S4Vectors/DESCRIPTION0000644000175400017540000000327413175736136015164 0ustar00biocbuildbiocbuildPackage: S4Vectors Title: S4 implementation of vector-like and list-like objects Description: The S4Vectors package defines the Vector and List virtual classes and a set of generic functions that extend the semantic of ordinary vectors and lists in R. Package developers can easily implement vector-like or list-like objects as concrete subclasses of Vector or List. In addition, a few low-level concrete subclasses of general interest (e.g. DataFrame, Rle, and Hits) are implemented in the S4Vectors package itself (many more are implemented in the IRanges package and in other Bioconductor infrastructure packages). Version: 0.16.0 Encoding: UTF-8 Author: H. Pagès, M. Lawrence and P. Aboyoun Maintainer: Bioconductor Package Maintainer biocViews: Infrastructure, DataRepresentation Depends: R (>= 3.3.0), methods, utils, stats, stats4, BiocGenerics (>= 0.23.3) Suggests: IRanges, GenomicRanges, Matrix, ShortRead, graph, data.table, RUnit License: Artistic-2.0 Collate: S4-utils.R show-utils.R utils.R normarg-utils.R LLint-class.R isSorted.R subsetting-utils.R vector-utils.R integer-utils.R character-utils.R eval-utils.R map_ranges_to_runs.R DataTable-class.R Annotated-class.R Vector-class.R Vector-comparison.R Vector-setops.R Vector-merge.R Hits-class.R Hits-comparison.R Hits-setops.R Rle-class.R Rle-utils.R List-class.R List-comparison.R List-utils.R SimpleList-class.R HitsList-class.R DataFrame-class.R expand-methods.R FilterRules-class.R aggregate-methods.R shiftApply-methods.R split-methods.R Pairs-class.R zzz.R NeedsCompilation: yes Packaged: 2017-10-30 23:57:18 UTC; biocbuild S4Vectors/NAMESPACE0000644000175400017540000002135513175714520014666 0ustar00biocbuildbiocbuilduseDynLib(S4Vectors) import(methods) importFrom(utils, head, tail, head.matrix, tail.matrix, getS3method) importFrom(stats, cov, cor, median, quantile, smoothEnds, runmed, window, "window<-", aggregate, na.omit, na.exclude, complete.cases, setNames, terms) importFrom(stats4, summary, update) import(BiocGenerics) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Export S4 classes ### exportClasses( character_OR_NULL, vector_OR_factor, LLint, integer_OR_LLint, DataTable, NSBS, Annotated, DataTable_OR_NULL, Vector, Hits, SelfHits, SortedByQueryHits, SortedByQuerySelfHits, Rle, List, SimpleList, HitsList, SortedByQueryHitsList, DataFrame, expression_OR_function, FilterRules, FilterMatrix, Pairs ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Export S3 methods ### S3method(aggregate, Vector) S3method(anyDuplicated, NSBS) S3method(as.character, LLint) S3method(as.data.frame, Vector) S3method(as.data.frame, DataTable) S3method(as.integer, LLint) S3method(as.list, Vector) S3method(as.logical, LLint) S3method(as.matrix, Vector) S3method(as.numeric, LLint) S3method(as.vector, Rle) S3method(cbind, DataFrame) S3method(diff, Rle) S3method(droplevels, List) S3method(droplevels, Rle) S3method(duplicated, DataTable) S3method(duplicated, Vector) S3method(head, DataTable) S3method(head, LLint) S3method(head, Vector) S3method(intersect, Vector) S3method(levels, Rle) S3method(mean, Rle) S3method(median, Rle) S3method(quantile, Rle) S3method(rbind, DataFrame) S3method(rev, Rle) S3method(setdiff, Vector) S3method(setequal, Vector) S3method(sort, Vector) S3method(summary, Rle) S3method(t, Hits) S3method(t, HitsList) S3method(tail, DataTable) S3method(tail, LLint) S3method(tail, Vector) S3method(union, Vector) S3method(unique, DataTable) S3method(unique, Vector) S3method(window, LLint) S3method(window, Vector) ### We also export them thru the export() directive so that (a) they can be ### called directly, (b) tab-completion on the name of the generic shows them, ### and (c) methods() doesn't asterisk them. export( aggregate.Vector, anyDuplicated.NSBS, as.character.LLint, as.data.frame.Vector, as.data.frame.DataTable, as.integer.LLint, as.list.Vector, as.logical.LLint, as.matrix.Vector, as.numeric.LLint, as.vector.Rle, cbind.DataFrame, diff.Rle, droplevels.Rle, droplevels.List, duplicated.DataTable, duplicated.Vector, head.DataTable, head.LLint, head.Vector, intersect.Vector, levels.Rle, mean.Rle, median.Rle, quantile.Rle, rbind.DataFrame, rev.Rle, setdiff.Vector, setequal.Vector, sort.Vector, summary.Rle, t.Hits, t.HitsList, tail.DataTable, tail.LLint, tail.Vector, union.Vector, unique.DataTable, unique.Vector, window.LLint, window.Vector ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Export S4 methods for generics not defined in S4Vectors ### exportMethods( ## Methods for generics defined in the base package: length, names, "names<-", nrow, ncol, dim, NROW, NCOL, rownames, "rownames<-", colnames, "colnames<-", dimnames, "dimnames<-", is.na, anyNA, as.logical, as.integer, as.numeric, as.complex, as.character, as.raw, as.factor, as.list, as.data.frame, as.matrix, as.table, "[", "[<-", subset, rep.int, c, append, "==", "!=", "<=", ">=", "<", ">", "Ops", "Summary", "!", match, duplicated, unique, anyDuplicated, "%in%", order, sort, is.unsorted, rank, xtfrm, merge, t, by, transform, nchar, substr, substring, levels, "levels<-", droplevels, "[[", "[[<-", "$", "$<-", split, eval, with, within, expand.grid, ## Methods for generics defined in the methods package: coerce, show, ## Methods for generics defined in the utils package: head, tail, ## Methods for generics defined in the stats package: var, cov, cor, sd, window, aggregate, na.omit, na.exclude, complete.cases, ## Methods for generics defined in the stats4 package: summary, ## Methods for generics defined in the BiocGenerics package: cbind, rbind, lapply, sapply, Reduce, Filter, unlist, do.call, union, intersect, setdiff, setequal, xtabs, start, end, width, grep, grepl, updateObject ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Export non-generic functions ### export( ## S4-utils.R: setValidity2, new2, setMethods, ## utils.R: wmsg, .Call2, get_showHeadLines, get_showTailLines, printAtomicVectorInAGrid, ## normarg-utils.R: isTRUEorFALSE, isSingleInteger, isSingleNumber, isSingleString, isSingleNumberOrNA, isSingleStringOrNA, recycleIntegerArg, recycleNumericArg, recycleLogicalArg, recycleCharacterArg, recycleArg, fold, ## LLint-class.R: is.LLint, as.LLint, LLint, ## subsetting-utils.R: normalizeSingleBracketSubscript, normalizeDoubleBracketSubscript, ## integer-utils.R: orderIntegerPairs, matchIntegerPairs, selfmatchIntegerPairs, duplicatedIntegerPairs, orderIntegerQuads, matchIntegerQuads, selfmatchIntegerQuads, duplicatedIntegerQuads, ## character-utils.R: safeExplode, strsplitAsListOfIntegerVectors, svn.time, ## Hits-class.R: queryHits, subjectHits, queryLength, subjectLength, countQueryHits, countSubjectHits, Hits, SelfHits, selectHits, breakTies, remapHits, isSelfHit, isRedundantHit, ## List-class.R: List, phead, ptail, pc, ## SimpleList-class.R: SimpleList, ## HitsList-class.R: HitsList, ## DataFrame-class.R: DataFrame, ## FilterRules-class.R: FilterRules, FilterMatrix, ## Pairs-class.R: Pairs, first, "first<-", second, "second<-", zipup, zipdown ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Export S4 generics obtained by explicit promotion of functions not ### defined in S4Vectors to S4 generics ### export( ## DataTable-class.R: rowSums, colSums, rowMeans, colMeans ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Export S4 generics defined in S4Vectors + export corresponding methods ### export( ## show-utils.R: classNameForDisplay, showAsCell, ## isSorted.R: isConstant, isSorted, isStrictlySorted, ## subsetting-utils.R: NSBS, normalizeSingleBracketReplacementValue, extractROWS, replaceROWS, getListElement, setListElement, ## character-utils.R: unstrsplit, ## DataTable-class.R: ROWNAMES, as.env, ## Annotated-class.R: metadata, "metadata<-", ## Vector-class.R: parallelSlotNames, parallelVectorNames, elementMetadata, mcols, values, "elementMetadata<-", "mcols<-", "values<-", rename, ## Vector-comparison.R: pcompare, selfmatch, findMatches, countMatches, ## Hits-class.R: from, to, nLnode, nRnode, nnode, countLnodeHits, countRnodeHits, ## Rle-class.R: runLength, "runLength<-", runValue, "runValue<-", nrun, Rle, findRun, decode, ## Rle-utils.R: runsum, runmean, runwtsum, runq, ## List-class.R: elementType, elementNROWS, lengths, isEmpty, relistToClass, ## List-comparison.R: pcompareRecursively, ## List-utils.R: endoapply, mendoapply, revElements, ## HitsList-class.R: space, ## expand-methods.R expand, ## FilterRules-class.R: active, "active<-", evalSeparately, subsetByFilter, params, filterRules, ## shiftApply-methods.R: shiftApply ) ### Exactly the same list as above. exportMethods( classNameForDisplay, showAsCell, isConstant, isSorted, isStrictlySorted, NSBS, normalizeSingleBracketReplacementValue, extractROWS, replaceROWS, getListElement, setListElement, unstrsplit, ROWNAMES, as.env, metadata, "metadata<-", parallelSlotNames, parallelVectorNames, elementMetadata, mcols, values, "elementMetadata<-", "mcols<-", "values<-", rename, pcompare, selfmatch, findMatches, countMatches, from, to, nLnode, nRnode, nnode, countLnodeHits, countRnodeHits, runLength, "runLength<-", runValue, "runValue<-", nrun, Rle, findRun, runsum, runmean, runwtsum, runq, elementType, elementNROWS, lengths, isEmpty, relistToClass, pcompareRecursively, endoapply, mendoapply, revElements, space, expand, active, "active<-", evalSeparately, subsetByFilter, params, filterRules, shiftApply ) S4Vectors/NEWS0000644000175400017540000002755213175714520014153 0ustar00biocbuildbiocbuildCHANGES IN VERSION 0.16.0 ------------------------- NEW FEATURES o Introduce FilterResults as generic parent of FilterMatrix. o Optimized subsetting of an Rle object by an integer vector. Speed up is about 3x or more for big objects with respect to BioC 3.5. SIGNIFICANT USER-VISIBLE CHANGES o coerce,list,DataFrame generates "valid" names when list has none. This ends up introducing an inconsistency between DataFrame and data.frame but it is arguably a good one. We shouldn't rely on DataFrame() to generate variable names from scratch anyway. BUG FIXES o Fix showAsCell() on data-frame-like and array-like objects with a single column, and on SplitDataFrameList objects. o Calling DataFrame() with explict 'row.names=NULL' should block rownames inference. o cbind.DataFrame() ensures every argument is a DataFrame, not just first. o rbind_mcols() now is robust to missing 'x'. o Fix extractROWS() for arrays when subscript is a RangeNSBS. o Temporary workaround to make the "union" method for Hits objects work even in the presence of another "union" generic in the cache (which is the case e.g. if the user loads the lubridate package). o A couple of (long-time due) tweaks and fixes to "unlist" method for List objects so that it behaves consistently with "unlist" method for CompressedList objects. o Modify Mini radix C code to accomodate a bug in Apple LLVM version 6.1.0 optimizer. [commit 241150d2b043e8fcf6721005422891baff018586] o Fix match,Pairs,Pairs() [commit a08c12bf4c31b7304d25122c411d882ec52b360c] o Various other minor fixes. CHANGES IN VERSION 0.14.0 ------------------------- NEW FEATURES o Add LLint vectors: similar to ordinary integer vectors (int values at the C level) but store "large integers" i.e. long long int values at the C level. These are 64-bit on Intel platforms vs 32-bit for int values. See ?LLint for more information. This is in preparation for supporting long Vector derivatives (planned for BioC 3.6). o Default "rank" method for Vector objects now supports the same ties method as base::rank() (was only supporting ties methods "first" and "min" until now). o Support x[[i,j]] on DataFrame objects. o Add "transform" methods for DataTable and Vector objects. SIGNIFICANT USER-VISIBLE CHANGES o Rename union classes characterORNULL, vectorORfactor, DataTableORNULL, and expressionORfunction -> character_OR_NULL, vector_OR_factor, DataTable_OR_NULL, and expression_OR_function, respectively. o Remove default "xtfrm" method for Vector objects. Not needed and introduced infinite recursion when calling order(), sort() or rank() on Vector objects that don't have specific order/sort/rank methods. DEPRECATED AND DEFUNCT o Remove compare() (was defunct in BioC 3.4). o Remove elementLengths() (was defunct in BioC 3.4). BUG FIXES o Make showAsCell() robust to nested lists. o Fix bug where subsetting a List object 'x' by a list-like subscript was not always propagating 'mcols(x)'. CHANGES IN VERSION 0.12.0 ------------------------- NEW FEATURES o Add n-ary "merge" method for Vector objects. o "extractROWS" methods for atomic vectors and DataFrame objects now support NAs in the subscript. As a consequence a DataFrame can now be subsetted by row with a subscript that contains NAs. However that will only succeed if all the columns in the DataFrame can also be subsetted with a subscript that contains NAs (e.g. it would fail at the moment if some columns are Rle's but we have plans to make this work in the future). o Add "union", "intersect", "setdiff", and "setequal" methods for Vector objects. o Add coercion from data.table to DataFrame. o Add t() S3 methods for Hits and HitsList. o Add "c" method for Pairs objects. o Add rbind/cbind methods for List, returning a list matrix. o aggregate() now supports named aggregator expressions when 'FUN' is missing. SIGNIFICANT USER-VISIBLE CHANGES o "c" method for Rle objects handles factor data more gracefully. o "eval" method for FilterRules objects now excludes NA results, like subset(), instead of failing on NAs. o Drop "as.env" method for List objects so that as.env() behaves more like as.data.frame() on these objects. o Speed up "replaceROWS" method for Vector objects when 'x' has names. o Optimize selfmatch for factors. DOCUMENTATION IMPROVEMENTS o Add S4QuickOverview vignette. DEPRECATED AND DEFUNCT o elementLengths() and compare() are now defunct (were deprecated in BioC 3.3). o Remove "ifelse" methods for Rle objects (were defunct in BioC 3.3), BUG FIXES o Fix bug in showAsCell(x) when 'x' is an AsIs object. o DataFrame() avoids NULL names when there are no columns. o DataFrame with NULL colnames are now considered invalid. CHANGES IN VERSION 0.10.0 ------------------------- NEW FEATURES o Add SelfHits class, a subclass of Hits for representing objects where the left and right nodes are identical. o Add utilities isSelfHit() and isRedundantHit() to operate on SelfHits objects. o Add new Pairs class that couples two parallel vectors. o head() and tail() now work on a DataTable object and behave like on an ordinary matrix. o Add as.matrix.Vector(). o Add "append" methods for Rle/vector (they promote to Rle). SIGNIFICANT USER-VISIBLE CHANGES o Many changes to the Hits class: - Replace the old Hits class (where the hits had to be sorted by query) with the SortedByQueryHits class. - A new Hits class where the hits can be in any order is re-introduced as the parent of the SortedByQueryHits class. - The Hits() constructor gets the new 'sort.by.query' argument that is FALSE by default. When 'sort.by.query' is set to TRUE, the constructor returns a SortedByQueryHits instance instead of a Hits instance. - Bidirectional coercion is supported between Hits and SortedByQueryHits. When going from Hits to SortedByQueryHits, the hits are sorted by query. - Add "c" method for Hits objects. - Rename Hits slots: queryHits -> from subjectHits -> to queryLength -> nLnode (nb of left nodes) subjectLength -> nRnode (nb of right nodes) - Add updateObject() method to update serialized Hits objects from old (queryHits/subjectHits) to new (from/to) internal representation. - The "show" method for Hits objects now labels columns with from/to by default and switches to queryHits/subjectHits labels only when the object is a SortedByQueryHits object. - New accessors are provided that match the new slot names: from(), to(), nLnode(), nRnode(). The old accessors (queryHits(), subjectHits(), queryLength(), and subjectLength()) are just aliases for the new accessors. Also countQueryHits() and countSubjectHits() are now aliases for new countLnodeHits() and countRnodeHits(). o Transposition of Hits objects now propagates the metadata columns. o Rename elementLengths() -> elementNROWS() (the old name was clearly a misnomer). For backward compatibility the old name still works but is deprecated (now it's just an "alias" for elementNROWS()). o Rename compare() -> pcompare(). For backward compatibility the old name still works but is just an "alias" for pcompare() and is deprecated. o Some refactoring of the Rle() generic and methods: - Remove ellipsis from the argument list of the generic. - Dispatch on 'values' only. - The 'values' and 'lengths' arguments now have explicit default values logical(0) and integer(0) respectively. - Methods have no more 'check' argument but new low-level (non-exported) constructor new_Rle() does and is what should now be used by code that needs this feature. o Optimize subsetting of an Rle object by an Rle subscript: the subscript is no longer decoded (i.e. expanded into an ordinary vector). This reduces memory usage and makes the subsetting much faster e.g. it can be 100x times faster or more if the subscript has many (e.g. thousands) of long runs. o Modify "replaceROWS" methods so that the replaced elements in 'x' get their metadata columns from 'value'. See this thread on bioc-devel: https://stat.ethz.ch/pipermail/bioc-devel/2015-November/008319.html o Remove ellipsis from the argument list of the "head" and "tail" methods for Vector objects. o pc() (parallel combine) now returns a List object only if one of the supplied objects is a List object, otherwise it returns an ordinary list. o The "as.data.frame" method for Vector objects now forwards the 'row.names' argument. o Export the "parallelSlotNames" methods. DEPRECATED AND DEFUNCT o Deprecate elementLengths() in favor of elementNROWS(). New name reflects TRUE semantic. o Deprecate compare() in favor of pcompare(). o After being deprecated in BioC 3.2, the "ifelse" methods for Rle objects are now defunct. o Remove "aggregate" method for vector objects which was an undocumented bad idea from the start. BUG FIXES o Fix 2 long-standing bugs in "as.data.frame" method for List objects: - must always return an ordinary data.frame (was returning a DataFrame when 'use.outer.mcols' was TRUE), - when 'x' has names and 'group_name.as.factor' is TRUE, the levels of the returned group_name col must be identical to 'unique(names(x))' (names of empty list elements in 'x' was not showing up in 'levels(group_name)'). o Fix and improve the elementMetadata/mcols setter method for Vector objects so that the specific methods for GenomicRanges, GAlignments, and GAlignmentPairs objects are not needed anymore and were removed. Note that this change also fixes setting the elementMetadata/mcols of a SummarizedExperiment object with NULL or an ordinary data frame, which was broken until now. o Fix bug in match,ANY,Rle method when supplied 'nomatch' is not NA. o Fix findMatches() for Rle table. o Fix show,DataTable-method to display all rows if <= nhead + ntail + 1 CHANGES IN VERSION 0.4.0 ------------------------ NEW FEATURES o Add isSorted() and isStrictlySorted() generics, plus some methods. o Add low-level wmsg() helper for formatting error/warning messages. o Add pc() function for parallel c() of list-like objects. o Add coerce,Vector,DataFrame; just adds any mcols as columns on top of the coerce,ANY,DataFrame behavior. o [[ on a List object now accepts a numeric- or character-Rle of length 1. o Add "droplevels" methods for Rle, List, and DataFrame objects. o Add table,DataTable and transform,DataTable methods. o Add prototype of a better all.equals() for S4 objects. SIGNIFICANT USER-VISIBLE CHANGES o Move Annotated, DataTable, Vector, Hits, Rle, List, SimpleList, and DataFrame classes from the IRanges package. o Move isConstant(), classNameForDisplay(), and low-level argument checking helpers isSingleNumber(), isSingleString(), etc... from the IRanges package. o Add as.data.frame,List method and remove other inconsistent and not needed anymore "as.data.frame" methods for List subclasses. o Remove useless and thus probably never used aggregate,DataTable method that followed the time-series API. o coerce,ANY,List method now propagates the names. BUG FIXES o Fix bug in coercion from list to SimpleList when the list contains matrices and arrays. o Fix subset() on a zero column DataFrame. o Fix rendering of Date/time classes as DataFrame columns. S4Vectors/R/0000755000175400017540000000000013175714520013642 5ustar00biocbuildbiocbuildS4Vectors/R/Annotated-class.R0000644000175400017540000000201513175714520017003 0ustar00biocbuildbiocbuild### ========================================================================= ### Annotated objects ### ------------------------------------------------------------------------- setClass("Annotated", representation("VIRTUAL", metadata = "list")) setGeneric("metadata", function(x, ...) standardGeneric("metadata")) setMethod("metadata", "Annotated", function(x) { if (is.null(x@metadata) || is.character(x@metadata)) list(metadata = x@metadata) else x@metadata }) setGeneric("metadata<-", function(x, ..., value) standardGeneric("metadata<-")) setReplaceMethod("metadata", "Annotated", function(x, value) { if (!is.list(value)) stop("replacement 'metadata' value must be a list") if (!length(value)) names(value) <- NULL # instead of character() x@metadata <- value x }) S4Vectors/R/DataFrame-class.R0000644000175400017540000006274013175714520016725 0ustar00biocbuildbiocbuild### ========================================================================= ### DataFrame objects ### ------------------------------------------------------------------------- ## A data.frame-like interface for S4 objects that implement length() and `[` ## NOTE: Normal data.frames always have rownames (sometimes as integers), ## but we allow the rownames to be NULL for efficiency. This means that we ## need to store the number of rows (nrows). setClass("DataFrame", representation( rownames = "character_OR_NULL", nrows = "integer" ), prototype(rownames = NULL, nrows = 0L, listData = structure(list(), names = character())), contains = c("DataTable", "SimpleList")) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Accessor methods. ### setMethod("nrow", "DataFrame", function(x) x@nrows) setMethod("ncol", "DataFrame", function(x) length(x)) setMethod("rownames", "DataFrame", function(x, do.NULL = TRUE, prefix = "row") { rn <- x@rownames if (is.null(rn) && !do.NULL) { nr <- NROW(x) if (nr > 0L) rn <- paste(prefix, seq_len(nr), sep = "") else rn <- character(0L) } rn }) setMethod("colnames", "DataFrame", function(x, do.NULL = TRUE, prefix = "col") { if (!identical(do.NULL, TRUE)) warning("do.NULL arg is ignored ", "in this method") cn <- names(x@listData) if (!is.null(cn)) return(cn) if (length(x@listData) != 0L) stop("DataFrame object with NULL colnames, please fix it ", "with colnames(x) <- value") return(character(0)) }) setReplaceMethod("rownames", "DataFrame", function(x, value) { if (!is.null(value)) { if (anyMissing(value)) stop("missing values not allowed in rownames") if (length(value) != nrow(x)) stop("invalid rownames length") if (anyDuplicated(value)) stop("duplicate rownames not allowed") if (!is(value, "XStringSet")) value <- as.character(value) } x@rownames <- value x }) setReplaceMethod("colnames", "DataFrame", function(x, value) { if (!is.character(value)) stop("'value' must be a character vector ", "in colnames(x) <- value") if (length(value) > length(x)) stop("more column names than columns") names(x) <- value x }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Validity. ### .valid.DataFrame.dim <- function(x) { nr <- dim(x)[1L] if (!length(nr) == 1) return("length of 'nrows' slot must be 1") if (nr < 0) return("number of rows must be non-negative") NULL } .valid.DataFrame.rownames <- function(x) { if (is.null(rownames(x))) return(NULL) if (length(rownames(x)) != nrow(x)) return("number of row names and number of rows differ") NULL } .valid.DataFrame.names <- function(x) { if (is.null(names(x))) return("column names should not be NULL") if (length(names(x)) != ncol(x)) return("number of columns and number of column names differ") NULL } .valid.DataFrame <- function(x) { c(.valid.DataFrame.dim(x), .valid.DataFrame.rownames(x), .valid.DataFrame.names(x)) } setValidity2("DataFrame", .valid.DataFrame) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Constructor. ### DataFrame <- function(..., row.names = NULL, check.names = TRUE) { ## build up listData, with names from arguments if (!isTRUEorFALSE(check.names)) stop("'check.names' must be TRUE or FALSE") nr <- 0 listData <- list(...) varlist <- vector("list", length(listData)) if (length(listData) > 0) { dotnames <- names(listData) if (is.null(dotnames)) { emptynames <- rep.int(TRUE, length(listData)) } else { emptynames <- !nzchar(dotnames) } if (any(emptynames)) { qargs <- as.list(substitute(list(...)))[-1L] dotvalues <- sapply(qargs[emptynames], function(arg) deparse(arg)[1L]) names(listData)[emptynames] <- dotvalues } varnames <- as.list(names(listData)) nrows <- ncols <- integer(length(varnames)) for (i in seq_along(listData)) { element <- try(as(listData[[i]], "DataFrame"), silent = TRUE) if (inherits(element, "try-error")) stop("cannot coerce class \"", class(listData[[i]]), "\" to a DataFrame") nrows[i] <- nrow(element) ncols[i] <- ncol(element) varlist[[i]] <- as.list(element, use.names = FALSE) if (!is(listData[[i]], "AsIs")) { if (((length(dim(listData[[i]])) > 1L) || (ncol(element) > 1L) || is.list(listData[[i]]))) { if (emptynames[i]) varnames[[i]] <- colnames(element) else varnames[[i]] <- paste(varnames[[i]], colnames(element), sep = ".") } } if (missing(row.names)) row.names <- rownames(element) } nr <- max(nrows) for (i in which((nrows > 0L) & (nrows < nr) & (nr %% nrows == 0L))) { recycle <- rep(seq_len(nrows[i]), length.out = nr) varlist[[i]] <- lapply(varlist[[i]], `[`, recycle, drop=FALSE) nrows[i] <- nr } if (!all(nrows == nr)) stop("different row counts implied by arguments") varlist <- unlist(varlist, recursive = FALSE, use.names = FALSE) nms <- as.character(unlist(varnames[ncols > 0L])) if (check.names) nms <- make.names(nms, unique = TRUE) names(varlist) <- nms } else names(varlist) <- character(0) if (!is.null(row.names)) { if (anyMissing(row.names)) stop("missing values in 'row.names'") if (length(varlist) && length(row.names) != nr) stop("invalid length of row names") if (anyDuplicated(row.names)) stop("duplicate row names") row.names <- as.character(row.names) } new2("DataFrame", listData=varlist, rownames=row.names, nrows=as.integer(max(nr, length(row.names))), check=FALSE) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Subsetting. ### setMethod("[[", "DataFrame", function(x, i, j, ...) { if (!missing(j)) { x[[j, ...]][[i]] } else { callNextMethod() } }) setReplaceMethod("[[", "DataFrame", function(x, i, j,..., value) { nrx <- nrow(x) lv <- NROW(value) if (!missing(j) || length(list(...)) > 0) warning("arguments beyond 'i' ignored") if (missing(i)) stop("subscript is missing") if (!is.character(i) && !is.numeric(i)) stop("invalid subscript type") if (length(i) < 1L) stop("attempt to select less than one element") if (length(i) > 1L) stop("attempt to select more than one element") if (is.numeric(i) && (i < 1L || i > ncol(x) + 1L)) stop("subscript out of bounds") if (!is.null(value) && (nrx != lv)) { if ((nrx == 0) || (lv == 0) || (nrx %% lv != 0)) stop(paste(lv, "elements in value to replace", nrx, "elements")) else value <- rep(value, length.out = nrx) } callNextMethod(x, i, value=value) }) setMethod("extractROWS", "DataFrame", function(x, i) { i <- normalizeSingleBracketSubscript(i, x, exact=FALSE, allow.NAs=TRUE, as.NSBS=TRUE) slot(x, "listData", check=FALSE) <- lapply(structure(seq_len(ncol(x)), names=names(x)), function(j) extractROWS(x[[j]], i)) slot(x, "nrows", check=FALSE) <- length(i) if (!is.null(rownames(x))) { slot(x, "rownames", check=FALSE) <- make.unique(extractROWS(rownames(x), i)) } x } ) setMethod("[", "DataFrame", function(x, i, j, ..., drop=TRUE) { if (!isTRUEorFALSE(drop)) stop("'drop' must be TRUE or FALSE") if (length(list(...)) > 0L) warning("parameters in '...' not supported") ## We do list-style subsetting when [ was called with no ','. ## NOTE: matrix-style subsetting by logical matrix not supported. list_style_subsetting <- (nargs() - !missing(drop)) < 3L if (list_style_subsetting || !missing(j)) { if (list_style_subsetting) { if (!missing(drop)) warning("'drop' argument ignored by list-style subsetting") if (missing(i)) return(x) j <- i } if (!is(j, "Ranges")) { xstub <- setNames(seq_along(x), names(x)) j <- normalizeSingleBracketSubscript(j, xstub) } new_listData <- extractROWS(x@listData, j) new_mcols <- extractROWS(mcols(x), j) x <- initialize(x, listData=new_listData, elementMetadata=new_mcols) if (anyDuplicated(names(x))) names(x) <- make.unique(names(x)) if (list_style_subsetting) return(x) } if (!missing(i)) x <- extractROWS(x, i) if (missing(drop)) # drop by default if only one column left drop <- ncol(x) == 1L if (drop) { ## one column left if (ncol(x) == 1L) return(x[[1L]]) ## one row left if (nrow(x) == 1L) return(as(x, "list")) } x } ) setMethod("replaceROWS", "DataFrame", function(x, i, value) { i <- normalizeSingleBracketSubscript(i, x, as.NSBS=TRUE) x_ncol <- ncol(x) value_ncol <- ncol(value) if (value_ncol > x_ncol) stop("provided ", value_ncol, " variables ", "to replace ", x_ncol, " variables") slot(x, "listData", check=FALSE) <- lapply(structure(seq_len(ncol(x)), names=names(x)), function(j) replaceROWS(x[[j]], i, value[[((j - 1L) %% value_ncol) + 1L]])) x } ) setReplaceMethod("[", "DataFrame", function(x, i, j, ..., value) { if (length(list(...)) > 0) warning("parameters in '...' not supported") useI <- FALSE newrn <- newcn <- NULL if (nargs() < 4) { if (missing(i)) { j2 <- seq_len(ncol(x)) } else { if (length(i) == 1) { if (is.logical(i) == 1 && i) i <- rep(i, ncol(x)) } xstub <- setNames(seq_along(x), names(x)) j2 <- normalizeSingleBracketSubscript(i, xstub, allow.append=TRUE) if (is.character(i)) newcn <- i[j2 > ncol(x)] } } else { if (missing(i)) { i2 <- seq_len(nrow(x)) } else { useI <- TRUE i2 <- normalizeSingleBracketSubscript(i, x, allow.append=TRUE) if (is.character(i)) newrn <- i[i2 > nrow(x)] } if (missing(j)) { j2 <- seq_len(ncol(x)) } else { xstub <- setNames(seq_along(x), names(x)) j2 <- normalizeSingleBracketSubscript(j, xstub, allow.append=TRUE) if (is.character(j)) newcn <- j[j2 > ncol(x)] } i <- i2 } j <- j2 if (!length(j)) # nothing to replace return(x) if (is(value, "list") || is(value, "List")) { null <- vapply(value, is.null, logical(1L)) if (any(null)) { ### FIXME: data.frame handles gracefully stop("NULL elements not allowed in list value") } value <- as(value, "DataFrame") } if (!is(value, "DataFrame")) { if (useI) li <- length(i) else li <- nrow(x) lv <- length(value) if (lv > 0L && li != lv) { if (li %% lv != 0) stop(paste(lv, "rows in value to replace", li, " rows")) else value <- rep(value, length.out = li) } ## come up with some default row and col names if (!length(newcn) && max(j) > length(x)) { newcn <- paste("V", seq.int(length(x) + 1L, max(j)), sep = "") if (length(newcn) != sum(j > length(x))) stop("new columns would leave holes after ", "existing columns") } if (useI) { if (length(newrn) == 0L && li > 0L && max(i) > nrow(x)) newrn <- as.character(seq.int(nrow(x) + 1L, max(i))) if (length(x@listData[j][[1]]) == 0L) x@listData[j] <- list(rep(NA, nrow(x))) x@listData[j] <- lapply(x@listData[j], function(y) {y[i] <- value; y}) } else { if (is.null(value)) x@listData[j] <- NULL else x@listData[j] <- list(value) } } else { vc <- seq_len(ncol(value)) if (ncol(value) > length(j)) stop("ncol(x[j]) < ncol(value)") if (ncol(value) < length(j)) vc <- rep(vc, length.out = length(j)) if (useI) li <- length(i) else li <- nrow(x) nrv <- nrow(value) if (li != nrv) { if ((li == 0) || (li %% nrv != 0)) stop(paste(nrv, "rows in value to replace", li, " rows")) else value <- value[rep(seq_len(nrv), length.out = li), , drop=FALSE] } ## attempt to derive new row and col names from value if (!length(newcn) && max(j) > length(x)) { newcn <- rep(names(value), length.out = length(j)) newcn <- newcn[j > length(x)] } if (useI) { if (length(newrn) == 0L && li > 0L && max(i) > nrow(x)) { if (!is.null(rownames(value))) { newrn <- rep(rownames(value), length.out = length(i)) newrn <- newrn[i > nrow(x)] } else newrn <- as.character(seq.int(nrow(x) + 1L, max(i))) } for (k in seq_len(length(j))) { if (j[k] > length(x)) v <- NULL else v <- x@listData[[j[k]]] rv <- value[[vc[k]]] if (length(dim(rv)) == 2) v[i,] <- rv else v[i] <- if (is.null(v)) rv else as(rv, class(v)) x@listData[[j[k]]] <- v } } else { if (is.logical(j)) { for (k in seq_len(length(j))) x@listData[[k]] <- value[[vc[k]]] } else { for (k in seq_len(length(j))) x@listData[[j[k]]] <- value[[vc[k]]] } } } ## update row and col names, making them unique if (length(newcn)) { oldcn <- head(colnames(x), length(x) - length(newcn)) colnames(x) <- make.unique(c(oldcn, newcn)) if (!is.null(mcols(x))) mcols(x)[tail(names(x),length(newcn)),] <- DataFrame(NA) } if (length(newrn)) { notj <- setdiff(seq_len(ncol(x)), j) x@listData[notj] <- lapply(x@listData[notj], function(y) c(y, rep(NA, length(newrn)))) x@rownames <- make.unique(c(rownames(x), newrn)) } x@nrows <- length(x[[1]]) # we should always have a column x }) hasNonDefaultMethod <- function(f, signature) { any(selectMethod(f, signature)@defined != "ANY") } hasS3Method <- function(f, signature) { !is.null(getS3method(f, signature, optional=TRUE)) } droplevels.DataFrame <- function(x, except=NULL) { canDropLevels <- function(xi) { hasNonDefaultMethod(droplevels, class(xi)) || hasS3Method("droplevels", class(xi)) } drop.levels <- vapply(x, canDropLevels, NA) if (!is.null(except)) drop.levels[except] <- FALSE x@listData[drop.levels] <- lapply(x@listData[drop.levels], droplevels) x } setMethod("droplevels", "DataFrame", droplevels.DataFrame) setMethod("rep", "DataFrame", function(x, ...) { x[rep(seq_len(nrow(x)), ...),,drop=FALSE] }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Coercion. ### ## Break DataFrame into a normal R data.frame setAs("DataFrame", "data.frame", function(from) { as.data.frame(from, optional=TRUE) }) injectIntoScope <- function(x, ...) { nms <- sapply(tail(substitute(list(...)), -1), deparse) environment(x) <- list2env(setNames(list(...), nms), parent = environment(x)) x } .as.data.frame.DataFrame <- function(x, row.names=NULL, optional=FALSE, ...) { if (length(list(...))) warning("Arguments in '...' ignored") l <- as(x, "list") if (is.null(row.names)) row.names <- rownames(x) if (!length(l) && is.null(row.names)) row.names <- seq_len(nrow(x)) l <- lapply(l, function(y) { if (is(y, "SimpleList") || is(y, "CompressedList")) y <- as.list(y) if (is.list(y)) y <- I(y) y }) IRanges.data.frame <- injectIntoScope(data.frame, as.data.frame) do.call(IRanges.data.frame, c(l, list(row.names=row.names), check.names=!optional, stringsAsFactors=FALSE)) } setMethod("as.data.frame", "DataFrame", .as.data.frame.DataFrame) setMethod("as.matrix", "DataFrame", function(x) { if (length(x) == 0L) m <- matrix(logical(), nrow = nrow(x), ncol = 0L) else m <- do.call(cbind, as.list(x)) rownames(m) <- rownames(x) m }) ## take data.frames to DataFrames setAs("data.frame", "DataFrame", function(from) { rn <- attributes(from)[["row.names"]] if (is.integer(rn)) rn <- NULL nr <- nrow(from) ### FIXME: this should be: ## from <- as.list(from) ### But unclass() causes deep copy attr(from, "row.names") <- NULL class(from) <- NULL new2("DataFrame", listData=from, nrows=nr, rownames=rn, check=FALSE) }) setAs("data.table", "DataFrame", function(from) { df <- data.table:::as.data.frame.data.table(from) as(df, "DataFrame") } ) setAs("table", "DataFrame", function(from) { df <- as.data.frame(from) factors <- sapply(df, is.factor) factors[1] <- FALSE do.call(DataFrame, c(df[1], lapply(df[factors], Rle), df["Freq"])) }) setOldClass(c("xtabs", "table")) setAs("xtabs", "DataFrame", function(from) { class(from) <- "table" as(from, "DataFrame") }) .defaultAsDataFrame <- function(from) { if (length(dim(from)) == 2L) { df <- as.data.frame(from) if (0L == ncol(from)) ## colnames on matrix with 0 columns are 'NULL' names(df) <- character() as(df, "DataFrame") } else { row.names <- if (!anyDuplicated(names(from))) names(from) else NULL new2("DataFrame", listData = setNames(list(from), "X"), nrows = length(from), rownames = row.names, check=FALSE) } } setAs("ANY", "DataFrame", .defaultAsDataFrame) .VectorAsDataFrame <- function(from) { ans <- .defaultAsDataFrame(from) if (!is.null(mcols(from))) { ans <- cbind(ans, mcols(from)) } ans } ## overriding the default inheritance-based coercion from methods package setAs("SimpleList", "DataFrame", .VectorAsDataFrame) setAs("Vector", "DataFrame", .VectorAsDataFrame) ## note that any element named 'row.names' will be interpreted differently ## is this a bug or a feature? setAs("list", "DataFrame", function(from) { do.call(DataFrame, c(from, check.names = is.null(names(from)))) }) setAs("NULL", "DataFrame", function(from) as(list(), "DataFrame")) ### FIXME: only exists due to annoying S4 warning due to its caching of ### coerce methods. setAs("integer", "DataFrame", function(from) { selectMethod("coerce", c("vector", "DataFrame"))(from) }) setAs("AsIs", "DataFrame", function(from) { df <- new2("DataFrame", nrows = NROW(from), check=FALSE) df[[1]] <- from df }) setAs("ANY", "AsIs", function(from) I(from)) setAs("ANY", "DataTable_OR_NULL", function(from) as(from, "DataFrame")) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Combining. ### cbind.DataFrame <- function(..., deparse.level = 1) { dfs <- lapply(list(...), as, "DataFrame", strict=FALSE) ans <- DataFrame(dfs) mcols(ans) <- do.call(rbind_mcols, dfs) ans } setMethod("cbind", "DataFrame", cbind.DataFrame) rbind.DataFrame <- function(..., deparse.level = 1) { do.call(rbind, lapply(list(...), as, "DataFrame")) } setMethod("rbind", "DataFrame", function(..., deparse.level=1) { args <- list(...) hasrows <- unlist(lapply(args, nrow), use.names=FALSE) > 0L hascols <- unlist(lapply(args, ncol), use.names=FALSE) > 0L if (!any(hasrows | hascols)) { return(DataFrame()) } else if (!any(hasrows)) { return(args[[which(hascols)[1L]]]) } else if (sum(hasrows) == 1) { return(args[[which(hasrows)]]) } else { args <- args[hasrows] } df <- args[[1L]] for (i in 2:length(args)) { if (ncol(df) != ncol(args[[i]])) stop("number of columns for arg ", i, " do not match those of first arg") if (!identical(colnames(df), colnames(args[[i]]))) stop("column names for arg ", i, " do not match those of first arg") } if (ncol(df) == 0) { ans <- DataFrame() ans@nrows <- sum(unlist(lapply(args, nrow), use.names=FALSE)) } else { cols <- lapply(colnames(df), function(cn) { cols <- lapply(args, `[[`, cn) isRle <- vapply(cols, is, logical(1L), "Rle") if (any(isRle) && !all(isRle)) { # would fail dispatch to c,Rle cols[isRle] <- lapply(cols[isRle], decodeRle) } isFactor <- vapply(cols, is.factor, logical(1L)) if (any(isFactor)) { cols <- lapply(cols, as.factor) levs <- unique(unlist(lapply(cols, levels), use.names=FALSE)) cols <- lapply(cols, factor, levs) } rectangular <- length(dim(cols[[1]])) == 2L if (rectangular) { combined <- do.call(rbind, unname(cols)) } else { combined <- do.call(c, unname(cols)) } if (any(isFactor)) combined <- structure(combined, class="factor", levels=levs) combined }) names(cols) <- colnames(df) ans <- new2("DataFrame", listData = cols, nrows = NROW(cols[[1]]), check = FALSE) } rn <- unlist(lapply(args, rownames), use.names=FALSE) if (!is.null(rn)) { if (length(rn) != nrow(ans)) { rn <- NULL } else if (anyDuplicated(rn)) rn <- make.unique(rn, sep = "") } rownames(ans) <- rn if (!is.null(mcols(df))) { df_mcols <- mcols(df) if (all(sapply(args, function(x) identical(mcols(x), df_mcols)))) mcols(ans) <- df_mcols } ans }) S4Vectors/R/DataTable-class.R0000644000175400017540000002704213175714520016716 0ustar00biocbuildbiocbuild### ========================================================================= ### DataTable objects ### ------------------------------------------------------------------------- ### ### DataTable is an API only (i.e. virtual class with no slots) for ### accessing objects with a rectangular shape like DataFrame or ### RangedData objects. It mimics the API for standard data.frame ### objects, except derivatives do not necessarily behave as a list of ### columns. ### setClass("DataTable", representation("VIRTUAL")) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Accessors. ### setMethod("NROW", "DataTable", function(x) nrow(x)) setMethod("NCOL", "DataTable", function(x) ncol(x)) setMethod("dim", "DataTable", function(x) c(nrow(x), ncol(x))) setGeneric("ROWNAMES", function(x) standardGeneric("ROWNAMES")) setMethod("ROWNAMES", "ANY", function (x) if (length(dim(x)) != 0L) rownames(x) else names(x) ) setMethod("ROWNAMES", "DataTable", function(x) rownames(x)) setMethod("dimnames", "DataTable", function(x) { list(rownames(x), colnames(x)) }) setReplaceMethod("dimnames", "DataTable", function(x, value) { if (!is.list(value)) stop("replacement value must be a list") rownames(x) <- value[[1L]] colnames(x) <- value[[2L]] x }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Subsetting. ### head.DataTable <- head.matrix setMethod("head", "DataTable", head.DataTable) tail.DataTable <- tail.matrix setMethod("tail", "DataTable", tail.DataTable) setMethod("subset", "DataTable", function(x, subset, select, drop = FALSE, ...) { i <- evalqForSubset(subset, x, ...) j <- evalqForSelect(select, x, ...) x[i, j, drop=drop] }) setMethod("na.omit", "DataTable", function(object, ...) { attr(object, "row.names") <- rownames(object) object.omit <- stats:::na.omit.data.frame(object) attr(object.omit, "row.names") <- NULL object.omit }) setMethod("na.exclude", "DataTable", function(object, ...) { attr(object, "row.names") <- rownames(object) object.ex <- stats:::na.exclude.data.frame(object) attr(object.ex, "row.names") <- NULL object.ex }) setMethod("is.na", "DataTable", function(x) { na <- do.call(cbind, lapply(seq(ncol(x)), function(xi) is.na(x[[xi]]))) rownames(na) <- rownames(x) na }) setMethod("complete.cases", "DataTable", function(...) { args <- list(...) if (length(args) == 1) { x <- args[[1L]] rowSums(is.na(x)) == 0 } else complete.cases(args[[1L]]) & do.call(complete.cases, args[-1L]) }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Transforming. ### setGeneric("column<-", function(x, name, value) standardGeneric("column<-"), signature="x") setReplaceMethod("column", "DataTable", function(x, name, value) { x[,name] <- value x }) transformColumns <- function(`_data`, ...) { exprs <- as.list(substitute(list(...))[-1L]) if (any(names(exprs) == "")) { stop("all arguments in '...' must be named") } ## elements in '...' can originate from different environments env <- setNames(top_prenv_dots(...), names(exprs)) for (colName in names(exprs)) { # for loop allows inter-arg dependencies value <- safeEval(exprs[[colName]], `_data`, env[[colName]]) column(`_data`, colName) <- value } `_data` } transform.DataTable <- transformColumns setMethod("transform", "DataTable", transform.DataTable) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Combining. ### setMethod("cbind", "DataTable", function(..., deparse.level=1) stop("missing 'cbind' method for DataTable class ", class(list(...)[[1L]]))) setMethod("rbind", "DataTable", function(..., deparse.level=1) stop("missing 'rbind' method for DataTable class ", class(list(...)[[1L]]))) setMethod("cbind2", c("ANY", "DataTable"), function(x, y, ...) { x <- as(x, "DataFrame") cbind(x, y, ...) }) setMethod("cbind2", c("DataTable", "ANY"), function(x, y, ...) { y <- as(y, "DataFrame") cbind(x, y, ...) }) setMethod("cbind2", c("DataTable", "DataTable"), function(x, y, ...) { x <- as(x, "DataFrame") y <- as(y, "DataFrame") cbind(x, y, ...) }) setMethod("rbind2", c("ANY", "DataTable"), function(x, y, ...) { x <- as(x, "DataFrame") rbind(x, y, ...) }) setMethod("rbind2", c("DataTable", "ANY"), function(x, y, ...) { y <- as(y, "DataFrame") rbind(x, y, ...) }) setMethod("rbind2", c("DataTable", "DataTable"), function(x, y, ...) { x <- as(x, "DataFrame") y <- as(y, "DataFrame") rbind(x, y, ...) }) .mergeByHits <- function(x, y, by, all.x=FALSE, all.y=FALSE, sort = TRUE, suffixes = c(".x", ".y")) { nm.x <- colnames(x) nm.y <- colnames(y) cnm <- nm.x %in% nm.y if (any(cnm) && nzchar(suffixes[1L])) nm.x[cnm] <- paste0(nm.x[cnm], suffixes[1L]) cnm <- nm.y %in% nm.x if (any(cnm) && nzchar(suffixes[2L])) nm.y[cnm] <- paste0(nm.y[cnm], suffixes[2L]) if (all.x) { x.alone <- which(countLnodeHits(by) == 0L) } x <- x[c(from(by), if (all.x) x.alone), , drop = FALSE] if (all.y) { y.alone <- which(countRnodeHits(by) == 0L) xa <- x[rep.int(NA_integer_, length(y.alone)), , drop = FALSE] x <- rbind(x, xa) } y <- y[c(to(by), if (all.x) rep.int(NA_integer_, length(x.alone)), if (all.y) y.alone), , drop = FALSE] cbind(x, y) } setMethod("merge", c("DataTable", "DataTable"), function(x, y, by, ...) { if (is(by, "Hits")) { return(.mergeByHits(x, y, by, ...)) } as(merge(as(x, "data.frame"), as(y, "data.frame"), by, ...), class(x)) }) setMethod("merge", c("data.frame", "DataTable"), function(x, y, ...) { as(merge(x, as(y, "data.frame"), ...), class(y)) }) setMethod("merge", c("DataTable", "data.frame"), function(x, y, ...) { as(merge(as(x, "data.frame"), y, ...), class(x)) }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Summary methods. ### ### S3/S4 combo for duplicated.DataTable duplicated.DataTable <- function(x, incomparables=FALSE, fromLast=FALSE, ...) { duplicated(as(x, "data.frame"), incomparables=incomparables, fromLast=fromLast, ...) } setMethod("duplicated", "DataTable", duplicated.DataTable) ### S3/S4 combo for unique.DataTable unique.DataTable <- unique.data.frame setMethod("unique", "DataTable", unique.DataTable) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Comparison methods. ### .sort.Vector <- function(x, decreasing=FALSE, na.last=NA, by) { if (!missing(by)) { i <- orderBy(by, x, decreasing=decreasing, na.last=na.last) } else { i <- order(x, na.last=na.last, decreasing=decreasing) } extractROWS(x, i) } setMethod("sort", "DataTable", .sort.Vector) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Coercion ### setGeneric("as.env", function(x, ...) standardGeneric("as.env")) setMethod("as.env", "NULL", function(x, enclos, tform = identity) { new.env(parent=enclos) }) addSelfRef <- function(x, env) { env$.. <- x env } setMethod("as.env", "DataTable", function(x, enclos = parent.frame(2), tform = identity) { env <- new.env(parent = enclos) lapply(colnames(x), function(col) { colFun <- function() { val <- tform(x[[col]]) rm(list=col, envir=env) assign(col, val, env) val } makeActiveBinding(col, colFun, env) }) addSelfRef(x, env) }) as.data.frame.DataTable <- function(x, row.names=NULL, optional=FALSE, ...) { as.data.frame(x, row.names=NULL, optional=optional, ...) } setMethod("as.data.frame", "DataTable", function(x, row.names=NULL, optional=FALSE, ...) { as.data.frame(as(x, "DataFrame"), row.names=row.names, optional=optional, ...) }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### The "show" method. ### setMethod("show", "DataTable", function(object) { nhead <- get_showHeadLines() ntail <- get_showTailLines() nr <- nrow(object) nc <- ncol(object) cat(class(object), " with ", nr, ifelse(nr == 1, " row and ", " rows and "), nc, ifelse(nc == 1, " column\n", " columns\n"), sep = "") if (nr > 0 && nc > 0) { nms <- rownames(object) if (nr <= (nhead + ntail + 1L)) { out <- as.matrix(format(as.data.frame( lapply(object, showAsCell), optional = TRUE))) if (!is.null(nms)) rownames(out) <- nms } else { out <- rbind(as.matrix(format(as.data.frame( lapply(object, function(x) showAsCell(head(x, nhead))), optional = TRUE))), rbind(rep.int("...", nc)), as.matrix(format(as.data.frame( lapply(object, function(x) showAsCell(tail(x, ntail))), optional = TRUE)))) rownames(out) <- .rownames(nms, nr, nhead, ntail) } classinfo <- matrix(unlist(lapply(object, function(x) { paste0("<", classNameForDisplay(x)[1], ">") }), use.names = FALSE), nrow = 1, dimnames = list("", colnames(out))) out <- rbind(classinfo, out) print(out, quote = FALSE, right = TRUE) } }) .rownames <- function(nms, nrow, nhead, ntail) { p1 <- ifelse (nhead == 0, 0L, 1L) p2 <- ifelse (ntail == 0, 0L, ntail-1L) s1 <- s2 <- character(0) if (is.null(nms)) { if (nhead > 0) s1 <- paste0(as.character(p1:nhead)) if (ntail > 0) s2 <- paste0(as.character((nrow-p2):nrow)) } else { if (nhead > 0) s1 <- paste0(head(nms, nhead)) if (ntail > 0) s2 <- paste0(tail(nms, ntail)) } c(s1, "...", s2) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Statistical routines ### setMethod("xtabs", signature(data = "DataTable"), function(formula = ~., data, subset, na.action, exclude = c(NA, NaN), drop.unused.levels = FALSE) { data <- as(data, "data.frame") callGeneric() }) setMethod("table", "DataTable", function(...) { table(as.list(cbind(...))) }) ## TODO: lm, glm, loess, ... S4Vectors/R/FilterRules-class.R0000644000175400017540000003470013175714520017334 0ustar00biocbuildbiocbuild### ========================================================================= ### FilterRules objects ### ------------------------------------------------------------------------- setClassUnion("expression_OR_function", c("expression", "function")) setClass("FilterRules", representation(active = "logical"), prototype(elementType = "expression_OR_function"), contains = "SimpleList") ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Accessors. ### setGeneric("active", function(x) standardGeneric("active")) setMethod("active", "FilterRules", function(x) { a <- x@active names(a) <- names(x) a }) setGeneric("active<-", signature="x", function(x, value) standardGeneric("active<-") ) setReplaceMethod("active", "FilterRules", function(x, value) { if (is.numeric(value)) { value <- as.integer(value)[!is.na(value)] if (any(value < 1) || any(value > length(x))) stop("filter index out of range") value <- names(x)[value] } if (is.character(value)) { value <- value[!is.na(value)] ## NA's are dropped filterNames <- names(x) if (length(filterNames) == 0) stop("there are no filter names") if (any(!(value %in% filterNames))) stop("'value' contains invalid filter names") x@active <- filterNames %in% value x } else if (is.logical(value)) { nfilters <- length(x) if (length(value) > nfilters) stop("length of 'value' must not be greater than that of 'filters'") if (anyMissing(value)) stop("'value' cannot contain NA's") if (nfilters && (nfilters %% length(value) != 0)) stop("number of filters not a multiple of 'value' length") x@active <- rep(value, length.out = nfilters) x } else stop("unsupported type of 'value'") }) setMethod("parallelSlotNames", "FilterRules", function(x) c("active", callNextMethod())) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Constructor. ### FilterRules.parseRule <- function(expr) { if (is.character(expr)) { expr <- try(parse(text = expr, srcfile = NULL), silent = TRUE) if (is.character(expr)) stop("failed to parse filter expression: ", expr) expr } else if (is.language(expr) || is.logical(expr)) as.expression(expr) else if (is.function(expr)) as(expr, "FilterClosure") else stop("would not evaluate to logical: ", expr) } ## takes logical expressions, character vectors, or functions to parse FilterRules <- function(exprs = list(), ..., active = TRUE) { exprs <- c(exprs, as.list(substitute(list(...)))[-1L]) if (length(names(exprs)) == 0) { funs <- as.logical(sapply(exprs, is.function)) nonfuns <- exprs[!funs] names(nonfuns) <- unlist(lapply(nonfuns, deparse)) chars <- as.logical(sapply(nonfuns, is.character)) names(nonfuns)[chars] <- unlist(nonfuns[chars]) names(exprs)[!funs] <- names(nonfuns) } exprs <- lapply(exprs, FilterRules.parseRule) active <- rep(active, length.out = length(exprs)) if (!is.logical(active) || anyMissing(active)) stop("'active' must be logical without any missing values") if (length(active) > length(exprs)) stop("length of 'active' is greater than number of rules") if (length(exprs) && length(exprs) %% length(active) > 0) stop("number of rules must be a multiple of length of 'active'") ans <- new_SimpleList_from_list("FilterRules", exprs, active = active) validObject(ans) ans } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Subsetting. ### setReplaceMethod("[[", "FilterRules", function(x, i, j, ..., value) { if (!missing(j) || length(list(...)) > 0) warning("arguments beyond 'i' ignored") if (missing(i)) stop("subscript is missing") rule <- FilterRules.parseRule(value) x <- callNextMethod(x, i, value = rule) if (is.numeric(i) && is.character(value)) names(x)[i] <- value active <- x@active ## in case we expanded names(active) <- names(x)[seq_along(active)] active[[i]] <- TRUE names(active) <- NULL x@active <- active names(x) <- make.names(names(x), unique = TRUE) x }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Validity. ### .valid.FilterRules.active <- function(x) { if (length(active(x)) != length(x)) "length of 'active' must match length of 'filters'" else if (!identical(names(active(x)), names(x))) "names of 'active' must match those of 'filters'" else if (anyMissing(active(x))) "'active' cannot contain NA's" else NULL } .valid.FilterRules.rules <- function(x) { unlist(lapply(x, function(rule) { if (is.function(rule) && length(formals(rule)) < 1) "function rule must take at least one parameter" else NULL })) } .valid.FilterRules <- function(x) c(.valid.FilterRules.active(x), .valid.FilterRules.rules(x)) setValidity2("FilterRules", .valid.FilterRules) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Combining ### setMethod("append", c("FilterRules", "FilterRules"), function(x, values, after=length(x)) { if (!isSingleNumber(after)) stop("'after' must be a single number") ans <- FilterRules(append(as.list(x, use.names = TRUE), as.list(values, use.names = TRUE), after = after)) active(ans) <- structure(append(active(x), active(values), after), names = names(ans)) mcols(ans) <- rbind(mcols(x), mcols(values)) ans }) setMethod("c", "FilterRules", function(x, ..., recursive = FALSE) { if (!identical(recursive, FALSE)) stop("\"c\" method for FilterRules objects ", "does not support the 'recursive' argument") if (missing(x)) args <- unname(list(...)) else args <- unname(list(x, ...)) args <- lapply(args, as, "FilterRules") ans <- FilterRules(unlist(lapply(args, function(x) { elts <- as.list(x) names(elts) <- names(x) elts }), recursive = FALSE)) active(ans) <- structure(unlist(lapply(args, active), use.names = FALSE), names = names(ans)) mcols(ans) <- do.call(rbind, lapply(args, mcols)) ans }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Evaluating ### setMethod("eval", signature(expr="FilterRules", envir="ANY"), function(expr, envir = parent.frame(), enclos = if(is.list(envir) || is.pairlist(envir)) parent.frame() else baseenv()) { result <- rep.int(TRUE, NROW(envir)) rules <- as.list(expr)[active(expr)] for (i in seq_along(rules)) { rule <- rules[[i]] val <- tryCatch(if (is.expression(rule)) eval(rule, envir, enclos) else rule(envir), error = function(e) { stop("Filter '", names(rules)[i], "' failed: ", e$message) }) if (is(val, "Rle")) val <- as.vector(val) if (!is.logical(val)) stop("filter rule evaluated to non-logical: ", names(rules)[i]) if ((NROW(envir) == 0L && length(val) > 0L) || (NROW(envir) > 0L && length(val) == 0L) || (NROW(envir) > 0L && (max(NROW(envir), length(val)) %% min(NROW(envir), length(val)) != 0))) stop("filter rule evaluated to inconsistent length: ", names(rule)[i]) if (anyNA(val)) { val[is.na(val)] <- FALSE } if (length(rules) > 1L) envir <- extractROWS(envir, val) result[result] <- val } result }) setGeneric("evalSeparately", function(expr, envir = parent.frame(), enclos = if (is.list(envir) || is.pairlist(envir)) parent.frame() else baseenv(), ...) standardGeneric("evalSeparately")) setMethod("evalSeparately", "FilterRules", function(expr, envir = parent.frame(), enclos = if (is.list(envir) || is.pairlist(envir)) parent.frame() else baseenv(), serial = FALSE) { if (!isTRUEorFALSE(serial)) stop("'serial' must be TRUE or FALSE") inds <- seq_len(length(expr)) names(inds) <- names(expr) passed <- rep.int(TRUE, NROW(envir)) m <- do.call(cbind, lapply(inds, function(i) { result <- eval(expr[i], envir = envir, enclos = enclos) if (serial) { envir <<- subset(envir, .(result)) passed[passed] <<- result passed } else result })) FilterMatrix(matrix = m, filterRules = expr) }) setGeneric("subsetByFilter", function(x, filter, ...) standardGeneric("subsetByFilter")) setMethod("subsetByFilter", c("ANY", "FilterRules"), function(x, filter) { extractROWS(x, eval(filter, x)) }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Summary ### setMethod("summary", "FilterRules", function(object, subject, serial = FALSE, discarded = FALSE, percent = FALSE) { if (!isTRUEorFALSE(serial)) stop("'serial' must be TRUE or FALSE") mat <- evalSeparately(object, subject, serial = serial) summary(mat, discarded = discarded, percent = percent) }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### FilterRule closures ### setClass("FilterClosure", contains = "function") setClass("GenericFilterClosure", contains = "FilterClosure") setClass("StandardGenericFilterClosure", contains = c("GenericFilterClosure", "standardGeneric")) setAs("standardGeneric", "FilterClosure", function(from) { new("StandardGenericFilterClosure", from) }) setAs("function", "FilterClosure", function(from) { new("FilterClosure", from) }) setGeneric("params", function(x, ...) standardGeneric("params")) setMethod("params", "FilterClosure", function(x) { as.list(environment(x)) }) setMethod("show", "FilterClosure", function(object) { p <- params(object) cat("filter (", paste(names(p), "=", sapply(p, deparse, control = NULL), collapse = ", "), ")\n", sep = "") print(body(object)) }) ### ------------------------------------------------------------------------- ### FilterResults: coordinates results from multiple filters ### setClass("FilterResults", representation(filterRules = "FilterRules")) .valid.FilterMatrix <- function(object) { c(if (!is.logical(object)) "values must be logical", if (!is.null(names(filterRules))) "filterRules must not be named", if (length(object@filterRules) != ncol(object)) "length(filterRules) must equal ncol(object)") } setClass("FilterMatrix", contains = c("matrix", "FilterResults"), validity = .valid.FilterMatrix) setGeneric("filterRules", function(x, ...) standardGeneric("filterRules")) setMethod("filterRules", "FilterResults", function(x) { setNames(x@filterRules, colnames(x)) }) setMethod("[", "FilterMatrix", function(x, i, j, ..., drop = TRUE) { if (!missing(i)) i <- as.vector(i) if (!missing(j)) j <- as.vector(j) ans <- callNextMethod() if (is.matrix(ans)) { filterRules <- filterRules(x) if (!missing(j)) filterRules <- filterRules[j] ans <- FilterMatrix(matrix = ans, filterRules = filterRules) } ans }) setMethod("rbind", "FilterMatrix", function(..., deparse.level = 1) { args <- list(...) ans <- do.call(rbind, lapply(args, as, "matrix")) rulesList <- lapply(args, filterRules) if (any(!sapply(rulesList, identical, rulesList[[1]]))) stop("cannot rbind filter matrices with non-identical rule sets") FilterMatrix(matrix = ans, filterRules = rulesList[[1]]) }) setMethod("cbind", "FilterMatrix", function(..., deparse.level = 1) { args <- list(...) ans <- do.call(cbind, lapply(args, as, "matrix")) rules <- do.call(c, lapply(args, function(x) x@filterRules)) FilterMatrix(matrix = ans, filterRules = rules) }) FilterMatrix <- function(matrix, filterRules) { stopifnot(ncol(matrix) == length(filterRules)) if (is.null(colnames(matrix))) colnames(matrix) <- names(filterRules) else if (!is.null(names(filterRules)) && !identical(names(filterRules), colnames(matrix))) stop("if names(filterRules) and colnames(matrix) are both not NULL,", " the names must match") names(filterRules) <- NULL new("FilterMatrix", matrix, filterRules = filterRules) } setMethod("show", "FilterMatrix", function(object) { cat(class(object), " (", nrow(object), " x ", ncol(object), ")\n", sep = "") mat <- makePrettyMatrixForCompactPrinting(object, function(x) x@.Data) print(mat, quote = FALSE, right = TRUE) }) setMethod("summary", "FilterResults", function(object, discarded = FALSE, percent = FALSE) { if (!isTRUEorFALSE(discarded)) stop("'discarded' must be TRUE or FALSE") if (!isTRUEorFALSE(percent)) stop("'percent' must be TRUE or FALSE") counts <- c("" = nrow(object), colSums(object), "" = sum(rowSums(object) == ncol(object))) if (discarded) { counts <- nrow(object) - counts } if (percent) { round(counts / nrow(object), 3) } else counts }) S4Vectors/R/Hits-class.R0000644000175400017540000006020513175714520016002 0ustar00biocbuildbiocbuild### ========================================================================= ### Hits objects ### ------------------------------------------------------------------------- ### ### Vector of hits between a set of left nodes and a set of right nodes. setClass("Hits", contains="Vector", representation( from="integer", # integer vector of length N to="integer", # integer vector of length N nLnode="integer", # single integer: number of Lnodes ("left nodes") nRnode="integer" # single integer: number of Rnodes ("right nodes") ), prototype( nLnode=0L, nRnode=0L ) ) ### A SelfHits object is a Hits object where the left and right nodes are ### identical. setClass("SelfHits", contains="Hits") ### Hits objects where the hits are sorted by query. Coercion from ### SortedByQueryHits to List takes advantage of this and is very fast. setClass("SortedByQueryHits", contains="Hits") setClass("SortedByQuerySelfHits", contains=c("SelfHits", "SortedByQueryHits")) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### parallelSlotNames() ### ### Combine the new parallel slots with those of the parent class. Make sure ### to put the new parallel slots *first*. setMethod("parallelSlotNames", "Hits", function(x) c("from", "to", callNextMethod()) ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Accessors ### setGeneric("from", function(x, ...) standardGeneric("from")) setMethod("from", "Hits", function(x) x@from) setGeneric("to", function(x, ...) standardGeneric("to")) setMethod("to", "Hits", function(x) x@to) setGeneric("nLnode", function(x, ...) standardGeneric("nLnode")) setMethod("nLnode", "Hits", function(x) x@nLnode) setGeneric("nRnode", function(x, ...) standardGeneric("nRnode")) setMethod("nRnode", "Hits", function(x) x@nRnode) setGeneric("nnode", function(x, ...) standardGeneric("nnode")) setMethod("nnode", "SelfHits", function(x) nLnode(x)) setGeneric("countLnodeHits", function(x, ...) standardGeneric("countLnodeHits")) .count_Lnode_hits <- function(x) tabulate(from(x), nbins=nLnode(x)) setMethod("countLnodeHits", "Hits", .count_Lnode_hits) setGeneric("countRnodeHits", function(x, ...) standardGeneric("countRnodeHits")) .count_Rnode_hits <- function(x) tabulate(to(x), nbins=nRnode(x)) setMethod("countRnodeHits", "Hits", .count_Rnode_hits) ### query/subject API queryHits <- function(x, ...) from(x, ...) subjectHits <- function(x, ...) to(x, ...) queryLength <- function(x, ...) nLnode(x, ...) subjectLength <- function(x, ...) nRnode(x, ...) countQueryHits <- function(x, ...) countLnodeHits(x, ...) countSubjectHits <- function(x, ...) countRnodeHits(x, ...) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Validity ### .valid.Hits.nnode <- function(nnode, side) { if (!isSingleInteger(nnode) || nnode < 0L) { msg <- wmsg("'n", side, "node(x)' must be a single non-negative ", "integer") return(msg) } if (!is.null(attributes(nnode))) { msg <- wmsg("'n", side, "node(x)' must be a single integer with ", "no attributes") return(msg) } NULL } .valid.Hits.from_or_to <- function(from_or_to, nnode, what, side) { if (!(is.integer(from_or_to) && is.null(attributes(from_or_to)))) { msg <- wmsg("'", what, "' must be an integer vector ", "with no attributes") return(msg) } if (anyMissingOrOutside(from_or_to, 1L, nnode)) { msg <- wmsg("'", what, "' must contain non-NA values ", ">= 1 and <= 'n", side, "node(x)'") return(msg) } NULL } .valid.Hits <- function(x) { c(.valid.Hits.nnode(nLnode(x), "L"), .valid.Hits.nnode(nRnode(x), "R"), .valid.Hits.from_or_to(from(x), nLnode(x), "from(x)", "L"), .valid.Hits.from_or_to(to(x), nRnode(x), "to(x)", "R")) } setValidity2("Hits", .valid.Hits) .valid.SelfHits <- function(x) { if (nLnode(x) != nRnode(x)) return("'nLnode(x)' and 'nRnode(x)' must be equal") NULL } setValidity2("SelfHits", .valid.SelfHits) .valid.SortedByQueryHits <- function(x) { if (isNotSorted(from(x))) return("'queryHits(x)' must be sorted") NULL } setValidity2("SortedByQueryHits", .valid.SortedByQueryHits) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Constructors ### ### Very low-level constructor. Doesn't try to sort the hits by query. .new_Hits <- function(Class, from, to, nLnode, nRnode, mcols) { new2(Class, from=from, to=to, nLnode=nLnode, nRnode=nRnode, elementMetadata=mcols, check=TRUE) } ### Low-level constructor. Sort the hits by query if Class extends ### SortedByQueryHits. new_Hits <- function(Class, from=integer(0), to=integer(0), nLnode=0L, nRnode=0L, mcols=NULL) { if (!isSingleString(Class)) stop("'Class' must be a single character string") if (!extends(Class, "Hits")) stop("'Class' must be the name of a class that extends Hits") if (!(is.numeric(from) && is.numeric(to))) stop("'from' and 'to' must be integer vectors") if (!is.integer(from)) from <- as.integer(from) if (!is.integer(to)) to <- as.integer(to) if (!(isSingleNumber(nLnode) && isSingleNumber(nRnode))) stop("'nLnode' and 'nRnode' must be single integers") if (!is.integer(nLnode)) nLnode <- as.integer(nLnode) if (!is.integer(nRnode)) nRnode <- as.integer(nRnode) if (!(is.null(mcols) || is(mcols, "DataFrame"))) stop("'mcols' must be NULL or a DataFrame object") if (!extends(Class, "SortedByQueryHits")) { ## No need to sort the hits by query. ans <- .new_Hits(Class, from, to, nLnode, nRnode, mcols) return(ans) } ## Sort the hits by query. if (!is.null(mcols)) { revmap_envir <- new.env(parent=emptyenv()) } else { revmap_envir <- NULL } ans <- .Call2("Hits_new", Class, from, to, nLnode, nRnode, revmap_envir, PACKAGE="S4Vectors") if (!is.null(mcols)) { if (nrow(mcols) != length(ans)) stop("length of supplied metadata columns ", "must equal number of hits") if (exists("revmap", envir=revmap_envir)) { revmap <- get("revmap", envir=revmap_envir) mcols <- mcols[revmap, , drop=FALSE] } mcols(ans) <- mcols } ans } .make_mcols <- function(...) { if (nargs() == 0L) return(NULL) DataFrame(..., check.names=FALSE) } ### 2 high-level constructors. Hits <- function(from=integer(0), to=integer(0), nLnode=0L, nRnode=0L, ..., sort.by.query=FALSE) { if (!isTRUEorFALSE(sort.by.query)) stop("'sort.by.query' must be TRUE or FALSE") Class <- if (sort.by.query) "SortedByQueryHits" else "Hits" mcols <- .make_mcols(...) new_Hits(Class, from, to, nLnode, nRnode, mcols) } SelfHits <- function(from=integer(0), to=integer(0), nnode=0L, ..., sort.by.query=FALSE) { if (!isTRUEorFALSE(sort.by.query)) stop("'sort.by.query' must be TRUE or FALSE") Class <- if (sort.by.query) "SortedByQuerySelfHits" else "SelfHits" mcols <- .make_mcols(...) new_Hits(Class, from, to, nnode, nnode, mcols) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Conversion from old to new internal representation ### setMethod("updateObject", "Hits", function(object, ..., verbose=FALSE) { if (is(try(object@queryHits, silent=TRUE), "try-error")) return(object) ans <- new_Hits("SortedByQueryHits", object@queryHits, object@subjectHits, object@queryLength, object@subjectLength, object@elementMetadata) ans@metadata <- object@metadata ans } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Coercion ### .from_Hits_to_SortedByQueryHits <- function(from) { new_Hits("SortedByQueryHits", from(from), to(from), nLnode(from), nRnode(from), mcols(from)) } setAs("Hits", "SortedByQueryHits", .from_Hits_to_SortedByQueryHits) setMethod("as.matrix", "Hits", function(x) { ans <- cbind(from=from(x), to=to(x)) if (is(x, "SortedByQueryHits")) colnames(ans) <- c("queryHits", "subjectHits") ans } ) setMethod("as.table", "Hits", .count_Lnode_hits) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Subsetting ### ### The "extractROWS" method for Vector objects doesn't test the validity of ### the result so we override it. setMethod("extractROWS", "SortedByQueryHits", function(x, i) { ans <- callNextMethod() pbs <- validObject(ans, test=TRUE) if (is.character(pbs)) stop(wmsg("Problem(s) found when testing validity of ", class(ans), " object returned by subsetting operation: ", paste0(pbs, collapse=", "), ". Make sure to use a ", "subscript that results in a valid ", class(ans), " object.")) ans } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Displaying ### setMethod("classNameForDisplay", "Hits", function(x) "Hits") setMethod("classNameForDisplay", "SelfHits", function(x) "SelfHits") .make_naked_matrix_from_Hits <- function(x) { x_len <- length(x) x_mcols <- mcols(x) x_nmc <- if (is.null(x_mcols)) 0L else ncol(x_mcols) ans <- cbind(from=as.character(from(x)), to=as.character(to(x))) if (is(x, "SortedByQueryHits")) colnames(ans) <- c("queryHits", "subjectHits") if (x_nmc > 0L) { tmp <- do.call(data.frame, c(lapply(x_mcols, showAsCell), list(check.names=FALSE))) ans <- cbind(ans, `|`=rep.int("|", x_len), as.matrix(tmp)) } ans } showHits <- function(x, margin="", print.classinfo=FALSE, print.nnode=FALSE) { x_class <- class(x) x_len <- length(x) x_mcols <- mcols(x) x_nmc <- if (is.null(x_mcols)) 0L else ncol(x_mcols) cat(classNameForDisplay(x), " object with ", x_len, " hit", ifelse(x_len == 1L, "", "s"), " and ", x_nmc, " metadata column", ifelse(x_nmc == 1L, "", "s"), ":\n", sep="") out <- makePrettyMatrixForCompactPrinting(x, .make_naked_matrix_from_Hits) if (print.classinfo) { .COL2CLASS <- c( from="integer", to="integer" ) if (is(x, "SortedByQueryHits")) names(.COL2CLASS) <- c("queryHits", "subjectHits") classinfo <- makeClassinfoRowForCompactPrinting(x, .COL2CLASS) ## A sanity check, but this should never happen! stopifnot(identical(colnames(classinfo), colnames(out))) out <- rbind(classinfo, out) } if (nrow(out) != 0L) rownames(out) <- paste0(margin, rownames(out)) ## We set 'max' to 'length(out)' to avoid the getOption("max.print") ## limit that would typically be reached when 'showHeadLines' global ## option is set to Inf. print(out, quote=FALSE, right=TRUE, max=length(out)) if (print.nnode) { cat(margin, "-------\n", sep="") if (is(x, "SortedByQueryHits")) { cat(margin, "queryLength: ", nLnode(x), " / subjectLength: ", nRnode(x), "\n", sep="") } else { if (is(x, "SelfHits")) { cat(margin, "nnode: ", nnode(x), "\n", sep="") } else { cat(margin, "nLnode: ", nLnode(x), " / nRnode: ", nRnode(x), "\n", sep="") } } } } setMethod("show", "Hits", function(object) showHits(object, margin=" ", print.classinfo=TRUE, print.nnode=TRUE) ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Combining ### ### Note that supporting "extractROWS" and "c" makes "replaceROWS" (and thus ### "[<-") work out-of-the-box! ### ### 'Class' must be "Hits" or the name of a concrete Hits subclass. ### 'objects' must be a list of Hits objects. ### Returns an instance of class 'Class'. combine_Hits_objects <- function(Class, objects, use.names=TRUE, ignore.mcols=FALSE) { if (!isSingleString(Class)) stop("'Class' must be a single character string") if (!extends(Class, "Hits")) stop("'Class' must be the name of a class that extends Hits") if (!is.list(objects)) stop("'objects' must be a list") if (!isTRUEorFALSE(use.names)) stop("'use.names' must be TRUE or FALSE") ### TODO: Support 'use.names=TRUE'. if (use.names) stop("'use.names=TRUE' is not supported yet") if (!isTRUEorFALSE(ignore.mcols)) stop("'ignore.mcols' must be TRUE or FALSE") NULL_idx <- which(sapply_isNULL(objects)) if (length(NULL_idx) != 0L) objects <- objects[-NULL_idx] if (length(objects) == 0L) return(new(Class)) ## TODO: Implement (in C) fast 'elementIs(objects, class)' in S4Vectors ## that does 'sapply(objects, is, class, USE.NAMES=FALSE)', and use it ## here. 'elementIs(objects, "NULL")' should work and be equivalent to ## 'sapply_isNULL(objects)'. if (!all(vapply(objects, is, logical(1), Class, USE.NAMES=FALSE))) stop("the objects to combine must be ", Class, " objects (or NULLs)") names(objects) <- NULL # so lapply(objects, ...) below returns an # unnamed list ## Combine "nLnode" slots. nLnode_slots <- lapply(objects, function(x) x@nLnode) ans_nLnode <- unlist(nLnode_slots, use.names=FALSE) ## Combine "nRnode" slots. nRnode_slots <- lapply(objects, function(x) x@nRnode) ans_nRnode <- unlist(nRnode_slots, use.names=FALSE) if (!(all(ans_nLnode == ans_nLnode[[1L]]) && all(ans_nRnode == ans_nRnode[[1L]]))) stop(wmsg("the objects to combine are incompatible Hits objects ", "by number of left and/or right nodes")) ans_nLnode <- ans_nLnode[[1L]] ans_nRnode <- ans_nRnode[[1L]] ## Combine "from" slots. from_slots <- lapply(objects, function(x) x@from) ans_from <- unlist(from_slots, use.names=FALSE) ## Combine "to" slots. to_slots <- lapply(objects, function(x) x@to) ans_to <- unlist(to_slots, use.names=FALSE) ## Combine "mcols" slots. if (ignore.mcols) { ans_mcols <- NULL } else { ans_mcols <- do.call(rbind_mcols, objects) } ## Make 'ans' and return it. .new_Hits(Class, ans_from, ans_to, ans_nLnode, ans_nRnode, ans_mcols) } setMethod("c", "Hits", function (x, ..., ignore.mcols=FALSE, recursive=FALSE) { if (!identical(recursive, FALSE)) stop("\"c\" method for Hits objects ", "does not support the 'recursive' argument") if (missing(x)) { objects <- list(...) x <- objects[[1L]] } else { objects <- list(x, ...) } combine_Hits_objects(class(x), objects, use.names=FALSE, ignore.mcols=ignore.mcols) } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### selectHits() ### selectHits <- function(x, select=c("all", "first", "last", "arbitrary", "count")) { if (!is(x, "Hits")) stop("'x' must be a Hits object") select <- match.arg(select) if (select == "all") return(x) .Call2("select_hits", from(x), to(x), nLnode(x), select, PACKAGE="S4Vectors") } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### breakTies() ### ### Redundant with selectHits. The only difference is that it returns a Hits ### object. That alone doesn't justify introducing a new verb. Should be ### controlled via an extra arg to selectHits() e.g. 'as.Hits' (FALSE by ### default). H.P. -- Oct 16, 2016 breakTies <- function(x, method=c("first", "last")) { if (!is(x, "Hits")) stop("'x' must be a Hits object") method <- match.arg(method) to <- selectHits(x, method) .new_Hits("SortedByQueryHits", which(!is.na(to)), to[!is.na(to)], nLnode(x), nRnode(x), NULL) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### revmap() ### ### NOT exported (but used in IRanges). ### TODO: Move revmap() generic from AnnotationDbi to S4Vectors, and make this ### the "revmap" method for SortedByQueryHits objects. ### Note that: ### - If 'x' is a valid SortedByQueryHits object (i.e. the hits in it are ### sorted by query), then 'revmap_Hits(x)' returns a SortedByQueryHits ### object where hits are "fully sorted" i.e. sorted by query first and ### then by subject. ### - Because revmap_Hits() reorders the hits by query, doing ### 'revmap_Hits(revmap_Hits(x))' brings back 'x' but with the hits in it ### now "fully sorted". revmap_Hits <- function(x) new_Hits(class(x), to(x), from(x), nRnode(x), nLnode(x), mcols(x)) ### FIXME: Replace this with "revmap" method for Hits objects. t.Hits <- function(x) t(x) setMethod("t", "Hits", revmap_Hits) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Remap the left and/or right nodes of a Hits object. ### ### Returns 'arg' as a NULL, an integer vector, or a factor. .normarg_nodes.remapping <- function(arg, side, old.nnode) { if (is.null(arg)) return(arg) if (!is.factor(arg)) { if (!is.numeric(arg)) stop("'" , side, "nodes.remappping' must be a vector ", "of integers") if (!is.integer(arg)) arg <- as.integer(arg) } if (length(arg) != old.nnode) stop("'" , side, "nodes.remapping' must be of length 'n", side, "node(x)'") arg } .normarg_new.nnode <- function(arg, side, map) { if (!isSingleNumberOrNA(arg)) stop("'new.n", side, "node' must be a single number or NA") if (!is.integer(arg)) arg <- as.integer(arg) if (is.null(map)) return(arg) if (is.factor(map)) { if (is.na(arg)) return(nlevels(map)) if (arg < nlevels(map)) stop("supplied 'new.n", side, "node' must ", "be >= 'nlevels(", side, "nodes.remapping)'") return(arg) } if (is.na(arg)) stop("'new.n", side, "node' must be specified when ", "'" , side, "s.remapping' is specified and is not a factor") arg } remapHits <- function(x, Lnodes.remapping=NULL, new.nLnode=NA, Rnodes.remapping=NULL, new.nRnode=NA, with.counts=FALSE) { if (!is(x, "SortedByQueryHits")) stop("'x' must be a SortedByQueryHits object") Lnodes.remapping <- .normarg_nodes.remapping(Lnodes.remapping, "L", nLnode(x)) new.nLnode <- .normarg_new.nnode(new.nLnode, "L", Lnodes.remapping) Rnodes.remapping <- .normarg_nodes.remapping(Rnodes.remapping, "R", nRnode(x)) new.nRnode <- .normarg_new.nnode(new.nRnode, "R", Rnodes.remapping) if (!isTRUEorFALSE(with.counts)) stop("'with.counts' must be TRUE or FALSE") x_from <- from(x) if (is.null(Lnodes.remapping)) { if (is.na(new.nLnode)) new.nLnode <- nLnode(x) } else { if (is.factor(Lnodes.remapping)) Lnodes.remapping <- as.integer(Lnodes.remapping) if (anyMissingOrOutside(Lnodes.remapping, 1L, new.nLnode)) stop(wmsg("'Lnodes.remapping' cannot contain NAs, or values that ", "are < 1, or > 'new.nLnode'")) x_from <- Lnodes.remapping[x_from] } x_to <- to(x) if (is.null(Rnodes.remapping)) { if (is.na(new.nRnode)) new.nRnode <- nRnode(x) } else { if (is.factor(Rnodes.remapping)) Rnodes.remapping <- as.integer(Rnodes.remapping) if (anyMissingOrOutside(Rnodes.remapping, 1L, new.nRnode)) stop(wmsg("'Rnodes.remapping' cannot contain NAs, or values that ", "are < 1, or > 'new.nRnode'")) x_to <- Rnodes.remapping[x_to] } x_mcols <- mcols(x) add_counts <- function(counts) { if (is.null(x_mcols)) return(DataFrame(counts=counts)) if ("counts" %in% colnames(x_mcols)) warning("'x' has a \"counts\" metadata column, replacing it") x_mcols$counts <- counts x_mcols } if (is.null(Lnodes.remapping) && is.null(Rnodes.remapping)) { if (with.counts) { counts <- rep.int(1L, length(x)) x_mcols <- add_counts(counts) } } else { sm <- selfmatchIntegerPairs(x_from, x_to) if (with.counts) { counts <- tabulate(sm, nbins=length(sm)) x_mcols <- add_counts(counts) keep_idx <- which(counts != 0L) } else { keep_idx <- which(sm == seq_along(sm)) } x_from <- x_from[keep_idx] x_to <- x_to[keep_idx] x_mcols <- extractROWS(x_mcols, keep_idx) } new_Hits(class(x), x_from, x_to, new.nLnode, new.nRnode, x_mcols) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### SelfHits methods ### ### TODO: Make isSelfHit() and isRedundantHit() generic functions with ### methods for SelfHits objects. ### ### A "self hit" is an edge from a node to itself. For example, the 2nd hit ### in the SelfHits object below is a self hit (from 3rd node to itself): ### SelfHits(c(3, 3, 3, 4, 4), c(2:4, 2:3), 4) isSelfHit <- function(x) { if (!is(x, "SelfHits")) stop("'x' must be a SelfHits object") from(x) == to(x) } ### When there is more than 1 edge between 2 given nodes (regardless of ### orientation), the extra edges are considered to be "redundant hits". For ### example, hits 3, 5, 7, and 8, in the SelfHits object below are redundant ### hits: ### SelftHits(c(3, 3, 3, 3, 3, 4, 4, 4), c(3, 2:4, 2, 2:3, 2), 4, 4) ### Note that this is regardless of the orientation of the edge so hit 7 (edge ### 4-3) is considered to be redundant with hit 4 (edge 3-4). isRedundantHit <- function(x) { if (!is(x, "SelfHits")) stop("'x' must be a SelfHits object") duplicatedIntegerPairs(pmin.int(from(x), to(x)), pmax.int(from(x), to(x))) } ### Specialized constructor. ### Return a SortedByQuerySelfHits object. ### About 10x faster and uses 4x less memory than my first attempt in pure ### R below. ### NOT exported. makeAllGroupInnerHits <- function(group.sizes, hit.type=0L) { if (!is.integer(group.sizes)) stop("'group.sizes' must be an integer vector") if (!isSingleNumber(hit.type)) stop("'hit.type' must be a single integer") if (!is.integer(hit.type)) hit.type <- as.integer(hit.type) .Call2("make_all_group_inner_hits", group.sizes, hit.type, PACKAGE="S4Vectors") } ### Return a SortedByQuerySelfHits object. ### NOT exported. ### TODO: Remove this. makeAllGroupInnerHits.old <- function(GS) { NG <- length(GS) # nb of groups ## First Element In group i.e. first elt associated with each group. FEIG <- cumsum(c(1L, GS[-NG])) GSr <- c(0L, GS[-NG]) CGSr2 <- cumsum(GSr * GSr) GS2 <- GS * GS nnode <- sum(GS) # length of original vector (i.e. before grouping) ## Original Group Size Assignment i.e. group size associated with each ## element in the original vector. OGSA <- rep.int(GS, GS) # is of length 'nnode' ans_from <- rep.int(seq_len(nnode), OGSA) NH <- length(ans_from) # same as sum(GS2) ## Hit Group Assignment i.e. group associated with each hit. HGA <- rep.int(seq_len(NG), GS2) ## Hit Group Size Assignment i.e. group size associated with each hit. HGSA <- GS[HGA] ans_to <- (0:(NH-1L) - CGSr2[HGA]) %% GS[HGA] + FEIG[HGA] SelfHits(ans_from, ans_to, nnode, sort.by.query=TRUE) } S4Vectors/R/Hits-comparison.R0000644000175400017540000000662013175714520017050 0ustar00biocbuildbiocbuild### ========================================================================= ### Comparing and ordering hits ### ------------------------------------------------------------------------- ### .compatible_Hits <- function(x, y) { nLnode(x) == nLnode(y) && nRnode(x) == nRnode(y) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### pcompare() ### ### Hits are ordered by 'from' first and then by 'to'. ### This way, the space of hits is totally ordered. ### setMethod("pcompare", c("Hits", "Hits"), function(x, y) { if (!.compatible_Hits(x, y)) stop("'x' and 'y' are incompatible Hits objects ", "by number of left and/or right nodes") pcompareIntegerPairs(from(x), to(x), from(y), to(y)) } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### match() ### setMethod("match", c("Hits", "Hits"), function(x, table, nomatch=NA_integer_, incomparables=NULL, method=c("auto", "quick", "hash")) { if (!.compatible_Hits(x, table)) stop("'x' and 'y' are incompatible Hits objects ", "by number of left and/or right nodes") if (!is.null(incomparables)) stop("\"match\" method for Hits objects ", "only accepts 'incomparables=NULL'") matchIntegerPairs(from(x), to(x), from(table), to(table), nomatch=nomatch, method=method) } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### selfmatch() ### ### Is this useful? When do we have to deal with duplicated hits in a Hits ### object? Which function return that? Would be good to know the use case. ### If there aren't any (and we don't expect any in the future), maybe we ### should enforce unicity in the validity method for Hits objects. Then ### selfmatch(), duplicated(), and unique() become pointless on Hits objects ### because their output is predictable (and thus they can be implemented ### in a trivial way that is very fast). ### #setMethod("selfmatch", "Hits", # function (x, method=c("auto", "quick", "hash")) # selfmatchIntegerPairs(from(x), to(x), method=method) #) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Ordering hits ### ### order(), sort(), rank() on Hits objects are consistent with the order ### on hits implied by pcompare(). ### ### TODO: Maybe add a method for SortedByQueryHits that takes advantage of ### the fact that Hits objects are already sorted by 'from'. ### 'na.last' is pointless (Hits objects don't contain NAs) so is ignored. ### 'method' is also ignored at the moment. setMethod("order", "Hits", function(..., na.last=TRUE, decreasing=FALSE, method=c("auto", "shell", "radix")) { if (!isTRUEorFALSE(decreasing)) stop("'decreasing' must be TRUE or FALSE") ## All arguments in '...' are guaranteed to be Hits objects. args <- list(...) if (length(args) == 1L) { x <- args[[1L]] return(orderIntegerPairs(from(x), to(x), decreasing=decreasing)) } order_args <- vector("list", 2L * length(args)) idx <- 2L * seq_along(args) order_args[idx - 1L] <- lapply(args, from) order_args[idx] <- lapply(args, to) do.call(order, c(order_args, list(decreasing=decreasing))) } ) S4Vectors/R/Hits-setops.R0000644000175400017540000000132713175714520016212 0ustar00biocbuildbiocbuild### ========================================================================= ### Set operations ### ------------------------------------------------------------------------- ### ### The methods below are endomorphisms with respect to their first argument ### 'x'. They propagate the names and metadata columns. ### setMethod("union", c("Hits", "Hits"), function(x, y) ## callNextMethod() is broken in R <= 3.4 if there is another "union" ## generic in the cache (which is the case e.g. if the user loads the ## lubridate package). So we avoid its use for now. #as(callNextMethod(as(x, "Hits"), as(y, "Hits")), class(x)) as(union.Vector(as(x, "Hits"), as(y, "Hits")), class(x)) ) S4Vectors/R/HitsList-class.R0000644000175400017540000000645113175714520016641 0ustar00biocbuildbiocbuild### ========================================================================= ### HitsList objects ### ------------------------------------------------------------------------- ### FIXME: Rename this class SimpleHitsList and make HitsList a virtual ### class that SimpleHitsList (and possibly CompressedHitsList, defined in ### IRanges) extend directly. setClass("HitsList", contains="SimpleList", representation( subjectOffsets="integer" ), prototype=prototype(elementType="Hits") ) setClass("SelfHitsList", contains="HitsList", prototype=prototype(elementType="SelfHits") ) setClass("SortedByQueryHitsList", contains="HitsList", prototype=prototype(elementType="SortedByQueryHits") ) setClass("SortedByQuerySelfHitsList", contains=c("SelfHitsList", "SortedByQueryHitsList"), prototype=prototype(elementType="SortedByQuerySelfHits") ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Accessors ### setGeneric("space", function(x, ...) standardGeneric("space")) setMethod("space", "HitsList", function(x) { space <- names(x) if (!is.null(space)) space <- rep.int(space, sapply(as.list(x, use.names = FALSE), length)) space }) setMethod("from", "HitsList", function(x) { as.matrix(x)[,1L,drop=TRUE] }) setMethod("to", "HitsList", function(x) { as.matrix(x)[,2L,drop=TRUE] }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Constructor ### ### This constructor always returns a SortedByQueryHitsList instance at the ### moment. ### TODO: Maybe add the 'sort.by.query' argument to let the user choose ### between getting a HitsList or SortedByQueryHitsList instance. HitsList <- function(list_of_hits, subject) { subjectOffsets <- c(0L, head(cumsum(sapply(subject, length)), -1)) subjectToQuery <- seq_along(list_of_hits) if (!is.null(names(list_of_hits)) && !is.null(names(subject))) subjectToQuery <- match(names(list_of_hits), names(subject)) subjectOffsets <- subjectOffsets[subjectToQuery] new_SimpleList_from_list("SortedByQueryHitsList", list_of_hits, subjectOffsets = subjectOffsets) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Going from Hits to HitsList with extractList() and family. ### setMethod("relistToClass", "Hits", function(x) "HitsList") setMethod("relistToClass", "SortedByQueryHits", function(x) "SortedByQueryHitsList") ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Coercion ### ## return as.matrix as on Hits, with indices adjusted setMethod("as.matrix", "HitsList", function(x) { mats <- lapply(x, as.matrix) mat <- do.call(rbind, mats) rows <- c(0L, head(cumsum(sapply(x, nLnode)), -1)) nr <- sapply(mats, nrow) mat + cbind(rep.int(rows, nr), rep.int(x@subjectOffsets, nr)) }) ## count up the matches for each left node in every matching setMethod("as.table", "HitsList", function(x, ...) { counts <- unlist(lapply(x, as.table)) as.table(array(counts, length(counts), list(range = seq_along(counts)))) }) t.HitsList <- function(x) t(x) setMethod("t", "HitsList", function(x) { x@elements <- lapply(as.list(x, use.names = FALSE), t) x }) ### TODO: many convenience methods S4Vectors/R/LLint-class.R0000644000175400017540000001715413175714520016122 0ustar00biocbuildbiocbuild### ========================================================================= ### LLint objects ### ------------------------------------------------------------------------- ### ### The LLint class is a container for storing a vector of "large integers" ### (i.e. long long int in C). It supports NAs. ### ### We don't support names for now. We will when we need them. setClass("LLint", representation(bytes="raw")) setClassUnion("integer_OR_LLint", c("integer", "LLint")) is.LLint <- function(x) is(x, "LLint") BYTES_PER_LLINT <- .Machine$sizeof.longlong setMethod("length", "LLint", function(x) length(x@bytes) %/% BYTES_PER_LLINT ) ### Called from the .onLoad() hook in zzz.R make_NA_LLint_ <- function() { ans_bytes <- .Call2("make_RAW_from_NA_LLINT", PACKAGE="S4Vectors") new2("LLint", bytes=ans_bytes, check=FALSE) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Coercion ### .from_logical_to_LLint <- function(from) { .Call2("new_LLint_from_LOGICAL", from, PACKAGE="S4Vectors") } setAs("logical", "LLint", .from_logical_to_LLint) .from_integer_to_LLint <- function(from) { .Call2("new_LLint_from_INTEGER", from, PACKAGE="S4Vectors") } setAs("integer", "LLint", .from_integer_to_LLint) .from_numeric_to_LLint <- function(from) { .Call2("new_LLint_from_NUMERIC", from, PACKAGE="S4Vectors") } setAs("numeric", "LLint", .from_numeric_to_LLint) .from_character_to_LLint <- function(from) { .Call2("new_LLint_from_CHARACTER", from, PACKAGE="S4Vectors") } setAs("character", "LLint", .from_character_to_LLint) as.LLint <- function(x) as(x, "LLint") ### S3/S4 combo for as.logical.LLint .from_LLint_to_logical <- function(x) { .Call2("new_LOGICAL_from_LLint", x, PACKAGE="S4Vectors") } as.logical.LLint <- function(x, ...) .from_LLint_to_logical(x, ...) setMethod("as.logical", "LLint", as.logical.LLint) ### S3/S4 combo for as.integer.LLint .from_LLint_to_integer <- function(x) { .Call2("new_INTEGER_from_LLint", x, PACKAGE="S4Vectors") } as.integer.LLint <- function(x, ...) .from_LLint_to_integer(x, ...) setMethod("as.integer", "LLint", as.integer.LLint) ### S3/S4 combo for as.numeric.LLint .from_LLint_to_numeric <- function(x) { .Call2("new_NUMERIC_from_LLint", x, PACKAGE="S4Vectors") } as.numeric.LLint <- function(x, ...) .from_LLint_to_numeric(x, ...) setMethod("as.numeric", "LLint", as.numeric.LLint) ### S3/S4 combo for as.character.LLint .from_LLint_to_character <- function(x) { .Call2("new_CHARACTER_from_LLint", x, PACKAGE="S4Vectors") } as.character.LLint <- function(x, ...) .from_LLint_to_character(x, ...) setMethod("as.character", "LLint", as.character.LLint) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Constructor ### .MAX_VECTOR_LENGTH <- 2^52 # see R_XLEN_T_MAX in Rinternals.h ### Return a single double value. .normarg_vector_length <- function(length=0L, max_length=.MAX_VECTOR_LENGTH) { if (is.LLint(length)) { if (length(length) != 1L || is.na(length) || length < as.LLint(0L)) stop("invalid 'length' argument") if (length > as.LLint(max_length)) stop("'length' is too big") return(as.double(length)) } if (!isSingleNumber(length) || length < 0L) stop("invalid 'length' argument") if (is.integer(length)) { length <- as.double(length) } else { length <- trunc(length) } if (length > max_length) stop("'length' is too big") length } LLint <- function(length=0L) { max_length <- .MAX_VECTOR_LENGTH / BYTES_PER_LLINT length <- .normarg_vector_length(length, max_length=max_length) ans_bytes <- raw(length * BYTES_PER_LLINT) new2("LLint", bytes=ans_bytes, check=FALSE) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Displaying ### .show_LLint <- function(x) { x_len <- length(x) if (x_len == 0L) { cat(class(x), "(0)\n", sep="") return() } cat(class(x), " of length ", x_len, ":\n", sep="") print(as.character(x), quote=FALSE, na.print="NA") return() } setMethod("show", "LLint", function(object) .show_LLint(object)) setMethod("showAsCell", "LLint", function(object) as.character(object)) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Combining ### combine_LLint_objects <- function(objects) { if (!is.list(objects)) stop("'objects' must be a list") ## If one of the objects to combine is a character vector, then all the ## objects are coerced to character and combined. if (any(vapply(objects, is.character, logical(1), USE.NAMES=FALSE))) { ans <- unlist(lapply(objects, as.character), use.names=FALSE) return(ans) } ## If one of the objects to combine is a double vector, then all the ## objects are coerced to double and combined. if (any(vapply(objects, is.double, logical(1), USE.NAMES=FALSE))) { ans <- unlist(lapply(objects, as.double), use.names=FALSE) return(ans) } ## Combine "bytes" slots. bytes_slots <- lapply(objects, function(x) { if (is.null(x)) return(NULL) if (is.logical(x) || is.integer(x)) x <- as.LLint(x) if (is.LLint(x)) return(x@bytes) stop(wmsg("cannot combine LLint objects ", "with objects of class ", class(x))) } ) ans_bytes <- unlist(bytes_slots, use.names=FALSE) new2("LLint", bytes=ans_bytes, check=FALSE) } setMethod("c", "LLint", function (x, ..., ignore.mcols=FALSE, recursive=FALSE) { if (!identical(recursive, FALSE)) stop("\"c\" method for LLint objects ", "does not support the 'recursive' argument") if (missing(x)) { objects <- list(...) x <- objects[[1L]] } else { objects <- list(x, ...) } combine_LLint_objects(objects) } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### is.na(), anyNA() ### setMethod("is.na", "LLint", function(x) is.na(as.logical(x))) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Operations from "Ops" group ### setMethod("Ops", c("LLint", "LLint"), function(e1, e2) { .Call("LLint_Ops", .Generic, e1, e2, PACKAGE="S4Vectors") } ) ### If one operand is LLint and the other one is integer, then the latter ### is coerced to LLint. ### If one operand is LLint and the other one is double, then the former ### is coerced to double. setMethod("Ops", c("LLint", "numeric"), function(e1, e2) { if (is.integer(e2)) { e2 <- as.LLint(e2) } else { ## Suppress "non reversible coercion to double" warning. e1 <- suppressWarnings(as.double(e1)) } callGeneric() } ) setMethod("Ops", c("numeric", "LLint"), function(e1, e2) { if (is.integer(e1)) { e1 <- as.LLint(e1) } else { ## Suppress "non reversible coercion to double" warning. e2 <- suppressWarnings(as.double(e2)) } callGeneric() } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Operations from "Summary" group ### setMethod("Summary", "LLint", function(x, ..., na.rm=FALSE) { if (length(list(...)) != 0L) stop(wmsg("\"", .Generic, "\" method for LLint objects ", "takes only one object")) if (!isTRUEorFALSE(na.rm)) stop("'na.rm' must be TRUE or FALSE") .Call("LLint_Summary", .Generic, x, na.rm=na.rm, PACKAGE="S4Vectors") } ) S4Vectors/R/List-class.R0000644000175400017540000005666613175714520016026 0ustar00biocbuildbiocbuild### ========================================================================= ### List objects ### ------------------------------------------------------------------------- ### ### List objects are Vector objects with "[[", "elementType" and ### "elementNROWS" methods. ### setClass("List", contains="Vector", representation( "VIRTUAL", elementType="character" ), prototype(elementType="ANY") ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Accessor methods. ### setGeneric("elementType", function(x, ...) standardGeneric("elementType")) setMethod("elementType", "List", function(x) x@elementType) setMethod("elementType", "vector", function(x) storage.mode(x)) setGeneric("elementNROWS", function(x) standardGeneric("elementNROWS")) setMethod("elementNROWS", "ANY", sapply_NROW) ### Used in the SGSeq package! quick_togroup <- function(x) { x_eltNROWS <- elementNROWS(x) rep.int(seq_along(x_eltNROWS), x_eltNROWS) } setMethod("elementNROWS", "List", function(x) { y <- as.list(x) if (length(y) == 0L) { ans <- integer(0) ## We must return a named integer(0) if 'x' is named names(ans) <- names(x) return(ans) } if (length(dim(y[[1L]])) < 2L) return(elementNROWS(y)) return(sapply(y, NROW)) } ) setGeneric("isEmpty", function(x) standardGeneric("isEmpty")) setMethod("isEmpty", "ANY", function(x) { if (is.atomic(x)) return(length(x) == 0L) if (!is.list(x) && !is(x, "List")) stop("isEmpty() is not defined for objects of class ", class(x)) ## Recursive definition if (length(x) == 0) return(logical(0)) sapply(x, function(xx) all(isEmpty(xx))) }) ### A List object is considered empty iff all its elements are empty. setMethod("isEmpty", "List", function(x) all(elementNROWS(x) == 0L)) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### relistToClass() ### ### 'relistToClass(x)' is the opposite of 'elementType(y)' in the sense that ### the former returns the class of the result of relisting (or splitting) ### 'x' while the latter returns the class of the result of unlisting (or ### unsplitting) 'y'. ### ### More formally, if 'x' is an object that is relistable and 'y' a list-like ### object: ### relistToClass(x) == class(relist(x, some_skeleton)) ### elementType(y) == class(unlist(y)) ### ### As a consequence, for any object 'x' for which relistToClass() is defined ### and returns a valid class, 'elementType(new(relistToClass(x)))' should ### return 'class(x)'. ### setGeneric("relistToClass", function(x) standardGeneric("relistToClass")) .selectListClassName <- function(x) { cn <- listClassName("Compressed", x) if (cn == "CompressedList") cn <- listClassName("Simple", x) cn } setMethod("relistToClass", "ANY", function(x) .selectListClassName(class(x))) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Constructor. ### List <- function(...) { args <- list(...) if (length(args) == 1L && is.list(args[[1L]])) args <- args[[1L]] as(args, "List") } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### The "show" method. ### setMethod("show", "List", function(object) { lo <- length(object) cat(classNameForDisplay(object), " of length ", lo, "\n", sep = "") if (!is.null(names(object))) cat(labeledLine("names", names(object))) }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### unlist() ### ### 'inner_names' and 'outer_names' can be either NULL or character vectors. ### If both are character vectors, then they must have the same length. .make_unlisted_names <- function(inner_names, outer_names) { if (is.null(outer_names)) return(inner_names) if (is.null(inner_names)) return(outer_names) ## Replace missing outer names with inner names. no_outer <- is.na(outer_names) | outer_names == "" if (any(no_outer)) { idx <- which(no_outer) outer_names[idx] <- inner_names[idx] } ## Paste *outer* and *inner* names together when both are present. no_inner <- is.na(inner_names) | inner_names == "" both <- !(no_outer | no_inner) if (any(both)) { idx <- which(both) outer_names[idx] <- paste(outer_names[idx], inner_names[idx], sep=".") } outer_names } ### 'unlisted_x' is assumed to have the *inner* names of 'x' on it. set_unlisted_names <- function(unlisted_x, x) { x_names <- names(x) if (is.null(x_names)) return(unlisted_x) inner_names <- ROWNAMES(unlisted_x) outer_names <- rep.int(x_names, elementNROWS(x)) unlisted_names <- .make_unlisted_names(inner_names, outer_names) if (length(dim(unlisted_x)) < 2L) { res <- try(names(unlisted_x) <- unlisted_names, silent=TRUE) what <- "names" } else { res <- try(rownames(unlisted_x) <- unlisted_names, silent=TRUE) what <- "rownames" } if (is(res, "try-error")) warning("failed to set ", what, " on the ", "unlisted ", class(x), " object") unlisted_x } ### If 'use.names' is FALSE or 'x' has no *outer* names, then we propagate ### the *inner* names on the unlisted object. Note that this deviates from ### base::unlist() which doesn't propagate any names (outer or inner) when ### 'use.names' is FALSE. ### Otherwise (i.e. if 'use.names' is TRUE and 'x' has *outer* names), the ### names we propagate are obtained by pasting the *outer* and *inner* names ### together. Note that, unlike base::unlist(), we never mangle the *outer* ### names when they have no corresponding *inner* names (a terrible feature ### of base::unlist()). setMethod("unlist", "List", function(x, recursive=TRUE, use.names=TRUE) { if (!isTRUEorFALSE(use.names)) stop("'use.names' must be TRUE or FALSE") if (length(x) == 0L) { elt_type <- elementType(x) if (isVirtualClass(elt_type)) return(NULL) return(new(elt_type)) } xx <- unname(as.list(x)) if (length(dim(xx[[1L]])) < 2L) { ## This propagates the *inner* names of 'x'. unlisted_x <- do.call(c, xx) } else { ## This propagates the *inner* names of 'x'. unlisted_x <- do.call(rbind, xx) } if (use.names) unlisted_x <- set_unlisted_names(unlisted_x, x) unlisted_x } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Subsetting. ### ### Assume 'x' and 'i' are parallel List objects (i.e. same length). ### Returns TRUE iff 'i' contains non-NA positive values that are compatible ### with the shape of 'x'. .is_valid_NL_subscript <- function(i, x) { unlisted_i <- unlist(i, use.names=FALSE) if (!is.integer(unlisted_i)) unlisted_i <- as.integer(unlisted_i) if (anyMissingOrOutside(unlisted_i, lower=1L)) return(FALSE) x_eltNROWS <- elementNROWS(x) i_eltNROWS <- elementNROWS(i) if (any(unlisted_i > rep.int(x_eltNROWS, i_eltNROWS))) return(FALSE) return(TRUE) } ### Assume 'x' and 'i' are parallel List objects (i.e. same length). ### Returns the name of one of the 3 supported fast paths ("LL", "NL", "RL") ### or NA if no fast path can be used. .select_fast_path <- function(i, x) { ## LEPType (List Element Pseudo-Type): same as "elementType" except for ## RleList objects. if (is(i, "RleList")) { i_runvals <- runValue(i) i_LEPType <- elementType(i_runvals) } else { i_LEPType <- elementType(i) } if (extends(i_LEPType, "logical")) { ## 'i' is a List of logical vectors or logical-Rle objects. ## We select the "LL" fast path ("Logical List"). return("LL") } if (extends(i_LEPType, "numeric")) { ## 'i' is a List of numeric vectors or numeric-Rle objects. if (is(i, "RleList")) { i2 <- i_runvals } else { i2 <- i } if (.is_valid_NL_subscript(i2, x)) { ## We select the "NL" fast path ("Number List"). return("NL") } } if (extends(i_LEPType, "Ranges")) { ## 'i' is a List of Ranges objects. ## We select the "RL" fast path ("Ranges List"). return("RL") } return(NA_character_) } ### Assume 'x' and 'i' are parallel List objects (i.e. same length). ### Truncate or recycle each list element of 'i' to the length of the ### corresponding element in 'x'. .adjust_elt_lengths <- function(i, x) { x_eltNROWS <- unname(elementNROWS(x)) i_eltNROWS <- unname(elementNROWS(i)) idx <- which(x_eltNROWS != i_eltNROWS) ## FIXME: This is rough and doesn't follow exactly the truncate-or-recycle ## semantic of normalizeSingleBracketSubscript() on a logical vector or ## logical-Rle object. for (k in idx) i[[k]] <- rep(i[[k]], length.out=x_eltNROWS[k]) return(i) } ### Assume 'x' and 'i' are parallel List objects (i.e. same length), ### and 'i' is a List of logical vectors or logical-Rle objects. .unlist_LL_subscript <- function(i, x) { i <- .adjust_elt_lengths(i, x) unlist(i, use.names=FALSE) } ### Assume 'x' and 'i' are parallel List objects (i.e. same length), ### and 'i' is a List of numeric vectors or numeric-Rle objects. .unlist_NL_subscript <- function(i, x) { offsets <- c(0L, end(IRanges::PartitioningByEnd(x))[-length(x)]) i <- i + offsets unlist(i, use.names=FALSE) } ### Assume 'x' and 'i' are parallel List objects (i.e. same length), ### and 'i' is a List of Ranges objects. .unlist_RL_subscript <- function(i, x) { unlisted_i <- unlist(i, use.names=FALSE) offsets <- c(0L, end(IRanges::PartitioningByEnd(x))[-length(x)]) IRanges::shift(unlisted_i, shift=rep.int(offsets, elementNROWS(i))) } ### Fast subset by List of logical vectors or logical-Rle objects. ### Assume 'x' and 'i' are parallel List objects (i.e. same length). ### Propagate 'names(x)' only. Caller is responsible for propagating 'mcols(x)' ### and 'metadata(x)'. .fast_subset_List_by_LL <- function(x, i) { ## Unlist 'x' and 'i'. unlisted_x <- unlist(x, use.names=FALSE) unlisted_i <- .unlist_LL_subscript(i, x) ## Subset. unlisted_ans <- extractROWS(unlisted_x, unlisted_i) ## Relist. group <- rep.int(seq_along(x), elementNROWS(x)) group <- extractROWS(group, unlisted_i) ans_skeleton <- IRanges::PartitioningByEnd(group, NG=length(x), names=names(x)) relist(unlisted_ans, ans_skeleton) } ### Fast subset by List of numeric vectors or numeric-Rle objects. ### Assume 'x' and 'i' are parallel List objects (i.e. same length). ### Propagate 'names(x)' only. Caller is responsible for propagating 'mcols(x)' ### and 'metadata(x)'. .fast_subset_List_by_NL <- function(x, i) { ## Unlist 'x' and 'i'. unlisted_x <- unlist(x, use.names=FALSE) unlisted_i <- .unlist_NL_subscript(i, x) ## Subset. unlisted_ans <- extractROWS(unlisted_x, unlisted_i) ## Relist. ans_breakpoints <- cumsum(unname(elementNROWS(i))) ans_skeleton <- IRanges::PartitioningByEnd(ans_breakpoints, names=names(x)) relist(unlisted_ans, ans_skeleton) } ### Fast subset by List of Ranges objects. ### Assume 'x' and 'i' are parallel List objects (i.e. same length). ### Propagate 'names(x)' only. Caller is responsible for propagating 'mcols(x)' ### and 'metadata(x)'. .fast_subset_List_by_RL <- function(x, i) { ## Unlist 'x' and 'i'. unlisted_x <- unlist(x, use.names=FALSE) unlisted_i <- .unlist_RL_subscript(i, x) ## Subset. unlisted_ans <- extractROWS(unlisted_x, unlisted_i) ## Relist. ans_breakpoints <- cumsum(unlist(sum(width(i)), use.names=FALSE)) ans_skeleton <- IRanges::PartitioningByEnd(ans_breakpoints, names=names(x)) relist(unlisted_ans, ans_skeleton) } ### Subset a List object by a list-like subscript. subset_List_by_List <- function(x, i) { li <- length(i) if (is.null(names(i))) { lx <- length(x) if (li > lx) stop("list-like subscript is longer than ", "list-like object to subset") if (li < lx) x <- x[seq_len(li)] } else { if (is.null(names(x))) stop("cannot subscript an unnamed list-like object ", "by a named list-like object") if (!identical(names(i), names(x))) { i2x <- match(names(i), names(x)) if (anyMissing(i2x)) stop("list-like subscript has names not in ", "list-like object to subset") x <- x[i2x] } } ## From here, 'x' and 'i' are guaranteed to have the same length. if (li == 0L) return(x) if (!is(x, "SimpleList")) { ## We'll try to take a fast path. if (is(i, "List")) { fast_path <- .select_fast_path(i, x) } else { i2 <- as(i, "List") i2_elttype <- elementType(i2) if (length(i2) == li && all(sapply(i, is, i2_elttype))) { fast_path <- .select_fast_path(i2, x) if (!is.na(fast_path)) i <- i2 } else { fast_path <- NA_character_ } } if (!is.na(fast_path)) { fast_path_FUN <- switch(fast_path, LL=.fast_subset_List_by_LL, NL=.fast_subset_List_by_NL, RL=.fast_subset_List_by_RL) ans <- as(fast_path_FUN(x, i), class(x)) # fast path ## Propagate 'metadata(x)' and 'mcols(x)'. metadata(ans) <- metadata(x) mcols(ans) <- mcols(x) return(ans) } } ## Slow path (loops over the list elements of 'x'). for (k in seq_len(li)) x[[k]] <- extractROWS(x[[k]], i[[k]]) return(x) } .adjust_value_length <- function(value, i_len) { value_len <- length(value) if (value_len == i_len) return(value) if (i_len %% value_len != 0L) warning("number of values supplied is not a sub-multiple ", "of the number of values to be replaced") rep(value, length.out=i_len) } ### Assume 'x' and 'i' are parallel List objects (i.e. same length). .fast_lsubset_List_by_List <- function(x, i, value) { ## Unlist 'x', 'i', and 'value'. unlisted_x <- unlist(x, use.names=FALSE) fast_path <- .select_fast_path(i, x) unlist_subscript_FUN <- switch(fast_path, LL=.unlist_LL_subscript, NL=.unlist_NL_subscript, RL=.unlist_RL_subscript) unlisted_i <- unlist_subscript_FUN(i, x) if (length(value) != 1L) { value <- .adjust_value_length(value, length(i)) value <- .adjust_elt_lengths(value, i) } unlisted_value <- unlist(value, use.names=FALSE) ## Subset. unlisted_ans <- replaceROWS(unlisted_x, unlisted_i, unlisted_value) ## Relist. ans <- as(relist(unlisted_ans, x), class(x)) metadata(ans) <- metadata(x) ans } lsubset_List_by_List <- function(x, i, value) { lx <- length(x) li <- length(i) if (li == 0L) { ## Surprisingly, in that case, `[<-` on standard vectors does not ## even look at 'value'. So neither do we... return(x) } lv <- length(value) if (lv == 0L) stop("replacement has length zero") value <- normalizeSingleBracketReplacementValue(value, x) if (is.null(names(i))) { if (li != lx) stop("when list-like subscript is unnamed, it must have the ", "length of list-like object to subset") if (!is(x, "SimpleList")) { ## We'll try to take a fast path. if (is(i, "List")) { fast_path <- .select_fast_path(i, x) } else { i2 <- as(i, "List") i2_elttype <- elementType(i2) if (length(i2) == li && all(sapply(i, is, i2_elttype))) { fast_path <- .select_fast_path(i2, x) if (!is.na(fast_path)) i <- i2 } else { fast_path <- NA_character_ } } if (!is.na(fast_path)) return(.fast_lsubset_List_by_List(x, i, value)) # fast path } i2x <- seq_len(li) } else { if (is.null(names(x))) stop("cannot subset an unnamed list-like object ", "by a named list-like subscript") i2x <- match(names(i), names(x)) if (anyMissing(i2x)) stop("list-like subscript has names not in ", "list-like object to subset") if (anyDuplicated(i2x)) stop("list-like subscript has duplicated names") } value <- .adjust_value_length(value, li) ## Slow path (loops over the list elements of 'x'). for (k in seq_len(li)) x[[i2x[k]]] <- replaceROWS(x[[i2x[k]]], i[[k]], value[[k]]) return(x) } setMethod("[", "List", function(x, i, j, ..., drop=TRUE) { if (!missing(j) || length(list(...)) > 0L) stop("invalid subsetting") if (missing(i)) return(x) if (is.list(i) || (is(i, "List") && !is(i, "Ranges"))) return(subset_List_by_List(x, i)) callNextMethod(x, i) } ) setReplaceMethod("[", "List", function(x, i, j, ..., value) { if (!missing(j) || length(list(...)) > 0L) stop("invalid subsetting") if (!missing(i) && (is.list(i) || (is(i, "List") && !is(i, "Ranges")))) return(lsubset_List_by_List(x, i, value)) callNextMethod(x, i, value=value) } ) setMethod("[[", "List", function(x, i, j, ...) { dotArgs <- list(...) if (length(dotArgs) > 0L) dotArgs <- dotArgs[names(dotArgs) != "exact"] if (!missing(j) || length(dotArgs) > 0L) stop("incorrect number of subscripts") ## '...' is either empty or contains only the 'exact' arg. getListElement(x, i, ...) } ) setMethod("$", "List", function(x, name) x[[name, exact=FALSE]]) setReplaceMethod("[[", "List", function(x, i, j, ..., value) { if (!missing(j) || length(list(...)) > 0) stop("invalid replacement") origLen <- length(x) x <- setListElement(x, i, value) if (origLen < length(x)) x <- rbindRowOfNAsToMetadatacols(x) x }) setReplaceMethod("$", "List", function(x, name, value) { x[[name]] <- value x }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Simple helper functions for some common subsetting operations. ### ### TODO: Move to List-utils.R (Looping methods section). ### ### phead() and ptail(): "parallel" versions of head() and tail() for List ### objects. They're just fast equivalents of 'mapply(head, x, n)' and ### 'mapply(tail, x, n)', respectively. .normarg_n <- function(n, x_eltNROWS) { if (!is.numeric(n)) stop("'n' must be an integer vector") if (!is.integer(n)) n <- as.integer(n) if (any(is.na(n))) stop("'n' cannot contain NAs") n <- pmin(x_eltNROWS, n) neg_idx <- which(n < 0L) if (length(neg_idx) != 0L) n[neg_idx] <- pmax(n[neg_idx] + x_eltNROWS[neg_idx], 0L) n } phead <- function(x, n=6L) { x_eltNROWS <- unname(elementNROWS(x)) n <- .normarg_n(n, x_eltNROWS) unlisted_i <- IRanges::IRanges(start=rep.int(1L, length(n)), width=n) i <- relist(unlisted_i, IRanges::PartitioningByEnd(seq_along(x))) ans <- x[i] mcols(ans) <- mcols(x) ans } ptail <- function(x, n=6L) { x_eltNROWS <- unname(elementNROWS(x)) n <- .normarg_n(n, x_eltNROWS) unlisted_i <- IRanges::IRanges(end=x_eltNROWS, width=n) i <- relist(unlisted_i, IRanges::PartitioningByEnd(seq_along(x))) ans <- x[i] mcols(ans) <- mcols(x) ans } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Coercion. ### setAs("List", "list", function(from) as.list(from)) .as.list.List <- function(x, use.names=TRUE) { if (!isTRUEorFALSE(use.names)) stop("'use.names' must be TRUE or FALSE") ans <- lapply(x, identity) if (!use.names) names(ans) <- NULL ans } setMethod("as.list", "List", .as.list.List) setMethod("parallelVectorNames", "List", function(x) setdiff(callNextMethod(), c("group", "group_name"))) listClassName <- function(impl, element.type) { if (is.null(impl)) impl <- "" listClass <- paste0(impl, "List") if (!is.null(element.type)) { cl <- c(element.type, names(getClass(element.type)@contains)) cl <- capitalize(cl) listClass <- c(paste0(cl, "List"), paste0(cl, "Set"), paste0(impl, cl, "List"), listClass) } clExists <- which(sapply(listClass, isClass) & sapply(listClass, extends, paste0(impl, "List"))) listClass[[clExists[[1L]]]] } setAs("ANY", "List", function(from) { ## since list is directed to SimpleList, we assume 'from' is non-list-like relist(from, IRanges::PartitioningByEnd(seq_along(from), names=names(from))) }) ## Special cased, because integer extends ANY (somehow) and numeric, ## so ambiguities are introduced due to method caching. setAs("integer", "List", getMethod(coerce, c("ANY", "List"))) .make_group_and_group_name <- function(x_eltNROWS, group_name.as.factor=FALSE) { if (!isTRUEorFALSE(group_name.as.factor)) stop("'group_name.as.factor' must be TRUE or FALSE") group <- rep.int(seq_along(x_eltNROWS), x_eltNROWS) x_names <- names(x_eltNROWS) if (is.null(x_names)) { group_name <- rep.int(NA_character_, length(group)) if (group_name.as.factor) group_name <- factor(group_name, levels=character(0)) } else { group_name <- rep.int(x_names, x_eltNROWS) if (group_name.as.factor) group_name <- factor(group_name, levels=unique(x_names)) } data.frame(group=group, group_name=group_name, stringsAsFactors=FALSE) } .as.data.frame.List <- function(x, row.names=NULL, optional=FALSE, ..., value.name="value", use.outer.mcols=FALSE, group_name.as.factor=FALSE) { if (!isSingleString(value.name)) stop("'value.name' must be a single string") if (!isTRUEorFALSE(use.outer.mcols)) stop("'use.outer.mcols' must be TRUE or FALSE") ans <- as.data.frame(unlist(x, use.names=FALSE), row.names=row.names, optional=optional, ...) if (ncol(ans) == 1L) colnames(ans)[1L] <- value.name group_and_group_name <- .make_group_and_group_name(elementNROWS(x), group_name.as.factor) ans <- cbind(group_and_group_name, ans) if (use.outer.mcols) { x_mcols <- mcols(x) if (length(x_mcols) != 0L) { extra_cols <- as.data.frame(x_mcols) extra_cols <- extract_data_frame_rows(extra_cols, ans[[1L]]) ans <- cbind(ans, extra_cols) } } ans } setMethod("as.data.frame", "List", .as.data.frame.List) setAs("List", "data.frame", function(from) as.data.frame(from)) S4Vectors/R/List-comparison.R0000644000175400017540000001726113175714520017057 0ustar00biocbuildbiocbuild### ========================================================================= ### Comparing and ordering List objects ### ------------------------------------------------------------------------- ### ### Method signatures for binary comparison operators. .OP2_SIGNATURES <- list( c("List", "List"), c("List", "list"), c("list", "List") ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### pcompareRecursively() ### ### NOT exported! ### ### By default, List objects pcompare recursively. Exceptions to the rule ### (e.g. Ranges, XString, etc...) must define a "pcompareRecursively" method ### that returns FALSE. ### setGeneric("pcompareRecursively", function(x) standardGeneric("pcompareRecursively") ) setMethod("pcompareRecursively", "List", function(x) TRUE) setMethod("pcompareRecursively", "list", function(x) TRUE) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### .op1_apply() and .op2_apply() internal helpers ### ### Apply a unary operator. .op1_apply <- function(OP1, x, ..., ANS_CONSTRUCTOR) { comp_rec_x <- pcompareRecursively(x) if (!comp_rec_x) { OP1_Vector_method <- selectMethod(OP1, "Vector") return(OP1_Vector_method(x, ...)) } compress_ans <- !is(x, "SimpleList") ## Note that we should just be able to do ## y <- lapply(x, OP1, ...) ## instead of the extremely obfuscated form below (which, in a bug-free ## world, should be equivalent to the simple form above). ## However, because of a regression in R 3.4.2, using the simple form ## above doesn't seem to work properly if OP1 is a generic function with ## dispatch on ... (e.g. order()). The form below seems to work though, ## so we use it as a temporary workaround. y <- lapply(x, function(xi) do.call(OP1, list(xi, ...))) ANS_CONSTRUCTOR(y, compress=compress_ans) } ### Apply a binary operator. .op2_apply <- function(OP2, x, y, ..., ANS_CONSTRUCTOR) { comp_rec_x <- pcompareRecursively(x) comp_rec_y <- pcompareRecursively(y) if (!(comp_rec_x || comp_rec_y)) { OP2_Vector_method <- selectMethod(OP2, c("Vector", "Vector")) return(OP2_Vector_method(x, y, ...)) } if (!comp_rec_x) x <- list(x) if (!comp_rec_y) y <- list(y) compress_ans <- !((is(x, "SimpleList") || is.list(x)) && (is(y, "SimpleList") || is.list(y))) x_len <- length(x) y_len <- length(y) if (x_len == 0L || y_len == 0L) { ans <- ANS_CONSTRUCTOR(compress=compress_ans) } else { ans <- ANS_CONSTRUCTOR(mapply(OP2, x, y, MoreArgs=list(...), SIMPLIFY=FALSE, USE.NAMES=FALSE), compress=compress_ans) } ## 'ans' is guaranteed to have the length of 'x' or 'y'. x_names <- names(x) y_names <- names(y) if (!(is.null(x_names) && is.null(y_names))) { ans_len <- length(ans) if (x_len != y_len) { if (x_len == ans_len) { ans_names <- x_names } else { ans_names <- y_names } } else { if (is.null(x_names)) { ans_names <- y_names } else { ans_names <- x_names } } names(ans) <- ans_names } ans } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Element-wise (aka "parallel") comparison of 2 List objects. ### setMethods("pcompare", .OP2_SIGNATURES, function(x, y) .op2_apply(pcompare, x, y, ANS_CONSTRUCTOR=IRanges::IntegerList) ) setMethods("==", .OP2_SIGNATURES, function(e1, e2) .op2_apply(`==`, e1, e2, ANS_CONSTRUCTOR=IRanges::LogicalList) ) setMethods("<=", .OP2_SIGNATURES, function(e1, e2) .op2_apply(`<=`, e1, e2, ANS_CONSTRUCTOR=IRanges::LogicalList) ) ### The remaining comparison binary operators (!=, >=, <, >) will work ### out-of-the-box on List objects thanks to the "!" methods below and to the ### methods for Vector objects. setMethod("!", "List", function(x) { if (is(x, "RleList")) { ANS_CONSTRUCTOR <- IRanges::RleList } else { ANS_CONSTRUCTOR <- IRanges::LogicalList } .op1_apply(`!`, x, ANS_CONSTRUCTOR=ANS_CONSTRUCTOR) } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### match() ### setMethods("match", .OP2_SIGNATURES, function(x, table, nomatch=NA_integer_, incomparables=NULL, ...) { if (is(x, "RleList")) { ANS_CONSTRUCTOR <- IRanges::RleList } else { ANS_CONSTRUCTOR <- IRanges::IntegerList } .op2_apply(match, x, table, nomatch=nomatch, incomparables=incomparables, ..., ANS_CONSTRUCTOR=ANS_CONSTRUCTOR) } ) ### 2 of the 3 "match" methods defined above have signatures List,list and ### List,List and therefore are more specific than the 2 methods below. ### So in the methods below 'table' is guaranteed to be a vector that is not ### a list or a Vector that is not a List. setMethods("match", list(c("List", "vector"), c("List", "Vector")), function(x, table, nomatch=NA_integer_, incomparables=NULL, ...) { match(x, list(table), nomatch=nomatch, incomparables=incomparables, ...) } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### duplicated() & unique() ### .duplicated.List <- function(x, incomparables=FALSE, fromLast=FALSE, ...) { .op1_apply(duplicated, x, incomparables=incomparables, fromLast=fromLast, ..., ANS_CONSTRUCTOR=IRanges::LogicalList) } setMethod("duplicated", "List", .duplicated.List) .unique.List <- function(x, incomparables=FALSE, ...) { if (!pcompareRecursively(x)) return(callNextMethod()) i <- !duplicated(x, incomparables=incomparables, ...) # LogicalList x[i] } setMethod("unique", "List", .unique.List) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### %in% ### ### The "%in%" method for Vector objects calls is.na() internally. setMethod("is.na", "List", function(x) { if (is(x, "RleList")) { ANS_CONSTRUCTOR <- IRanges::RleList } else { ANS_CONSTRUCTOR <- IRanges::LogicalList } .op1_apply(is.na, x, ANS_CONSTRUCTOR=ANS_CONSTRUCTOR) } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### order() and related methods. ### setMethod("order", "List", function(..., na.last=TRUE, decreasing=FALSE, method=c("auto", "shell", "radix")) { args <- list(...) if (length(args) != 1L) stop("\"order\" method for List objects ", "can only take one input object") .op1_apply(order, args[[1L]], na.last=na.last, decreasing=decreasing, method=method, ANS_CONSTRUCTOR=IRanges::IntegerList) } ) .sort.List <- function(x, decreasing=FALSE, na.last=NA) { if (!pcompareRecursively(x)) return(callNextMethod()) i <- order(x, na.last=na.last, decreasing=decreasing) # IntegerList x[i] } setMethod("sort", "List", .sort.List) setMethod("rank", "List", function(x, na.last=TRUE, ties.method=c("average", "first", "random", "max", "min")) { .op1_apply(rank, x, na.last=na.last, ties.method=ties.method, ANS_CONSTRUCTOR=IRanges::IntegerList) } ) setMethod("is.unsorted", "List", function(x, na.rm = FALSE, strictly = FALSE) { vapply(x, is.unsorted, logical(1L), na.rm=na.rm, strictly=strictly) }) S4Vectors/R/List-utils.R0000644000175400017540000002205613175714520016043 0ustar00biocbuildbiocbuild### ========================================================================= ### Common operations on List objects ### ------------------------------------------------------------------------- ### ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Looping on List objects ### setMethod("lapply", "List", function(X, FUN, ...) { FUN <- match.fun(FUN) ii <- seq_len(length(X)) names(ii) <- names(X) lapply(ii, function(i) FUN(X[[i]], ...)) }) .sapplyDefault <- base::sapply environment(.sapplyDefault) <- topenv() setMethod("sapply", "List", .sapplyDefault) setGeneric("endoapply", signature = "X", function(X, FUN, ...) standardGeneric("endoapply")) setMethod("endoapply", "list", function(X, FUN, ...) lapply(X = X, FUN = match.fun(FUN), ...)) setMethod("endoapply", "data.frame", function(X, FUN, ...) as.data.frame(lapply(X = X, FUN = match.fun(FUN), ...))) setMethod("endoapply", "List", function(X, FUN, ...) { elementTypeX <- elementType(X) FUN <- match.fun(FUN) for (i in seq_len(length(X))) { elt <- FUN(X[[i]], ...) if (!extends(class(elt), elementTypeX)) stop("'FUN' must return elements of class ", elementTypeX) X[[i]] <- elt } X }) setGeneric("revElements", signature="x", function(x, i) standardGeneric("revElements") ) ### These 2 methods explain the concept of revElements() but they are not ### efficient because they loop over the elements of 'x[i]'. ### There is a fast method for CompressedList objects though. setMethod("revElements", "list", function(x, i) { x[i] <- lapply(x[i], revROWS) x } ) setMethod("revElements", "List", function(x, i) { x[i] <- endoapply(x[i], revROWS) x } ) setGeneric("mendoapply", signature = "...", function(FUN, ..., MoreArgs = NULL) standardGeneric("mendoapply")) BiocGenerics:::apply_hotfix73465(getGeneric("mendoapply")) setMethod("mendoapply", "list", function(FUN, ..., MoreArgs = NULL) mapply(FUN = FUN, ..., MoreArgs = MoreArgs, SIMPLIFY = FALSE)) setMethod("mendoapply", "data.frame", function(FUN, ..., MoreArgs = NULL) as.data.frame(mapply(FUN = match.fun(FUN), ..., MoreArgs = MoreArgs, SIMPLIFY = FALSE))) setMethod("mendoapply", "List", function(FUN, ..., MoreArgs = NULL) { X <- list(...)[[1L]] elementTypeX <- elementType(X) FUN <- match.fun(FUN) listData <- mapply(FUN = FUN, ..., MoreArgs = MoreArgs, SIMPLIFY = FALSE) for (i in seq_len(length(listData))) { if (!extends(class(listData[[i]]), elementTypeX)) stop("'FUN' must return elements of class ", elementTypeX) X[[i]] <- listData[[i]] } X }) ### Element-wise c() for list-like objects. ### This is a fast mapply(c, ..., SIMPLIFY=FALSE) but with the following ### differences: ### 1) pc() ignores the supplied objects that are NULL. ### 2) pc() does not recycle its arguments. All the supplied objects must ### have the same length. ### 3) If one of the supplied objects is a List object, then pc() returns a ### List object. ### 4) pc() always returns a homogenous list or List object, that is, an ### object where all the list elements have the same type. pc <- function(...) { args <- unname(list(...)) args <- args[!sapply_isNULL(args)] if (length(args) == 0L) return(list()) if (length(args) == 1L) return(args[[1L]]) args_NROWS <- elementNROWS(args) if (!all(args_NROWS == args_NROWS[[1L]])) stop("all the objects to combine must have the same length") ans_as_List <- any(vapply(args, is, logical(1), "List", USE.NAMES=FALSE)) SPLIT.FUN <- if (ans_as_List) IRanges::splitAsList else split ans_unlisted <- do.call(c, lapply(args, unlist, use.names=FALSE)) f <- structure(unlist(lapply(args, quick_togroup), use.names=FALSE), levels=as.character(seq_along(args[[1L]])), class="factor") setNames(SPLIT.FUN(ans_unlisted, f), names(args[[1L]])) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Functional programming methods ### ### Copy+pasted to disable forced as.list() coercion .ReduceDefault <- function(f, x, init, right = FALSE, accumulate = FALSE) { mis <- missing(init) len <- length(x) if (len == 0L) return(if (mis) NULL else init) f <- match.fun(f) # if (!is.vector(x) || is.object(x)) # x <- as.list(x) ind <- seq_len(len) if (mis) { if (right) { init <- x[[len]] ind <- ind[-len] } else { init <- x[[1L]] ind <- ind[-1L] } } if (!accumulate) { if (right) { for (i in rev(ind)) init <- f(x[[i]], init) } else { for (i in ind) init <- f(init, x[[i]]) } init } else { len <- length(ind) + 1L out <- vector("list", len) if (mis) { if (right) { out[[len]] <- init for (i in rev(ind)) { init <- f(x[[i]], init) out[[i]] <- init } } else { out[[1L]] <- init for (i in ind) { init <- f(init, x[[i]]) out[[i]] <- init } } } else { if (right) { out[[len]] <- init for (i in rev(ind)) { init <- f(x[[i]], init) out[[i]] <- init } } else { for (i in ind) { out[[i]] <- init init <- f(init, x[[i]]) } out[[len]] <- init } } if (all(lengths(out) == 1L)) out <- unlist(out, recursive = FALSE) out } } setMethod("Reduce", "List", .ReduceDefault) ### Presumably to avoid base::lapply coercion to list. .FilterDefault <- base::Filter environment(.FilterDefault) <- topenv() setMethod("Filter", "List", .FilterDefault) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Evaluating. ### setMethod("within", "List", function(data, expr, ...) { ## cannot use active bindings here, as they break for replacement enclos <- top_prenv(expr) e <- list2env(as.list(data), parent=enclos) safeEval(substitute(expr), e, enclos) l <- mget(ls(e), e) l <- l[!sapply(l, is.null)] nD <- length(del <- setdiff(names(data), (nl <- names(l)))) for (nm in nl) data[[nm]] <- l[[nm]] for (nm in del) data[[nm]] <- NULL data }) setMethod("do.call", c("ANY", "List"), function (what, args, quote = FALSE, envir = parent.frame()) { args <- as.list(args) callGeneric() }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Factors. ### droplevels.List <- function(x, ...) droplevels(x, ...) .droplevels.List <- function(x, except = NULL) { ix <- vapply(x, Has(levels), logical(1L)) ix[except] <- FALSE x[ix] <- lapply(x[ix], droplevels) x } setMethod("droplevels", "List", .droplevels.List) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Summarizing. ### setMethod("anyNA", "List", function(x, recursive=FALSE) { stopifnot(isTRUEorFALSE(recursive)) if (recursive) { anyNA(as.list(x), recursive=TRUE) } else { callNextMethod() } }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Matrix construction ### normBindArgs <- function(..., deparse.level=1L) { stopifnot(isSingleNumber(deparse.level), deparse.level >= 0L, deparse.level <= 2L) args <- list(...) if (deparse.level > 0L) { exprs <- as.list(substitute(list(...)))[-1L] genName <- if (is.null(names(args))) TRUE else names(args) == "" if (deparse.level == 1L && any(genName)) genName <- genName & vapply(exprs, is.name, logical(1L)) if (any(genName)) { if (is.null(names(args))) names(args) <- rep("", length(args)) names(args)[genName] <- as.character(exprs[genName]) } } args } setMethod("rbind", "List", function(..., deparse.level=1L) { args <- normBindArgs(..., deparse.level=deparse.level) do.call(rbind, lapply(args, as.list)) }) setMethod("cbind", "List", function(..., deparse.level=1L) { args <- normBindArgs(..., deparse.level=deparse.level) do.call(cbind, lapply(args, as.list)) }) S4Vectors/R/Pairs-class.R0000644000175400017540000001735413175714520016160 0ustar00biocbuildbiocbuild### ========================================================================= ### Pairs objects ### ------------------------------------------------------------------------- ### ### Two parallel vectors. Could result from "dereferencing" a Hits. ### setClass("Pairs", contains="Vector", representation(first="ANY", second="ANY", NAMES="character_OR_NULL"), prototype(first=logical(0L), second=logical(0L), elementMetadata=DataFrame())) setValidity2("Pairs", .valid.Pairs) .valid.Pairs <- function(object) { c(if (length(object@first) != length(object@second)) "'first' and 'second' must have the same length", if (!is.null(object@NAMES) && length(object@NAMES) != length(object@first)) "'NAMES', if not NULL, must have the same length as object" ) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Accessors ### setGeneric("first", function(x, ...) standardGeneric("first")) setGeneric("second", function(x, ...) standardGeneric("second")) setMethod("first", "Pairs", function(x) x@first) setMethod("second", "Pairs", function(x) x@second) setGeneric("first<-", function(x, ..., value) standardGeneric("first<-"), signature="x") setGeneric("second<-", function(x, ..., value) standardGeneric("second<-"), signature="x") setReplaceMethod("first", "Pairs", function(x, value) { x@first <- value x }) setReplaceMethod("second", "Pairs", function(x, value) { x@second <- value x }) setMethod("names", "Pairs", function(x) x@NAMES) setReplaceMethod("names", "Pairs", function(x, value) { x@NAMES <- value x }) setMethod("length", "Pairs", function(x) length(first(x))) setMethod("parallelSlotNames", "Pairs", function(x) c("first", "second", "NAMES", callNextMethod())) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Constructor ### Pairs <- function(first, second, ..., names = NULL, hits = NULL) { if (!is.null(hits)) { stopifnot(is(hits, "Hits"), queryLength(hits) == length(first), subjectLength(hits) == length(second)) first <- first[queryHits(hits)] second <- second[subjectHits(hits)] } stopifnot(length(first) == length(second), is.null(names) || length(names) == length(first)) if (!missing(...)) { elementMetadata <- DataFrame(...) } else { elementMetadata <- make_zero_col_DataFrame(length(first)) } new("Pairs", first=first, second=second, NAMES=names, elementMetadata=elementMetadata) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Comparison ### setMethod("match", c("Pairs", "Pairs"), function(x, table, nomatch = NA_integer_, incomparables = NULL, ...) { if (!is.null(incomparables)) stop("'incomparables' must be NULL") hits <- intersect(findMatches(first(x), first(table), ...), findMatches(second(x), second(table), ...)) ans <- selectHits(hits, "first") if (!identical(nomatch, NA_integer_)) { ans[is.na(ans)] <- nomatch } ans }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Coerce ### ### We use 'zipup' and 'zipdown' because '(un)zip' already taken by utils. ### setGeneric("zipup", function(x, y, ...) standardGeneric("zipup")) setMethod("zipup", c("ANY", "ANY"), function(x, y) { stopifnot(length(x) == length(y)) linear <- append(x, y) collate_subscript <- make_XYZxyz_to_XxYyZz_subscript(length(x)) linear <- linear[collate_subscript] names <- if (!is.null(names(x))) names(x) else names(y) p <- IRanges::PartitioningByWidth(rep(2L, length(x)), names=names) relist(linear, p) }) setMethod("zipup", c("Pairs", "missing"), function(x, y, ...) { zipped <- zipup(first(x), second(x), ...) names(zipped) <- names(x) mcols(zipped) <- mcols(x) zipped }) setGeneric("zipdown", function(x, ...) standardGeneric("zipdown")) setMethod("zipdown", "ANY", function(x) { stopifnot(all(lengths(x) == 2L)) p <- IRanges::PartitioningByEnd(x) v <- unlist(x, use.names=FALSE) Pairs(v[start(p)], v[end(p)], names=names(x)) }) setMethod("zipdown", "List", function(x) { unzipped <- callNextMethod() mcols(unzipped) <- mcols(x) unzipped }) setAs("Pairs", "DataFrame", function(from) { df <- DataFrame(first=first(from), second=second(from), mcols(from), check.names=FALSE) df$names <- names(from) df }) setMethod("as.data.frame", "Pairs", function (x, row.names = NULL, optional = FALSE, ...) { as.data.frame(as(x, "DataFrame"), optional=optional, row.names=row.names, ...) }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Combine ### .unlist_list_of_Pairs <- function(x) { Pairs(do.call(c, lapply(x, first)), do.call(c, lapply(x, second)), do.call(rbind, lapply(x, mcols)), ### FIXME: breaks if only some names are NULL names = unlist(lapply(x, names))) } setMethod("c", "Pairs", function (x, ..., recursive = FALSE) { if (!identical(recursive, FALSE)) stop("'recursive' argument not supported") if (missing(x)) args <- unname(list(...)) else args <- unname(list(x, ...)) .unlist_list_of_Pairs(args) }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Utilities ### setMethod("t", "Pairs", function(x) { tx <- x first(tx) <- second(x) second(tx) <- first(x) tx }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Show ### .makeNakedMatFromPairs <- function(x) { x_len <- length(x) x_mcols <- mcols(x) x_nmc <- if (is.null(x_mcols)) 0L else ncol(x_mcols) ans <- cbind(first = showAsCell(first(x)), second = showAsCell(second(x))) if (x_nmc > 0L) { tmp <- do.call(data.frame, c(lapply(x_mcols, showAsCell), list(check.names = FALSE))) ans <- cbind(ans, `|` = rep.int("|", x_len), as.matrix(tmp)) } ans } showPairs <- function(x, margin = "", print.classinfo = FALSE) { x_class <- class(x) x_len <- length(x) x_mcols <- mcols(x) x_nmc <- if (is.null(x_mcols)) 0L else ncol(x_mcols) cat(x_class, " object with ", x_len, " pair", ifelse(x_len == 1L, "", "s"), " and ", x_nmc, " metadata column", ifelse(x_nmc == 1L, "", "s"), ":\n", sep = "") out <- makePrettyMatrixForCompactPrinting(x, .makeNakedMatFromPairs) if (print.classinfo) { .COL2CLASS <- c(first = class(first(x)), second = class(second(x))) classinfo <- makeClassinfoRowForCompactPrinting(x, .COL2CLASS) stopifnot(identical(colnames(classinfo), colnames(out))) out <- rbind(classinfo, out) } if (nrow(out) != 0L) rownames(out) <- paste0(margin, rownames(out)) print(out, quote = FALSE, right = TRUE, max = length(out)) } setMethod("show", "Pairs", function(object) { showPairs(object, margin = " ", print.classinfo = TRUE) }) S4Vectors/R/Rle-class.R0000644000175400017540000007212513175714520015621 0ustar00biocbuildbiocbuild### ========================================================================= ### Rle objects ### ------------------------------------------------------------------------- ### setClass("Rle", contains="Vector", representation( values="vector_OR_factor", lengths="integer_OR_LLint" ), prototype( values=logical(0), lengths=integer(0) ) ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Getters ### setMethod("length", "Rle", function(x) as.double(.Call2("Rle_length", x, PACKAGE="S4Vectors")) ) setGeneric("runLength", signature = "x", function(x) standardGeneric("runLength")) setMethod("runLength", "Rle", function(x) x@lengths) setGeneric("runValue", signature = "x", function(x) standardGeneric("runValue")) setMethod("runValue", "Rle", function(x) x@values) setGeneric("nrun", signature = "x", function(x) standardGeneric("nrun")) setMethod("nrun", "Rle", function(x) length(runLength(x))) setMethod("start", "Rle", function(x) .Call2("Rle_start", x, PACKAGE="S4Vectors")) setMethod("end", "Rle", function(x) .Call2("Rle_end", x, PACKAGE="S4Vectors")) setMethod("width", "Rle", function(x) runLength(x)) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Validity ### .valid_Rle <- function(x) { msg <- NULL msg <- c(msg, .Call2("Rle_valid", x, PACKAGE="S4Vectors")) ## Too expensive so commented out for now. Maybe do this in C? #run_values <- runValues(x) #if (length(run_values) >= 2 && is.atomic(run_values) && # any(run_values[-1L] == run_values[-length(run_values)])) # msg <- c(msg, "consecutive runs must have different values") msg } setValidity2("Rle", .valid_Rle) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Constructor ### ### Low-level constructor. new_Rle <- function(values=logical(0), lengths=NULL) { stopifnot(is(values, "vector_OR_factor")) if (!is.null(lengths)) { if (!(is.numeric(lengths) || is.LLint(lengths))) stop("'lengths' must be NULL or a numeric or LLint vector") if (anyNA(lengths)) stop("'lengths' cannot contain NAs") if (is.double(lengths)) { suppressWarnings(lengths <- as.LLint(lengths)) if (anyNA(lengths)) stop("Rle vector is too long") } if (length(lengths) == 1L) lengths <- rep.int(lengths, length(values)) } .Call2("Rle_constructor", values, lengths, PACKAGE="S4Vectors") } setGeneric("Rle", signature="values", function(values=logical(0), lengths=NULL) standardGeneric("Rle") ) setMethod("Rle", "ANY", function(values=logical(0), lengths=NULL) new_Rle(values, lengths) ) setMethod("Rle", "Rle", function(values=logical(0), lengths=NULL) { if (!missing(lengths)) stop(wmsg("'lengths' cannot be supplied when calling Rle() ", "on an Rle object")) values } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Setters ### setGeneric("runLength<-", signature="x", function(x, value) standardGeneric("runLength<-")) setReplaceMethod("runLength", "Rle", function(x, value) Rle(runValue(x), value)) setGeneric("runValue<-", signature="x", function(x, value) standardGeneric("runValue<-")) setReplaceMethod("runValue", "Rle", function(x, value) Rle(value, runLength(x))) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Coercion ### setAs("vector", "Rle", function(from) Rle(from)) setAs("logical", "Rle", function(from) Rle(from)) setAs("integer", "Rle", function(from) Rle(from)) setAs("numeric", "Rle", function(from) Rle(from)) setAs("complex", "Rle", function(from) Rle(from)) setAs("character", "Rle", function(from) Rle(from)) setAs("raw", "Rle", function(from) Rle(from)) setAs("factor", "Rle", function(from) Rle(from)) setAs("Rle", "vector", function(from) as.vector(from)) setAs("Rle", "logical", function(from) as.logical(from)) setAs("Rle", "integer", function(from) as.integer(from)) setAs("Rle", "numeric", function(from) as.numeric(from)) setAs("Rle", "complex", function(from) as.complex(from)) setAs("Rle", "character", function(from) as.character(from)) setAs("Rle", "raw", function(from) as.raw(from)) setAs("Rle", "factor", function(from) as.factor(from)) setAs("Rle", "list", function(from) as.list(from)) setAs("Rle", "data.frame", function(from) as.data.frame(from)) as.vector.Rle <- function(x, mode) rep.int(as.vector(runValue(x), mode), runLength(x)) setMethod("as.vector", "Rle", as.vector.Rle) setMethod("as.factor", "Rle", function(x) rep.int(as.factor(runValue(x)), runLength(x))) asFactorOrFactorRle <- function(x) { if (is(x, "Rle")) { runValue(x) <- as.factor(runValue(x)) x } else { as.factor(x) } } .as.list.Rle <- function(x) as.list(as.vector(x)) setMethod("as.list", "Rle", .as.list.Rle) setGeneric("decode", function(x, ...) standardGeneric("decode")) setMethod("decode", "ANY", identity) decodeRle <- function(x) rep.int(runValue(x), runLength(x)) setMethod("decode", "Rle", decodeRle) .as.data.frame.Rle <- function(x, row.names=NULL, optional=FALSE, ...) { value <- decodeRle(x) as.data.frame(value, row.names=row.names, optional=optional, ...) } setMethod("as.data.frame", "Rle", .as.data.frame.Rle) getStartEndRunAndOffset <- function(x, start, end) { .Call2("Rle_getStartEndRunAndOffset", x, start, end, PACKAGE="S4Vectors") } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Subsetting workhorses ### ### These are the low-level functions that do the real work of subsetting an ### Rle object. The final coercion to class(x) is to make sure that they act ### like an endomorphism on objects that belong to a subclass of Rle (the ### VariantAnnotation package defines Rle subclasses). ### Note that they drop the metadata columns! ### ### TODO: Support NAs in 'pos'. extract_positions_from_Rle <- function(x, pos, method=0L, decoded=FALSE) { if (!is.integer(pos)) stop("'pos' must be an integer vector") if (!isTRUEorFALSE(decoded)) stop("'decoded' must be TRUE or FALSE") #ans <- .Call2("Rle_extract_positions", x, pos, method, PACKAGE="S4Vectors") mapped_pos <- map_positions_to_runs(runLength(x), pos, method=method) ans <- runValue(x)[mapped_pos] if (decoded) return(ans) as(Rle(ans), class(x)) # so the function is an endomorphism } extract_range_from_Rle <- function(x, start, end) { ans <- .Call2("Rle_extract_range", x, start, end, PACKAGE="S4Vectors") as(ans, class(x)) # so the function is an endomorphism } ### NOT exported but used in IRanges package (by "extractROWS" method with ### signature Rle,RangesNSBS). extract_ranges_from_Rle <- function(x, start, width, method=0L, as.list=FALSE) { method <- normarg_method(method) if (!isTRUEorFALSE(as.list)) stop("'as.list' must be TRUE or FALSE") ans <- .Call2("Rle_extract_ranges", x, start, width, method, as.list, PACKAGE="S4Vectors") ## The function must act like an endomorphism. x_class <- class(x) if (!as.list) return(as(ans, x_class)) ## 'ans' is a list of Rle instances. if (x_class == "Rle") return(ans) lapply(ans, as, x_class) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Subsetting ### setMethod("extractROWS", c("Rle", "ANY"), function (x, i) { i <- normalizeSingleBracketSubscript(i, x, allow.NAs=TRUE, as.NSBS=TRUE) callGeneric() } ) setMethod("extractROWS", c("Rle", "RangeNSBS"), function(x, i) { range <- i@subscript range_start <- range[[1L]] range_end <- range[[2L]] ans <- extract_range_from_Rle(x, range_start, range_end) mcols(ans) <- extractROWS(mcols(x), i) ans } ) setMethod("extractROWS", c("Rle", "NSBS"), function(x, i) { ans <- extract_positions_from_Rle(x, as.integer(i)) mcols(ans) <- extractROWS(mcols(x), i) ans } ) setMethod("[", "Rle", function(x, i, j, ..., drop=getOption("dropRle", default=FALSE)) { if (!missing(j) || length(list(...)) > 0) stop("invalid subsetting") if (!missing(i)) x <- extractROWS(x, i) if (drop) x <- decodeRle(x) x } ) ### The replaced elements in 'x' must get their metadata columns from 'value'. ### See this thread on bioc-devel: ### https://stat.ethz.ch/pipermail/bioc-devel/2015-November/008319.html setMethod("replaceROWS", "Rle", function(x, i, value) { ## FIXME: Right now, the subscript 'i' is turned into an IRanges ## object so we need stuff that lives in the IRanges package for this ## to work. This is ugly/hacky and needs to be fixed (thru a redesign ## of this method). if (!requireNamespace("IRanges", quietly=TRUE)) stop("Couldn't load the IRanges package. You need to install ", "the IRanges\n package in order to replace values in ", "an Rle object.") i <- normalizeSingleBracketSubscript(i, x, as.NSBS=TRUE) lv <- length(value) if (lv != 1L) { ans <- Rle(replaceROWS(decodeRle(x), i, as.vector(value))) mcols(ans) <- replaceROWS(mcols(x), i, mcols(value)) return(ans) } ## From here, 'value' is guaranteed to be of length 1. ## TODO: Maybe make this the coercion method from NSBS to Ranges. if (is(i, "RangesNSBS")) { ir <- i@subscript } else { ir <- as(as.integer(i), "IRanges") } ir <- IRanges::reduce(ir) if (length(ir) == 0L) return(x) isFactorRle <- is.factor(runValue(x)) value <- normalizeSingleBracketReplacementValue(value, x) value2 <- as.vector(value) if (isFactorRle) { value2 <- factor(value2, levels=levels(x)) dummy_value <- factor(levels(x), levels=levels(x)) } if (anyMissingOrOutside(start(ir), 1L, length(x)) || anyMissingOrOutside(end(ir), 1L, length(x))) stop("some ranges are out of bounds") valueWidths <- width(ir) ir <- IRanges::gaps(ir, start=1, end=length(x)) k <- length(ir) start <- start(ir) end <- end(ir) info <- getStartEndRunAndOffset(x, start, end) runStart <- info[["start"]][["run"]] offsetStart <- info[["start"]][["offset"]] runEnd <- info[["end"]][["run"]] offsetEnd <- info[["end"]][["offset"]] if ((length(ir) == 0L) || (start(ir)[1L] != 1L)) { k <- k + 1L runStart <- c(1L, runStart) offsetStart <- c(0L, offsetStart) runEnd <- c(0L, runEnd) offsetEnd <- c(0L, offsetEnd) } if ((length(ir) > 0L) && (end(ir[length(ir)]) != length(x))) { k <- k + 1L runStart <- c(runStart, 1L) offsetStart <- c(offsetStart, 0L) runEnd <- c(runEnd, 0L) offsetEnd <- c(offsetEnd, 0L) } subseqs <- vector("list", length(valueWidths) + k) if (k > 0L) { if (isFactorRle) { subseqs[seq(1L, length(subseqs), by=2L)] <- lapply(seq_len(k), function(i) { ans <- .Call2("Rle_window_aslist", x, runStart[i], runEnd[i], offsetStart[i], offsetEnd[i], PACKAGE="S4Vectors") ans[["values"]] <- dummy_value[ans[["values"]]] ans}) } else { subseqs[seq(1L, length(subseqs), by=2L)] <- lapply(seq_len(k), function(i) .Call2("Rle_window_aslist", x, runStart[i], runEnd[i], offsetStart[i], offsetEnd[i], PACKAGE="S4Vectors")) } } if (length(valueWidths) > 0L) { subseqs[seq(2L, length(subseqs), by=2L)] <- lapply(seq_len(length(valueWidths)), function(i) list(values=value2, lengths=valueWidths[i])) } values <- unlist(lapply(subseqs, "[[", "values")) if (isFactorRle) values <- dummy_value[values] ans <- Rle(values, unlist(lapply(subseqs, "[[", "lengths"))) mcols(ans) <- replaceROWS(mcols(x), i, mcols(value)) ans } ) setReplaceMethod("[", "Rle", function(x, i, j,..., value) { if (!missing(j) || length(list(...)) > 0L) stop("invalid subsetting") i <- normalizeSingleBracketSubscript(i, x, as.NSBS=TRUE) li <- length(i) if (li == 0L) { ## Surprisingly, in that case, `[<-` on standard vectors does not ## even look at 'value'. So neither do we... return(x) } lv <- length(value) if (lv == 0L) stop("replacement has length zero") replaceROWS(x, i, value) } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Subsetting an object by an Rle subscript. ### ### See R/subsetting-utils.R for more information. ### setClass("RleNSBS", # not exported contains="NSBS", representation( subscript="Rle" # integer-Rle ), prototype( ## Calling Rle(integer(0)) below causes the following error at ## installation time: ## Error in .Call(.NAME, ..., PACKAGE = PACKAGE) : ## "Rle_constructor" not available for .Call() for package ## "S4Vectors" ## Error : unable to load R code in package ‘S4Vectors’ ## ERROR: lazy loading failed for package ‘S4Vectors’ #subscript=Rle(integer(0)) subscript=new2("Rle", values=integer(0), lengths=integer(0), check=FALSE) ) ) ### Construction methods. ### Supplied arguments are trusted so we don't check them! setMethod("NSBS", "Rle", function(i, x, exact=TRUE, strict.upper.bound=TRUE, allow.NAs=FALSE) { x_NROW <- NROW(x) i_vals <- runValue(i) if (is.logical(i_vals) && length(i_vals) != 0L) { if (anyNA(i_vals)) stop("subscript contains NAs") if (length(i) < x_NROW) i <- rep(i, length.out=x_NROW) ## The coercion method from Rle to NormalIRanges is defined in the ## IRanges package. if (requireNamespace("IRanges", quietly=TRUE)) { i <- as(i, "NormalIRanges") ## This will call the "NSBS" method for Ranges objects defined ## in the IRanges package and return a RangesNSBS, or ## RangeNSBS, or NativeNSBS object. return(callGeneric()) } warning(wmsg( "Couldn't load the IRanges package. Installing this package ", "will enable efficient subsetting by a logical-Rle object ", "so is higly recommended." )) i <- which(i) return(callGeneric()) # will return a NativeNSBS object } i_vals <- NSBS(i_vals, x, exact=exact, strict.upper.bound=strict.upper.bound, allow.NAs=allow.NAs) runValue(i) <- as.integer(i_vals) new2("RleNSBS", subscript=i, upper_bound=x_NROW, upper_bound_is_strict=strict.upper.bound, has_NAs=i_vals@has_NAs, check=FALSE) } ) ### Other methods. setMethod("as.integer", "RleNSBS", function(x) decodeRle(x@subscript)) setMethod("length", "RleNSBS", function(x) length(x@subscript)) setMethod("anyDuplicated", "RleNSBS", function(x, incomparables=FALSE, ...) anyDuplicated(x@subscript) ) setMethod("isStrictlySorted", "RleNSBS", function(x) isStrictlySorted(x@subscript) ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Subsetting an Rle object by an Rle subscript. ### ### Simplified version of rep.int() for Rle objects. Handles only the case ### where 'times' has the length of 'x'. .rep_times_Rle <- function(x, times) { breakpoints <- end(x) if (length(times) != last_or(breakpoints, 0L)) stop("invalid 'times' argument") runLength(x) <- groupsum(times, breakpoints) x } setMethod("extractROWS", c("Rle", "RleNSBS"), function(x, i) { rle <- i@subscript .rep_times_Rle(extractROWS(x, runValue(rle)), runLength(rle)) } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Other subsetting-related operations ### ### S3/S4 combo for rev.Rle rev.Rle <- function(x) { x@values <- rev(runValue(x)) x@lengths <- rev(runLength(x)) x } setMethod("rev", "Rle", rev.Rle) setMethod("rep.int", "Rle", function(x, times) { if (!is.numeric(times)) stop("invalid 'times' argument") if (!is.integer(times)) times <- as.integer(times) if (anyMissingOrOutside(times, 0L)) stop("invalid 'times' argument") x_len <- length(x) times_len <- length(times) if (times_len == x_len) return(.rep_times_Rle(x, times)) if (times_len != 1L) stop("invalid 'times' argument") ans <- Rle(rep.int(runValue(x), times), rep.int(runLength(x), times)) as(ans, class(x)) # rep.int() must act like an endomorphism } ) setMethod("rep", "Rle", function(x, times, length.out, each) { usedEach <- FALSE if (!missing(each) && length(each) > 0) { each <- as.integer(each[1L]) if (!is.na(each)) { if (each < 0) stop("invalid 'each' argument") usedEach <- TRUE if (each == 0) x <- new2(class(x), values=runValue(x)[0L], check=FALSE) else x@lengths <- each[1L] * runLength(x) } } if (!missing(length.out) && length(length.out) > 0) { n <- length(x) length.out <- as.integer(length.out[1L]) if (!is.na(length.out)) { if (length.out == 0) { x <- new2(class(x), values=runValue(x)[0L], check=FALSE) } else if (length.out < n) { x <- window(x, 1, length.out) } else if (length.out > n) { if (n == 0) { x <- Rle(rep(runValue(x), length.out=1), length.out) } else { x <- window(rep.int(x, ceiling(length.out / n)), 1, length.out) } } } } else if (!missing(times)) { if (usedEach && length(times) != 1) stop("invalid 'times' argument") x <- rep.int(x, times) } x }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Combining. ### setMethod("c", "Rle", function(x, ..., recursive = FALSE) { args <- lapply(unname(list(x, ...)), Rle) args_values <- lapply(args, slot, "values") ## use unlist() for factors: ans_values <- unlist(args_values, recursive=FALSE) if (is.null(ans_values)) { ## use c() to get type promotion right in zero-length case: ans_values <- do.call(c, args_values) } ans_lengths <- unlist(lapply(args, slot, "lengths"), recursive=FALSE) Rle(ans_values, ans_lengths) }) setMethod("append", c("Rle", "vector"), function (x, values, after = length(x)) { append(x, Rle(values), after) }) setMethod("append", c("vector", "Rle"), function (x, values, after = length(x)) { append(Rle(x), values, after) }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Other methods. ### setMethod("%in%", "Rle", function(x, table) new_Rle(runValue(x) %in% table, runLength(x))) setGeneric("findRun", signature = "vec", function(x, vec) standardGeneric("findRun")) setMethod("findRun", signature = c(vec = "Rle"), function(x, vec) { runs <- findIntervalAndStartFromWidth(as.integer(x), runLength(vec))[["interval"]] runs[is.na(runs) | x == 0 | x > length(vec)] <- NA runs }) setMethod("is.na", "Rle", function(x) new_Rle(is.na(runValue(x)), runLength(x))) setMethod("anyNA", "Rle", function(x) anyNA(runValue(x))) setMethod("is.unsorted", "Rle", function(x, na.rm = FALSE, strictly = FALSE) { ans <- is.unsorted(runValue(x), na.rm = na.rm, strictly = strictly) if (strictly && !ans) ans <- any(runLength(x) > 1L) ans }) setMethod("match", c("ANY", "Rle"), function(x, table, nomatch=NA_integer_, incomparables=NULL) { m <- match(x, runValue(table), incomparables=incomparables) ans <- start(table)[m] ## 'as.integer(nomatch)[1L]' seems to mimic how base::match() treats ## the 'nomatch' argument. nomatch <- as.integer(nomatch)[1L] if (!is.na(nomatch)) ans[is.na(ans)] <- nomatch ans } ) setMethod("match", c("Rle", "ANY"), function(x, table, nomatch=NA_integer_, incomparables=NULL) { x_run_lens <- runLength(x) x <- runValue(x) m <- callGeneric() Rle(m, x_run_lens) } ) setMethod("match", c("Rle", "Rle"), function(x, table, nomatch=NA_integer_, incomparables=NULL) { x_run_lens <- runLength(x) x <- runValue(x) m <- callGeneric() Rle(m, x_run_lens) } ) setMethod("order", "Rle", function(..., na.last=TRUE, decreasing=FALSE, method=c("auto", "shell", "radix")) { args <- list(...) if (length(args) == 1L) { x <- args[[1L]] o <- order(runValue(x), na.last=na.last, decreasing=decreasing, method=method) mseq(start(x)[o], end(x)[o]) } else { args <- lapply(unname(args), decodeRle) do.call(order, c(args, list(na.last=na.last, decreasing=decreasing, method=method))) } }) .sort.Rle <- function(x, decreasing=FALSE, na.last=NA, ...) { if (is.na(na.last)) { if (anyNA(runValue(x))) x <- x[!is.na(x)] } ord <- base::order(runValue(x), na.last=na.last, decreasing=decreasing) new_Rle(runValue(x)[ord], runLength(x)[ord]) } setMethod("sort", "Rle", .sort.Rle) setMethod("xtfrm", "Rle", function(x) { initialize(x, values=xtfrm(runValue(x))) }) setMethod("rank", "Rle", function (x, na.last = TRUE, ties.method = c("average", "first", "random", "max", "min")) { ties.method <- match.arg(ties.method) if (ties.method == "min" || ties.method == "first") { callNextMethod() } else { x <- as.vector(x) ans <- callGeneric() if (ties.method %in% c("average", "max", "min")) { Rle(ans) } else { ans } } }) setMethod("table", "Rle", function(...) { ## Currently only 1 Rle is supported. An approach for multiple ## Rle's could be disjoin(), findRun() to find matches, then ## xtabs(length ~ value ...). x <- sort(list(...)[[1L]]) if (is.factor(runValue(x))) { dn <- levels(x) tab <- integer(length(dn)) tab[dn %in% runValue(x)] <- runLength(x) dims <- length(dn) } else { dn <- as.character(runValue(x)) tab <- runLength(x) dims <- nrun(x) } ## Adjust 'dn' for consistency with base::table if (length(dn) == 0L) dn <- NULL dn <- list(dn) names(dn) <- .list.names(...) y <- array(tab, dims, dimnames=dn) class(y) <- "table" y } ) .list.names <- function(...) { l <- as.list(substitute(list(...)))[-1L] deparse.level <- 1 nm <- names(l) fixup <- if (is.null(nm)) seq_along(l) else nm == "" dep <- vapply(l[fixup], function(x) switch(deparse.level + 1, "", if (is.symbol(x)) as.character(x) else "", deparse(x, nlines = 1)[1L]), "") if (is.null(nm)) dep else { nm[fixup] <- dep nm } } ### Not exported? Broken on numeric-Rle and factor-Rle. H.P. -- Oct 16, 2016 setMethod("tabulate", "Rle", function (bin, nbins = max(bin, 1L, na.rm = TRUE)) { tabulate2(runValue(bin), nbins, runLength(bin)) }) .duplicated.Rle <- function(x, incomparables=FALSE, fromLast=FALSE) stop("no \"duplicated\" method for Rle objects yet, sorry") setMethod("duplicated", "Rle", .duplicated.Rle) .unique.Rle <- function(x, incomparables=FALSE, ...) unique(runValue(x), incomparables=incomparables, ...) setMethod("unique", "Rle", .unique.Rle) ### S3/S4 combo for anyDuplicated.Rle anyDuplicated.Rle <- function(x, incomparables=FALSE, ...) all(runLength(x) == 1L) && anyDuplicated(runValue(x)) setMethod("anyDuplicated", "Rle", anyDuplicated.Rle) setMethod("isStrictlySorted", "Rle", function(x) all(runLength(x) == 1L) && isStrictlySorted(runValue(x)) ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Set methods ### ### The return values of these do not have any duplicated values, so ### it would obviously be more efficient to return plain vectors. That ### might violate user expectations though. ### setMethod("union", c("Rle", "Rle"), function(x, y) { Rle(union(runValue(x), runValue(y))) }) setMethod("union", c("ANY", "Rle"), function(x, y) { Rle(union(as.vector(x), runValue(y))) }) setMethod("union", c("Rle", "ANY"), function(x, y) { Rle(union(runValue(x), as.vector(y))) }) setMethod("intersect", c("Rle", "Rle"), function(x, y) { Rle(intersect(runValue(x), runValue(y))) }) setMethod("intersect", c("ANY", "Rle"), function(x, y) { Rle(intersect(as.vector(x), runValue(y))) }) setMethod("intersect", c("Rle", "ANY"), function(x, y) { Rle(intersect(runValue(x), as.vector(y))) }) setMethod("setdiff", c("Rle", "Rle"), function(x, y) { Rle(setdiff(runValue(x), runValue(y))) }) setMethod("setdiff", c("ANY", "Rle"), function(x, y) { Rle(setdiff(as.vector(x), runValue(y))) }) setMethod("setdiff", c("Rle", "ANY"), function(x, y) { Rle(setdiff(runValue(x), as.vector(y))) }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### The "show" method ### setMethod("show", "Rle", function(object) { lo <- length(object) nr <- nrun(object) halfWidth <- getOption("width") %/% 2L cat(classNameForDisplay(runValue(object)), "-Rle of length ", as.character(as.LLint(lo)), " with ", nr, ifelse(nr == 1, " run\n", " runs\n"), sep = "") first <- max(1L, halfWidth) showMatrix <- rbind(as.character(head(runLength(object), first)), as.character(head(runValue(object), first))) if (nr > first) { last <- min(nr - first, halfWidth) showMatrix <- cbind(showMatrix, rbind(as.character(tail(runLength(object), last)), as.character(tail(runValue(object), last)))) } if (is.character(runValue(object))) { showMatrix[2L,] <- paste("\"", showMatrix[2L,], "\"", sep = "") } showMatrix <- format(showMatrix, justify = "right") cat(labeledLine(" Lengths", showMatrix[1L,], count = FALSE)) cat(labeledLine(" Values ", showMatrix[2L,], count = FALSE)) if (is.factor(runValue(object))) cat(labeledLine("Levels", levels(object))) }) setMethod("showAsCell", "Rle", function(object) as.vector(object)) S4Vectors/R/Rle-utils.R0000644000175400017540000006301013175714520015645 0ustar00biocbuildbiocbuild### ========================================================================= ### Common operations on Rle objects ### ------------------------------------------------------------------------- ### ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Group generic methods ### .sumprodRle <- function(e1, e2, na.rm = FALSE) { n1 <- length(e1) n2 <- length(e2) if (n1 == 0 || n2 == 0) { ends <- integer(0) which1 <- integer(0) which2 <- integer(0) } else { n <- max(n1, n2) if (max(n1, n2) %% min(n1, n2) != 0) warning("longer object length is not a multiple of shorter object length") if (n1 < n) e1 <- rep(e1, length.out = n) if (n2 < n) e2 <- rep(e2, length.out = n) # ends <- sort(unique(c(end(e1), end(e2)))) ends <- sortedMerge(end(e1), end(e2)) which1 <- findIntervalAndStartFromWidth(ends, runLength(e1))[["interval"]] which2 <- findIntervalAndStartFromWidth(ends, runLength(e2))[["interval"]] } lengths <- diffWithInitialZero(ends) values <- runValue(e1)[which1] * runValue(e2)[which2] sum(lengths * values, na.rm = na.rm) } setMethod("Ops", signature(e1 = "Rle", e2 = "Rle"), function(e1, e2) { n1 <- length(e1) n2 <- length(e2) if (n1 == 0 || n2 == 0) { ends <- integer(0) which1 <- integer(0) which2 <- integer(0) } else { n <- max(n1, n2) if (max(n1, n2) %% min(n1, n2) != 0) warning("longer object length is not a multiple of shorter object length") if (n1 < n) e1 <- rep(e1, length.out = n) if (n2 < n) e2 <- rep(e2, length.out = n) # ends <- sort(unique(c(end(e1), end(e2)))) ends <- sortedMerge(end(e1), end(e2)) which1 <- findIntervalAndStartFromWidth(ends, runLength(e1))[["interval"]] which2 <- findIntervalAndStartFromWidth(ends, runLength(e2))[["interval"]] } new_Rle(callGeneric(runValue(e1)[which1], runValue(e2)[which2]), diffWithInitialZero(ends)) }) setMethod("Ops", signature(e1 = "Rle", e2 = "vector"), function(e1, e2) callGeneric(e1, Rle(e2))) setMethod("Ops", signature(e1 = "vector", e2 = "Rle"), function(e1, e2) callGeneric(Rle(e1), e2)) setMethod("Math", "Rle", function(x) switch(.Generic, cumsum = { whichZero <- which(runValue(x) == 0) widthZero <- runLength(x)[whichZero] startZero <- cumsum(c(1L, runLength(x)))[whichZero] y <- x y@lengths[y@values == 0] <- 1L values <- cumsum(as.vector(y)) lengths <- rep.int(1L, length(values)) lengths[startZero - c(0L, cumsum(head(widthZero, -1) - 1L))] <- widthZero new_Rle(values, lengths) }, cumprod = { whichOne <- which(runValue(x) == 0) widthOne <- runLength(x)[whichOne] startOne <- cumsum(c(1L, runLength(x)))[whichOne] y <- x y@lengths[y@values == 0] <- 1L values <- cumprod(as.vector(y)) lengths <- rep.int(1L, length(values)) lengths[startOne - c(0L, cumsum(head(widthOne, -1) - 1L))] <- widthOne new_Rle(values, lengths) }, new_Rle(callGeneric(runValue(x)), runLength(x)))) setMethod("Math2", "Rle", function(x, digits) { if (missing(digits)) digits <- ifelse(.Generic == "round", 0, 6) new_Rle(callGeneric(runValue(x), digits = digits), runLength(x)) }) setMethod("Summary", "Rle", function(x, ..., na.rm = FALSE) { switch(.Generic, all =, any =, min =, max =, range = callGeneric(runValue(x), ..., na.rm=na.rm), sum = withCallingHandlers({ sum(runValue(x) * runLength(x), ..., na.rm=na.rm) }, warning=function(warn) { msg <- conditionMessage(warn) exp <- gettext("integer overflow - use sum(as.numeric(.))", domain="R") if (msg == exp) { msg <- sub("sum\\(as.numeric\\(.\\)\\)", "runValue(.) <- as.numeric(runValue(.))", msg) warning(simpleWarning(msg, conditionCall(warn))) invokeRestart("muffleWarning") } else { warn } }), prod = prod(runValue(x) ^ runLength(x), ..., na.rm=na.rm)) } ) setMethod("Complex", "Rle", function(z) new_Rle(callGeneric(runValue(z)), runLength(z))) ### S3/S4 combo for summary.Rle summary.Rle <- function(object, ..., digits) { value <- if (is.logical(runValue(object))) c(ValueMode = "logical", { tb <- table(object, exclude = NULL) if (!is.null(n <- dimnames(tb)[[1L]]) && any(iN <- is.na(n))) dimnames(tb)[[1L]][iN] <- "NA's" tb }) else if (is.numeric(runValue(object))) { nas <- is.na(object) object <- object[!nas] qq <- quantile(object) qq <- c(qq[1L:3L], mean(object), qq[4L:5L]) if (!missing(digits)) qq <- signif(qq, digits) names(qq) <- c("Min.", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max.") if (any(nas)) c(qq, `NA's` = sum(nas)) else qq } else c(Length = length(object), Class = class(object), ValueMode = mode(runValue(object))) class(value) <- c("summaryDefault", "table") value } setMethod("summary", "Rle", summary.Rle) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Other logical data methods ### setMethod("!", "Rle", function(x) new_Rle(!runValue(x), runLength(x))) setMethod("which", "Rle", function(x, arr.ind = FALSE) { if (!is.logical(runValue(x))) stop("argument to 'which' is not logical") ok <- runValue(x) ok[is.na(ok)] <- FALSE from <- start(x)[ok] to <- end(x)[ok] if (length(from) == 0) integer(0) else mseq(from, to) }) setMethod("which.max", "Rle", function(x) { start(x)[which.max(runValue(x))] }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Other numerical data methods ### diff.Rle <- function(x, ...) diff(x, ...) .diff.Rle <- function(x, lag = 1, differences = 1) { if (!isSingleNumber(lag) || lag < 1L || !isSingleNumber(differences) || differences < 1L) stop("'lag' and 'differences' must be integers >= 1") lag <- as.integer(lag) differences <- as.integer(differences) if (lag * differences >= length(x)) return(Rle(vector(class(runValue(x))))) for (i in seq_len(differences)) { n <- length(x) x <- window(x, 1L + lag, n) - window(x, 1L, n - lag) } x } setMethod("diff", "Rle", .diff.Rle) .psummary.Rle <- function(FUN, ..., MoreArgs = NULL) { args <- list(...) ends <- end(args[[1L]]) if (length(args) > 1) { for (i in 2:length(args)) ends <- sortedMerge(ends, end(args[[i]])) } new_Rle(do.call(FUN, c(lapply(args, function(x) { runs <- findIntervalAndStartFromWidth(ends, runLength(x))[["interval"]] runValue(x)[runs] }), MoreArgs)), diffWithInitialZero(ends)) } setMethod("pmax", "Rle", function(..., na.rm = FALSE) .psummary.Rle(pmax, ..., MoreArgs = list(na.rm = na.rm))) setMethod("pmin", "Rle", function(..., na.rm = FALSE) .psummary.Rle(pmin, ..., MoreArgs = list(na.rm = na.rm))) setMethod("pmax.int", "Rle", function(..., na.rm = FALSE) .psummary.Rle(pmax.int, ..., MoreArgs = list(na.rm = na.rm))) setMethod("pmin.int", "Rle", function(..., na.rm = FALSE) .psummary.Rle(pmin.int, ..., MoreArgs = list(na.rm = na.rm))) ### S3/S4 combo for mean.Rle .mean.Rle <- function(x, na.rm = FALSE) { if (is.integer(runValue(x))) runValue(x) <- as.double(runValue(x)) if (na.rm) n <- length(x) - sum(runLength(x)[is.na(runValue(x))]) else n <- length(x) sum(x, na.rm = na.rm) / n } mean.Rle <- function(x, ...) .mean.Rle(x, ...) setMethod("mean", "Rle", .mean.Rle) setMethod("var", signature = c(x = "Rle", y = "missing"), function(x, y = NULL, na.rm = FALSE, use) { if (na.rm) n <- length(x) - sum(runLength(x)[is.na(runValue(x))]) else n <- length(x) centeredValues <- runValue(x) - mean(x, na.rm = na.rm) sum(runLength(x) * centeredValues * centeredValues, na.rm = na.rm) / (n - 1) }) setMethod("var", signature = c(x = "Rle", y = "Rle"), function(x, y = NULL, na.rm = FALSE, use) { # Direct change to slots for fast computation x@values <- runValue(x) - mean(x, na.rm = na.rm) y@values <- runValue(y) - mean(y, na.rm = na.rm) z <- x * y if (na.rm) n <- length(z) - sum(runLength(z)[is.na(runValue(z))]) else n <- length(z) sum(z, na.rm = na.rm) / (n - 1) }) setMethod("cov", signature = c(x = "Rle", y = "Rle"), function(x, y = NULL, use = "everything", method = c("pearson", "kendall", "spearman")) { use <- match.arg(use, c("all.obs", "complete.obs", "pairwise.complete.obs", "everything", "na.or.complete")) method <- match.arg(method) if (method != "pearson") stop("only 'pearson' method is supported for Rle objects") na.rm <- use %in% c("complete.obs", "pairwise.complete.obs", "na.or.complete") if (use == "all.obs" && (anyMissing(x) || anyMissing(y))) stop("missing observations in cov/cor") var(x, y, na.rm = na.rm) }) setMethod("cor", signature = c(x = "Rle", y = "Rle"), function(x, y = NULL, use = "everything", method = c("pearson", "kendall", "spearman")) { use <- match.arg(use, c("all.obs", "complete.obs", "pairwise.complete.obs", "everything", "na.or.complete")) method <- match.arg(method) if (method != "pearson") stop("only 'pearson' method is supported for Rle objects") na.rm <- use %in% c("complete.obs", "pairwise.complete.obs", "na.or.complete") isMissing <- is.na(x) | is.na(y) if (any(isMissing)) { if (use == "all.obs") { stop("missing observations in cov/cor") } else if (na.rm) { x <- x[!isMissing] y <- y[!isMissing] } } # Direct change to slots for fast computation x@values <- runValue(x) - mean(x, na.rm = na.rm) y@values <- runValue(y) - mean(y, na.rm = na.rm) .sumprodRle(x, y, na.rm = na.rm) / (sqrt(sum(runLength(x) * runValue(x) * runValue(x), na.rm = na.rm)) * sqrt(sum(runLength(y) * runValue(y) * runValue(y), na.rm = na.rm))) }) setMethod("sd", signature = c(x = "Rle"), function(x, na.rm = FALSE) sqrt(var(x, na.rm = na.rm))) ### S3/S4 combo for median.Rle ### FIXME: code duplication needed for S3 / S4 dispatch ### drop NA's here, so dropRle==TRUE allows x[FALSE][NA] in median.default ### FIXME: Remove these methods in R 3.5 median.Rle <- function(x, na.rm = FALSE, ...) { if (na.rm) x <- x[!is.na(x)] oldOption <- getOption("dropRle") options("dropRle" = TRUE) on.exit(options("dropRle" = oldOption)) NextMethod("median", na.rm=FALSE) } setMethod("median", "Rle", function(x, na.rm = FALSE) { if (na.rm) x <- x[!is.na(x)] oldOption <- getOption("dropRle") options("dropRle" = TRUE) on.exit(options("dropRle" = oldOption)) callNextMethod(x=x, na.rm=FALSE) }) ### FIXME: Remove this in R 3.5 quantile.Rle <- function(x, probs = seq(0, 1, 0.25), na.rm = FALSE, names = TRUE, type = 7, ...) { if (na.rm) x <- x[!is.na(x)] oldOption <- getOption("dropRle") options("dropRle" = TRUE) on.exit(options("dropRle" = oldOption)) NextMethod("quantile", na.rm=FALSE) } ### FIXME: Remove this in R 3.5 setMethod("mad", "Rle", function(x, center = median(x), constant = 1.4826, na.rm = FALSE, low = FALSE, high = FALSE) { if (na.rm) x <- x[!is.na(x)] oldOption <- getOption("dropRle") options("dropRle" = TRUE) on.exit(options("dropRle" = oldOption)) callNextMethod(x=x, center=center, constant=constant, na.rm=FALSE, low=FALSE, high=FALSE) }) setMethod("IQR", "Rle", function(x, na.rm = FALSE) diff(quantile(x, c(0.25, 0.75), na.rm = na.rm, names = FALSE))) ### FIXME: Remove this in R 3.5 setMethod("smoothEnds", "Rle", function(y, k = 3) { oldOption <- getOption("dropRle") options("dropRle" = TRUE) on.exit(options("dropRle" = oldOption)) callNextMethod(y = y, k = k) }) setGeneric("runmean", signature="x", function(x, k, endrule = c("drop", "constant"), ...) standardGeneric("runmean")) setMethod("runmean", "Rle", function(x, k, endrule = c("drop", "constant"), na.rm = FALSE) { sums <- runsum(x, k, endrule, na.rm) if (na.rm) { d <- Rle(rep(1L, length(x))) d[is.na(x)] <- 0L sums / runsum(d, k, endrule, na.rm) } else { sums / k } }) setMethod("runmed", "Rle", function(x, k, endrule = c("median", "keep", "drop", "constant"), algorithm = NULL, print.level = 0) { if (!all(is.finite(as.vector(x)))) stop("NA/NaN/Inf not supported in runmed,Rle-method") endrule <- match.arg(endrule) n <- length(x) k <- normargRunK(k = k, n = n, endrule = endrule) i <- (k + 1L) %/% 2L ans <- runq(x, k = k, i = i) if (endrule == "constant") { runLength(ans)[1L] <- runLength(ans)[1L] + (i - 1L) runLength(ans)[nrun(ans)] <- runLength(ans)[nrun(ans)] + (i - 1L) } else if (endrule != "drop") { ans <- c(head(x, i - 1L), ans, tail(x, i - 1L)) if (endrule == "median") { ans <- smoothEnds(ans, k = k) } } ans }) setGeneric("runsum", signature="x", function(x, k, endrule = c("drop", "constant"), ...) standardGeneric("runsum")) setMethod("runsum", "Rle", function(x, k, endrule = c("drop", "constant"), na.rm = FALSE) { endrule <- match.arg(endrule) n <- length(x) k <- normargRunK(k = k, n = n, endrule = endrule) ans <- .Call2("Rle_runsum", x, as.integer(k), as.logical(na.rm), PACKAGE="S4Vectors") if (endrule == "constant") { j <- (k + 1L) %/% 2L runLength(ans)[1L] <- runLength(ans)[1L] + (j - 1L) runLength(ans)[nrun(ans)] <- runLength(ans)[nrun(ans)] + (j - 1L) } ans }) setGeneric("runwtsum", signature="x", function(x, k, wt, endrule = c("drop", "constant"), ...) standardGeneric("runwtsum")) setMethod("runwtsum", "Rle", function(x, k, wt, endrule = c("drop", "constant"), na.rm = FALSE) { endrule <- match.arg(endrule) n <- length(x) k <- normargRunK(k = k, n = n, endrule = endrule) ans <- .Call2("Rle_runwtsum", x, as.integer(k), as.numeric(wt), as.logical(na.rm), PACKAGE="S4Vectors") if (endrule == "constant") { j <- (k + 1L) %/% 2L runLength(ans)[1L] <- runLength(ans)[1L] + (j - 1L) runLength(ans)[nrun(ans)] <- runLength(ans)[nrun(ans)] + (j - 1L) } ans }) setGeneric("runq", signature="x", function(x, k, i, endrule = c("drop", "constant"), ...) standardGeneric("runq")) setMethod("runq", "Rle", function(x, k, i, endrule = c("drop", "constant"), na.rm = FALSE) { endrule <- match.arg(endrule) n <- length(x) k <- normargRunK(k = k, n = n, endrule = endrule) ans <- .Call2("Rle_runq", x, as.integer(k), as.integer(i), as.logical(na.rm), PACKAGE="S4Vectors") if (endrule == "constant") { j <- (k + 1L) %/% 2L runLength(ans)[1L] <- runLength(ans)[1L] + (j - 1L) runLength(ans)[nrun(ans)] <- runLength(ans)[nrun(ans)] + (j - 1L) } ans }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Other character data methods ### setMethod("nchar", "Rle", function(x, type="chars", allowNA=FALSE, keepNA=NA) new_Rle(nchar(runValue(x), type=type, allowNA=allowNA, keepNA=keepNA), runLength(x)) ) setMethod("substr", "Rle", function(x, start, stop) { if (is.factor(runValue(x))) { levels(x) <- substr(levels(x), start = start, stop = stop) } else { runValue(x) <- substr(runValue(x), start = start, stop = stop) } x }) setMethod("substring", "Rle", function(text, first, last = 1000000L) { if (is.factor(runValue(text))) { levels(text) <- substring(levels(text), first = first, last = last) } else { runValue(text) <- substring(runValue(text), first = first, last = last) } text }) setMethod("chartr", c(old = "ANY", new = "ANY", x = "Rle"), function(old, new, x) { if (is.factor(runValue(x))) { levels(x) <- chartr(old = old, new = new, levels(x)) } else { runValue(x) <- chartr(old = old, new = new, runValue(x)) } x }) setMethod("tolower", "Rle", function(x) { if (is.factor(runValue(x))) { levels(x) <- tolower(levels(x)) } else { runValue(x) <- tolower(runValue(x)) } x }) setMethod("toupper", "Rle", function(x) { if (is.factor(runValue(x))) { levels(x) <- toupper(levels(x)) } else { runValue(x) <- toupper(runValue(x)) } x }) setMethod("sub", signature = c(pattern = "ANY", replacement = "ANY", x = "Rle"), function(pattern, replacement, x, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE) { if (is.factor(runValue(x))) { levels(x) <- sub(pattern = pattern, replacement = replacement, x = levels(x), ignore.case = ignore.case, perl = perl, fixed = fixed, useBytes = useBytes) } else { runValue(x) <- sub(pattern = pattern, replacement = replacement, x = runValue(x), ignore.case = ignore.case, perl = perl, fixed = fixed, useBytes = useBytes) } x }) setMethod("gsub", signature = c(pattern = "ANY", replacement = "ANY", x = "Rle"), function(pattern, replacement, x, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE) { if (is.factor(runValue(x))) { levels(x) <- gsub(pattern = pattern, replacement = replacement, x = levels(x), ignore.case = ignore.case, perl = perl, fixed = fixed, useBytes = useBytes) } else { runValue(x) <- gsub(pattern = pattern, replacement = replacement, x = runValue(x), ignore.case = ignore.case, perl = perl, fixed = fixed, useBytes = useBytes) } x }) .pasteTwoRles <- function(e1, e2, sep = " ", collapse = NULL) { n1 <- length(e1) n2 <- length(e2) if (n1 == 0 || n2 == 0) { ends <- integer(0) which1 <- integer(0) which2 <- integer(0) } else { n <- max(n1, n2) if (max(n1, n2) %% min(n1, n2) != 0) warning("longer object length is not a multiple of shorter object length") if (n1 < n) e1 <- rep(e1, length.out = n) if (n2 < n) e2 <- rep(e2, length.out = n) # ends <- sort(unique(c(end(e1), end(e2)))) ends <- sortedMerge(end(e1), end(e2)) which1 <- findIntervalAndStartFromWidth(ends, runLength(e1))[["interval"]] which2 <- findIntervalAndStartFromWidth(ends, runLength(e2))[["interval"]] } if (is.null(collapse) && is.factor(runValue(e1)) && is.factor(runValue(e2))) { levelsTable <- expand.grid(levels(e2), levels(e1), KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE) values <- structure((as.integer(runValue(e1)[which1]) - 1L) * nlevels(e2) + as.integer(runValue(e2)[which2]), levels = paste(levelsTable[[2L]], levelsTable[[1L]], sep = sep), class = "factor") } else { values <- paste(runValue(e1)[which1], runValue(e2)[which2], sep = sep, collapse = collapse) } new_Rle(values, diffWithInitialZero(ends)) } setMethod("paste", "Rle", function(..., sep = " ", collapse = NULL) { args <- list(...) ans <- args[[1L]] if (length(args) > 1) { for (i in 2:length(args)) { ans <- .pasteTwoRles(ans, args[[i]], sep = sep, collapse = collapse) } } ans }) setMethod("grepl", c("ANY", "Rle"), function(pattern, x, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE) { v <- grepl(pattern, runValue(x), ignore.case, perl, fixed, useBytes) Rle(v, runLength(x)) }) setMethod("grep", c("ANY", "Rle"), function(pattern, x, ignore.case = FALSE, perl = FALSE, value = FALSE, fixed = FALSE, useBytes = FALSE, invert = FALSE) { if (isTRUE(value)) { v <- grep(pattern, x, ignore.case, perl, value=TRUE, fixed, useBytes, invert) Rle(v, runLength(x)) } else { # obviously inefficient Rle(callNextMethod()) } }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Other factor data methods ### ### S3/S4 combo for levels.Rle levels.Rle <- function(x) levels(runValue(x)) setMethod("levels", "Rle", levels.Rle) setReplaceMethod("levels", "Rle", function(x, value) { levels(x@values) <- value if (anyDuplicated(value)) x <- new_Rle(runValue(x), runLength(x)) x }) droplevels.Rle <- function(x, ...) droplevels(x, ...) .droplevels.Rle <- function(x) { if (!is.factor(runValue(x))) { stop("levels can only be dropped when runValue(x) is a factor") } runValue(x) <- droplevels(runValue(x)) x } setMethod("droplevels", "Rle", .droplevels.Rle) S4Vectors/R/S4-utils.R0000644000175400017540000002515713175714520015423 0ustar00biocbuildbiocbuild### ========================================================================= ### Some low-level S4 classes and utilities ### ------------------------------------------------------------------------- ### setClassUnion("character_OR_NULL", c("character", "NULL")) ### WARNING: The behavior of is.vector(), is( , "vector"), is.list(), and ### is( ,"list") makes no sense: ### 1. is.vector(matrix()) is FALSE but is(matrix(), "vector") is TRUE. ### 2. is.list(data.frame()) is TRUE but is(data.frame(), "list") is FALSE. ### 3. is(data.frame(), "list") is FALSE but extends("data.frame", "list") ### is TRUE. ### 4. is.vector(data.frame()) is FALSE but is.list(data.frame()) and ### is.vector(list()) are both TRUE. In other words: a data frame is a ### list and a list is a vector but a data frame is not a vector. ### 5. I'm sure there is more but you get it! ### Building our software on top of such a mess won't give us anything good. ### For example, it's not too surprising that the union class we define below ### is broken: ### 6. is(data.frame(), "vector_OR_factor") is TRUE even though ### is(data.frame(), "vector") and is(data.frame(), "factor") are both ### FALSE. ### Results above obtained with R-3.1.2 and R-3.2.0. ### TODO: Be brave and report this craziness to the R bug tracker. setClassUnion("vector_OR_factor", c("vector", "factor")) ### We define the coercion method below as a workaround to the following ### bug in R: ### ### setClass("A", representation(stuff="numeric")) ### setMethod("as.vector", "A", function(x, mode="any") x@stuff) ### ### a <- new("A", stuff=3:-5) ### > as.vector(a) ### [1] 3 2 1 0 -1 -2 -3 -4 -5 ### > as(a, "vector") ### Error in as.vector(from) : ### no method for coercing this S4 class to a vector ### > selectMethod("coerce", c("A", "vector")) ### Method Definition: ### ### function (from, to, strict = TRUE) ### { ### value <- as.vector(from) ### if (strict) ### attributes(value) <- NULL ### value ### } ### ### ### Signatures: ### from to ### target "A" "vector" ### defined "ANY" "vector" ### > setAs("ANY", "vector", function(from) as.vector(from)) ### > as(a, "vector") ### [1] 3 2 1 0 -1 -2 -3 -4 -5 ### ### ML: The problem is that the default coercion method is defined ### in the methods namespace, which does not see the as.vector() ### generic we define. Solution in this case would probably be to ### make as.vector a dispatching primitive like as.character(), but ### the "mode" argument makes things complicated. setAs("ANY", "vector", function(from) as.vector(from)) coercerToClass <- function(class) { if (extends(class, "vector")) .as <- get(paste0("as.", class)) else .as <- function(from) as(from, class) function(from) { to <- .as(from) if (!is.null(names(from)) && is.null(names(to))) { names(to) <- names(from) } to } } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### setValidity2(), new2() ### ### Give more contol over when object validation should happen. ### .validity_options <- new.env(hash=TRUE, parent=emptyenv()) assign("debug", FALSE, envir=.validity_options) assign("disabled", FALSE, envir=.validity_options) debugValidity <- function(debug) { if (missing(debug)) return(get("debug", envir=.validity_options)) debug <- isTRUE(debug) assign("debug", debug, envir=.validity_options) debug } disableValidity <- function(disabled) { if (missing(disabled)) return(get("disabled", envir=.validity_options)) disabled <- isTRUE(disabled) assign("disabled", disabled, envir=.validity_options) disabled } setValidity2 <- function(Class, valid.func, where=topenv(parent.frame())) { setValidity(Class, function(object) { if (disableValidity()) return(TRUE) if (debugValidity()) { whoami <- paste("validity method for", Class, "object") cat("[debugValidity] Entering ", whoami, "\n", sep="") on.exit(cat("[debugValidity] Leaving ", whoami, "\n", sep="")) } problems <- valid.func(object) if (isTRUE(problems) || length(problems) == 0L) return(TRUE) problems }, where=where ) } new2 <- function(..., check=TRUE) { if (!isTRUEorFALSE(check)) stop("'check' must be TRUE or FALSE") old_val <- disableValidity() on.exit(disableValidity(old_val)) disableValidity(!check) new(...) } stopIfProblems <- function(problems) if (!is.null(problems)) stop(paste(problems, collapse="\n ")) ### 'signatures' must be a list of character vectors. To use when many methods ### share the same implementation. setMethods <- function(f, signatures=list(), definition, where=topenv(parent.frame()), ...) { for (signature in signatures) setMethod(f, signature=signature, definition, where=where, ...) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### setReplaceAs() ### ### Supplying a "coerce<-" method to the 'replace' argument of setAs() is ### optional but not supplying a "coerce" method (thru the 'def' argument). ### However there are legitimate situations where we want to define a ### "coerce<-" method only. setReplaceAs() can be used for that. ### ### Same interface as setAs() (but no 'replace' argument). setReplaceAs <- function(from, to, def, where=topenv(parent.frame())) { ## Code below taken from setAs() and slightly adapted. args <- formalArgs(def) if (identical(args, c("from", "to", "value"))) { method <- def } else { if (length(args) != 2L) stop(gettextf("the method definition must be a function of 2 ", "arguments, got %d", length(args)), domain=NA) def <- body(def) if (!identical(args, c("from", "value"))) { ll <- list(quote(from), quote(value)) names(ll) <- args def <- substituteDirect(def, ll) warning(gettextf("argument names in method definition changed ", "to agree with 'coerce<-' generic:\n%s", paste(deparse(def), sep="\n ")), domain=NA) } method <- eval(function(from, to, value) NULL) functionBody(method, envir=.GlobalEnv) <- def } setMethod("coerce<-", c(from, to), method, where=where) } ### We also provide 2 canonical "coerce<-" methods that can be used when the ### "from class" is a subclass of the "to class". They do what the methods ### automatically generated by the methods package are expected to do except ### that the latter are broken. See ### https://bugs.r-project.org/bugzilla/show_bug.cgi?id=16421 ### for the bug report. ### Naive/straight-forward implementation (easy to understand so it explains ### the semantic of canonical "coerce<-"). canonical_replace_as <- function(from, to, value) { for (what in slotNames(to)) slot(from, what) <- slot(value, what) from } ### Does the same as canonical_replace_as() but tries to generate only one ### copy of 'from' instead of one copy each time one of its slots is modified. canonical_replace_as_2 <- function(from, to, value) { firstTime <- TRUE for (what in slotNames(to)) { v <- slot(value, what) if (firstTime) { slot(from, what, FALSE) <- v firstTime <- FALSE } else { `slot<-`(from, what, FALSE, v) } } from } ### Usage (assuming B is a subclass of A): ### ### setReplaceAs("B", "A", canonical_replace_as_2) ### ### Note that this is used in the VariantAnnotation package. ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Manipulating the prototype of an S4 class. ### ### Gets or sets the default value of the given slot of the given class by ### reading or altering the prototype of the class. setDefaultSlotValue() is ### typically used in the .onLoad() hook of a package when the DLL of the ### package needs to be loaded *before* the default value of a slot can be ### computed. getDefaultSlotValue <- function(classname, slotname, where=.GlobalEnv) { classdef <- getClass(classname, where=where) if (!(slotname %in% names(attributes(classdef@prototype)))) stop("prototype for class \"", classname, "\" ", "has no \"", slotname, "\" attribute") attr(classdef@prototype, slotname, exact=TRUE) } setDefaultSlotValue <- function(classname, slotname, value, where=.GlobalEnv) { classdef <- getClass(classname, where=where) if (!(slotname %in% names(attributes(classdef@prototype)))) stop("prototype for class \"", classname, "\" ", "has no \"", slotname, "\" attribute") attr(classdef@prototype, slotname) <- value assignClassDef(classname, classdef, where=where) ## Re-compute the complete definition of the class. methods::setValidity() ## does that after calling assignClassDef() so we do it too. resetClass(classname, classdef, where=where) } setPrototypeFromObject <- function(classname, object, where=.GlobalEnv) { classdef <- getClass(classname, where=where) if (class(object) != classname) stop("'object' must be a ", classname, " instance") object_attribs <- attributes(object) object_attribs$class <- NULL ## Sanity check. stopifnot(identical(names(object_attribs), names(attributes(classdef@prototype)))) attributes(classdef@prototype) <- object_attribs assignClassDef(classname, classdef, where=where) ## Re-compute the complete definition of the class. methods::setValidity() ## does that after calling assignClassDef() so we do it too. resetClass(classname, classdef, where=where) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### allEqualsS4: just a hack that auomatically digs down ### deeply nested objects to detect differences. ### .allEqualS4 <- function(x, y) { eq <- all.equal(x, y) canCompareS4 <- !isTRUE(eq) && isS4(x) && isS4(y) && class(x) == class(y) if (canCompareS4) { child.diffs <- mapply(.allEqualS4, attributes(x), attributes(y), SIMPLIFY=FALSE) child.diffs$class <- NULL dfs <- mapply(function(d, nm) { if (!is.data.frame(d)) { data.frame(comparison = I(list(d))) } else d }, child.diffs, names(child.diffs), SIMPLIFY=FALSE) do.call(rbind, dfs) } else { eq[1] } } allEqualS4 <- function(x, y) { eq <- .allEqualS4(x, y) setNames(eq$comparison, rownames(eq))[sapply(eq$comparison, Negate(isTRUE))] } S4Vectors/R/SimpleList-class.R0000644000175400017540000001641313175714520017162 0ustar00biocbuildbiocbuild### ========================================================================= ### SimpleList objects ### ------------------------------------------------------------------------- setClass("SimpleList", contains="List", representation( listData="list" ) ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Accessor methods. ### setMethod("length", "SimpleList", function(x) length(as.list(x))) setMethod("names", "SimpleList", function(x) names(as.list(x))) setReplaceMethod("names", "SimpleList", function(x, value) { names(x@listData) <- value x }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Constructor. ### ### Low-level. NOT exported. ### Stuff to put in elementMetadata slot can be passed either with ### new_SimpleList_from_list(..., elementMetadata=somestuff) ### or with ### new_SimpleList_from_list(..., mcols=somestuff) ### The latter is the new recommended form. new_SimpleList_from_list <- function(Class, x, ..., mcols) { if (!extends(Class, "SimpleList")) stop("class ", Class, " must extend SimpleList") if (!is.list(x)) stop("'x' must be a list") if (is.array(x)) { # drop any unwanted dimensions tmp_names <- names(x) dim(x) <- NULL # clears the names names(x) <- tmp_names } class(x) <- "list" ans_elementType <- elementType(new(Class)) if (!all(sapply(x, function(xi) extends(class(xi), ans_elementType)))) stop("all elements in 'x' must be ", ans_elementType, " objects") if (missing(mcols)) return(new2(Class, listData=x, ..., check=FALSE)) new2(Class, listData=x, ..., elementMetadata=mcols, check=FALSE) } SimpleList <- function(...) { args <- list(...) ## The extends(class(x), "list") test is NOT equivalent to is.list(x) or ## to is(x, "list") or to inherits(x, "list"). Try for example with ## x <- data.frame() or x <- matrix(list()). We use the former below ## because it seems to closely mimic what the methods package uses for ## checking the "listData" slot of the SimpleList object that we try to ## create later with new(). For example if we were using is.list() instead ## of extends(), the test would pass on matrix(list()) but new() then would ## fail with the following message: ## Error in validObject(.Object) : ## invalid class “SimpleList” object: invalid object for slot "listData" ## in class "SimpleList": got class "matrix", should be or extend class ## "list" if (length(args) == 1L && extends(class(args[[1L]]), "list")) args <- args[[1L]] new2("SimpleList", listData=args, check=FALSE) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Validity. ### .valid.SimpleList.listData <- function(x) { elementTypeX <- elementType(x) if (!all(sapply(as.list(x), function(xi) extends(class(xi), elementTypeX)))) return(paste("the 'listData' slot must be a list containing", elementTypeX, "objects")) NULL } .valid.SimpleList <- function(x) { c(.valid.SimpleList.listData(x)) } setValidity2("SimpleList", .valid.SimpleList) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### classNameForDisplay() ### setMethod("classNameForDisplay", "SimpleList", function(x) sub("^Simple", "", class(x)) ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Subsetting. ### setMethod("parallelSlotNames", "SimpleList", function(x) c("listData", callNextMethod())) setMethod("getListElement", "SimpleList", function(x, i, exact=TRUE) getListElement(x@listData, i, exact=exact) ) setMethod("setListElement", "SimpleList", function(x, i, value) { x@listData[[i]] <- value x } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Combining. ### ## NOTE: while the 'c' function does not have an 'x', the generic does ## c() is a primitive, so 'x' can be missing; dispatch is by position, ## although sometimes this does not work so well, so it's best to keep ## names off the parameters whenever feasible. #setMethod("c", "SimpleList", # function(x, ..., recursive = FALSE) { # slot(x, "listData") <- # do.call(c, lapply(unname(list(x, ...)), as.list)) # if (!is.null(mcols(x))) # mcols(x) <- rbind.mcols(x, ...) # x # }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Looping. ### ### TODO: easily generalized to List setMethod("lapply", "SimpleList", function(X, FUN, ...) lapply(as.list(X), FUN = FUN, ...)) setMethod("endoapply", "SimpleList", function(X, FUN, ...) { FUN <- match.fun(FUN) listData <- lapply(X, FUN = FUN, ...) elementTypeX <- elementType(X) if (!all(sapply(listData, function(Xi) extends(class(Xi), elementTypeX)))) stop("all results must be of class '", elementTypeX, "'") slot(X, "listData", check=FALSE) <- listData X }) setMethod("mendoapply", "SimpleList", function(FUN, ..., MoreArgs = NULL) { X <- list(...)[[1L]] elementTypeX <- elementType(X) FUN <- match.fun(FUN) listData <- mapply(FUN = FUN, ..., MoreArgs = MoreArgs, SIMPLIFY = FALSE) if (!all(sapply(listData, function(Xi) extends(class(Xi), elementTypeX)))) stop("all results must be of class '", elementTypeX, "'") slot(X, "listData", check=FALSE) <- listData X }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Coercion. ### .as.list.SimpleList <- function(x, use.names=TRUE) { if (!isTRUEorFALSE(use.names)) stop("'use.names' must be TRUE or FALSE") ans <- x@listData if (!use.names) names(ans) <- NULL ans } setMethod("as.list", "SimpleList", .as.list.SimpleList) setAs("ANY", "SimpleList", function(from) { coerceToSimpleList(from) }) setAs("list", "List", function(from) { coerceToSimpleList(from) }) coerceToSimpleList <- function(from, element.type, ...) { if (missing(element.type)) { if (is(from, "List")) element.type <- from@elementType else if (is.list(from)) element.type <- listElementType(from) else element.type <- class(from) } SimpleListClass <- listClassName("Simple", element.type) if (!is(from, SimpleListClass)) { listData <- as.list(from) if (!is.null(element.type)) listData <- lapply(listData, coercerToClass(element.type), ...) new_SimpleList_from_list(SimpleListClass, listData) } else { from } } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### unique() ### ### TODO: easily generalized to List .unique.SimpleList <- function(x, incomparables=FALSE, ...) { as(lapply(x, unique, incomparables=incomparables, ...), class(x)) } setMethod("unique", "SimpleList", .unique.SimpleList) S4Vectors/R/Vector-class.R0000644000175400017540000004773513175714520016352 0ustar00biocbuildbiocbuild### ========================================================================= ### Vector objects ### ------------------------------------------------------------------------- ### ### The Vector virtual class is a general container for storing a finite ### sequence i.e. an ordered finite collection of elements. ### setClassUnion("DataTable_OR_NULL", c("DataTable", "NULL")) setClass("Vector", contains="Annotated", representation( "VIRTUAL", elementMetadata="DataTable_OR_NULL" ) ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### parallelSlotNames() ### ### For internal use only. ### ### Must return the names of all the slots in Vector object 'x' that are ### "parallel" to 'x'. Slot 'foo' is considered to be "parallel" to 'x' if: ### (a) 'x@foo' is NULL or an object for which NROW() is equal to ### 'length(x)', and ### (b) the i-th element in 'x@foo' describes some component of the i-th ### element in 'x'. ### For example, the "start", "width", "NAMES", and "elementMetadata" slots ### of an IRanges object are parallel to the object. Note that the "NAMES" ### and "elementMetadata" slots can be set to NULL. ### The *first" slot name returned by parallelSlotNames() is used to get the ### length of 'x'. ### setGeneric("parallelSlotNames", function(x) standardGeneric("parallelSlotNames") ) setMethod("parallelSlotNames", "Vector", function(x) "elementMetadata") ### Methods for Vector subclasses only need to specify the parallel slots they ### add to their parent class. See Hits-class.R file for an example. ### parallelVectorNames() is for internal use only. setGeneric("parallelVectorNames", function(x) standardGeneric("parallelVectorNames")) setMethod("parallelVectorNames", "ANY", function(x) setdiff(colnames(as.data.frame(new(class(x)))), "value")) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Getters. ### setMethod("length", "Vector", function(x) NROW(slot(x, parallelSlotNames(x)[[1L]])) ) setMethod("lengths", "Vector", function(x, use.names=TRUE) { if (!isTRUEorFALSE(use.names)) stop("'use.names' must be TRUE or FALSE") ans <- elementNROWS(x) # This is wrong! See ?Vector for the details. if (!use.names) names(ans) <- NULL ans } ) setMethod("NROW", "Vector", function(x) length(x)) setMethod("ROWNAMES", "Vector", function(x) names(x)) ### 3 accessors for the same slot: elementMetadata(), mcols(), and values(). ### mcols() is the recommended one, use of elementMetadata() or values() is ### discouraged. setGeneric("elementMetadata", function(x, use.names=FALSE, ...) standardGeneric("elementMetadata") ) setMethod("elementMetadata", "Vector", function(x, use.names=FALSE, ...) { if (!isTRUEorFALSE(use.names)) stop("'use.names' must be TRUE or FALSE") ans <- x@elementMetadata if (use.names && !is.null(ans)) rownames(ans) <- names(x) ans } ) setGeneric("mcols", function(x, use.names=FALSE, ...) standardGeneric("mcols") ) setMethod("mcols", "Vector", function(x, use.names=FALSE, ...) elementMetadata(x, use.names=use.names, ...) ) setGeneric("values", function(x, ...) standardGeneric("values")) setMethod("values", "Vector", function(x, ...) elementMetadata(x, ...)) setMethod("anyNA", "Vector", function(x, recursive=FALSE) any(is.na(x))) setMethod("is.na", "Vector", function(x) rep(FALSE, length(x))) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Validity. ### .valid.Vector.length <- function(x) { x_len <- length(x) if (!isSingleNumber(x_len) || x_len < 0L) return("'length(x)' must be a single non-negative number") if (!is.null(attributes(x_len))) return("'length(x)' must be a single integer with no attributes") NULL } .valid.Vector.parallelSlots <- function(x) { x_len <- length(x) x_pslotnames <- parallelSlotNames(x) if (!is.character(x_pslotnames) || anyMissing(x_pslotnames) || anyDuplicated(x_pslotnames)) { msg <- c("'parallelSlotNames(x)' must be a character vector ", "with no NAs and no duplicates") return(paste(msg, collapse="")) } if (x_pslotnames[[length(x_pslotnames)]] != "elementMetadata") { msg <- c("last string in 'parallelSlotNames(x)' ", "must be \"elementMetadata\"") return(paste(msg, collapse="")) } for (slotname in x_pslotnames) { tmp <- slot(x, slotname) if (!(is.null(tmp) || NROW(tmp) == x_len)) { if (slotname == "elementMetadata") { what <- "mcols(x)" } else { what <- paste0("x@", slotname) } msg <- c("'", what, "' is not parallel to 'x'") return(paste(msg, collapse="")) } } NULL } .valid.Vector.names <- function(x) { x_names <- names(x) if (is.null(x_names)) return(NULL) if (!is.character(x_names) || !is.null(attributes(x_names))) { msg <- c("'names(x)' must be NULL or a character vector ", "with no attributes") return(paste(msg, collapse="")) } if (length(x_names) != length(x)) return("'names(x)' must be NULL or have the length of 'x'") NULL } .valid.Vector.mcols <- function(x) { x_mcols <- mcols(x) if (!is(x_mcols, "DataTable_OR_NULL")) return("'mcols(x)' must be a DataTable object or NULL") if (is.null(x_mcols)) return(NULL) ## 'x_mcols' is a DataTable object. x_mcols_rownames <- rownames(x_mcols) if (is.null(x_mcols_rownames)) return(NULL) if (!identical(x_mcols_rownames, names(x))) { msg <- c("the rownames of DataTable 'mcols(x)' ", "must match the names of 'x'") return(paste(msg, collapse="")) } NULL } .valid.Vector <- function(x) { c(.valid.Vector.length(x), .valid.Vector.parallelSlots(x), .valid.Vector.names(x), .valid.Vector.mcols(x)) } setValidity2("Vector", .valid.Vector) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Coercion. ### setMethod("as.logical", "Vector", function(x) as.vector(x, mode="logical") ) setMethod("as.integer", "Vector", function(x) as.vector(x, mode="integer") ) setMethod("as.numeric", "Vector", function(x) as.vector(x, mode="numeric") ) ### Even though as.double() is a generic function (as reported by ### 'getGeneric("as.double")', it seems impossible to define methods for this ### generic. Trying to do so like in the code below actually creates an ### "as.numeric" method. #setMethod("as.double", "Vector", # function(x) as.vector(x, mode="double") #) setMethod("as.complex", "Vector", function(x) as.vector(x, mode="complex") ) setMethod("as.character", "Vector", function(x) as.vector(x, mode="character") ) setMethod("as.raw", "Vector", function(x) as.vector(x, mode="raw") ) setAs("Vector", "vector", function(from) as.vector(from)) setAs("Vector", "logical", function(from) as.logical(from)) setAs("Vector", "integer", function(from) as.integer(from)) setAs("Vector", "numeric", function(from) as.numeric(from)) setAs("Vector", "complex", function(from) as.complex(from)) setAs("Vector", "character", function(from) as.character(from)) setAs("Vector", "raw", function(from) as.raw(from)) setAs("Vector", "factor", function(from) as.factor(from)) setAs("Vector", "data.frame", function(from) as.data.frame(from)) ### S3/S4 combo for as.data.frame.Vector as.data.frame.Vector <- function(x, row.names=NULL, optional=FALSE, ...) { as.data.frame(x, row.names=NULL, optional=optional, ...) } setMethod("as.data.frame", "Vector", function(x, row.names=NULL, optional=FALSE, ...) { x <- as.vector(x) as.data.frame(x, row.names=row.names, optional=optional, ...) }) as.matrix.Vector <- function(x, ...) { as.matrix(x) } setMethod("as.matrix", "Vector", function(x) { as.matrix(as.vector(x)) }) classNamespace <- function(x) { pkg <- packageSlot(class(x)) pvnEnv <- .GlobalEnv if (!is.null(pkg)) { pvnEnv <- getNamespace(pkg) } pvnEnv } makeFixedColumnEnv <- function(x, parent, tform = identity) { env <- new.env(parent=parent) pvnEnv <- classNamespace(x) lapply(c("names", parallelVectorNames(x)), function(nm) { accessor <- get(nm, pvnEnv, mode="function") makeActiveBinding(nm, function() { val <- tform(accessor(x)) rm(list=nm, envir=env) assign(nm, val, env) val }, env) }) env } setMethod("as.env", "Vector", function(x, enclos, tform = identity) { addSelfRef(x, makeFixedColumnEnv(x, as.env(mcols(x), enclos, tform), tform)) }) as.list.Vector <- function(x, ...) as.list(x, ...) setMethod("as.list", "Vector", function(x, ...) as.list(as(x, "List"), ...)) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Setters. ### setGeneric("elementMetadata<-", function(x, ..., value) standardGeneric("elementMetadata<-")) ### NOT exported but used in packages IRanges, GenomicRanges, ### SummarizedExperiment, GenomicAlignments, and maybe more... ### 3x faster than new("DataFrame", nrows=nrow). ### 500x faster than DataFrame(matrix(nrow=nrow, ncol=0L)). make_zero_col_DataFrame <- function(nrow) new2("DataFrame", nrows=nrow, check=FALSE) .normalize_mcols_replacement_value <- function(value, x) { x_slots <- getSlots(class(x)) ## Should never happen because 'x' should always be a Vector object so ## should always have the 'elementMetadata' slot. if (!("elementMetadata" %in% names(x_slots))) stop(wmsg("trying to set metadata columns on an object that does ", "not support them (i.e. with no 'elementMetadata' slot)")) mcols_class <- x_slots[["elementMetadata"]] if (is.null(value)) { if (is(NULL, mcols_class)) return(NULL) value <- make_zero_col_DataFrame(length(x)) } value <- as(value, mcols_class, strict=TRUE) ## From here 'value' is guaranteed to be a DataTable object. if (!is.null(rownames(value))) rownames(value) <- NULL V_recycle(value, x, x_what="value", skeleton_what="x") } setReplaceMethod("elementMetadata", "Vector", function(x, ..., value) { value <- .normalize_mcols_replacement_value(value, x) BiocGenerics:::replaceSlots(x, elementMetadata=value, check=FALSE) } ) setGeneric("mcols<-", function(x, ..., value) standardGeneric("mcols<-")) setReplaceMethod("mcols", "Vector", function(x, ..., value) `elementMetadata<-`(x, ..., value=value) ) setGeneric("values<-", function(x, ..., value) standardGeneric("values<-")) setReplaceMethod("values", "Vector", function(x, value) { elementMetadata(x) <- value x }) setGeneric("rename", function(x, ...) standardGeneric("rename")) .renameVector <- function(x, ...) { newNames <- c(...) if (!is.character(newNames) || any(is.na(newNames))) { stop("arguments in '...' must be character and not NA") } badOldNames <- setdiff(names(newNames), names(x)) if (length(badOldNames)) stop("Some 'from' names in value not found on 'x': ", paste(badOldNames, collapse = ", ")) names(x)[match(names(newNames), names(x))] <- newNames x } setMethod("rename", "vector", .renameVector) setMethod("rename", "Vector", .renameVector) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Subsetting. ### ### The "[" and "[<-" methods for Vector objects are just delegating to ### extractROWS() and replaceROWS() for performing the real work. Most of ### the times, the author of a Vector subclass only needs to implement an ### "extractROWS" and "replaceROWS" method for his/her objects. ### setMethod("[", "Vector", subset_along_ROWS) ### We provide a default "extractROWS" method for Vector objects that only ### subsets the individual parallel slots. That should be enough for most ### Vector derivatives that have parallelSlotNames() properly set. setMethod("extractROWS", "Vector", function(x, i) { i <- normalizeSingleBracketSubscript(i, x, as.NSBS=TRUE) x_pslotnames <- parallelSlotNames(x) ans_pslots <- lapply(setNames(x_pslotnames, x_pslotnames), function(slotname) extractROWS(slot(x, slotname), i)) ## Does NOT validate the object before returning it, because, most of ## the times, this is not needed. There are exceptions though. See ## for example the "extractROWS" method for Hits objects. do.call(BiocGenerics:::replaceSlots, c(list(x), ans_pslots, list(check=FALSE))) } ) setReplaceMethod("[", "Vector", function(x, i, j, ..., value) { if (!missing(j) || length(list(...)) > 0L) stop("invalid subsetting") i <- normalizeSingleBracketSubscript(i, x, as.NSBS=TRUE) li <- length(i) if (li == 0L) { ## Surprisingly, in that case, `[<-` on standard vectors does not ## even look at 'value'. So neither do we... return(x) } lv <- NROW(value) if (lv == 0L) stop("replacement has length zero") value <- normalizeSingleBracketReplacementValue(value, x) if (li != lv) { if (li %% lv != 0L) warning("number of values supplied is not a sub-multiple ", "of the number of values to be replaced") value <- extractROWS(value, rep(seq_len(lv), length.out=li)) } replaceROWS(x, i, value) } ) ### Work on any Vector object for which c() and extractROWS() work. ### Assume 'value' is compatible with 'x'. setMethod("replaceROWS", "Vector", function(x, i, value) { i <- normalizeSingleBracketSubscript(i, x, as.NSBS=TRUE) ## --<1>-- Concatenate 'x' and 'value' with c() ----- ## We assume that c() works on objects of class 'class(x)' and that it ## does the right thing i.e. that it returns an object of the same ## class as 'x' and of length 'length(x) + length(value)'. ans <- c(x, value) ## --<2>-- Subset 'c(x, value)' with extractROWS() ----- idx <- replaceROWS(seq_along(x), i, seq_along(value) + length(x)) ## Because of how we constructed it, 'idx' is guaranteed to be a valid ## subscript to use in 'extractROWS(ans, idx)'. By wrapping it inside a ## NativeNSBS object, extractROWS() won't waste time checking it or ## trying to normalize it. idx <- NativeNSBS(idx, length(ans), TRUE, FALSE) ## We assume that extractROWS() works on an object of class 'class(x)'. ans <- extractROWS(ans, idx) ## --<3>-- Restore the original decoration ----- metadata(ans) <- metadata(x) names(ans) <- names(x) ## However, we want the replaced elements in 'x' to get their ## metadata columns from 'value' so we do not restore the original ## metadata columns. See this thread on bioc-devel: ## https://stat.ethz.ch/pipermail/bioc-devel/2015-November/008319.html #mcols(ans) <- mcols(x) ans } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Convenience wrappers for common subsetting operations. ### ### S3/S4 combo for window.Vector window.Vector <- function(x, ...) window(x, ...) Vector_window <- function(x, start=NA, end=NA, width=NA) { i <- RangeNSBS(x, start=start, end=end, width=width) extractROWS(x, i) } setMethod("window", "Vector", Vector_window) ### S3/S4 combo for head.Vector head.Vector <- function(x, ...) head(x, ...) setMethod("head", "Vector", head_along_ROWS) ## S3/S4 combo for tail.Vector tail.Vector <- function(x, ...) tail(x, ...) setMethod("tail", "Vector", tail_along_ROWS) setMethod("rep.int", "Vector", rep.int_along_ROWS) ## NOT exported. revROWS <- function(x) extractROWS(x, rev(seq_len(NROW(x)))) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Combining. ### ### Somewhat painful that we do not always have a DataFrame in elementMetadata ensureMcols <- function(x) { mc <- mcols(x) if (is.null(mc)) { mc <- make_zero_col_DataFrame(length(x)) } mc } rbind_mcols <- function(x, ...) { args <- c(if (!missing(x)) list(x), list(...)) mcols_list <- lapply(args, mcols) if (length(mcols_list) == 1L) return(mcols_list[[1L]]) mcols_is_null <- sapply(mcols_list, is.null) if (all(mcols_is_null)) return(NULL) mcols_list[mcols_is_null] <- lapply( args[mcols_is_null], function(arg) make_zero_col_DataFrame(length(arg)) ) colnames_list <- lapply(mcols_list, colnames) allCols <- unique(unlist(colnames_list, use.names=FALSE)) fillCols <- function(df) { if (nrow(df)) df[setdiff(allCols, colnames(df))] <- DataFrame(NA) df } do.call(rbind, lapply(mcols_list, fillCols)) } rbindRowOfNAsToMetadatacols <- function(x) { x_mcols <- mcols(x) if (!is.null(x_mcols)) mcols(x)[nrow(x_mcols)+1L,] <- NA x } ### FIXME: This method doesn't work properly on DataTable objects if 'after' ### is >= 1 and < length(x). setMethod("append", c("Vector", "Vector"), function(x, values, after=length(x)) { if (!isSingleNumber(after)) stop("'after' must be a single number") x_len <- length(x) if (after == 0L) c(values, x) else if (after >= x_len) c(x, values) else c(head(x, n=after), values, tail(x, n=-after)) } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Evaluating. ### setMethod("eval", c("expression", "Vector"), function(expr, envir, enclos = parent.frame()) eval(expr, as.env(envir, enclos)) ) setMethod("eval", c("language", "Vector"), function(expr, envir, enclos = parent.frame()) eval(expr, as.env(envir, enclos)) ) setMethod("with", "Vector", function(data, expr, ...) { safeEval(substitute(expr), data, parent.frame(), ...) }) setReplaceMethod("column", "Vector", function(x, name, value) { if (name %in% parallelVectorNames(x)) { setter <- get(paste0(name, "<-"), classNamespace(x), mode="function") setter(x, value=value) } else { mcols(x)[[name]] <- value x } }) transform.Vector <- transformColumns setMethod("transform", "Vector", transform.Vector) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Utilities. ### setGeneric("expand.grid", signature="...") BiocGenerics:::apply_hotfix73465(getGeneric("expand.grid")) setMethod("expand.grid", "Vector", function(..., KEEP.OUT.ATTRS = TRUE, stringsAsFactors = TRUE) { args <- list(...) inds <- lapply(args, seq_along) grid <- do.call(expand.grid, c(inds, KEEP.OUT.ATTRS=KEEP.OUT.ATTRS, stringsAsFactors=stringsAsFactors)) names(args) <- names(grid) ans <- DataFrame(mapply(`[`, args, grid, SIMPLIFY=FALSE), check.names=FALSE) metadata(ans)$out.attrs <- attr(grid, "out.attrs") ans }) ### FIXME: tapply method still in IRanges setMethod("by", "Vector", function(data, INDICES, FUN, ..., simplify = TRUE) { if (!is.list(INDICES)) { INDICES <- setNames(list(INDICES), deparse(substitute(INDICES))[1L]) } FUNx <- function(i) FUN(extractROWS(data, i), ...) structure(tapply(seq_len(NROW(data)), INDICES, FUNx, simplify = simplify), call = match.call(), class = "by") }) diff.Vector <- function(x, ...) diff(x, ...) S4Vectors/R/Vector-comparison.R0000644000175400017540000002734513175714520017412 0ustar00biocbuildbiocbuild### ========================================================================= ### Comparing, ordering, and tabulating vector-like objects ### ------------------------------------------------------------------------- ### ### Functions/operators for comparing, ordering, tabulating: ### ### pcompare ### == ### != ### <= ### >= ### < ### > ### match ### selfmatch ### duplicated ### unique ### %in% ### findMatches ### countMatches ### order ### sort ### rank ### table ### Method signatures for binary comparison operators. .OP2_SIGNATURES <- list( c("Vector", "Vector"), c("Vector", "ANY"), c("ANY", "Vector") ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Element-wise (aka "parallel") comparison of 2 Vector objects. ### setGeneric("pcompare", function(x, y) standardGeneric("pcompare")) ### The methods below are implemented on top of pcompare(). setMethods("==", .OP2_SIGNATURES, function(e1, e2) { pcompare(e1, e2) == 0L } ) setMethods("<=", .OP2_SIGNATURES, function(e1, e2) { pcompare(e1, e2) <= 0L } ) ### The methods below are implemented on top of == and <=. setMethods("!=", .OP2_SIGNATURES, function(e1, e2) { !(e1 == e2) }) setMethods(">=", .OP2_SIGNATURES, function(e1, e2) { e2 <= e1 }) setMethods("<", .OP2_SIGNATURES, function(e1, e2) { !(e2 <= e1) }) setMethods(">", .OP2_SIGNATURES, function(e1, e2) { !(e1 <= e2) }) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### selfmatch() ### ### The default "selfmatch" method below is implemented on top of match(). ### setGeneric("selfmatch", function(x, ...) standardGeneric("selfmatch")) ### Default "selfmatch" method. Args in ... are propagated to match(). setMethod("selfmatch", "ANY", function(x, ...) match(x, x, ...)) ### Optimized "selfmatch" method for factors. setMethod("selfmatch", "factor", function(x, ..., incomparables = NULL) { ignore.na <- isTRUE(is.na(incomparables)) has.incomparables <- !is.null(incomparables) && !ignore.na if (!missing(...) || has.incomparables || (!ignore.na && anyNA(x)) || is.unsorted(x)) callNextMethod() else as.integer(x) }) ### 'selfmatch_mapping' must be an integer vector like one returned by ### selfmatch(), that is, values are non-NAs and such that any value 'v' in it ### must appear for the first time at *position* 'v'. ### Such a vector can be seen as a many-to-one mapping that maps any position ### in the vector to a lower position and that has the additional property of ### being idempotent. ### More formally, any vector returned by selfmatch() has the 2 following ### properties: ### ### (1) for any 1 <= i <= length(selfmatch_mapping), ### selfmatch_mapping[i] must be >= 1 and <= i ### and ### ### (2) selfmatch_mapping[selfmatch_mapping] is the same as selfmatch_mapping ### ### reverseSelfmatchMapping() creates the "reverse mapping" as an ordinary ### list. reverseSelfmatchMapping <- function(selfmatch_mapping) { ans <- vector(mode="list", length=length(selfmatch_mapping)) sparse_ans <- split(seq_along(selfmatch_mapping), selfmatch_mapping) ans[as.integer(names(sparse_ans))] <- as.list(sparse_ans) ans } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### duplicated() & unique() ### ### The "duplicated" method below is implemented on top of selfmatch(). ### The "unique" method below is implemented on top of duplicated(). ### ### S3/S4 combo for duplicated.Vector duplicated.Vector <- function(x, incomparables=FALSE, ...) duplicated(x, incomparables=incomparables, ...) .duplicated.Vector <- function(x, incomparables=FALSE, ...) { if (!identical(incomparables, FALSE)) stop("the \"duplicated\" method for Vector objects ", "only accepts 'incomparables=FALSE'") args <- list(...) if ("fromLast" %in% names(args)) { fromLast <- args$fromLast if (!isTRUEorFALSE(fromLast)) stop("'fromLast' must be TRUE or FALSE") args$fromLast <- NULL if (fromLast) x <- rev(x) } else { fromLast <- FALSE } xx <- do.call(selfmatch, c(list(x), args)) ans <- xx != seq_along(xx) if (fromLast) ans <- rev(ans) ans } setMethod("duplicated", "Vector", .duplicated.Vector) ### S3/S4 combo for unique.Vector unique.Vector <- function(x, incomparables=FALSE, ...) unique(x, incomparables=incomparables, ...) .unique.Vector <- function(x, incomparables=FALSE, ...) { i <- !duplicated(x, incomparables=incomparables, ...) extractROWS(x, i) } setMethod("unique", "Vector", .unique.Vector) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### %in% ### ### The method below is implemented on top of match(). ### setMethods("%in%", .OP2_SIGNATURES, function(x, table) { match(x, table, nomatch=0L) > 0L } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### findMatches() & countMatches() ### ### The default "findMatches" and "countMatches" methods below are ### implemented on top of match(). ### setGeneric("findMatches", signature=c("x", "table"), function(x, table, select=c("all", "first", "last"), ...) standardGeneric("findMatches") ) ### Equivalent to 'countQueryHits(findMatches(x, table))' but the default ### "countMatches" method below has a more efficient implementation. setGeneric("countMatches", signature=c("x", "table"), function(x, table, ...) standardGeneric("countMatches") ) ### Problem: using transpose=TRUE generates an invalid SortedByQueryHits ### object (hits are not sorted by query): ### > S4Vectors:::.findAllMatchesInSmallTable(1:6, c(7:5, 4:5), ### transpose=TRUE) ### Hits of length 4 ### queryLength: 5 ### subjectLength: 6 ### queryHits subjectHits ### ### 1 4 4 ### 2 3 5 ### 3 5 5 ### 4 2 6 ### and the cost of ordering them would probably defeat the purpose of the ### "put the smallest object on the right" optimization trick. .findAllMatchesInSmallTable <- function(x, table, ..., transpose=FALSE) { x2 <- match(x, table, ...) table2 <- selfmatch(table, ...) table_low2high <- reverseSelfmatchMapping(table2) hits_per_x <- table_low2high[as.integer(x2)] x_hits <- rep.int(seq_along(hits_per_x), sapply_NROW(hits_per_x)) if (length(x_hits) == 0L) { table_hits <- integer(0) } else { table_hits <- unlist(hits_per_x, use.names=FALSE) } if (transpose) { Hits(table_hits, x_hits, length(table), length(x), sort.by.query=TRUE) } else { Hits(x_hits, table_hits, length(x), length(table), sort.by.query=TRUE) } } ### Default "findMatches" method. Args in ... are propagated to match() and ### selfmatch(). setMethod("findMatches", c("ANY", "ANY"), function(x, table, select=c("all", "first", "last"), ...) { select <- match.arg(select) if (select != "all") stop("'select' is not supported yet. Note that you can use ", "match() if you want to do 'select=\"first\"'. Otherwise ", "you're welcome to request this on the Bioconductor ", "mailing list.") ## "put the smallest object on the right" optimization trick #if (length(x) < length(table)) # return(.findAllMatchesInSmallTable(table, x, ..., transpose=TRUE)) .findAllMatchesInSmallTable(x, table, ...) } ) setMethod("findMatches", c("ANY", "missing"), function(x, table, select=c("all", "first", "last"), ...) { ans <- callGeneric(x, x, select=select, ...) if (!is(ans, "Hits")) # e.g. if 'select' is "first" return(ans) as(ans, "SortedByQuerySelfHits") } ) ### Default "countMatches" method. Args in ... are propagated to match() and ### selfmatch(). .countMatches.default <- function(x, table, ...) { x_len <- length(x) table_len <- length(table) if (x_len <= table_len) { table2 <- match(table, x, ...) # can contain NAs nbins <- x_len x2 <- selfmatch(x, ...) # no NAs } else { table2 <- selfmatch(table, ...) # no NAs nbins <- table_len + 1L x2 <- match(x, table, nomatch=nbins, ...) } tabulate(table2, nbins=nbins)[x2] } setMethod("countMatches", c("ANY", "ANY"), .countMatches.default) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### sort() ### ### The method below is implemented on top of order(). ### sort.Vector <- function(x, decreasing=FALSE, ...) sort(x, decreasing=decreasing, ...) setMethod("sort", "Vector", .sort.Vector) formulaAsListCall <- function(formula) { attr(terms(formula), "variables") } formulaValues <- function(x, formula) { listCall <- formulaAsListCall(formula) vals <- eval(listCall, as.env(x, environment(formula))) names(vals) <- vapply(listCall, function(x) { paste(deparse(x, width.cutoff = 500), collapse = " ") }, character(1L))[-1L] vals } orderBy <- function(formula, x, decreasing=FALSE, na.last=TRUE) { values <- formulaValues(x, formula) do.call(order, c(decreasing=decreasing, na.last=na.last, values)) } setMethod("rank", "Vector", function(x, na.last=TRUE, ties.method=c("average", "first", "last", "random", "max", "min")) { ties.method <- match.arg(ties.method) oo <- order(x, na.last=na.last) ## 'ans' is the reverse permutation of 'oo'. ans <- integer(length(oo)) ans[oo] <- seq_along(oo) if (ties.method == "first") return(ans) ans <- ans[selfmatch(x)] if (ties.method == "min") return(ans) ## Other ties methods. rank(ans, ties.method=ties.method) } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### table() ### ### The method below is implemented on top of selfmatch(), order(), and ### as.character(). ### ### This is a copy/paste of the list.names() function locally defined inside ### base::table(). .list.names <- function(...) { deparse.level <- 1 l <- as.list(substitute(list(...)))[-1L] nm <- names(l) fixup <- if (is.null(nm)) seq_along(l) else nm == "" dep <- vapply(l[fixup], function(x) switch(deparse.level + 1, "", if (is.symbol(x)) as.character(x) else "", deparse(x, nlines = 1)[1L]), "") if (is.null(nm)) dep else { nm[fixup] <- dep nm } } ### Works on any object for which selfmatch(), order(), and as.character() ### are supported. .compute_table <- function(x) { xx <- selfmatch(x) t <- tabulate(xx, nbins=length(xx)) keep_idx <- which(t != 0L) x2 <- x[keep_idx] t2 <- t[keep_idx] oo <- order(x2) x2 <- x2[oo] t2 <- t2[oo] ans <- array(t2) ## Some "as.character" methods propagate the names (e.g. the method for ## GenomicRanges objects). We drop them. dimnames(ans) <- list(unname(as.character(x2))) ans } setMethod("table", "Vector", function(...) { args <- list(...) if (length(args) != 1L) stop("\"table\" method for Vector objects ", "can only take one input object") x <- args[[1L]] ## Compute the table as an array. ans <- .compute_table(x) ## Some cosmetic adjustments. names(dimnames(ans)) <- .list.names(...) class(ans) <- "table" ans } ) setMethod("xtabs", signature(data = "Vector"), function(formula = ~., data, subset, na.action, exclude = c(NA, NaN), drop.unused.levels = FALSE) { data <- as.env(data, environment(formula), tform=decode) callGeneric() }) S4Vectors/R/Vector-merge.R0000644000175400017540000001535213175714520016332 0ustar00biocbuildbiocbuild### ========================================================================= ### Merging vector-like objects ### ------------------------------------------------------------------------- ### ### Compute the n-ary union (if 'all' is TRUE) or n-ary intersection (if 'all' ### is FALSE) of a list of vector-like objects with no metadata columns. ### The objects must support c() for the n-ary union (i.e. when 'all' is TRUE), ### and %in% and [ for the n-ary intersection (i.e. when 'all' is FALSE). ### They must also support sort() if 'sort' is TRUE, as well as unique(). .merge_naked_objects <- function(naked_objects, all=FALSE, all.x=NA, all.y=NA, sort=TRUE) { if (!isTRUEorFALSE(all)) stop("'all' must be TRUE or FALSE") if (!(is.logical(all.x) && length(all.x) == 1L)) stop("'all.x' must be a single logical") if (!(is.logical(all.y) && length(all.y) == 1L)) stop("'all.y' must be a single logical") if (!isTRUEorFALSE(sort)) stop("'sort' must be TRUE or FALSE") if (length(naked_objects) == 1L) { ## Unary union or intersection. ## 'all', 'all.x', and 'all.y' are ignored. ans <- naked_objects[[1L]] } else if (length(naked_objects) == 2L) { ## Binary union or intersection. ## Behavior is controlled by 'all.x' and 'all.y' (after setting each ## of them to 'all' if it's NA). if (is.na(all.x)) all.x <- all if (is.na(all.y)) all.y <- all x <- naked_objects[[1L]] y <- naked_objects[[2L]] if (all.x && all.y) { ans <- c(x, y) } else if (all.x) { ans <- x } else if (all.y) { ans <- y } else { ans <- x[x %in% y] } } else { ## N-ary union or intersection (N > 2). ## 'all.x' and 'all.y' must be NAs. if (!(is.na(all.x) && is.na(all.y))) stop(wmsg("You need to use 'all' instead of the 'all.x' or ", "'all.y' argument when merging more than 2 objects.")) if (all) { ans <- do.call("c", naked_objects) } else { ans <- naked_objects[[1L]] for (i in 2:length(naked_objects)) ans <- ans[ans %in% naked_objects[[i]]] } } if (sort) ans <- sort(ans) unique(ans) } ### The list can contain NULLs, which are ignored. Non-NULL list elements are ### assumed to be of same lengths. This is not checked. .collapse_list_of_equal_vectors <- function(x, colname) { x <- x[!sapply_isNULL(x)] ans <- x[[1L]] if (length(x) >= 2L) { na_idx <- which(is.na(ans)) for (i in 2:length(x)) { x_elt <- x[[i]] if (is.null(x_elt)) next if (!all(x_elt == ans, na.rm=TRUE)) stop(wmsg("metadata column \"", colname, "\" contains ", "incompatible values across the objects to merge")) if (length(na_idx) != 0L) { ans[na_idx] <- x_elt[na_idx] na_idx <- which(is.na(ans)) } } } ans } .merge_mcols <- function(x, objects) { all_mcolnames <- unique(unlist( lapply(objects, function(object) colnames(mcols(object))) )) if (length(all_mcolnames) == 0L) return(NULL) revmaps <- lapply(objects, match, x=x) merge_mcol <- function(colname) { cols <- mapply( function(object, revmap) { col <- mcols(object)[[colname]] if (is.null(col)) return(NULL) col <- col[revmap] }, objects, revmaps, SIMPLIFY=FALSE ) .collapse_list_of_equal_vectors(cols, colname) } all_mcols <- lapply(setNames(all_mcolnames, all_mcolnames), merge_mcol) DataFrame(all_mcols) } ### 'objects' must be a list of vector-like objects. See .merge_naked_objects() ### above for what operations these objects must support in order for ### .merge_Vector_objects() to work. .merge_Vector_objects <- function(objects, all=FALSE, all.x=NA, all.y=NA, sort=TRUE) { objects <- unname(objects) naked_objects <- lapply(objects, function(object) { mcols(object) <- NULL if (any(duplicated(object))) ## We don't actually apply unique() to the input objects but ## .merge_Vector_objects() behaves like if we did. warning(wmsg("Some of the objects to merge contain ", "duplicated elements. These elements were ", "removed by applying unique() to each object ", "before the merging.")) object } ) ans <- .merge_naked_objects(naked_objects, all=all, all.x=all.x, all.y=all.y, sort=sort) mcols(ans) <- .merge_mcols(ans, objects) ans } ### 3 important differences with base::merge.data.frame(): ### 1) The matching is based on the vector values (vs arbitrary columns for ### base::merge.data.frame()). ### 2) Self merge is a no-op if 'sort=FALSE' (or object already sorted) and ### if the object has no duplicates. ### 3) This an n-ary merge() of vector-like objects (vs binary for ### base::merge.data.frame()). setMethod("merge", c("Vector", "Vector"), function(x, y, ..., all=FALSE, all.x=NA, all.y=NA, sort=TRUE) { if (missing(x)) { if (missing(y)) { objects <- list(...) } else { objects <- list(y, ...) } } else { if (missing(y)) { objects <- list(x, ...) } else { objects <- list(x, y, ...) } } ## .merge_Vector_objects() won't work if some of the objects to merge ## are list-like objects that pcompare recursively. In that case, we ## fallback on base::merge() but this one is a binary merge only. comp_rec <- vapply(objects, function(object) { is.list(object) || is(object, "List") && pcompareRecursively(object) }, logical(1)) if (any(comp_rec)) { if (length(objects) > 2L) stop(wmsg("cannot merge more than 2 objects ", "when some of them are list-like objects")) ans <- base::merge(x, y, all=all, sort=sort) return(ans) } .merge_Vector_objects(objects, all=all, all.x=all.x, all.y=all.y, sort=sort) } ) S4Vectors/R/Vector-setops.R0000644000175400017540000000164013175714520016543 0ustar00biocbuildbiocbuild### ========================================================================= ### Set operations ### ------------------------------------------------------------------------- ### ### The methods below are endomorphisms with respect to their first argument ### 'x'. They propagates the names and metadata columns. ### ### S3/S4 combo for union.Vector union.Vector <- function(x, y) unique(c(x, y)) setMethod("union", c("Vector", "Vector"), union.Vector) ### S3/S4 combo for intersect.Vector intersect.Vector <- function(x, y) unique(x[x %in% y]) setMethod("intersect", c("Vector", "Vector"), intersect.Vector) ### S3/S4 combo for setdiff.Vector setdiff.Vector <- function(x, y) unique(x[!(x %in% y)]) setMethod("setdiff", c("Vector", "Vector"), setdiff.Vector) ### S3/S4 combo for setequal.Vector setequal.Vector <- function(x, y) all(x %in% y) && all(y %in% x) setMethod("setequal", c("Vector", "Vector"), setequal.Vector) S4Vectors/R/aggregate-methods.R0000644000175400017540000002105313175714520017355 0ustar00biocbuildbiocbuild### ========================================================================= ### "aggregate" methods ### ------------------------------------------------------------------------- ### ### This is messy and broken! E.g. ### ### aggregate(DataFrame(state.x77), FUN=mean, start=1:20, width=10) ### ### doesn't work as expected. Or: ### ### aggregate(Rle(2:-2, 5:9), FUN=mean, start=1:20, width=17) ### ### doesn't give the same result as: ### ### aggregate(rep(2:-2, 5:9), FUN=mean, start=1:20, width=17) ### ### See also the FIXME note down below (the one preceding the definition of ### the method for vector) for more mess. ### ### FIXME: Fix the aggregate() mess. Before fixing, it would be good to ### simplify by gettting rid of the 'frequency' and 'delta' arguments. ### Then the 'start', 'end', and 'width' arguments wouldn't be needed ### anymore because the user can aggregate by range by passing ### IRanges(start, end, width) to 'by'. After removing these arguments, ### the remaining arguments would be as in stats:::aggregate.data.frame. ### Finally make sure that, when 'by' is not a Ranges, the "aggregate" method ### for vector objects behaves exactly like stats:::aggregate.data.frame ### (the easiest way would be to delegate to it). ### ### A nice extension would be to have 'by' accept an IntegerList object, not ### just a Ranges (which is a special case of IntegerList), to let the user ### specify the subsets of 'x'. When 'by' is an IntegerList, aggregate() would ### be equivalent to: ### ### sapply(seq_along(by), ### function(i) FUN(x[by[[i]]], ...), simplify=simplify) ### ### This could be how it is implemented, except for the common use case where ### 'by' is a Ranges (needs special treatment in order to remain as fast as it ### is at the moment). This could even be extended to 'by' being a List (e.g. ### CharacterList, RleList, etc...) ### ### Other options (non-exclusive) to explore: ### ### (a) aggregateByRanges() new generic (should go in IRanges). aggregate() ### would simply delegate to it when 'by' is a Ranges object (but that ### means that the "aggregate" methods should also go in IRanges). ### ### (b) lapply/sapply on Views objects (but only works if Views(x, ...) ### works and views can only be created on a few specific types of ### objects). ### setMethod("aggregate", "matrix", stats:::aggregate.default) setMethod("aggregate", "data.frame", stats:::aggregate.data.frame) setMethod("aggregate", "ts", stats:::aggregate.ts) ### S3/S4 combo for aggregate.Vector aggregate.Vector <- function(x, by, FUN, start=NULL, end=NULL, width=NULL, frequency=NULL, delta=NULL, ..., simplify=TRUE) { aggregate(x, by, FUN, start, end, width, frequency, delta, ..., simplify=simplify) } .aggregate.Vector <- function(x, by, FUN, start=NULL, end=NULL, width=NULL, frequency=NULL, delta=NULL, ..., simplify=TRUE) { if (missing(FUN)) { return(aggregateWithDots(x, by, ...)) } else if (!missing(by)) { if (is.list(by)) { ans <- aggregate(as.data.frame(x), by=by, FUN=FUN, ..., simplify=simplify) return(DataFrame(ans)) } else if (is(by, "formula")) { ans <- aggregate(by, as.env(x, environment(by), tform=decode), FUN=FUN, ...) return(DataFrame(ans)) } start <- structure(start(by), names=names(by)) end <- end(by) } else { if (!is.null(width)) { if (is.null(start)) start <- end - width + 1L else if (is.null(end)) end <- start + width - 1L } ## Unlike as.integer(), as( , "integer") propagates the names. start <- as(start, "integer") end <- as(end, "integer") } FUN <- match.fun(FUN) if (length(start) != length(end)) stop("'start', 'end', and 'width' arguments have unequal length") n <- length(start) if (!is.null(names(start))) indices <- structure(seq_len(n), names = names(start)) else indices <- structure(seq_len(n), names = names(end)) if (is.null(frequency) && is.null(delta)) { sapply(indices, function(i) FUN(Vector_window(x, start = start[i], end = end[i]), ...), simplify = simplify) } else { frequency <- rep(frequency, length.out = n) delta <- rep(delta, length.out = n) sapply(indices, function(i) FUN(window(x, start = start[i], end = end[i], frequency = frequency[i], delta = delta[i]), ...), simplify = simplify) } } setMethod("aggregate", "Vector", .aggregate.Vector) .aggregate.Rle <- function(x, by, FUN, start=NULL, end=NULL, width=NULL, frequency=NULL, delta=NULL, ..., simplify=TRUE) { FUN <- match.fun(FUN) if (!missing(by)) { start <- structure(start(by), names=names(by)) end <- end(by) } else { if (!is.null(width)) { if (is.null(start)) start <- end - width + 1L else if (is.null(end)) end <- start + width - 1L } start <- as(start, "integer") end <- as(end, "integer") } if (length(start) != length(end)) stop("'start', 'end', and 'width' arguments have unequal length") n <- length(start) if (!is.null(names(start))) indices <- structure(seq_len(n), names = names(start)) else indices <- structure(seq_len(n), names = names(end)) if (is.null(frequency) && is.null(delta)) { width <- end - start + 1L rle_list <- extract_ranges_from_Rle(x, start, width, as.list=TRUE) names(rle_list) <- names(indices) sapply(rle_list, FUN, ..., simplify = simplify) } else { frequency <- rep(frequency, length.out = n) delta <- rep(delta, length.out = n) sapply(indices, function(i) FUN(window(x, start = start[i], end = end[i], frequency = frequency[i], delta = delta[i]), ...), simplify = simplify) } } setMethod("aggregate", "Rle", .aggregate.Rle) .aggregate.List <- function(x, by, FUN, start=NULL, end=NULL, width=NULL, frequency=NULL, delta=NULL, ..., simplify=TRUE) { if (missing(by) || !requireNamespace("IRanges", quietly=TRUE) || !is(by, "RangesList")) { ans <- callNextMethod() return(ans) } if (length(x) != length(by)) stop("for Ranges 'by', 'length(x) != length(by)'") y <- as.list(x) result <- lapply(structure(seq_len(length(x)), names = names(x)), function(i) aggregate(y[[i]], by = by[[i]], FUN = FUN, frequency = frequency, delta = delta, ..., simplify = simplify)) as(result, "List") } setMethod("aggregate", "List", .aggregate.List) ModelFrame <- function(formula, x) { if (length(formula) != 2L) stop("'formula' must not have a left side") DataFrame(formulaValues(x, formula)) } aggregateWithDots <- function(x, by, FUN, ..., drop = TRUE) { stopifnot(isTRUEorFALSE(drop)) endomorphism <- FALSE if (missing(by)) { if (is(x, "List") && !is(x, "DataTable")) { by <- IRanges::PartitioningByEnd(x) x <- unlist(x, use.names=FALSE) } else { endomorphism <- TRUE by <- x } } if (is(by, "IntegerList")) { by <- IRanges::ManyToManyGrouping(by, nobj=NROW(x)) } if (is(by, "formula")) { by <- ModelFrame(by, x) } else if (is.list(by) || is(by, "DataTable")) { by <- IRanges::FactorList(by, compress=FALSE) } by <- as(by, "Grouping", strict=FALSE) if (IRanges::nobj(by) != NROW(x)) { stop("'by' does not have the same number of objects as 'x'") } if (drop) { by <- by[lengths(by) > 0L] } by <- unname(by) prenvs <- top_prenv_dots(...) exprs <- substitute(list(...))[-1L] envs <- lapply(prenvs, function(p) { as.env(x, p, tform = function(col) IRanges::extractList(col, by)) }) stats <- DataFrame(mapply(safeEval, exprs, envs, SIMPLIFY=FALSE)) if (endomorphism && !is(x, "DataFrame")) { ans <- x[end(IRanges::PartitioningByEnd(by))] mcols(by) <- NULL mcols(ans) <- DataFrame(grouping = by, stats) } else { ans <- DataFrame(by, stats) colnames(ans)[1L] <- "grouping" } ans } S4Vectors/R/character-utils.R0000644000175400017540000000661313175714520017065 0ustar00biocbuildbiocbuild### ========================================================================= ### Some utility functions to operate on strings ### ------------------------------------------------------------------------- ### NOT exported capitalize <- function(x) { substring(x, 1L, 1L) <- toupper(substring(x, 1L, 1L)) x } ### NOT exported ### Reduce size of each input string by keeping only its head and tail ### separated by 3 dots. Each returned strings is guaranteed to have a number ### characters <= width. sketchStr <- function(x, width=23) { if (!is.character(x)) stop("'x' must be a character vector") if (!isSingleNumber(width)) stop("'width' must be a single integer") if (!is.integer(width)) width <- as.integer(width) if (width < 7L) width <- 7L x_nchar <- nchar(x, type="width") idx <- which(x_nchar > width) if (length(idx) != 0L) { xx <- x[idx] xx_nchar <- x_nchar[idx] w1 <- (width - 2L) %/% 2L w2 <- (width - 3L) %/% 2L x[idx] <- paste0(substr(xx, start=1L, stop=w1), "...", substr(xx, start=xx_nchar - w2 + 1L, stop=xx_nchar)) } x } setGeneric("unstrsplit", signature="x", function(x, sep="") standardGeneric("unstrsplit") ) setMethod("unstrsplit", "list", function(x, sep="") .Call2("unstrsplit_list", x, sep, PACKAGE="S4Vectors") ) setMethod("unstrsplit", "character", function(x, sep="") x ) ### Safe alternative to 'strsplit(x, NULL, fixed=TRUE)[[1L]]'. safeExplode <- function(x) { if (!isSingleString(x)) stop("'x' must be a single string") .Call2("safe_strexplode", x, PACKAGE="S4Vectors") } ### strsplitAsListOfIntegerVectors(x) is an alternative to: ### lapply(strsplit(x, ",", fixed=TRUE), as.integer) ### except that: ### - strsplit() accepts NAs, we don't (raise an error); ### - as.integer() introduces NAs by coercion (with a warning), we don't ### (raise an error); ### - as.integer() supports "inaccurate integer conversion in coercion" ### when the value to coerce is > INT_MAX (then it's coerced to INT_MAX), ### we don't (raise an error); ### - as.integer() will coerce non-integer values (e.g. 10.3) to an int ### by truncating them, we don't (raise an error). ### When it fails, strsplit_as_list_of_ints() will print a detailed parse ### error message. ### It's also faster and uses much less memory. E.g. it's 8x faster and uses ### < 1 Mb versus > 60 Mb on the character vector 'biginput' created with: ### library(rtracklayer) ### session <- browserSession() ### genome(session) <- "hg18" ### query <- ucscTableQuery(session, "UCSC Genes") ### tx <- getTable(query) ### biginput <- c(tx$exonStarts, tx$exonEnds) # 133606 elements strsplitAsListOfIntegerVectors <- function(x, sep=",") { if (!is.character(x)) stop("'x' must be a character vector") if (!isSingleString(sep) || nchar(sep) != 1L) stop("'sep' must be a string containing just one single-byte character") ans <- .Call2("strsplit_as_list_of_ints", x, sep, PACKAGE="S4Vectors") names(ans) <- names(x) ans } ### svn.time() returns the time in Subversion format, e.g.: ### "2007-12-07 10:03:15 -0800 (Fri, 07 Dec 2007)" ### The -0800 part will be adjusted if daylight saving time is in effect. ### TODO: Find a better home for this function. svn.time <- function() .Call2("svn_time", PACKAGE="S4Vectors") S4Vectors/R/eval-utils.R0000644000175400017540000000422713175714520016057 0ustar00biocbuildbiocbuild### ========================================================================= ### Helpers for environments and evaluation ### ------------------------------------------------------------------------- safeEval <- function(expr, envir, enclos=parent.env(envir), strict=FALSE) { expr <- eval(call("bquote", expr, enclos)) if (strict) { enclos <- makeGlobalWarningEnv(expr, envir, enclos) } eval(expr, envir, enclos) } makeGlobalWarningEnv <- function(expr, envir, enclos) { envir <- as.env(envir, enclos) globals <- setdiff(all.names(expr, functions=FALSE), ls(envir)) env <- new.env(parent=enclos) lapply(globals, function(g) { makeActiveBinding(g, function() { val <- get(g, enclos) warning("Symbol '", g, "' resolved from calling frame; ", "escape with .(", g, ") for safety.") val }, env) }) env } evalArg <- function(expr, envir, ..., where=parent.frame()) { enclos <- eval(call("top_prenv", expr, where)) expr <- eval(call("substitute", expr), where) safeEval(expr, envir, enclos, ...) } normSubsetIndex <- function(i) { i <- try(as.logical(i), silent=TRUE) if (inherits(i, "try-error")) stop("'subset' must be coercible to logical") i & !is.na(i) } missingArg <- function(arg, where=parent.frame()) { eval(call("missing", arg), where) } evalqForSubset <- function(expr, envir, ...) { if (missingArg(substitute(expr), parent.frame())) { TRUE } else { i <- evalArg(substitute(expr), envir, ..., where=parent.frame()) normSubsetIndex(i) } } evalqForSelect <- function(expr, df, ...) { if (missingArg(substitute(expr), parent.frame())) { rep(TRUE, ncol(df)) } else { nl <- as.list(seq_len(ncol(df))) names(nl) <- colnames(df) evalArg(substitute(expr), nl, ..., where=parent.frame()) } } top_prenv <- function(x, where=parent.frame()) { sym <- substitute(x) if (!is.name(sym)) { stop("'x' did not substitute to a symbol") } if (!is.environment(where)) { stop("'where' must be an environment") } .Call2("top_prenv", sym, where, PACKAGE="S4Vectors") } top_prenv_dots <- function(...) { .Call("top_prenv_dots", environment(), PACKAGE="S4Vectors") } S4Vectors/R/expand-methods.R0000644000175400017540000000522413175714520016710 0ustar00biocbuildbiocbuild### ========================================================================= ### expand methods ### ------------------------------------------------------------------------- ### setGeneric("expand", signature="x", function(x, ...) standardGeneric("expand") ) ## A helper function to do the work .expandOneCol <- function(x, colname, keepEmptyRows) { if (!is(x, "DataFrame")) stop("'x' must be a DataFrame object") if (!isSingleString(colname) && !isSingleNumber(colname)) stop("'colname' must be a single string or number") col <- x[[colname]] if (is.null(col)) stop("'colname' must be a valid colname name or index") if(keepEmptyRows){ col[elementNROWS(col)==0] <- NA } idx <- rep(seq_len(nrow(x)), elementNROWS(col)) ans <- x[idx, ] ans[[colname]] <- unlist(col, use.names=FALSE) ans } ## A better helper .expand <- function(x, colnames, keepEmptyRows){ for(colname in colnames) { x <- .expandOneCol(x, colname, keepEmptyRows) } x } ### FIXME: should make is.recursive a generic in base R isRecursive <- function(x) is.recursive(x) || is(x, "List") defaultIndices <- function(x) { which(vapply(x, isRecursive, logical(1L))) } setMethod("expand", "DataFrame", function(x, colnames, keepEmptyRows = FALSE){ stopifnot(isTRUEorFALSE(keepEmptyRows)) if (missing(colnames)) { colnames <- defaultIndices(x) } .expand(x, colnames, keepEmptyRows) } ) setMethod("expand", "Vector", function(x, colnames, keepEmptyRows = FALSE){ stopifnot(isTRUEorFALSE(keepEmptyRows)) if (missing(colnames)) { colnames <- defaultIndices(mcols(x)) } df <- mcols(x) df[["__index__"]] <- seq_along(x) ex <- .expand(df, colnames, keepEmptyRows) mcols(x) <- NULL ans <- x[ex[["__index__"]]] ex[["__index__"]] <- NULL mcols(ans) <- ex ans } ) ## NOT exported but used in VariantAnnotation package. ## Assume that the named columns have the same geometry and expand ## them simultaneously; this is different from the cartesian product ## expansion above. expandByColumnSet <- function(x, colnames, keepEmptyRows) { if (length(colnames) == 0L) return(x) if(keepEmptyRows) { emptyRows <- elementNROWS(col) == 0L x[emptyRows, colnames] <- rep(NA, sum(emptyRows)) } ans <- x[quick_togroup(x[[colnames[1L]]]),,drop=FALSE] ans[colnames] <- lapply(x[colnames], unlist, use.names = FALSE) ans } S4Vectors/R/integer-utils.R0000644000175400017540000004350413175714520016566 0ustar00biocbuildbiocbuild### ========================================================================= ### Some low-level utility functions to operate on integer vectors ### ------------------------------------------------------------------------- ### ### Unless stated otherwise, the functions defined in this file are not ### exported. ### anyMissingOrOutside <- function(x, lower = -.Machine$integer.max, upper = .Machine$integer.max) { if (!is.integer(x)) stop("'x' must be an integer vector") if (!is.integer(lower)) lower <- as.integer(lower) if (!is.integer(upper)) upper <- as.integer(upper) .Call2("Integer_any_missing_or_outside", x, lower, upper, PACKAGE="S4Vectors") } ### Equivalent to (but much faster than): ### ### diff(c(0L, x)) ### ### except that NAs are not supported. diffWithInitialZero <- function(x) { if (!is.integer(x)) stop("'x' must be an integer vector") .Call2("Integer_diff_with_0", x, PACKAGE="S4Vectors") } ### Equivalent to (but much faster than): ### ### diff(c(x, last)) ### ### except that NAs are not supported. diffWithLast <- function(x, last) { if (!is.integer(x)) stop("'x' must be an integer vector") if (!isSingleInteger(last)) stop("'last' must be a single, non-NA integer") .Call2("Integer_diff_with_last", x, last, PACKAGE="S4Vectors") } ### x: integer vector. ### breakpoints: vector of positions on 'x' in increasing order. ### Equivalent to (but 10x faster than): ### sum(relist(x, PartitioningByEnd(breakpoints))) ### Also equivalent to (but 200x faster than): ### f <- rep(factor(seq_along(breakpoints)), diff(c(0L, breakpoints))) ### vapply(split(x, f, drop=FALSE), sum, integer(1), USE.NAMES=FALSE) groupsum <- function(x, breakpoints) { if (last_or(breakpoints, 0L) != length(x)) stop("invalid 'breakpoints' argument") diffWithInitialZero(cumsum(x)[breakpoints]) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Fast ordering/comparing of integer pairs. ### .normargIntegerOrFactor <- function(arg, argname) { if (is.factor(arg)) arg <- as.integer(arg) else if (is(arg, "Rle") && (is(runValue(arg), "integer") || is(runValue(arg), "factor"))) arg <- as.integer(arg) else if (!is.integer(arg)) stop("'", argname, "' must be an integer vector or factor") arg } .normargMethod <- function(method=c("auto", "quick", "hash"), a_len) { method <- match.arg(method) if (method == "auto") { if (a_len <= 2^29) method <- "hash" else method <- "quick" } method } pcompareIntegerPairs <- function(a1, b1, a2, b2) { a1 <- .normargIntegerOrFactor(a1, "a1") b1 <- .normargIntegerOrFactor(b1, "b1") if (length(a1) != length(b1)) stop("'a1' and 'b1' must have the same length") a2 <- .normargIntegerOrFactor(a2, "a2") b2 <- .normargIntegerOrFactor(b2, "b2") if (length(a2) != length(b2)) stop("'a2' and 'b2' must have the same length") .Call2("Integer_pcompare2", a1, b1, a2, b2, PACKAGE="S4Vectors") } sortedIntegerPairs <- function(a, b, decreasing=FALSE, strictly=FALSE) { a <- .normargIntegerOrFactor(a, "a") b <- .normargIntegerOrFactor(b, "b") .Call2("Integer_sorted2", a, b, decreasing, strictly, PACKAGE="S4Vectors") } ### Exported! orderIntegerPairs <- function(a, b, decreasing=FALSE) { a <- .normargIntegerOrFactor(a, "a") b <- .normargIntegerOrFactor(b, "b") #.Call2("Integer_order2", a, b, decreasing, PACKAGE="S4Vectors") base::order(a, b, decreasing=decreasing) } .matchIntegerPairs_quick <- function(a1, b1, a2, b2, nomatch=NA_integer_) { .Call2("Integer_match2_quick", a1, b1, a2, b2, nomatch, PACKAGE="S4Vectors") } .matchIntegerPairs_hash <- function(a1, b1, a2, b2, nomatch=NA_integer_) { .Call2("Integer_match2_hash", a1, b1, a2, b2, nomatch, PACKAGE="S4Vectors") } ### Exported! matchIntegerPairs <- function(a1, b1, a2, b2, nomatch=NA_integer_, method=c("auto", "quick", "hash")) { a1 <- .normargIntegerOrFactor(a1, "a1") b1 <- .normargIntegerOrFactor(b1, "b1") if (length(a1) != length(b1)) stop("'a1' and 'b1' must have the same length") a2 <- .normargIntegerOrFactor(a2, "a2") b2 <- .normargIntegerOrFactor(b2, "b2") if (length(a2) != length(b2)) stop("'a2' and 'b2' must have the same length") if (!isSingleNumberOrNA(nomatch)) stop("'nomatch' must be a single number or NA") if (!is.integer(nomatch)) nomatch <- as.integer(nomatch) method <- .normargMethod(method, length(a2)) if (method == "quick") { ans <- .matchIntegerPairs_quick(a1, b1, a2, b2, nomatch=nomatch) } else { ans <- .matchIntegerPairs_hash(a1, b1, a2, b2, nomatch=nomatch) } ans } .selfmatchIntegerPairs_quick <- function(a, b) { .Call2("Integer_selfmatch2_quick", a, b, PACKAGE="S4Vectors") } ### Author: Martin Morgan .selfmatchIntegerPairs_hash <- function(a, b) { .Call2("Integer_selfmatch2_hash", a, b, PACKAGE="S4Vectors") } ### Exported! selfmatchIntegerPairs <- function(a, b, method=c("auto", "quick", "hash")) { a <- .normargIntegerOrFactor(a, "a") b <- .normargIntegerOrFactor(b, "b") if (length(a) != length(b)) stop("'a' and 'b' must have the same length") method <- .normargMethod(method, length(a)) if (method == "quick") { ans <- .selfmatchIntegerPairs_quick(a, b) } else { ans <- .selfmatchIntegerPairs_hash(a, b) } ans } ### Exported! ### ### For 'a' and 'b' integer vectors of equal length with no NAs, ### 'duplicatedIntegerPairs(a, b)' is equivalent to (but much faster than): ### ### duplicated(cbind(a, b)) ### ### For efficiency reasons, we don't support (and don't even check) for NAs. duplicatedIntegerPairs <- function(a, b, fromLast=FALSE, method=c("auto", "quick", "hash")) { a <- .normargIntegerOrFactor(a, "a") b <- .normargIntegerOrFactor(b, "b") if (length(a) != length(b)) stop("'a' and 'b' must have the same length") if (!isTRUEorFALSE(fromLast)) stop("'fromLast' must be TRUE or FALSE") if (length(a) == 0L) return(logical(0L)) if (length(a) == 1L) return(FALSE) ## This is a temporary (and inefficient) workaround until "quick" ## and "hash" methods can natively support fromLast=TRUE. ## TODO: Add support for fromLast=TRUE to "quick" and "hash" methods. if (fromLast) return(rev(duplicatedIntegerPairs(rev(a), rev(b), method=method))) sm <- selfmatchIntegerPairs(a, b, method=method) sm != seq_len(length(sm)) } ### For 'a' and 'b' integer vectors of equal length with no NAs, ### 'runEndsOfIntegerPairs(a, b)' finds the runs of identical rows in ### 'cbind(a, b)' and returns the indices of the last row in each run. ### In other words, it's equivalent to (but much faster than): ### ### cumsum(runLength(Rle(paste(a, b, sep="|")))) ### ### Note that, if the rows in 'cbind(a, b)' are already sorted, then ### 'runEndsOfIntegerPairs(a, b)' returns the indices of the unique rows. ### In other words, 'runEndsOfIntegerPairs()' could be used to efficiently ### extract the unique pairs of integers from a presorted set of pairs. ### However, at the moment (April 2011) using 'duplicatedIntegerPairs()' ### is still faster than using 'runEndsOfIntegerPairs()' for finding the ### duplicated or unique pairs of integers in a presorted set of pairs. ### But this only because 'runEndsOfIntegerPairs()' is not as fast as it ### could/should be (an all-in-C implementation would probably solve this). ### ### For efficiency reasons, we don't support (and don't even check) for NAs. ### TODO: What happens if 'a' and 'b' don't have the same length? Shouldn't ### we check for that? runEndsOfIntegerPairs <- function(a, b) { not_same_as_prev <- diffWithInitialZero(a) != 0L | diffWithInitialZero(b) != 0L if (length(not_same_as_prev) == 0L) return(integer()) which(c(not_same_as_prev[-1L], TRUE)) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Fast ordering/comparing of integer quadruplets. ### sortedIntegerQuads <- function(a, b, c, d, decreasing=FALSE, strictly=FALSE) { a <- .normargIntegerOrFactor(a, "a") b <- .normargIntegerOrFactor(b, "b") c <- .normargIntegerOrFactor(c, "c") d <- .normargIntegerOrFactor(d, "d") .Call2("Integer_sorted4", a, b, c, d, decreasing, strictly, PACKAGE="S4Vectors") } ### Exported! orderIntegerQuads <- function(a, b, c, d, decreasing=FALSE) { a <- .normargIntegerOrFactor(a, "a") b <- .normargIntegerOrFactor(b, "b") c <- .normargIntegerOrFactor(c, "c") d <- .normargIntegerOrFactor(d, "d") #.Call2("Integer_order4", a, b, c, d, decreasing, PACKAGE="S4Vectors") base::order(a, b, c, d, decreasing=decreasing) } .matchIntegerQuads_quick <- function(a1, b1, c1, d1, a2, b2, c2, d2, nomatch=NA_integer_) { .Call2("Integer_match4_quick", a1, b1, c1, d1, a2, b2, c2, d2, nomatch, PACKAGE="S4Vectors") } .matchIntegerQuads_hash <- function(a1, b1, c1, d1, a2, b2, c2, d2, nomatch=NA_integer_) { .Call2("Integer_match4_hash", a1, b1, c1, d1, a2, b2, c2, d2, nomatch, PACKAGE="S4Vectors") } ### Exported! matchIntegerQuads <- function(a1, b1, c1, d1, a2, b2, c2, d2, nomatch=NA_integer_, method=c("auto", "quick", "hash")) { a1 <- .normargIntegerOrFactor(a1, "a1") b1 <- .normargIntegerOrFactor(b1, "b1") c1 <- .normargIntegerOrFactor(c1, "c1") d1 <- .normargIntegerOrFactor(d1, "d1") if (length(a1) != length(b1) || length(b1) != length(c1) || length(c1) != length(d1)) stop("'a1', 'b1', 'c1' and 'd1' must have the same length") a2 <- .normargIntegerOrFactor(a2, "a2") b2 <- .normargIntegerOrFactor(b2, "b2") c2 <- .normargIntegerOrFactor(c2, "c2") d2 <- .normargIntegerOrFactor(d2, "d2") if (length(a2) != length(b2) || length(b2) != length(c2) || length(c2) != length(d2)) stop("'a2', 'b2', 'c2' and 'd2' must have the same length") if (!isSingleNumberOrNA(nomatch)) stop("'nomatch' must be a single number or NA") if (!is.integer(nomatch)) nomatch <- as.integer(nomatch) method <- .normargMethod(method, length(a2)) if (method == "quick") { ans <- .matchIntegerQuads_quick(a1, b1, c1, d1, a2, b2, c2, d2, nomatch=nomatch) } else { ans <- .matchIntegerQuads_hash(a1, b1, c1, d1, a2, b2, c2, d2, nomatch=nomatch) } ans } .selfmatchIntegerQuads_quick <- function(a, b, c, d) { .Call2("Integer_selfmatch4_quick", a, b, c, d, PACKAGE="S4Vectors") } .selfmatchIntegerQuads_hash <- function(a, b, c, d) { .Call2("Integer_selfmatch4_hash", a, b, c, d, PACKAGE="S4Vectors") } ### Exported! selfmatchIntegerQuads <- function(a, b, c, d, method=c("auto", "quick", "hash")) { a <- .normargIntegerOrFactor(a, "a") b <- .normargIntegerOrFactor(b, "b") c <- .normargIntegerOrFactor(c, "c") d <- .normargIntegerOrFactor(d, "d") if (length(a) != length(b) || length(b) != length(c) || length(c) != length(d)) stop("'a', 'b', 'c' and 'd' must have the same length") method <- .normargMethod(method, length(a)) if (method == "quick") { ans <- .selfmatchIntegerQuads_quick(a, b, c, d) } else { ans <- .selfmatchIntegerQuads_hash(a, b, c, d) } ans } ### Exported! duplicatedIntegerQuads <- function(a, b, c, d, fromLast=FALSE, method=c("auto", "quick", "hash")) { a <- .normargIntegerOrFactor(a, "a") b <- .normargIntegerOrFactor(b, "b") c <- .normargIntegerOrFactor(c, "c") d <- .normargIntegerOrFactor(d, "d") if (length(a) != length(b) || length(b) != length(c) || length(c) != length(d)) stop("'a', 'b', 'c' and 'd' must have the same length") if (!isTRUEorFALSE(fromLast)) stop("'fromLast' must be TRUE or FALSE") if (length(a) == 0L) return(logical(0L)) if (length(a) == 1L) return(FALSE) ## This is a temporary (and inefficient) workaround until "quick" ## and "hash" methods can natively support fromLast=TRUE. ## TODO: Add support for fromLast=TRUE to "quick" and "hash" methods. if (fromLast) return(rev(duplicatedIntegerQuads(rev(a), rev(b), rev(c), rev(d), method=method))) sm <- selfmatchIntegerQuads(a, b, c, d, method=method) sm != seq_len(length(sm)) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### tabulate2() ### ### An enhanced version of base::tabulate() that: (1) handles integer weights ### (NA and negative weights are OK), and (2) throws an error if 'strict' is ### TRUE and if 'x' contains NAs or values not in the [1, 'nbins'] interval. ### Unlike with base::tabulate(), 'nbins' needs to be specified (no default ### value). Also for now, it only works if 'x' is an integer vector. ### tabulate2 <- function(x, nbins, weight=1L, strict=FALSE) { if (!is.integer(x)) stop("'x' must be an integer vector") if (!isSingleNumber(nbins)) stop("'nbins' must be a single integer") if (!is.integer(nbins)) nbins <- as.integer(nbins) if (!is.integer(weight)) stop("'weight' must be an integer vector") if (!isTRUEorFALSE(strict)) stop("'strict' must be TRUE or FALSE") .Call2("Integer_tabulate2", x, nbins, weight, strict, PACKAGE="S4Vectors") } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Bitwise operations. ### ### The bitwise operations in this section don't treat the integer NA (aka ### NA_integer_) in any particular way: at the C level an NA_integer_ is ### just a 32-bit pattern like any other int in C. ### makePowersOfTwo <- function(nbit) { if (!isSingleInteger(nbit) || nbit < 0L) stop("'nbit' must be a single non-negative integer") if (nbit == 0L) return(integer(0)) as.integer(cumprod(c(1L, rep.int(2L, nbit-1L)))) } ### Returns an integer matrix with 'length(x)' rows and 'length(bitpos)' cols. explodeIntBits <- function(x, bitpos=1:32) { if (!is.integer(x)) stop("'x' must be an integer vector") if (!is.integer(bitpos)) stop("'bitpos' must be an integer vector") ## Old implementation: not very efficient and also broken on NAs and ## negative integers! #if (length(bitpos) == 0L) # return(matrix(nrow=length(x), ncol=0L)) #nbit <- max(bitpos) #if (is.na(nbit) || min(bitpos) <= 0L) # stop("'bitpos' must contain potive values only") #ans <- matrix(nrow=length(x), ncol=nbit) #for (i in seq_len(ncol(ans))) { # ans[ , i] <- x %% 2L # x <- x %/% 2L #} #ans[ , bitpos, drop=FALSE] .Call2("Integer_explode_bits", x, bitpos, PACKAGE="S4Vectors") } ### FIXME: Broken if ncol(x) = 32. implodeIntBits <- function(x) { if (!is.matrix(x)) stop("'x' must be a matrix") tx <- t(x) data <- tx * makePowersOfTwo(nrow(tx)) ## In some circumstances (e.g. if 'tx' has 0 col), the "dim" attribute ## gets lost during the above multiplication. if (is.null(dim(data))) dim(data) <- dim(tx) as.integer(colSums(data)) } intbitsNOT <- function(x) { stop("not yet implemented") # fix implodeIntBits() first! xbits <- explodeIntBits(x) implodeIntBits(!xbits) } intbitsAND <- function(x, y) { stop("not yet implemented") # fix implodeIntBits() first! xbits <- explodeIntBits(x) ybits <- explodeIntBits(y) implodeIntBits(xbits & ybits) } intbitsOR <- function(x, y) { stop("not yet implemented") # fix implodeIntBits() first! xbits <- explodeIntBits(x) ybits <- explodeIntBits(y) implodeIntBits(xbits | ybits) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Others. ### sortedMerge <- function(x, y) .Call2("Integer_sorted_merge", x, y, PACKAGE="S4Vectors") mseq <- function(from, to) { if (!is.integer(from)) from <- as.integer(from) if (!is.integer(to)) to <- as.integer(to) .Call2("Integer_mseq", from, to, PACKAGE="S4Vectors") } fancy_mseq <- function(lengths, offset=0L, rev=FALSE) { if (!is.integer(lengths)) lengths <- as.integer(lengths) if (!is.integer(offset)) offset <- as.integer(offset) if (!is.logical(rev)) stop("'rev' must be a logical vector") #unlist(lapply(seq_len(length(lengths)), # function(i) { # tmp <- seq_len(lengths[i]) + offset[i] # if (rev[i]) # tmp <- rev(tmp) # tmp # })) .Call2("Integer_fancy_mseq", lengths, offset, rev, PACKAGE="S4Vectors") } make_XYZxyz_to_XxYyZz_subscript <- function(N) as.vector(matrix(seq_len(2L * N), nrow=2L, byrow=TRUE)) findIntervalAndStartFromWidth <- function(x, width) .Call2("findIntervalAndStartFromWidth", x, width, PACKAGE="S4Vectors") ### Reverse an injection from 1:M to 1:N. ### The injection is represented by an integer vector of length M (eventually ### with NAs). Fundamental property: ### ### reverseIntegerInjection(reverseIntegerInjection(injection, N), M) ### ### is the identity function. ### Can be used to efficiently reverse the result of a call to 'order()'. reverseIntegerInjection <- function(injection, N) { M <- length(injection) ans <- rep.int(NA_integer_, N) is_not_na <- !is.na(injection) ans[injection[is_not_na]] <- seq_len(M)[is_not_na] ans } S4Vectors/R/isSorted.R0000644000175400017540000001465613175714520015575 0ustar00biocbuildbiocbuild### ========================================================================= ### isConstant(), isSorted(), isStrictlySorted() ### ------------------------------------------------------------------------- ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### isConstant() ### setGeneric("isConstant", function(x) standardGeneric("isConstant")) ### There are many ways to implement the "isConstant" method for integer ### vectors: ### isConstant1 <- function(x) {length(x) <= 1L || all(x == x[1L])} ### isConstant2 <- function(x) {length(unique(x)) <= 1L} ### isConstant3 <- function(x) {length(x) <= 1L || all(duplicated(x)[-1L])} ### isConstant4 <- function(x) {length(x) <= 1L || ### sum(duplicated(x)) == length(x) - 1L} ### isConstant5 <- function(x) {length(x) <= 1L || min(x) == max(x)} ### isConstant6 <- function(x) {length(x) <= 1L || ### {rx <- range(x); rx[1L] == rx[2L]}} ### Which one is faster is hard to guess. It happens to be isConstant5(): ### it's 2.7x faster than isConstant1(), 6x faster than isConstant2(), 11x ### faster than isConstant3(), 5.2x faster than isConstant4() and 1.6x faster ### than isConstant6(). ### Results obtained on 'x0 <- rep.int(112L, 999999L)' with R-2.13 Under ### development (unstable) (2011-01-08 r53945). ### For this method we use a modified version of isConstant5() above that ### handles NAs. setMethod("isConstant", "integer", function(x) { if (length(x) <= 1L) return(TRUE) x_min <- min(x, na.rm=FALSE) if (!is.na(x_min)) # success means 'x' contains no NAs return(x_min == max(x, na.rm=FALSE)) ## From here 'x' is guaranteed to have a length >= 2 and to contain ## at least an NA. ## 'min(x, na.rm=TRUE)' issues a warning if 'x' contains only NAs. ## In that case, and in that case only, it returns Inf. x_min <- suppressWarnings(min(x, na.rm=TRUE)) if (x_min == Inf) return(NA) ## From here 'x' is guaranteed to contain a mix of NAs and non-NAs. x_max <- max(x, na.rm=TRUE) if (x_min == x_max) return(NA) FALSE } ) ### Like the method for integer vectors this method also uses a comparison ### between min(x) and max(x). In addition it needs to handle rounding errors ### and special values: NA, NaN, Inf and -Inf. ### Using all.equal() ensures that TRUE is returned on c(11/3, 2/3+4/3+5/3). setMethod("isConstant", "numeric", function(x) { if (length(x) <= 1L) return(TRUE) x_min <- min(x, na.rm=FALSE) if (!is.na(x_min)) { # success means 'x' contains no NAs and no NaNs x_max <- max(x, na.rm=FALSE) if (is.finite(x_min) && is.finite(x_max)) return(isTRUE(all.equal(x_min, x_max))) if (x_min == x_max) # both are Inf or both are -Inf return(NA) return(FALSE) } ## From here 'x' is guaranteed to have a length >= 2 and to contain ## at least an NA or NaN. ## 'min(x, na.rm=TRUE)' issues a warning if 'x' contains only NAs ## and NaNs. x_min <- suppressWarnings(min(x, na.rm=TRUE)) if (x_min == Inf) { ## Only possible values in 'x' are NAs, NaNs or Infs. is_in_x <- c(NA, NaN, Inf) %in% x if (is_in_x[2L] && is_in_x[3L]) return(FALSE) return(NA) } ## From here 'x' is guaranteed to contain at least one value that is ## not NA or NaN or Inf. x_max <- max(x, na.rm=TRUE) if (x_max == -Inf) { ## Only possible values in 'x' are NAs, NaNs or -Infs. is_in_x <- c(NA, NaN, -Inf) %in% x if (is_in_x[2L] && is_in_x[3L]) return(FALSE) return(NA) } if (is.infinite(x_min) || is.infinite(x_max)) return(FALSE) if (!isTRUE(all.equal(x_min, x_max))) return(FALSE) if (NaN %in% x) return(FALSE) return(NA) } ) setMethod("isConstant", "array", function(x) isConstant(as.vector(x))) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### isNotSorted(), isNotStrictlySorted() ### ### NOT exported. ### ### isNotStrictlySorted() takes for granted that 'x' contains no NAs (behaviour ### is undefined if this is not the case). This allows isNotStrictlySorted() to ### be MUCH faster than is.unsorted() in some situations: ### > x <- c(99L, 1:1000000) ### > system.time(for (i in 1:1000) isNotStrictlySorted(x)) ### user system elapsed ### 0.004 0.000 0.003 ### > system.time(for (i in 1:1000) is.unsorted(x, strictly=TRUE)) ### user system elapsed ### 6.925 1.756 8.690 ### So let's keep it for now! Until someone has enough time and energy to ### convince the R core team to fix is.unsorted()... ### Note that is.unsorted() does not only have a performance problem: ### a) It also has a semantic problem: is.unsorted(NA) returns NA despite the ### man page stating that all objects of length 0 or 1 are sorted (sounds ### like a fair statement). ### b) The sort()/is.unsorted() APIs and semantics are inconsistent. ### c) Why did they choose to have is.unsorted() instead of is.sorted() in the ### first place? Having is.unsorted( , strictly=TRUE) being a "looser test" ### (or a "weaker condition") than is.unsorted( , strictly=FALSE) is really ### counterintuitive! ### > is.unsorted(c(5L, 5:8), strictly=FALSE) ### [1] FALSE ### > is.unsorted(c(5L, 5:8), strictly=TRUE) ### [1] TRUE ### Common sense would expect to have less objects that are "strictly ### something" than objects that are "just something". ..Internal <- .Internal # a silly trick to keep 'R CMD check' quiet isNotSorted <- function(x) ..Internal(is.unsorted(x, FALSE)) isNotStrictlySorted <- function(x) ..Internal(is.unsorted(x, TRUE)) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### isSorted() ### setGeneric("isSorted", function(x) standardGeneric("isSorted")) setMethod("isSorted", "ANY", function(x) !isNotSorted(x)) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### isStrictlySorted() ### setGeneric("isStrictlySorted", function(x) standardGeneric("isStrictlySorted") ) setMethod("isStrictlySorted", "ANY", function(x) !isNotStrictlySorted(x)) S4Vectors/R/map_ranges_to_runs.R0000644000175400017540000000231613175714520017654 0ustar00biocbuildbiocbuild### ========================================================================= ### map_ranges_to_runs() ### ------------------------------------------------------------------------- ### normarg_method <- function(method) { if (!(isSingleNumber(method) && method >= 0 && method <= 3)) stop("'method' must be a single integer between 0 and 3") if (!is.integer(method)) method <- as.integer(method) method } ### Used in GenomicRanges. map_ranges_to_runs <- function(run_lens, start, width, method=0L) { method <- normarg_method(method) .Call2("map_ranges", run_lens, start, width, method, PACKAGE="S4Vectors") } ### Note that ### ### map_positions_to_runs(run_lengths, pos) ### ### is equivalent to ### ### findInterval(pos - 1L, cumsum(run_lengths)) + 1L ### ### but is more efficient, specially when the number of runs is big and the ### number of positions to map relatively small with respect to the number of ### runs (in which case map_positions_to_runs() can be 10x or 20x faster than ### findInterval()). map_positions_to_runs <- function(run_lens, pos, method=0L) { method <- normarg_method(method) .Call2("map_positions", run_lens, pos, method, PACKAGE="S4Vectors") } S4Vectors/R/normarg-utils.R0000644000175400017540000003161113175714520016572 0ustar00biocbuildbiocbuild### ========================================================================= ### Utility functions for checking/fixing user-supplied arguments ### ------------------------------------------------------------------------- ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### For checking only. ### isTRUEorFALSE <- function(x) { is.logical(x) && length(x) == 1L && !is.na(x) } isSingleInteger <- function(x) { is.integer(x) && length(x) == 1L && !is.na(x) } isSingleNumber <- function(x) { is.numeric(x) && length(x) == 1L && !is.na(x) } isSingleString <- function(x) { is.character(x) && length(x) == 1L && !is.na(x) } ### We want these functions to return TRUE when passed an NA of whatever type. isSingleNumberOrNA <- function(x) { is.atomic(x) && length(x) == 1L && (is.numeric(x) || is.na(x)) } isSingleStringOrNA <- function(x) { is.atomic(x) && length(x) == 1L && (is.character(x) || is.na(x)) } ### NOT exported. anyMissing <- function(x) .Call2("anyMissing", x, PACKAGE="S4Vectors") ### NOT exported. isNumericOrNAs <- function(x) { is.numeric(x) || (is.atomic(x) && is.vector(x) && all(is.na(x))) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Vertical/horiontal recycling of a vector-like/list-like object. ### ### Vertical recycling (of any vector-like object). ### NOT exported. V_recycle <- function(x, skeleton, x_what="x", skeleton_what="skeleton") { x_NROW <- NROW(x) skeleton_len <- length(skeleton) if (x_NROW == skeleton_len) return(x) if (x_NROW > skeleton_len && x_NROW != 1L) stop(wmsg( "'NROW(", x_what, ")' is greater than ", "'length(", skeleton_what, ")'" )) if (x_NROW == 0L) stop(wmsg( "'NROW(", x_what, ")' is 0 but ", "'length(", skeleton_what, ")' is not" )) if (skeleton_len %% x_NROW != 0L) warning(wmsg( "'length(", skeleton_what, ")' is not a multiple of ", "'NROW(", x_what, ")'" )) idx <- rep(seq_len(x_NROW), length.out=skeleton_len) extractROWS(x, idx) } ### Horizontal recycling (of a list-like object only). ### NOT exported. H_recycle <- function(x, skeleton, x_what="x", skeleton_what="skeleton", more_blahblah=NA) { stopifnot(is.list(x) || is(x, "List")) stopifnot(is.list(skeleton) || is(skeleton, "List")) x_len <- length(x) skeleton_len <- length(skeleton) stopifnot(x_len == skeleton_len) x_what2 <- paste0("some list elements in '", x_what, "'") if (!is.na(more_blahblah)) x_what2 <- paste0(x_what2, " (", more_blahblah, ")") x_eltNROWS <- unname(elementNROWS(x)) skeleton_eltNROWS <- unname(elementNROWS(skeleton)) idx <- which(x_eltNROWS != skeleton_eltNROWS) if (length(idx) == 0L) return(x) longer_idx <- which(x_eltNROWS > skeleton_eltNROWS) shorter_idx <- which(x_eltNROWS < skeleton_eltNROWS) if (length(longer_idx) == 0L && length(shorter_idx) == 0L) return(x) if (length(longer_idx) != 0L) { if (max(x_eltNROWS[longer_idx]) >= 2L) stop(wmsg( x_what2, " are longer than their corresponding ", "list element in '", skeleton_what, "'" )) } if (length(shorter_idx) != 0L) { tmp <- x_eltNROWS[shorter_idx] if (min(tmp) == 0L) stop(wmsg( x_what2, " are of length 0, but their corresponding ", "list element in '", skeleton_what, "' is not" )) if (max(tmp) >= 2L) stop(wmsg( x_what2, " are shorter than their corresponding ", "list element in '", skeleton_what, "', but have ", "a length >= 2. \"Horizontal\" recycling only supports ", "list elements of length 1 at the moment." )) } ## From here 'x[idx]' is guaranteed to contain list elements of length 1. ## We use an "unlist => stretch => relist" algo to perform the horizontal ## recycling. Because of this, the returned value is not necessary of the ## same class as 'x' (e.g. can be an IntegerList if 'x' is an ordinary ## list of integers and 'skeleton' a List object). unlisted_x <- unlist(x, use.names=FALSE) times <- rep.int(1L, length(unlisted_x)) idx2 <- cumsum(x_eltNROWS)[idx] times[idx2] <- skeleton_eltNROWS[idx] unlisted_ans <- rep.int(unlisted_x, times) ans <- relist(unlisted_ans, skeleton) names(ans) <- names(x) ans } ### Performs first vertical then horizontal recycling (of a list-like object ### only). ### NOT exported. VH_recycle <- function(x, skeleton, x_what="x", skeleton_what="skeleton", more_blahblah=NA) { x <- V_recycle(x, skeleton, x_what=x_what, skeleton_what=skeleton_what) H_recycle(x, skeleton, x_what=x_what, skeleton_what=skeleton_what, more_blahblah=more_blahblah) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### More recycling of a vector-like object. ### ### TODO: This section needs to be cleaned. Some of the stuff in it is ### redundant with and superseded by V_recycle() and/or H_recycle() (defined ### in the previous section). ### ### NOT exported. ### recycleVector() vs rep(x, length.out=length): ### - The former seems a little bit faster (1.5x - 2x). ### - The former will issue a warning that "number of items to replace is not ### a multiple of replacement length". The latter will always remain silent. recycleVector <- function(x, length.out) { if (length(x) == length.out) { x } else { ans <- vector(storage.mode(x), length.out) ans[] <- x ans } } ### Must always drop the names of 'arg'. recycleArg <- function(arg, argname, length.out) { if (length.out == 0L) { if (length(arg) > 1L) stop("invalid length for '", argname, "'") if (length(arg) == 1L && is.na(arg)) stop("'", argname, "' contains NAs") return(recycleVector(arg, length.out)) # drops the names } if (length(arg) == 0L) stop("'", argname, "' has no elements") if (length(arg) > length.out) stop("'", argname, "' is longer than 'x'") if (anyMissing(arg)) stop("'", argname, "' contains NAs") if (length(arg) < length.out) arg <- recycleVector(arg, length.out) # drops the names else arg <- unname(arg) arg } recycleIntegerArg <- function(arg, argname, length.out) { if (!is.numeric(arg)) stop("'", argname, "' must be a vector of integers") if (!is.integer(arg)) arg <- as.integer(arg) recycleArg(arg, argname, length.out) } recycleNumericArg <- function(arg, argname, length.out) { if (!is.numeric(arg)) stop("'", argname, "' must be a numeric vector") recycleArg(arg, argname, length.out) } recycleLogicalArg <- function(arg, argname, length.out) { if (!is.logical(arg)) stop("'", argname, "' must be a logical vector") recycleArg(arg, argname, length.out) } recycleCharacterArg <- function(arg, argname, length.out) { if (!is.character(arg)) stop("'", argname, "' must be a character vector") recycleArg(arg, argname, length.out) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Normalization of replacement values ### ### NOT exported. normalize_names_replacement_value <- function(value, x) { if (is.null(value)) return(NULL) value <- as.character(value) value_len <- length(value) x_len <- length(x) if (value_len > x_len) stop(wmsg("attempt to set too many names (", value_len, ") ", "on ", class(x), " object of length ", x_len)) if (value_len < x_len) { ## We pad with NA's to mimic what 'names(x) <- value' does on an ## ordinary vector. value <- c(value, rep.int(NA_character_, x_len - value_len)) } value } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Fold a vector-like object. ### ### We use a signature in the style of IRanges::successiveIRanges() or ### IRanges::successiveViews(). ### The current implementation should be fast enough if length(x)/circle.length ### is small (i.e. < 10 or 20). This will actually be the case for the typical ### usecase which is the calculation of "circular coverage vectors", that is, ### we use fold() on the "linear coverage vector" to turn it into a "circular ### coverage vector" of length 'circle.length' where 'circle.length' is the ### length of the circular sequence. fold <- function(x, circle.length, from=1) { if (typeof(x) != "S4" && !is.numeric(x) && !is.complex(x)) stop("'x' must be a vector-like object with elements that can be added") if (!isSingleNumber(circle.length)) stop("'circle.length' must be a single integer") if (!is.integer(circle.length)) circle.length <- as.integer(circle.length) if (circle.length <= 0L) stop("'circle.length' must be positive") if (!isSingleNumber(from)) stop("'from' must be a single integer") if (!is.integer(from)) from <- as.integer(from) from <- 1L + (from - 1L) %% circle.length if (typeof(x) == "S4") { ans <- as(rep.int(0L, circle.length), class(x)) if (length(ans) != circle.length) stop("don't know how to handle 'x' of class ", class(x)) } else { ans <- vector(typeof(x), length=circle.length) } if (from > length(x)) { ## Nothing to fold jj <- seq_len(length(x)) + circle.length - from + 1L ans[jj] <- x return(ans) } if (from > 1L) { ii <- seq_len(from - 1L) jj <- ii + circle.length - from + 1L ans[jj] <- x[ii] } max_from <- length(x) - circle.length + 1L while (from <= max_from) { ii <- from:(from+circle.length-1L) ans[] <- ans[] + x[ii] from <- from + circle.length } if (from > length(x)) return(ans) ii <- from:length(x) jj <- ii - from + 1L ans[jj] <- ans[jj] + x[ii] ans } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Other non exported normarg* functions. ### ### NOT exported. normargSingleStartOrNA <- function(start) { if (!isSingleNumberOrNA(start)) stop("'start' must be a single integer or NA") if (!is.integer(start)) start <- as.integer(start) start } ### NOT exported. normargSingleEndOrNA <- function(end) { if (!isSingleNumberOrNA(end)) stop("'end' must be a single integer or NA") if (!is.integer(end)) end <- as.integer(end) end } ### NOT exported. normargUseNames <- function(use.names) { if (is.null(use.names)) return(TRUE) if (!isTRUEorFALSE(use.names)) stop("'use.names' must be TRUE or FALSE") use.names } ### NOT exported. normargRunK <- function(k, n, endrule) { if (!is.numeric(k)) stop("'k' must be a numeric vector") if (k < 0) stop("'k' must be positive") if ((endrule != "drop") && (k %% 2 == 0)) { k <- 1L + 2L * (k %/% 2L) warning(paste("'k' must be odd when 'endrule != \"drop\"'!", "Changing 'k' to ", k)) } if (k > n) { k <- 1L + 2L * ((n - 1L) %/% 2L) warning("'k' is bigger than 'n'! Changing 'k' to ", k) } as.integer(k) } ### NOT exported. normargSubset2_iOnly <- function(x, i, j, ..., .conditionPrefix=character()) { if (!missing(j) || length(list(...)) > 0) warning(.conditionPrefix, "arguments beyond 'i' ignored") if (missing(i)) stop(.conditionPrefix, "subscript 'i' is missing") if (!is.character(i) && !is.numeric(i)) stop(.conditionPrefix, "invalid subscript 'i' type") if (length(i) < 1L) stop(.conditionPrefix, "attempt to select less than one element") if (length(i) > 1L) stop(.conditionPrefix, "attempt to select more than one element") if (is.numeric(i) && (i < 1L || i > length(x)+1)) stop(.conditionPrefix, "subscript 'i' out of bounds") if (is.character(i)) { i <- match(i, names(x)) if (is.na(i)) i <- length(x) + 1L } i } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Miscellaneous. ### ### NOT exported. numeric2integer <- function(x) { if (is.numeric(x) && !is.integer(x)) as.integer(x) else x } ### NOT exported. extraArgsAsList <- function(.valid.argnames, ...) { args <- list(...) argnames <- names(args) if (length(args) != 0L && (is.null(argnames) || any(argnames %in% c("", NA)))) stop("all extra arguments must be named") if (!is.null(.valid.argnames) && !all(argnames %in% .valid.argnames)) stop("valid extra argument names are ", paste("'", .valid.argnames, "'", sep="", collapse=", ")) if (anyDuplicated(argnames)) stop("argument names must be unique") args } S4Vectors/R/shiftApply-methods.R0000644000175400017540000000420013175714520017545 0ustar00biocbuildbiocbuild### ========================================================================= ### "shiftApply" methods ### ------------------------------------------------------------------------- setGeneric("shiftApply", signature=c("X", "Y"), function(SHIFT, X, Y, FUN, ..., OFFSET=0L, simplify=TRUE, verbose=FALSE) standardGeneric("shiftApply") ) .Vector_shiftApply <- function(SHIFT, X, Y, FUN, ..., OFFSET=0L, simplify=TRUE, verbose=FALSE) { FUN <- match.fun(FUN) N <- length(X) if (N != length(Y)) stop("'X' and 'Y' must be of equal length") if (!is.integer(SHIFT)) SHIFT <- as.integer(SHIFT) if (length(SHIFT) == 0 || anyMissingOrOutside(SHIFT, 0L)) stop("all 'SHIFT' values must be non-negative") if (!is.integer(OFFSET)) OFFSET <- as.integer(OFFSET) if (length(OFFSET) == 0 || anyMissingOrOutside(OFFSET, 0L)) stop("'OFFSET' must be non-negative") ## Perform X setup shiftedStartX <- rep.int(1L + OFFSET, length(SHIFT)) shiftedEndX <- N - SHIFT ## Perform Y setup shiftedStartY <- 1L + SHIFT shiftedEndY <- rep.int(N - OFFSET, length(SHIFT)) if (verbose) { maxI <- length(SHIFT) ans <- sapply(seq_len(length(SHIFT)), function(i) { cat("\r", i, "/", maxI) FUN(Vector_window(X, start = shiftedStartX[i], end = shiftedEndX[i]), Vector_window(Y, start = shiftedStartY[i], end = shiftedEndY[i]), ...) }, simplify = simplify) cat("\n") } else { ans <- sapply(seq_len(length(SHIFT)), function(i) FUN(Vector_window(X, start = shiftedStartX[i], end = shiftedEndX[i]), Vector_window(Y, start = shiftedStartY[i], end = shiftedEndY[i]), ...), simplify = simplify) } ans } setMethod("shiftApply", signature(X="Vector", Y="Vector"), .Vector_shiftApply) setMethod("shiftApply", signature(X="vector", Y="vector"), .Vector_shiftApply) S4Vectors/R/show-utils.R0000644000175400017540000003203713175714520016110 0ustar00biocbuildbiocbuild### ========================================================================= ### Some low-level (not exported) utility functions used by various "show" ### methods ### ------------------------------------------------------------------------- ### ### Unless stated otherwise, nothing in this file is exported. ### ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### selectSome() ### ### taken directly from Biobase, then added 'ellipsisPos' argument selectSome <- function(obj, maxToShow = 5, ellipsis = "...", ellipsisPos = c("middle", "end", "start"), quote=FALSE) { if(is.character(obj) && quote) obj <- sQuote(obj) ellipsisPos <- match.arg(ellipsisPos) len <- length(obj) if (maxToShow < 3) maxToShow <- 3 if (len > maxToShow) { maxToShow <- maxToShow - 1 if (ellipsisPos == "end") { c(head(obj, maxToShow), ellipsis) } else if (ellipsisPos == "start") { c(ellipsis, tail(obj, maxToShow)) } else { bot <- ceiling(maxToShow/2) top <- len - (maxToShow - bot - 1) nms <- obj[c(1:bot, top:len)] c(as.character(nms[1:bot]), ellipsis, as.character(nms[-c(1:bot)])) } } else { obj } } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### labeledLine() ### .qualifyByName <- function(x, qualifier="=") { nms <- names(x) x <- as.character(x) aliased <- nzchar(nms) x[aliased] <- paste0(nms[aliased], qualifier, x[aliased]) x } .padToAlign <- function(x) { whitespace <- paste(rep(" ", getOption("width")), collapse="") padlen <- max(nchar(x)) - nchar(x) substring(whitespace, 1L, padlen) } .ellipsize <- function(obj, width = getOption("width"), sep = " ", ellipsis = "...", pos = c("middle", "end", "start")) { pos <- match.arg(pos) if (is.null(obj)) obj <- "NULL" if (is.factor(obj)) obj <- as.character(obj) ## get order selectSome() would print if (pos == "middle") { if (length(obj) > 2 * width) obj <- c(head(obj, width), tail(obj, width)) half <- seq_len(ceiling(length(obj) / 2L)) ind <- as.vector(rbind(half, length(obj) - half + 1L)) } else if (pos == "end") { obj <- head(obj, width) ind <- seq_len(length(obj)) } else { obj <- tail(obj, width) ind <- rev(seq_len(length(obj))) } str <- encodeString(obj) nc <- cumsum(nchar(str[ind]) + nchar(sep)) - nchar(sep) last <- findInterval(width, nc) if (length(obj) > last) { ## make sure ellipsis fits while (last && (nc[last] + nchar(sep)*2^(last>1) + nchar(ellipsis)) > width) last <- last - 1L if (last == 0) { ## have to truncate the first/last element if (pos == "start") { str <- paste(ellipsis, substring(tail(str, 1L), nchar(tail(str, 1L))-(width-nchar(ellipsis))+1L, nchar(ellipsis)), sep = "") } else { str <- paste(substring(str[1L], 1, width - nchar(ellipsis)), ellipsis, sep = "") } } else if (last == 1) { ## can only show the first/last if (pos == "start") str <- c(ellipsis, tail(str, 1L)) else str <- c(str[1L], ellipsis) } else { str <- selectSome(str, last + 1L, ellipsis, pos) } } paste(str, collapse = sep) } labeledLine <- function(label, els, count = TRUE, labelSep = ":", sep = " ", ellipsis = "...", ellipsisPos = c("middle", "end", "start"), vectorized = FALSE, pad = vectorized) { if (!is.null(els)) { label[count] <- paste(label, "(", if (vectorized) lengths(els) else length(els), ")", sep = "")[count] if (!is.null(names(els))) { els <- .qualifyByName(els) } } label <- paste(label, labelSep, " ", sep = "") if (pad) { label <- paste0(label, .padToAlign(label)) } width <- getOption("width") - nchar(label) ellipsisPos <- match.arg(ellipsisPos) if (vectorized) { .ellipsize <- Vectorize(.ellipsize) } line <- .ellipsize(els, width, sep, ellipsis, ellipsisPos) paste(label, line, "\n", sep = "") } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### get_showHeadLines() and get_showTailLines() ### ### showHeadLines and showTailLines robust to NA, Inf and non-integer .get_showLines <- function(default, option) { opt <- getOption(option, default=default) if (!is.infinite(opt)) opt <- as.integer(opt) if (is.na(opt)) opt <- default opt } ### Exported! get_showHeadLines <- function() .get_showLines(5L, "showHeadLines") ### Exported! get_showTailLines <- function() .get_showLines(5L, "showTailLines") ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Pretty printing ### ### Exported! printAtomicVectorInAGrid <- function(x, prefix="", justify="left") { if (!is.character(x)) x <- setNames(as.character(x), names(x)) ## Nothing to print if length(x) is 0. if (length(x) == 0L) return(invisible(x)) ## Determine the nb of cols in the grid. grid_width <- getOption("width") + 1L - nchar(prefix) cell_width <- max(3L, nchar(x), nchar(names(x))) ncol <- grid_width %/% (cell_width + 1L) ## Determine the nb of rows in the grid. nrow <- length(x) %/% ncol remainder <- length(x) %% ncol if (remainder != 0L) { nrow <- nrow + 1L x <- c(x, character(ncol - remainder)) } ## Print the grid. print_line <- function(y) { cells <- format(y, justify=justify, width=cell_width) cat(prefix, paste0(cells, collapse=" "), "\n", sep="") } print_grid_row <- function(i) { idx <- (i - 1L) * ncol + seq_len(ncol) slice <- x[idx] if (!is.null(names(slice))) print_line(names(slice)) print_line(slice) } n1 <- get_showHeadLines() n2 <- get_showTailLines() if (nrow <= n1 + n2) { for (i in seq_len(nrow)) print_grid_row(i) } else { idx1 <- seq_len(n1) idx2 <- nrow - n2 + seq_len(n2) for (i in idx1) print_grid_row(i) print_line(rep.int("...", ncol)) for (i in idx2) print_grid_row(i) } invisible(x) } ### 'makeNakedMat.FUN' must be a function returning a character matrix. makePrettyMatrixForCompactPrinting <- function(x, makeNakedMat.FUN) { nhead <- get_showHeadLines() ntail <- get_showTailLines() x_NROW <- NROW(x) x_ROWNAMES <- ROWNAMES(x) wrap_in_square_brackets <- function(idx) { if (length(idx) == 0L) return(character(0)) paste0("[", idx, "]") } if (x_NROW <= nhead + ntail + 1L) { ## Compute 'ans' (the matrix). ans <- makeNakedMat.FUN(x) ## Compute 'ans_rownames' (the matrix row names). if (is.null(x_ROWNAMES)) { ans_rownames <- wrap_in_square_brackets(seq_len(x_NROW)) } else { ans_rownames <- x_ROWNAMES } } else { ## Compute 'ans' (the matrix). ans_top <- makeNakedMat.FUN(head(x, n=nhead)) ans_bottom <- makeNakedMat.FUN(tail(x, n=ntail)) ellipses <- rep.int("...", ncol(ans_top)) ellipses[colnames(ans_top) %in% "|"] <- "." ans <- rbind(ans_top, matrix(ellipses, nrow=1L), ans_bottom) ## Compute 'ans_rownames' (the matrix row names). if (is.null(x_ROWNAMES)) { idx1 <- seq(from=1L, by=1L, length.out=nhead) idx2 <- seq(to=x_NROW, by=1L, length.out=ntail) s1 <- wrap_in_square_brackets(idx1) s2 <- wrap_in_square_brackets(idx2) } else { s1 <- head(x_ROWNAMES, n=nhead) s2 <- tail(x_ROWNAMES, n=ntail) } max_width <- max(nchar(s1, type="width"), nchar(s2, type="width")) if (max_width <= 1L) { ellipsis <- "." } else if (max_width == 2L) { ellipsis <- ".." } else { ellipsis <- "..." } ans_rownames <- c(s1, ellipsis, s2) } rownames(ans) <- format(ans_rownames, justify="right") ans } makeClassinfoRowForCompactPrinting <- function(x, col2class) { ans_names <- names(col2class) no_bracket <- ans_names == "" ans_names[no_bracket] <- col2class[no_bracket] left_brackets <- right_brackets <- character(length(col2class)) left_brackets[!no_bracket] <- "<" right_brackets[!no_bracket] <- ">" ans <- paste0(left_brackets, col2class, right_brackets) names(ans) <- ans_names x_mcols <- mcols(x) x_nmc <- if (is.null(x_mcols)) 0L else ncol(x_mcols) if (x_nmc > 0L) { tmp <- sapply(x_mcols, function(xx) paste0("<", classNameForDisplay(xx), ">")) ans <- c(ans, `|`="|", tmp) } matrix(ans, nrow=1L, dimnames=list("", names(ans))) } ### Works as long as length(), "[" and as.numeric() work on 'x'. ### Not exported. toNumSnippet <- function(x, max.width) { if (length(x) <= 2L) return(paste(format(as.numeric(x)), collapse=" ")) if (max.width < 0L) max.width <- 0L ## Elt width and nb of elt to display if they were all 0. elt_width0 <- 1L nelt_to_display0 <- min(length(x), (max.width+1L) %/% (elt_width0+1L)) head_ii0 <- seq_len(nelt_to_display0 %/% 2L) tail_ii0 <- length(x) + head_ii0 - length(head_ii0) ii0 <- c(head_ii0, tail_ii0) ## Effective elt width and nb of elt to display elt_width <- format.info(as.numeric(x[ii0]))[1L] nelt_to_display <- min(length(x), (max.width+1L) %/% (elt_width+1L)) if (nelt_to_display == length(x)) return(paste(format(as.numeric(x), width=elt_width), collapse=" ")) head_ii <- seq_len((nelt_to_display+1L) %/% 2L) tail_ii <- length(x) + seq_len(nelt_to_display %/% 2L) - nelt_to_display %/% 2L ans_head <- format(as.numeric(x[head_ii]), width=elt_width) ans_tail <- format(as.numeric(x[tail_ii]), width=elt_width) ans <- paste(paste(ans_head, collapse=" "), "...", paste(ans_tail, collapse=" ")) if (nchar(ans) <= max.width || length(ans_head) == 0L) return(ans) ans_head <- ans_head[-length(ans_head)] ans <- paste(paste(ans_head, collapse=" "), "...", paste(ans_tail, collapse=" ")) if (nchar(ans) <= max.width || length(ans_tail) == 0L) return(ans) ans_tail <- ans_tail[-length(ans_tail)] paste(paste(ans_head, collapse=" "), "...", paste(ans_tail, collapse=" ")) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### classNameForDisplay() ### ### Exported! setGeneric("classNameForDisplay", function(x) standardGeneric("classNameForDisplay")) setMethod("classNameForDisplay", "ANY", function(x) { ## Selecting the 1st element guarantees that we return a single string ## (e.g. on an ordered factor, class(x) returns a character vector of ## length 2). class(x)[1L] } ) .drop_AsIs <- function(x) { #x_class <- class(x) #if (x_class[[1L]] == "AsIs") # class(x) <- x_class[-1L] ## Simpler, and probably more robust, than the above. class(x) <- setdiff(class(x), "AsIs") x } setMethod("classNameForDisplay", "AsIs", function(x) classNameForDisplay(.drop_AsIs(x)) ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### showAsCell() ### ### All "showAsCell" methods should return a character vector. ### ### Exported! setGeneric("showAsCell", function(object) standardGeneric("showAsCell") ) ### Must work on any array-like object (i.e. on any object with 2 dimensions ### or more) e.g. ordinary array or matrix, Matrix, data.frame, DataFrame, ### data.table, etc... .showAsCell_array <- function(object) { if (length(dim(object)) > 2L) dim(object) <- c(nrow(object), prod(tail(dim(object), -1L))) object_ncol <- ncol(object) if (object_ncol == 0L) return(rep.int("", nrow(object))) object <- lapply(head(seq_len(object_ncol), 3L), function(i) object[ , i, drop=TRUE]) ans <- do.call(paste, c(object, list(sep=":"))) if (object_ncol > 3L) ans <- paste0(ans, ":...") ans } .default_showAsCell <- function(object) { ## Some objects like SplitDataFrameList are not array-like but have ## a "dim" method that return a matrix! if (length(dim(object)) >= 2L && !is.matrix(dim(object))) return(.showAsCell_array(object)) if (NROW(object) == 0L) return(character(0L)) if (is.list(object) || is(object, "List")) { vapply(object, function(x) { str <- paste(head(unlist(x), 3L), collapse = ",") if (length(x) > 3L) str <- paste0(str, ",...") str }, character(1L)) } else { attempt <- try(as.vector(object), silent=TRUE) if (is(attempt, "try-error")) rep.int("########", length(object)) else attempt } } setMethod("showAsCell", "ANY", .default_showAsCell) setMethod("showAsCell", "AsIs", function(object) showAsCell(.drop_AsIs(object)) ) ### Mmmh... these methods don't return a character vector. Is that ok? setMethod("showAsCell", "Date", function(object) object) setMethod("showAsCell", "POSIXt", function(object) object) S4Vectors/R/split-methods.R0000644000175400017540000000261513175714520016565 0ustar00biocbuildbiocbuild### ========================================================================= ### Split a vector-like object as a list-like object ### ------------------------------------------------------------------------- setMethod("split", c("list", "Vector"), function(x, f, drop=FALSE, ...) split(x, as.vector(f), drop=drop, ...) ) ### The remaining methods delegate to IRanges::splitAsList(). setMethod("split", c("Vector", "ANY"), function(x, f, drop=FALSE) { if (!requireNamespace("IRanges", quietly=TRUE)) stop("Couldn't load the IRanges package. You need to install ", "the IRanges\n package in order to split a Vector object.") IRanges::splitAsList(x, f, drop=drop) } ) setMethod("split", c("ANY", "Vector"), function(x, f, drop=FALSE) { if (!requireNamespace("IRanges", quietly=TRUE)) stop("Couldn't load the IRanges package. You need to install ", "the IRanges\n package in order to split by a Vector object.") IRanges::splitAsList(x, f, drop=drop) } ) setMethod("split", c("Vector", "Vector"), function(x, f, drop=FALSE) { if (!requireNamespace("IRanges", quietly=TRUE)) stop("Couldn't load the IRanges package. You need to install ", "the IRanges\n package in order to split a Vector object.") IRanges::splitAsList(x, f, drop=drop) } ) S4Vectors/R/subsetting-utils.R0000644000175400017540000005214213175714520017316 0ustar00biocbuildbiocbuild### ========================================================================= ### Low-level subsetting utilities ### ------------------------------------------------------------------------- ### ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Formal representation of a Normalized Single Bracket Subscript, i.e. a ### subscript that holds positive integer values that can be used for single ### bracket subsetting ([ or [<-). ### ### NSBS and its subclasses are for internal use only. ### setClass("NSBS", representation( "VIRTUAL", ## 'subscript' is an object that holds integer values >= 1 and ## <= upper_bound, or NA_integer_ values. The precise type of the ## object depends on the NSBS subclass and is specified in the ## definition of the subclass. subscript="ANY", upper_bound="integer", # single integer >= 0 upper_bound_is_strict="logical", # TRUE or FALSE has_NAs="logical" ), prototype( upper_bound=0L, upper_bound_is_strict=TRUE, has_NAs=FALSE ) ) ### There are currently 4 NSBS concrete subclasses: ### - in S4Vectors: ### 1) NativeNSBS: subscript slot is a vector of positive integers ### 2) RangeNSBS: subscript slot is c(start, end) ### 3) RleNSBS: subscript slot is an integer-Rle ### - in IRanges: ### 4) RangesNSBS: subscript slot is an IRanges ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### NSBS API: ### - NSBS() constructor function ### - as.integer() ### - length() ### - anyDuplicated() ### - isStrictlySorted() ### setGeneric("NSBS", signature="i", function(i, x, exact=TRUE, strict.upper.bound=TRUE, allow.NAs=FALSE) standardGeneric("NSBS") ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Default methods. ### ### Used in IRanges. ### We use 'call.=FALSE' to hide the function call because displaying it seems ### to confuse some users. .subscript_error <- function(...) stop(wmsg(...), call.=FALSE) setMethod("NSBS", "NSBS", function(i, x, exact=TRUE, strict.upper.bound=TRUE, allow.NAs=FALSE) { x_NROW <- NROW(x) if (i@upper_bound != x_NROW || i@upper_bound_is_strict < strict.upper.bound) .subscript_error( "subscript is a NSBS object that is incompatible ", "with the current subsetting operation" ) if (!allow.NAs && i@has_NAs) .subscript_error("subscript contains NAs") i } ) ### NSBS concrete subclasses NativeNSBS, RangeNSBS, and RleNSBS override this ### default method. setMethod("as.integer", "NSBS", function(x) as.integer(x@subscript)) ### The 3 default methods below work out-of-the-box on NSBS objects for which ### as.integer() works. However, concrete subclasses RangeNSBS, RleNSBS, and ### RangesNSBS override some of them with more efficient versions that avoid ### expanding 'x' into an integer vector. setMethod("length", "NSBS", function(x) length(as.integer(x))) ## S3/S4 combo for anyDuplicated.NSBS anyDuplicated.NSBS <- function(x, incomparables=FALSE, ...) anyDuplicated(x, incomparables=incomparables, ...) setMethod("anyDuplicated", "NSBS", function(x, incomparables=FALSE, ...) anyDuplicated(as.integer(x))) setMethod("isStrictlySorted", "NSBS", function(x) isStrictlySorted(as.integer(x)) ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### NativeNSBS objects. ### setClass("NativeNSBS", # not exported contains="NSBS", representation( subscript="integer" ), prototype( subscript=integer(0) ) ) ### Construction methods. ### Supplied arguments are trusted so we don't check them! NativeNSBS <- function(subscript, upper_bound, upper_bound_is_strict, has_NAs) new2("NativeNSBS", subscript=subscript, upper_bound=upper_bound, upper_bound_is_strict=upper_bound_is_strict, has_NAs=has_NAs, check=FALSE) setMethod("NSBS", "missing", function(i, x, exact=TRUE, strict.upper.bound=TRUE, allow.NAs=FALSE) { x_NROW <- NROW(x) i <- seq_len(x_NROW) NativeNSBS(i, x_NROW, strict.upper.bound, FALSE) } ) setMethod("NSBS", "NULL", function(i, x, exact=TRUE, strict.upper.bound=TRUE, allow.NAs=FALSE) { x_NROW <- NROW(x) i <- integer(0) NativeNSBS(i, x_NROW, strict.upper.bound, FALSE) } ) .NSBS.numeric <- function(i, x, exact=TRUE, strict.upper.bound=TRUE, allow.NAs=FALSE) { x_NROW <- NROW(x) if (!is.integer(i)) i <- as.integer(i) has_NAs <- anyNA(i) if (!allow.NAs && has_NAs) .subscript_error("subscript contains NAs") ## Strangely, this is much faster than using range(). i_max <- suppressWarnings(max(i, na.rm=TRUE)) i_min <- suppressWarnings(min(i, na.rm=TRUE)) if (strict.upper.bound && i_max > x_NROW) .subscript_error("subscript contains out-of-bounds indices") if (i_min < 0L) { ## Translate into positive indices. i <- seq_len(x_NROW)[i] } else { ## Remove 0's from subscript. zero_idx <- which(!is.na(i) & i == 0L) if (length(zero_idx) != 0L) i <- i[-zero_idx] } NativeNSBS(i, x_NROW, strict.upper.bound, has_NAs) } setMethod("NSBS", "numeric", .NSBS.numeric) setMethod("NSBS", "logical", function(i, x, exact=TRUE, strict.upper.bound=TRUE, allow.NAs=FALSE) { x_NROW <- NROW(x) if (anyNA(i)) .subscript_error("logical subscript contains NAs") li <- length(i) if (strict.upper.bound && li > x_NROW) { if (any(i[(x_NROW+1L):li])) .subscript_error( "subscript is a logical vector with out-of-bounds ", "TRUE values" ) i <- i[seq_len(x_NROW)] } if (li < x_NROW) i <- rep(i, length.out=x_NROW) i <- which(i) NativeNSBS(i, x_NROW, strict.upper.bound, FALSE) } ) .NSBS.characterORfactor <- function(i, x, exact=TRUE, strict.upper.bound=TRUE, allow.NAs=FALSE) { x_NROW <- NROW(x) x_ROWNAMES <- ROWNAMES(x) what <- if (length(dim(x)) != 0L) "rownames" else "names" if (is.null(x_ROWNAMES)) { if (strict.upper.bound) .subscript_error("cannot subset by character when ", what, " are NULL") i <- x_NROW + seq_along(i) return(NativeNSBS(i, x_NROW, FALSE, FALSE)) } if (exact) { i <- match(i, x_ROWNAMES, incomparables=c(NA_character_, "")) } else { i <- pmatch(i, x_ROWNAMES, duplicates.ok=TRUE) } if (!strict.upper.bound) { na_idx <- which(is.na(i)) i[na_idx] <- x_NROW + seq_along(na_idx) return(NativeNSBS(i, x_NROW, FALSE, FALSE)) } has_NAs <- anyNA(i) if (!allow.NAs && has_NAs) .subscript_error("subscript contains invalid ", what) NativeNSBS(i, x_NROW, strict.upper.bound, has_NAs) } setMethod("NSBS", "character", .NSBS.characterORfactor) setMethod("NSBS", "factor", .NSBS.characterORfactor) setMethod("NSBS", "array", function(i, x, exact=TRUE, strict.upper.bound=TRUE, allow.NAs=FALSE) { warning("subscript is an array, passing it thru as.vector() first") i <- as.vector(i) callGeneric() } ) ### Other methods. setMethod("as.integer", "NativeNSBS", function(x) x@subscript) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### RangeNSBS objects. ### setClass("RangeNSBS", # not exported contains="NSBS", representation( subscript="integer" ), prototype( subscript=c(1L, 0L) ) ) ### Constructor. .normarg_range_start <- function(start, argname="start") { if (!isSingleNumberOrNA(start)) .subscript_error("'", argname, "' must be a single number or NA") if (!is.integer(start)) start <- as.integer(start) start } ### Replacement for IRanges:::solveUserSEWForSingleSeq() ### TODO: Get rid of IRanges:::solveUserSEWForSingleSeq() and use RangeNSBS() ### instead. RangeNSBS <- function(x, start=NA, end=NA, width=NA) { x_NROW <- NROW(x) start <- .normarg_range_start(start, "start") end <- .normarg_range_start(end, "end") width <- .normarg_range_start(width, "width") if (is.na(width)) { if (is.na(start)) start <- 1L if (is.na(end)) end <- x_NROW } else if (is.na(start) != is.na(end)) { if (is.na(start)) { start <- end - width + 1L } else { end <- start + width - 1L } } else { if (is.na(start) && is.na(end)) { start <- 1L end <- x_NROW } if (width != end - start + 1L) stop("the supplied 'start', 'end', and 'width' are incompatible") } if (!(start >= 1L && start <= x_NROW + 1L && end <= x_NROW && end >= 0L)) stop("the specified range is out-of-bounds") if (end < start - 1L) stop("the specified range has a negative width") new2("RangeNSBS", subscript=c(start, end), upper_bound=x_NROW, check=FALSE) } setMethod("as.integer", "RangeNSBS", function(x) { range <- x@subscript range_start <- range[[1L]] range_end <- range[[2L]] if (range_end < range_start) return(integer(0)) seq.int(range_start, range_end) } ) setMethod("length", "RangeNSBS", function(x) { range <- x@subscript range_start <- range[[1L]] range_end <- range[[2L]] range_end - range_start + 1L } ) setMethod("anyDuplicated", "RangeNSBS", function(x, incomparables=FALSE, ...) 0L ) setMethod("isStrictlySorted", "RangeNSBS", function(x) TRUE) setMethod("show", "RangeNSBS", function(object) { range <- object@subscript range_start <- range[[1L]] range_end <- range[[2L]] cat(sprintf("%d:%d%s / 1:%d%s\n", range_start, range_end, if (length(object) == 0L) " (empty)" else "", object@upper_bound, if (object@upper_bound == 0L) " (empty)" else "")) } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### normalizeSingleBracketSubscript() ### normalizeSingleBracketSubscript <- function(i, x, exact=TRUE, allow.append=FALSE, allow.NAs=FALSE, as.NSBS=FALSE) { if (!isTRUEorFALSE(exact)) stop("'exact' must be TRUE or FALSE") if (!isTRUEorFALSE(allow.append)) stop("'allow.append' must be TRUE or FALSE") if (!isTRUEorFALSE(as.NSBS)) stop("'as.NSBS' must be TRUE or FALSE") if (missing(i)) { i <- NSBS( , x, exact=exact, strict.upper.bound=!allow.append, allow.NAs=allow.NAs) } else { i <- NSBS(i, x, exact=exact, strict.upper.bound=!allow.append, allow.NAs=allow.NAs) } if (!as.NSBS) i <- as.integer(i) i } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### normalizeSingleBracketReplacementValue() ### ### Dispatch on the 2nd argument! setGeneric("normalizeSingleBracketReplacementValue", signature="x", function(value, x, i) standardGeneric("normalizeSingleBracketReplacementValue") ) ### Default method. setMethod("normalizeSingleBracketReplacementValue", "ANY", function(value, x) { if (is(value, class(x))) return(value) lv <- length(value) value <- try(as(value, class(x)), silent=TRUE) if (inherits(value, "try-error")) stop("'value' must be a ", class(x), " object (or coercible ", "to a ", class(x), " object)") if (length(value) != lv) stop("coercing replacement value to ", class(x), "\n", " changed its length!\n", " Please do the explicit coercion ", "yourself with something like:\n", " x[...] <- as(value, \"", class(x), "\")\n", " but first make sure this coercion does what you want.") value } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### extractROWS(), replaceROWS() ### ### 2 internal generics to ease implementation of [ and [<- subsetting for ### Vector subclasses. ### ### A Vector subclass Foo should only need to implement an "extractROWS" and ### "replaceROWS" method to make "[" and "[<-" work out-of-the-box. ### extractROWS() does NOT need to support a missing 'i' so "extractROWS" ### methods don't need to do 'if (missing(i)) return(x)'. ### For replaceROWS(), it's OK to assume that 'value' is "compatible" with 'x' ### i.e. that it has gone thru normalizeSingleBracketReplacementValue(). ### See "extractROWS" and "replaceROWS" methods for Hits objects for an ### example. ### setGeneric("extractROWS", signature=c("x", "i"), function(x, i) standardGeneric("extractROWS") ) setGeneric("replaceROWS", signature="x", function(x, i, value) standardGeneric("replaceROWS") ) .extractROWSWithBracket <- function(x, i) { if (is.null(x) || missing(i)) return(x) ## dynamically call [i,,,..,drop=FALSE] with as many "," as length(dim)-1 ndim <- max(length(dim(x)), 1L) i <- normalizeSingleBracketSubscript(i, x, allow.NAs=TRUE) args <- rep.int(alist(foo=), ndim) args[[1]] <- i args <- c(list(x), args, list(drop=FALSE)) do.call(`[`, args) } .replaceROWSWithBracket <- function(x, i, value) { if (is.null(x)) return(x) ndim <- max(length(dim(x)), 1L) i <- normalizeSingleBracketSubscript(i, x, allow.append=TRUE) args <- rep.int(alist(foo=), ndim) args[[1]] <- i args <- c(list(x), args, list(value=value)) do.call(`[<-`, args) } setMethod("extractROWS", c("ANY", "ANY"), .extractROWSWithBracket) ### NOT exported but used in IRanges package (by "extractROWS" method with ### signature vector_OR_factor,RangesNSBS). extract_ranges_from_vector_OR_factor <- function(x, start, width) { .Call2("vector_OR_factor_extract_ranges", x, start, width, PACKAGE="S4Vectors") } setMethod("extractROWS", c("vector_OR_factor", "RangeNSBS"), function(x, i) { start <- i@subscript[[1L]] width <- i@subscript[[2L]] - start + 1L extract_ranges_from_vector_OR_factor(x, start, width) } ) setMethod("extractROWS", c("array", "RangeNSBS"), .extractROWSWithBracket) setMethod("extractROWS", c("array", "RangesNSBS"), .extractROWSWithBracket) ### NOT exported but will be used in IRanges package (by "extractROWS" method ### with signature LLint,RangesNSBS). extract_ranges_from_LLint <- function(x, start, width) { start <- (start - 1L) * BYTES_PER_LLINT + 1L width <- width * BYTES_PER_LLINT x@bytes <- extract_ranges_from_vector_OR_factor(x@bytes, start, width) x } setMethod("extractROWS", c("LLint", "RangeNSBS"), function(x, i) { start <- i@subscript[[1L]] width <- i@subscript[[2L]] - start + 1L extract_ranges_from_LLint(x, start, width) } ) setMethod("extractROWS", c("LLint", "NSBS"), function(x, i) { start <- as.integer(i) width <- rep.int(1L, length(start)) extract_ranges_from_LLint(x, start, width) } ) setMethod("extractROWS", c("LLint", "ANY"), function (x, i) { ## We don't support NAs in the subscript yet. #i <- normalizeSingleBracketSubscript(i, x, allow.NAs=TRUE, # as.NSBS=TRUE) i <- normalizeSingleBracketSubscript(i, x, as.NSBS=TRUE) callGeneric() } ) subset_along_ROWS <- function(x, i, j, ..., drop=TRUE) { if (!missing(j) || length(list(...)) > 0L) stop("invalid subsetting") if (missing(i)) return(x) extractROWS(x, i) } setMethod("[", "LLint", subset_along_ROWS) setMethod("replaceROWS", "ANY", .replaceROWSWithBracket) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### normalizeDoubleBracketSubscript() ### ### Supported types for 'i': single NA, or numeric or character vector of ### length 1, or numeric- or character-Rle of length 1. ### Always returns a single integer. When called with 'error.if.nomatch=FALSE', ### returns an NA_integer_ if no match is found. Otherwise (the default), ### raises an error if no match is found so the returned integer is guaranteed ### to be a non-NA positive integer referring to a valid position in 'x'. ### normalizeDoubleBracketSubscript <- function(i, x, exact=TRUE, error.if.nomatch=TRUE) { if (!isTRUEorFALSE(exact)) stop("'exact' must be TRUE or FALSE") if (!isTRUEorFALSE(error.if.nomatch)) stop("'error.if.nomatch' must be TRUE or FALSE") if (missing(i)) stop("subscript is missing") subscript_type <- class(i) if (is(i, "Rle")) { i <- decodeRle(i) subscript_type <- paste0(class(i), "-", subscript_type) } if (is.vector(i) && length(i) == 1L && is.na(i)) { if (error.if.nomatch) stop("subsetting by NA returns no match") return(NA_integer_) } if (!is.numeric(i) && !is.character(i)) stop("invalid [[ subscript type: ", subscript_type) if (length(i) < 1L) stop("attempt to extract less than one element") if (length(i) > 1L) stop("attempt to extract more than one element") if (is.numeric(i)) { if (!is.integer(i)) i <- as.integer(i) if (i < 1L || length(x) < i) stop("subscript is out of bounds") return(i) } ## 'i' is a character string x_names <- names(x) if (is.null(x_names)) { if (error.if.nomatch) stop("attempt to extract by name when elements have no names") return(NA_integer_) } #if (i == "") # stop("invalid subscript \"\"") if (exact) { ans <- match(i, x_names, incomparables=c(NA_character_, "")) } else { ## Because 'i' has length 1, it doesn't matter whether we use ## 'duplicates.ok=FALSE' (the default) or 'duplicates.ok=TRUE' but ## the latter seems to be just a little bit faster. ans <- pmatch(i, x_names, duplicates.ok=TRUE) } if (is.na(ans) && error.if.nomatch) stop("subscript \"", i, "\" matches no name") ans } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### 2 internal generics to ease implementation of [[ and [[<- subsetting for ### new List subclasses. ### setGeneric("getListElement", signature="x", function(x, i, exact=TRUE) standardGeneric("getListElement") ) setGeneric("setListElement", signature="x", function(x, i, value) standardGeneric("setListElement") ) setMethod("getListElement", "list", function(x, i, exact=TRUE) { i <- normalizeDoubleBracketSubscript(i, x, exact=exact, error.if.nomatch=FALSE) x[[i]] } ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### window(), head(), tail(), rep.int() ### ### S3/S4 combo for window.LLint window_along_ROWS <- function(x, start=NA, end=NA, width=NA) { i <- RangeNSBS(x, start=start, end=end, width=width) extractROWS(x, i) } window.LLint <- function(x, ...) window_along_ROWS(x, ...) setMethod("window", "LLint", window.LLint) ### S3/S4 combo for head.LLint head_along_ROWS <- function(x, n=6L) { if (!isSingleNumber(n)) stop("'n' must be a single integer") if (!is.integer(n)) n <- as.integer(n) x_NROW <- NROW(x) if (n >= 0L) { n <- min(x_NROW, n) } else { n <- max(0L, x_NROW + n) } window(x, start=1L, width=n) } head.LLint <- function(x, ...) head_along_ROWS(x, ...) setMethod("head", "LLint", head.LLint) ### S3/S4 combo for tail.LLint tail_along_ROWS <- function(x, n=6L) { if (!isSingleNumber(n)) stop("'n' must be a single integer") if (!is.integer(n)) n <- as.integer(n) x_NROW <- NROW(x) if (n >= 0L) { n <- min(x_NROW, n) } else { n <- max(0L, x_NROW + n) } window(x, end=x_NROW, width=n) } tail.LLint <- function(x, ...) tail_along_ROWS(x, ...) setMethod("tail", "LLint", tail.LLint) rep.int_along_ROWS <- function(x, times) { x_len <- length(x) if (!(is.numeric(times) || is.LLint(times))) stop("'times' must be a numeric or LLint vector") times_len <- length(times) if (times_len == 1L) { if (times == 1L) return(x) if (times == 0L) return(extractROWS(x, integer(0))) } if (times_len == x_len) { i <- Rle(seq_len(x_len), times) } else if (times_len == 1L) { if (is.LLint(times)) times <- as.double(times) i <- IRanges::IRanges(rep.int(1L, times), rep.int(x_len, times)) } else { stop("invalid 'times' value") } extractROWS(x, i) } setMethod("rep.int", "LLint", rep.int_along_ROWS) S4Vectors/R/utils.R0000644000175400017540000000242413175714520015127 0ustar00biocbuildbiocbuild### ========================================================================= ### Miscellaneous low-level utils ### ------------------------------------------------------------------------- ### ### Unless stated otherwise, nothing in this file is exported. ### ### Wrap the message in lines that don't exceed the terminal width (obtained ### with 'getOption("width")'). Usage: ### stop(wmsg(...)) ### warning(wmsg(...)) ### message(wmsg(...)) wmsg <- function(...) paste0(strwrap(paste0(c(...), collapse="")), collapse="\n ") errorIfWarning <- function(expr) { old_options <- options(warn=2) on.exit(options(old_options)) eval(expr) } .AEbufs_use_malloc <- function(x) .Call("AEbufs_use_malloc", x, PACKAGE="S4Vectors") .AEbufs_free <- function() .Call("AEbufs_free", PACKAGE="S4Vectors") ### Exported! .Call2 <- function(.NAME, ..., PACKAGE) { ## Uncomment the 2 lines below to switch from R_alloc- to malloc-based ## Auto-Extending buffers. #.AEbufs_use_malloc(TRUE) #on.exit({.AEbufs_free(); .AEbufs_use_malloc(FALSE)}) .Call(.NAME, ..., PACKAGE=PACKAGE) } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Functional fun ### Has <- function(FUN) { function(x) { !is.null(FUN(x)) } } S4Vectors/R/vector-utils.R0000644000175400017540000000652213175714520016432 0ustar00biocbuildbiocbuild### ========================================================================= ### Some low-level (not exported) utility functions to operate on ordinary ### vectors (including lists and data frames) ### ------------------------------------------------------------------------- ### ### Unless stated otherwise, nothing in this file is exported. ### last_or <- function(x, or) { x_len <- length(x) if (x_len != 0L) x[[x_len]] else or } ### TODO: Maybe implement this in C? sapply_isNULL <- function(x) vapply(x, is.null, logical(1), USE.NAMES=FALSE) sapply_NROW <- function(x) { if (!is.list(x)) x <- as.list(x) ans <- try(.Call2("sapply_NROW", x, PACKAGE="S4Vectors"), silent=TRUE) if (!inherits(ans, "try-error")) { names(ans) <- names(x) return(ans) } ## From here, 'length(x)' is guaranteed to be != 0 return(vapply(x, NROW, integer(1))) } listElementType <- function(x) { cl <- lapply(x, class) clnames <- unique(unlist(cl, use.names=FALSE)) if (length(clnames) == 1L) { clnames } else { contains <- lapply(cl, function(x) getClass(x, TRUE)@contains) clnames <- c(clnames, unlist(lapply(contains, names), use.names=FALSE)) cltab <- table(factor(clnames, unique(clnames))) clnames <- names(cltab)[cltab == length(x)] if (length(clnames) > 0L) { clnames[1] } else { NULL } } } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### quick_unlist() and quick_unsplit() ### ### Both functions *assume* that 'x' is a list of length >= 1 with no names, ### and that the list elements in 'x' have the same type. But they don't ### actually check this! ### quick_unlist <- function(x) { x1 <- x[[1L]] if (is.factor(x1)) { ## Fast unlisting of a list of factors that all have the same levels ## in the same order. structure(unlist(x), class="factor", levels=levels(x1)) } else { do.call(c, x) # doesn't work on list of factors } } quick_unsplit <- function(x, f) { idx <- split(seq_along(f), f) idx <- unlist(idx, use.names=FALSE) revidx <- integer(length(idx)) revidx[idx] <- seq_along(idx) quick_unlist(x)[revidx] } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### extract_data_frame_rows() ### ### A fast version of {df <- df[i, , drop=FALSE]; rownames(df) <- NULL}. ### Can be up to 20x or 30x faster when extracting millions of rows. ### What kills [.data.frame is the overhead of propagating the original ### rownames and trying to keep them unique with make.unique(). However, most ### of the time, nobody cares about the rownames so this effort is pointless ### and only a waste of time. ### ### NOT exported. extract_data_frame_rows <- function(x, i) { stopifnot(is.data.frame(x)) ## The commented code should be as fast (or even faster, because 'i' is ## normalized only once) as the code below but unfortunately it's not. ## TODO: Investigate why and make it as fast as the code below. #i <- normalizeSingleBracketSubscript(i, x, exact=FALSE, as.NSBS=TRUE) #data.frame(lapply(x, extractROWS, i), # check.names=FALSE, stringsAsFactors=FALSE) i <- normalizeSingleBracketSubscript(i, x, exact=FALSE) data.frame(lapply(x, "[", i), check.names=FALSE, stringsAsFactors=FALSE) } S4Vectors/R/zzz.R0000644000175400017540000000054713175714520014630 0ustar00biocbuildbiocbuild### .onLoad <- function(libname, pkgname) { ns <- asNamespace(pkgname) objname <- "NA_LLint_" NA_LLint_ <- make_NA_LLint_() assign(objname, NA_LLint_, envir=ns) namespaceExport(ns, objname) } .onUnload <- function(libpath) { library.dynam.unload("S4Vectors", libpath) } .test <- function() BiocGenerics:::testPackage("S4Vectors") S4Vectors/TODO0000644000175400017540000000403713175714520014135 0ustar00biocbuildbiocbuildIRanges before the split (version 1.23.5) ----------------------------------------- R files: 73 files Going to S4Vectors (29 files in total): X S4-utils.R X utils.R X isConstant.R (renamed isSorted.R) X normarg-utils.R X compact_bitvector.R (deleted) X int-utils.R X str-utils.R X vector-utils.R (NEW!) X eval-utils.R X Annotated-class.R X DataTable-API.R (rename DataTable-class.R, put DataTable class def here) X DataTable-stats.R (merge with DataTable-class.R) X subsetting-utils.R X Vector-class.R (there are some leftovers in IRanges/R/Vector-class.R) X Vector-comparison.R X Hits-class.R (there are some leftovers in IRanges/R/Hits-class.R) X Rle-class.R (too big! split in Rle-class.R + Rle-utils.R) X runstat.R (merge with Rle-utils.R) X List-class.R (split in List-class.R + List-utils.R) X endoapply.R (merge with List-utils.R) X funprog-methods.R (merge with List-utils.R) X SimpleList-class.R X DataFrame-class.R List-comparison.R (methods for CompressedList need to stay in IRanges) DataFrame-utils.R expand-methods.R (merge with DataFrame-utils.R) X FilterRules-class.R classNameForDisplay-methods.R (no more, has been diluted in other files) zzz.R C files: - 36 compilation units (.c files) Going to S4Vectors (15 files in total): X safe_arithm.c X sort_utils.c X hash_utils.c X AEbufs.c X SEXP_utils.c (split in SEXP_utils.c + vector_utils.c + eval_utils.c) X anyMissing.c X compact_bitvector.c (deleted) X int_utils.c X str_utils.c X Hits_class.c X Rle_class.c X Rle_utils.c X Vector_class.c (renamed List_class.c) X SimpleList_class.c X DataFrame_class.c - 9 header files (.h files) None goes to S4Vectors. Other TODO items that originated in the IRanges package ------------------------------------------------------- o FilterRules: - refactor, using ShortRead filter framework (becomes FilterList) - support subsetting DataFrame/RangedData directly oooooooooo S4Vectors/build/0000755000175400017540000000000013175736135014546 5ustar00biocbuildbiocbuildS4Vectors/build/vignette.rds0000644000175400017540000000045213175736135017106 0ustar00biocbuildbiocbuilduN@@4sǪ/`41ƨIX'/^R =7l؎E, ];ă}yCaQG/^\ZkX"]gCS8)5ђnLkz2I%i!w{1J7Zc Sf97L fͧ;ޛ^IeB;la :օ5-XU;EY A,bxsV93?VR}S f&,u]Ɖ3`0ÂT/M>VS4Vectors/inst/0000755000175400017540000000000013175714520014416 5ustar00biocbuildbiocbuildS4Vectors/inst/doc/0000755000175400017540000000000013175736135015171 5ustar00biocbuildbiocbuildS4Vectors/inst/doc/HTS_core_package_stack.txt0000644000175400017540000000163413175714520022236 0ustar00biocbuildbiocbuildHTS core package stack ---------------------- as of August 2015 VariantAnnotation | | v v GenomicFeatures BSgenome | | v v rtracklayer | v GenomicAlignments | | v v SummarizedExperiment Rsamtools | | | v v v GenomicRanges Biostrings | | v v GenomeInfoDb XVector | | v v IRanges | v S4Vectors S4Vectors/inst/doc/RleTricks.R0000644000175400017540000000370313175736107017220 0ustar00biocbuildbiocbuild### R code from vignette source 'RleTricks.Rnw' ### Encoding: UTF-8 ################################################### ### code chunk number 1: options ################################################### options(width=60) ################################################### ### code chunk number 2: Rle-rollmean ################################################### rollmeanRle <- function (x, k) { n <- length(x) cumsum(c(Rle(sum(window(x, 1, k))), window(x, k + 1, n) - window(x, 1, n - k))) / k } ################################################### ### code chunk number 3: Rle-rollvar ################################################### rollvarRle <- function(x, k) { n <- length(x) means <- rollmeanRle(x, k) nextMean <- window(means, 2, n - k + 1) cumsum(c(Rle(sum((window(x, 1, k) - means[1])^2)), k * diff(means)^2 - (window(x, 1, n - k) - nextMean)^2 + (window(x, k + 1, n) - nextMean)^2)) / (k - 1) } ################################################### ### code chunk number 4: Rle-rollcov ################################################### rollcovRle <- function(x, y, k) { n <- length(x) meanX <- rollmeanRle(x, k) meanY <- rollmeanRle(y, k) nextMeanX <- window(meanX, 2, n - k + 1) nextMeanY <- window(meanY, 2, n - k + 1) cumsum(c(Rle(sum((window(x, 1, k) - meanX[1]) * (window(y, 1, k) - meanY[1]))), k * diff(meanX) * diff(meanY) - (window(x, 1, n - k) - nextMeanX) * (window(y, 1, n - k) - nextMeanY) + (window(x, k + 1, n) - nextMeanX) * (window(y, k + 1, n) - nextMeanY))) / (k - 1) } ################################################### ### code chunk number 5: Rle-rollsd ################################################### rollsdRle <- function(x, k) { sqrt(rollvarRle(x, k)) } ################################################### ### code chunk number 6: Rle-rollcor ################################################### rollcorRle <- function(x, y, k) { rollcovRle(x, y, k) / (rollsdRle(x, k) * rollsdRle(y, k)) } S4Vectors/inst/doc/RleTricks.Rnw0000644000175400017540000000365013175714520017561 0ustar00biocbuildbiocbuild\documentclass{article} % \VignetteIndexEntry{Rle Tips and Tricks} % \VignetteDepends{} % \VignetteKeywords{Rle} % \VignettePackage{S4Vectors} \usepackage{times} \usepackage{hyperref} \textwidth=6.5in \textheight=8.5in % \parskip=.3cm \oddsidemargin=-.1in \evensidemargin=-.1in \headheight=-.3in \newcommand{\Rfunction}[1]{{\texttt{#1}}} \newcommand{\Robject}[1]{{\texttt{#1}}} \newcommand{\Rpackage}[1]{{\textit{#1}}} \newcommand{\Rmethod}[1]{{\texttt{#1}}} \newcommand{\Rfunarg}[1]{{\texttt{#1}}} \newcommand{\Rclass}[1]{{\textit{#1}}} \newcommand{\Rcode}[1]{{\texttt{#1}}} \newcommand{\software}[1]{\textsf{#1}} \newcommand{\R}{\software{R}} \title{Rle Tips and Tricks} \author{Patrick Aboyoun} \date{\today} \begin{document} \maketitle <>= options(width=60) @ <>= rollmeanRle <- function (x, k) { n <- length(x) cumsum(c(Rle(sum(window(x, 1, k))), window(x, k + 1, n) - window(x, 1, n - k))) / k } @ <>= rollvarRle <- function(x, k) { n <- length(x) means <- rollmeanRle(x, k) nextMean <- window(means, 2, n - k + 1) cumsum(c(Rle(sum((window(x, 1, k) - means[1])^2)), k * diff(means)^2 - (window(x, 1, n - k) - nextMean)^2 + (window(x, k + 1, n) - nextMean)^2)) / (k - 1) } @ <>= rollcovRle <- function(x, y, k) { n <- length(x) meanX <- rollmeanRle(x, k) meanY <- rollmeanRle(y, k) nextMeanX <- window(meanX, 2, n - k + 1) nextMeanY <- window(meanY, 2, n - k + 1) cumsum(c(Rle(sum((window(x, 1, k) - meanX[1]) * (window(y, 1, k) - meanY[1]))), k * diff(meanX) * diff(meanY) - (window(x, 1, n - k) - nextMeanX) * (window(y, 1, n - k) - nextMeanY) + (window(x, k + 1, n) - nextMeanX) * (window(y, k + 1, n) - nextMeanY))) / (k - 1) } @ <>= rollsdRle <- function(x, k) { sqrt(rollvarRle(x, k)) } @ <>= rollcorRle <- function(x, y, k) { rollcovRle(x, y, k) / (rollsdRle(x, k) * rollsdRle(y, k)) } @ \end{document} S4Vectors/inst/doc/RleTricks.pdf0000644000175400017540000012415313175736107017573 0ustar00biocbuildbiocbuild%PDF-1.5 % 5 0 obj << /Length 703 /Filter /FlateDecode >> stream xX[o0~cBg7'H <4Cz[ nc'fH<|h).aԱQ@[DP0CgƩI빉eU}j\JLwZ350hL}D)Ezl 03VJI %KZ .R-m)4NkkOjBZq .f-#O|o\H@ƍVn27w\/-X7y`/X=J9#ʏrFZZIhNѸŠ7]ku,,b4oJ6<'t:i%#=XtelVA;.@cuKB*%*ag5/KNJ>yD;vB+1Nc;6 a ?o endstream endobj 17 0 obj << /Length1 1365 /Length2 5943 /Length3 0 /Length 6882 /Filter /FlateDecode >> stream xڍtT6 E kBo I @0$B"EBJޛR*(M˹k}Zoyf̞F&*hG8 CDrME`17)1o%5 aXf(\[o `0XZV{p'7џM<῜f|!AhO<ap;8##;; wxA߿Vxz9QȀWDRJUw򩪢 a 0 ,&  ?I&_3 F}82@Y Mr$ +Y7wAP$q<Ȁ? 6MTGV;GQ.͡_toزJ. nH8,&wc(#' VTzǟr0:ID[ bFg_y73q1bd%N;0TX0n¿N"kbe񨚶r7G@$w4K)E5NCxlm8},SCρ +ږ^=v,XPPbqhYL˥>&a.4[i M>9!E)z ߒ!n#eْz/j8ҏ6򌫳3V7uט=cVfbz{=pm|o"WD{- /FL_ {7/!%vOr<}|~KM`Z - 7cN@|,%S`16so4[rͫ+Ś}6м>:5z-e/^ tՏ*WXpBjc2A; lpQ0iڹBgʚnaUU^+u39(Iӓf[?z'o eݻpE0EI!/VW|LR [R186vvljfk;̦~EP—:@>w+Ny]}#gV}0ϼr38 zi:d+2EAG7vKU3 0w%N7Mi1#ROStR˾/Wj}HGg{ڿ@54re;بզO}P.g_s*]^1MR\xb<<8 n/y;Mq-6ڟ*iYQ؂0UAN:uHmXz(I]?"WeY_ot&/6jW-RW@|F=Uϛy87x+XF0x8r' ;<]Ԟ2"-m!#fo'ߒed"K7]$FAI9I Dݜ@'S2-[\pT9 Fӌ ??cIjX_; +.k:aI !9Яخ-:Qhv:&xy^!Jr;*lQ,_0WojI4I(g;|MjZhb'(SdI`T {ߌ(LοFd=BɞgSP"A̤pヱ. RZF0@&>:Wh* : v:zEz/ӯO4_.51Z@5ElEG>ascAp =4q$y=$ҵ%sq$qv 7:LP,˼Eomw{3^:J`R 饒|1@&{T;8 Y\A..@M,'EeGR*0F}aծu|{**=)sC\ժCJ~\p.pDTq19s1U $U0L k_i Q)e{|3s{(` ̌n,dilw=m>if%5JfRYԥjV|ǦB܂8n-Nr)vWr eD]'wƺ- P[#k3*UST0>~B?Q(QUkV9n✕ 6}E=,e$=#!H0؋{N(eM䒫'q [LZM۾ZbBGDQGD:z9d|ʠOA%UAЪs/w7lIaf9uA'ԾgWr%RAp`fIaN0Mr\J/C/[)BZ:eeL_ZM]"yu]C#0l/,N*5hZp~;Y9e9\lMrIOt48y*I4UymU}+3U.$ӗQ.~KV'#Qٽuߢ0^MUkqL IeWi.k(ZC=?jc2#dֺ:KXniزDk*e/V;rhרȤW ]1& Lx+)ÓE*5}Դ+mr՝,w^ lH۫[f˒jAאo=sC3 vE/0}[^)wWB_:B#_O7mu^Qۦu;YWx]CM,czjv@1+ik \i)~ f"̒gi`"G&^?um,i +Mhq oi,ܪ.I4 yѾ xjh 6r3&]5 7T'{m@{TnWzpVw``fnx!f+}D'` :&(ptv4a37lnèiR{agV3zŗ>k šrFdEkݲo;LF1hǸ^51;Sm RFZCzaV⸖̃pko? 3YlUoD3J[}S{]ʮLG^3a$JhBbЂ_TuOqaÄPd1I_z%m׶Jy+*˰٥ݦ!j+Ϭ'N9]Y@4 `ׅqh?pn>Z9'5YeMfj7-Yr-vTFXͫ3"/6_yUI8f`7f@O4fZ5X% 6%׿!#v : oIs|9 vصʧ즤mf pP?.돻0W#+Տxv<'pdqyڭ#/n9#ԟreۢR2*0JJЕ2ҭG=񊵫2z<]y?uBVܭٷ8z lNH;5Xh%8f.2 y9q=/}{|*كi m4/tK(Yٶ>(zڗ>ԮeKxbw 9y@Zq]^AօiA7DŰߎ-m-%(c4-56^^yS@gDJ[)4+Фʚ`JۺR)S r|+LM>X\֪R:P(ޘ*7Kͳ;"ʶn /n T/*?[ӟPVtWpsXoU<>i3ޒ2d&iJ 'yB8u'f\۬2؍2ɧ]nrPiK3"unq:7XQz=v=v^9#4w-V'钨bOV+|.UI"g-Ua#"I֫9PB<48r]oJrmUsK&-r n c$M.uP"M/ۙ EBNboj ~+; "KI loUO`7ά>J:gޱOoI 2 LꌉT4*MfÞ:x"mnBsYmpp g鶸T໯>K F=be(dĽ[pv^PW6Cvɫ M!\ %ҔYl<saw ׮7ZiBW h|Ѫ"}lDŽLSҘk皉 l%,NSa'G$o!UV1gqt̓gL:Bw+><2{ $8zfNRݩi%'}˛ 3&ч- qYmc]L?7.9YtLUp ~$/Rh 80%mV < *jDcC@T{.ei\@_0EɲQ|_ N7}PƶĀt CۦՒEpĮ!FQo#2G WM5ҡ9gfcGgZO ̇6#0ű$RB 1Q 6]_L5Ax6:VIH[OԝD25)lDv8AV\@_EL6(J$ )` y^Awi"@cdJe׬."舎<ȡ2ÖѪ 8nhNC`UI]"{" EsF !"eͼua56ěbeSB2Zr5 baN2$]rg xv?~qR[} vH71ߟ0an7\݈]Gtb5!3`iŻr1Qsz%IV| endstream endobj 19 0 obj << /Length1 1670 /Length2 9220 /Length3 0 /Length 10307 /Filter /FlateDecode >> stream xڍT6L#!9t Ѝ 9t3C 5tw4Ht )" ( ȇ{ι5k̳}23hpڸXA\n>@^x@~lff] Ys  l OPt DĀ@?] y.p6;P?l>Hp3f !{CEk@EW 6 {Uۛn'!` j=0@ s2lf=O-<`PC'x(QQhB$p:#bm v[ƃAp pDC< sX=P@k<kw+ƒ{DiNYn# #<sZ?/7wlapCxanP(&lvP@P7ڞwz]_WN ]]\C@aЇlp`Fv08?P?|&?3{ ˫bd\|n~!>HT "w-.m]?}84Υ Z(_|J#O࿳4 )y:9fqCaN4xпjsgա60O { saJ0 amX^2'=l?\Їpk/$ C|B=  xy.x[w7*,m#Q WoBA^CHP=l?lBR;b]IP1al^^?l{w?򯀇v<'1cY? Zc/ͻXG84Ft 9üky^~%1Ov)Lьݙ3"<ۅ|xl- ^[~#jr*s(V Ae]:g8U܉z VsnG'>W3ą؁GeIs~^{=~ؘx|_n?Sbl}߮9 cJ^TRΧi i'>*g"V\i{M'&!0hMa>&Y!G4؞,> M\ĂSk7rHf:biiK;R'd.;.;SUm'ғ <޽ --a 8MWjtDܢ- C!o1C_IejD^b=LNnCޜ$W)F-ܠ|7RnOs)jfAʟur|U,GT+r?~ӭBhs٨=p8 2wT%m\aq3_~=r`ث*ȹ ~p?8Xcx0&,Nh85,7$pjr}HB,%^Mo5<Ge/3u䶄7+_G蹹Wik |fx/f~ħ_гGH%|ӚaSaҴkw;cr NFԏⓟxZ>7,'o 1ahwvՓ]Os+)| _]aƞ!8Yf!D4e*Y{$"QeUGb7^s~d&wI:'=fWm&q|#D+s OxS+touQ .(Ȉʌ5]4-) ý2]DpǼ }rPڶꋄ&BM0^Hޡ~K$s}~.~F2 I cȫl%ɘBp>dXR> AG:9fpzzߎDs4RwP${2=]LzQcދKUx}9i^X K{yo=Q^?Z4.Z*xگdNt1_-e?$! '5WbGƏ{RmJ%l$W~\bM#Wı0Ҏ "0*Ajs4U+clO$T-[l6^QfY[Bܛ asLJV-9tq[j+^  M޲)ɪ2xC9dKzѬ1lꔇgZ 7?f,_J7:sħvp(_Qofq.ɑsed\NgN - _X:]eVar\ߥģ>OY)+ XJ{ i|Tޮ#f)zr3EǗ +zT8UC=Ne4>K5Ql_PK o|ќ:L':|"dah>?QPתV* JnAChHNnSy91ȫp&ŗU>S+f-+ S4ykA~qZڻ+ЩR!q\JzR:Ue =Ոq|ZRƥن)̺Qxsȩ>*;>JC 2 kLS#].6Ֆ67X= fu~ 2 ,.5!KeLIUDB7ejA-͌,vɎetZUCi%dC2š%נkW/ڂzWCZBqv$eic#kyι-`^`cHk#Y!փ>\v\:3cpiEפMQIiVKoqK[ \HI]BM53BpW0Q9_{Y>?Q:OHU.WK}*)%Lo]6'6-+} |AF[ܴmi33Ԟ8K8?|W4e.tޔj&euc \}$1N#rIvZW a*1Q3c5FHiƅ _buTT-Arq2&Xpl'|%{\;JY|D zٻT SEEPVkBJ+Y#9""ܖ#KOjԽ1~!hdL[+l0h6I&ډE,%e̺/l>̓/.#D)5 /U"eV2P0"*.)s$^ @ȁ#C t[i#]q:,dH+xU-Y9ءx|BMԼuKgaJTLҸZ{I!߂2N=R>;Y!4e5*N0+$I#` ٺlhw۸jBĻǼ&q(=4&a!`Oh9ŭԮ$NM:.ϡ˵"nxG]?r VO"aX BO*rn5ăW*ډ~HEĽz4?yGoIū.QOrID0X9jyu'ΧB.R9f/Ɨi:o̢l<ټ U9;I 4Bhd'ftοV6^,2LSB75!X&,8"!]cМدoWjqzY:`V#4io ;T]_5ϧLl"M/u^ :;t-bO%Zpp@1exHEUPy}74 zR/@󬂳CIRtlJ-5y H~ԏoQ\SPUɑP[f6amw)hU!/il$" j,lN>>ZQqmoVni|ڇFn#l^z9Uȧ4 ȅ't׶HeSC)rIŐAY,4װ78DA1J /ط㱔3=vsH2oK 3U"P>gXfj^\S4޷D\_u/.AץI5X^Z[$uۻZfɚmJ'nYG̭bbVɶ{sRLBsrS<M߃YTy}1?1 JhFnUvtG}/^0x e~<ܹSLפtiu=iRI"E4kȫ~-/9f6 "]r'VLR ܖrJ!n۱6kPg:l~=Vߴq[&PoV  |7xV48t/c̆Q8)y IvYn2t"MV0'-yAgůۍ.,XW }D;d:`b",řMм~Ԝpj[]7r֥ 3݀)O`߆zgts85'iǼ_\s?eK yL/T>}EiA-&^#>/@VhrGfTkedj!$R-ȁY6ݨvSZ4JNAN+O,o=/Xzd&%2$Mpp.[6GޒfL"E$fq2P.Wo=I\~ܒȧ\OܖAnt(G NF["710Tjao@ԙed\b ;;IЄ7~ %=da )'JNFn|:k`knyG!CW]?8(Аd,y#ϩnsȸ兤$xf򍇠ÌF5vgz{Jzgy؞mސHqbFs vT$}jbD5--QBFӉTrK0"޵PΚc6fK3 on{LKy.xĬ8(m'n# q͈`䕒~i= jf/^`HҼvÓM ($|bEǝ=}VϙA+slGz-k~Wqؓ-w?E8 ['vOcjcf2~f2c7fm1AzAb1ͤa"ξTo%6|ij)1ZO6L"ZU.:H2i|$~#0U:DmT-_c;@+F0L!\uݚw&x0ڛA8$ 6]h Y}'wʌГD4,בeΙ92Y­35Y_5ʔϳ\~ Q41B-Z%dN&-+y? &[2$ROUIHUcφ֝D/߄};uc7An 4bl͈9%^p&-<:e.4.ca`T,R;rIE?^$MdI+ ץ盳]]ݿ43<_L]k͢, {\ءdk3QrcߺKeOf`:9'JVQ5.PBex̱}0@om5L_Ϥ6L?Zt |tM:2vQ],d_0ф-dS}k'uu> _fnFޖCmfoxq-4A̯yt[!=W F)L2M[e'9X%k!{)\ 6w*?R" l's?9_4rЄnCxYiCcBH RL u"RPyxү֔fBD2Ö,k 6.U鱾9!0lPb?EA5*_XI^eK˻rMI7;XiȥaEYS#D VF|\Z4&2a'4Y5ܐgD _Oʦh Ű3/,=_xqοa>-GDrx{Լ2UҠ?Ǻ7/xK"i䨼Ӕupܘ_Vވt6Ooak߂T}D_;!q,3gQ33r]Q"Xo4ۉ;0J(\GHOd~Q:v y "H%BS{wr=Pa^e*܊~8ױrp@m>#&/M  K߳ŚukˤʧB̯H` fq#QnixiH`!lT7!_k*s$ Tw[OX ߓȨauHP8?7Gԓ)~T\)1T'3*ROps)o8J8fjT;-؝zo**Z 4Fi+/;YH*pk,ui-;&*m˥WO xy QJUԤtVe >L:h:ص)')SNQ8%⍗vMg"B0RXE;ۛ"ꨨ#frݎCtH]w|!7h̝jn y^$L@ )znۚ$YAEKzHzģ<~H+$ZɺBeC2Y~lq5qZoof/e0h=ރfcJLḋ)wG|QPjRô6ًCX$3? -Y%0*Ny.TEB3y[Қn5O碣IqT EzL/Z}ƬK\DPhq6_vLa5,5˃q`ZK8HCW~)T@A(W5c|;N#jEr5W+ƜՓ 6+~M y:h6yT=/Ҷ^PQU,A_VJVPh4PƐ՜=p4{Z[Ltv|JeY`yWS4*eLɆr갔W ?r߭D/{p|[Z/t8ƕ}hH\6?qE|_IDZD-`#y/+O_SD [ RQ[7?\gFf1g^ ۺz`)S'D1JK"YN_Y Ҹ endstream endobj 21 0 obj << /Length1 1539 /Length2 8187 /Length3 0 /Length 9201 /Filter /FlateDecode >> stream xڍP-kp`{pd,-.#ɹ{꽚o]CC,f1JC.,l %uv'  &F  4uyI<)AyW;;'5?;??'~(4O'k30N@`5v 9_t..,,'+az&;t:-(tƂBд9k@,]Mgv~p[/h)T?7_ٿ@ɦ{S'l*Ҋ,..LSů@S;gs)9wi15slrpqfqj{{ W} '=Y- Xpu`]r^v7BagX]f@+g3~>|'Y{_ ey|Y%5$tO\b0sp<\|fQ5U?r`KOSOn?_Ao.eȳh4YS-H6yYWg+A:?;WyVv, Z\̭]גف@U3׭`fgcf>ϊ>/R laܯNN(lBx?㷆,`s =% ׉rX .߈j~j/` rXALV 3_$皜M[C}@sy@ͧj1bw ]Tzf%vUNb=+RtW^ua-jS(& jII5Ez?:zk6Cw:ܺx~ UZZt9F+M@,MY% 3) v|<#a / ػ]ԄWcS/xlb2<@Y3PF<ވ| mztVM)NGP $iSvH~(d6ֱ)_[?bawǩUTS/oHr~.&4Ռ)&QHm1ҚJ[da6v8U(GPIa}E#:IvldeD,:$r|P?I4osL'Տ)OeHf6k1{Šq=@oXϧ뎿eD:j8^҈KKɧz'j٬ ^D2HhfT@ѴuOPm jUô~F֮z <p)Đvu"xH; !%fa4>\l_&(s4qCyu:!d.ܳEw,ZU:rw镔IXTd3A֜$QZ nҶ= ;t){Y(D]F+/˹,ZxLөn7 '{55N1$K42M S&}Uٿ)LoYlc1!\KLǑHvq%SyWO`T݊śbJ3YnPm)d _ )im/lU-tY1\u}q}+?ǯ=p )GFΌ3Qoxi.Z<o$' l1Jk'$dEUQQ/4i kޏ./2^莊k>FLj/0ܚ\^ٳ6Bq=;a=Ĉ4ȇk]gI:xdžIc&j%U5iCWM-هBWNT/~1|rwt aeXi?-\20wfW<]e.C@N&¨dfZjrы͹[Y漏?`֫XfjMIXJԢym ~<ICQ'tn]EBR0^T^֒\K+b?b_:VȮv";ɪ;I]=X.GΈvT %Z^ $(6+` P~V}h֪};: R<[0:DEK𦨱$}ab<^-,ϻ+يI إwW'L n&gMk ?2C\8J⢄~4_g6NC.fMeUj}0a~`ƳHkM) 㺟`Y(թ(U*XNad/eWMY2(cXtoKKSәdx28@&go!6 Gi-6FT/M-U`1?p bTJi(u Nm.P IZˋs;w쬂 O9MJa۔]%yҠD%wnc@¤v;sZgYTInJ儏!;rH. %ϜӅWQ}e]6xs"3s9Ů  {\t}IQ)89,E'~8,RV'u޹ȼD a7EJIqrlAnL- ǷgY§|G1"қw@Y7/r{hx-W#KwDk Ip&=ʁ ~y)!$p*xrՓes֒H`i6 0f,G'!ۋbZ42_%V8qVlj N}UlT1ݥgB,솖&L7YKHԽc9%y.Ӑz99T",?8Lk.Bj `0vg)0f\7d G~qՂji;MЪo %ap$V966:ԯ͚dē-UM)lo6c_$Tf@{mth ӭ.代ʄڳ@ T.4T+Qa_YłdǼwq~pQ-`J>'-QL:n;N˒u줢$sʊKX$:ᾹAC|qJ]5-WeqSpdTl,*j-x%Ivߨn8STRxRn`8J+?q9'j!ﻊ> ]݄LomDo^{uX$Mן y&KaPc<>ᙖwڥxE摆Tzxj5cZ6ϟ;yv16&yx (c7vpO k:e!qhL/hKӏz\UAɊ悔8{ _='J"D [mB|ǰ~;űU`[HLg^ fZ&oJ{B-R9x AByCEiTRޕw*눥NqvS@dїC4&ٛU><$f&;6%Q2|,L//-$"oTW{(L :,TX!".0l">?g_>h1 Ȩ@܍!РBjD.Lj}:o'Fأm0̓ޔԝ+5*~m")<~M!mzH T# $zג+N{w E G#X&Jv5H(U>E䊸Bρ%CDž0x^:IǤYCy3̉J#͌6矁kzWENJTC $O3Y#H=e;+z" s t;XkM9f%L" ']n_MT]θyN/Bkz1r~ax{ۗ}1imq)VzwsEՔzRZ%ֆaS5ߐ2Imi7ɲY^ff&nC _Z;|o.3ǡTOK¡e#*G '.]gWU C@ioXd,cV\W S{|w'5z%7E8\!'IDBjh+hpO!-Rq4 :=lZ\H^ ~v+)Z1 PI[?U˾ڗ\DxoRL$ѕ r,l|ѧ-D,_ *@ڣ'fV4*(0_<=jn2KbjdY~ XaӮ'OMNL%(5q6eOL䔱ώm}cHa%XLصN/[>ؐ'1Wc^WÙ`:gm~LgYzsp%2i& u˃I!ûTـ%qF2e5bcO<\}."!L.54֊)\O$>o7:lVa-RWliEX> DIJLc\\ vxa"'Ye00LFJgnK,<F$-6ރэfIUl" nLLrL*?`DdpGwn}M=[Ң&U.KW*ѱ|'ENϳs}>٘.F%t?éĊ4PW-<H,OR{T- xyډjvXhr&g&35G4i9v-fN@f,2mn? 8J=U"EfDu}ߐEbIFJ(4+%[_7CH\ٔc9/zUր8š7(.LbF+^+.i ʼ" F@ SLn8GuTN>΀(@\һ+f0BpNp)X(;+"V8nz0U+>QpþpxgfqM5Le*B\!]ǕܚdF-KP{txLc4?Bm/'ݟA`F&{VbYFȐ #b9>v&SQUJ NАNA3{23_K7[j% ڄ;r_4 /=g[1.&Pwuԉ-gtduH52^Bd+eMBy`yXnN$"}_ֿX\=W+me70"%OEA$]hG3򅊵{|[}>ut)~qow">dEGNAڑ2 bjÛ!'&wBywieVg~hc4"7i!zU[F+G;>Ce ́Wi`S)c595x;ʻSxK$  S2W7 xc8+jZ2X:+q OtvªSG퓍gY9XSIlpѪYkd!d4S5r QѫX B _Pd$Sڟ 22 VtcWx ׊|bs(Ȋ2{|=ocq*j)?QڀJ-Xtwڲ ahߙ_1:hn̾L ww4D*1`uߥ^4-KX hVIU :S'RO5@Jpx{OwSgwʦD/ԇVB_{E֒^Px;,KƿTY;+W[NBQkqͯp$iɰt-yuOTB-X|Lƒq`9*;9G5Κ{qC<$ endstream endobj 23 0 obj << /Length1 2086 /Length2 12297 /Length3 0 /Length 13564 /Filter /FlateDecode >> stream xڍveXk. !]H!H ]!!H*twt )qpms~kk׺W>k=RiJ PV6 @JYSIK؀@NTZZ-kA3}IOTeG +'8@ 7k0@ qArtt>E#! 9@lmr(V ;5_.P'Avvwww6 (# jЀ@ `* {ȿű]Vi:Z@Ad84NJX9vdnhrvXXAJlP( Eٹ8>ك@v '_ɃSTbluasU%/7OqK9C.v?uޓ#uptw-J:k;XvKzYB t ZNOuz;9:,JZ[@~P]@n*lmA,P{C,OSl0> !Ӝ<:h22:2ZR rrqx9v'?L,Ԩ 0'4y7͟8/װ1_ P/BAi){!`kWCAOk!`4ڬlܼ]d= `5k_cXY;@]]5V tOfntc뿚UOy!̧Np/-;}8t!j.:)Dp=晅{~܏XLI9C4Ueɿщܕ h? |ߔ76!~w7 ]D RxUsaACtW4U01Q\~'#eH\͙4p3?(* U`/:p2 (%~h~!-a46M-dRRNoQ֩űx=)ۨX$1ق ZfR,u331xnH^ N7p.cf_/ɖe``ֲ!"oMaxiPUxP2QғGlb `w`b uEyK*HLIy;M,kogUMJ 1@]zj_[߶0<] LhQ$ >6bZ"M67;^mm1G|TŃa+^x\s,8=3n;q7jcG(ܴ lŬȫe,3ĨYR^[R>y}?E'CE^?k7%_ # zI%߯2]rdx-^P tA?Ω,"S%h>ٯNR4à,%`B읁Aak㺷 f&ɕ{Rn=L/R䩀['t8R%rIYEZ0-x-h؀p^lřUm&͟$ܕ/[9]([ւJs1ϕx!0Q#-*~QҧB1jW||=lcJ\+zsC$EKb萦&KY͜#-IR.| L^iA_(j|)_PQ C`ݡT$ä} @qI;T\fA|<7rm2| g5GS=v[l)SmtxǏnM_Qkit@Ӷ J*F2)>{C>e9L&Zh+rGAD=/<5n0Y#hco8W#\dЩN<ǝV<:yY͌<;DvrbgpU 1M#*؁PTߗ&x1VKOtԿjyϑ+=1*ɧUaEW=}YbviE^^CYfJ! (T7u8t>o wbWh}iZrW@N "PM2tG@w&ih4 ςZ\7*B?ɯ/l/>d7?F̀OxDE9g!JZqx3|Xe˽k :3!_ە^ <0,OFLₐ!vQ'X ?]lXJQ>3f;/f5L0r"h<{LgUm4ş\ qLM/{M&,IJ'B>8x5ǠtzDaDpG* @V:xp+Od!k|ZCAgxK*zrnUaHer$=n^lʦFlr8lɡ#wyiGBv:0[럼 tO^1wѼNn>9o_4Mm]$JbR!B:6.[CDO^.m +ކ$8KWe`c&UsS;sM+ZN(q~͠ӷ DXN|*5!$af䴷񱜉l{p*!4mC\@Cup8T5#]/zj/&C=Wa8;dT:B5!FT( dOɅ'sjNf V24*{`ҧ0slqY+|znƛ:\J`v$cW[ ~=`_ VtBXwX+Or: JA6YB9bxNFȱ{;Awb9^?"vq^ISᅊJA@9=ɶƂ$PQD u-$%h6.}GOi7}J[4݄1&I8 3!9"bm7#h l׸S&;2/):/ >eβU>x[Iȋf )ڝ(M~Xf'BPՙ2"dp*09]N2u²f*<"fmP"~@7]XxU&f-8TOlN_VIAN--$,ˁZOU&CV!!5\YawgB?Kޗ|ݝT¼e8x+¢ LD ;# }DLfyl#I%IS4$gw$W#*yE"] Wk8kF٣L_x'Zuymt1fM`CE{m$E-ΑHjbk?#92."\*T;(w-8lx3ApH+lȊ# hş@׽{ة9V<OVs(fX jEh}1ʮXpN;* 7~&9\^]W3σuj梕ck$h ÿrG+-G4LelLښ+ {۽GcdZ{(eqְH:lrv;Q$rԔσ3L@!HmRLN펿"jZ._y>j Ha;O,kr2T]YͱbSY~KR5#! {Sg Y0Hy9roli ss9ZTAN77}V6YPy'k>xjyg$tG'egȣl޶=!z10Ld7QLh`<,tPWӧ7EY]c⥧Qhm~sxÉx '(}:W$x=c>sfJ4WD$_'tYRAs<9!]{+X%#z6(Hd+-PdIp,CZb3e5znᖕ&օF4_ ہafXI{Y&͖IJ (y%927aZ^2Dz Pݣ"Z>y+՟"ƙ*B$3Wz0Y7\\ Kg? JNUn楷@ڽW d*sewN{eo~D5Dx˽8'h?&"e#=qjd@賆uKvmJ˯!c:arEt^͆&If,7i`5IegH\͈Bzph~ s) wA 6̰iEkz):uȗA L_Jg]RE7/l* ggVQb#;k}MvL9W>fi˘.yeD"UY,gQi͖AuΏ)N)aYk ǝ)XS:~gN`H_ۏt.2o>|맏[j$t!\m&jeLɖр<`Z$+XʋJSpo-v~ _dcTt#&BQɄ@ACz{p`-n i>czty-zJlZMOAIM'[[X8fݾ@pOSk|3_2;$8CC^)A1q}ڥʷA.a~!Lbl4 l 7Vi+|c8-r":%OPHZW>Bhm~;6^e/{*:"0-^Zz.D֋IhQtYE NL}HeicuW\ej uAIsƌޑe|>$Mpcy>0{}Ⱥ>YM 5.>7wzc+jě 7(y5& <:iQrKzzTOz 5ocP0 $7M\s$4~ߏ}Y֨YsinwEr0IĆ{w1ڸZUpL} s O$cC|kxeqFgK2pg[f( vIc7PBkwJ5@ūH `BDԙhI\J,$~TԒ*[Q1FSmZ 1uؖL]=raK_   8w0?z*z}`Gsyb6~OpTێvL7]DMIyOIƙ4A,|7CBୁI.TʞzɷH?I ė('QdN+>-V–^b,9:@g(,y癗1VIs?""I׼}/d%a`gG $=# R{Xxwj9w[ '.>"L7xuDL6h!dP{q݈PdI#dˑ>R:AW$H[ۆ7)L۰IcO$[Ѝơdf%qhjPՄ_Y(͙o%?rAdDLT4ELx&uG9Hԟ&=-R3I6K,,ǠN/9:YC䛺p }Uv &hP2:CZeFwV8ok%'x 7[Zp['=V7hrsV+'T[QϽ-)*W鲤DsC^"1'0# XLk6-ä:#k`xrt^f 錛Luھ+&LU6Yk./I )ɰg(VqwVOV5zu0B({)4Hc܏_C}RhF/nm|f($љ PtZw;=PFq=iT4P_n (}xmYY8}O<nu\ǵ9.ew* /H?.U2ǰ~jO<sad8&KIXI+2s /aL O02^i_~޶n.}25 ìu끔k\UQpxy1%Ad9)[hlNEcP4C8`2(*6NF(67_tt݄LWPLVZa[(8gG o C$b*՟M>ݿXKKEnvND+sbbu vt-:4cp#t4ûtR7J%<_nq|]Ls'?Puɪ^P _nIU_Jz=X *;̃qXq8L84&i7tI`381mKq/\0j*Jf9T򯻶r*&$bdξl,{\Y>4:Rc} ScAdܩc&7+Ϸ>tHM 'e1#=Z" + Wk"Q vd /ȗlc89~<:|"8?ZThG@S&V"+β|sMи u2}[Z7YE?-% _|U-WFZ/#{w-IݧEo.j"^YtWq&F3!7zП'~@#^_KsYMy}=T5Y[j~V} ϰ=lAfE8qYLvkS3 5$栭=%aYsvRh/K&]]er2#9\F@0C:v5 =Q͸=nH?=s8BW3AّF- X ƫ˃ l*Z3F.b5h3N#mO^Ҡ6D_fr$MK"ʚz/gFpvMiifлpܻ{0˳ng\ǐJշր.oEpfa?xL`ɳP k%f^qْ밮v(f9(ɔ"br^}%. D\hdv@tFH0ylOze.ܻ`[uw H9M7 ;eٙ%kQ.{ybdll?ܷ4>>}xmK<@FЇhVRA% m+TjW8A9%XWYtzJ kƖ[n,Ć aJLG*x(S(.'CzCiXر֕LzIWu0E3x3P c,z1a~jPO+)ne6kٹ%Ż8UFaS8˕ɘѪH~%q{dq1 L"~o~IiYXɕ R". ݽV ]FW7Diy voڎy^[ TMR| kVAگ`烥] WP]g#*A z,,g7r%̽j=vcsy4nDX_-}'k<]ds0GY J 1b^al6T6L hW,A4Ə7$[TRRȥD0'h{~7F֨,Y͜RhE:%ƅH}H x|Rx%9,j)簌l?MpZʵUב /)ˉG}AkA~VO̒P!M~Uz< դ|:ZW2";T%Fn;&0aXk)MmԾ1"<NQ!2^g Ix HI SF.ZC.Z5v>ĕԐ2zs:E/] _~0T6Υ=!#<*X̳y?goՊ?kّL(LV'OuPJT^5J o ,7Fsw$Շdp}v>IE{q݉T9oMgWflNg[lD= [YCeyqr4iW6c%eTDnM,uJ{ XyX'Z6DG]m1γ@9:}c>,c^Zµ[+ĩf?$4+cҀeSTV(PHB\xjjF"Rޫyt>!gB7PnM]Ja)/.~"|uƯ}p[gjUxgzgŹո"ye}RIVV7ϩ&/EsA4橭 tKBf0PE_[ []\Zto]3( _V-f8X B5 wһH*?Y=S&~40Yq9OE.cq%mjI$NHfj#~Yƥ}dП1)ԟo<.5\i|yHWiRe3:\Y4,* JK&üErf|ZFl@A곯}ayK j-zIj$lPrP8 &JXB( 7ZX[21Q¨ wc,R* GlJ2uz{ WnqRmy&2a18 R>r(6[`r?[Lt̪VP͊N^vQ*5~X:~ڟڒ1 C&$>ߔgFNzt"C cϽ얻>1.U$U)H\NwO2N)n$)c?`ލ/BNwq41 RBc8_W&ᒑqpeaP5jh9jpmV8ئð[q>ML1HmRL{T?r?ݽeR_Q6ndm'oOٝpK:`Pt C򗠺WjT^YufM׌p V{>ܫ j^TޖX=gaSkظ {a)D]|Lf̎Kڶoes`yrivڔipc]JJ%72xbq@sY*6'RU9?!2Df+TayUhV[_G3lRkZ!?1,]'l65h,46-EVbީWq.g ZTho0!2~A쪞s”!s(0"$InDm؉gn4B]kSv TGP @ q^8yǾ;2' vMb5nT}#"+J=Pk-5^98~>5A{KfUHg3&YNM<1'?#^gI>U>Fjiw2@0l3V"3]eI1`[3C[ 'p~[3jgyӬGэNs~zWsKS~\6 CT(盥"]G9oÆcM1]y>k%ӹ/&*3ʿ2Ze -)a|GЊKxKr{ +{m1{[%- ;$*K/ZpṜ5qj endstream endobj 28 0 obj << /Author()/Title()/Subject()/Creator(LaTeX with hyperref package)/Producer(pdfTeX-1.40.16)/Keywords() /CreationDate (D:20171030195654-04'00') /ModDate (D:20171030195654-04'00') /Trapped /False /PTEX.Fullbanner (This is pdfTeX, Version 3.14159265-2.6-1.40.16 (TeX Live 2015/Debian) kpathsea version 6.2.1) >> endobj 2 0 obj << /Type /ObjStm /N 21 /First 150 /Length 1116 /Filter /FlateDecode >> stream xVO8_GЉ?b'V+eAwKAZeqگ\AvMtibB:vx^]N94Nim~Qˉ"mקQ8M>ѷUV˺(%C2_~vxz!8t(r'5wBh_ewO 6='u:˳dfk;f=lmB%bPMAu{A9_ | Pǿ~>9tKe|iACI%U]PMe=)8_}6P $?F ꦀwSpA#$  xw|tG VN1@bۮ!J)7!%?Q@yIcL,ϳVsе-n43nҹ佭lЍR\¾ф&Mሦ4}\NQ,Mgh33O5UEdS5K3_a=ׯW:&p#vظ07hUrBds|iwz曡%:[?ڊ-cDz[_Vt>|MwWnhѧj!;"iY2ߡցeѧ <983D0C5AD78CBAB71FF9612BCADEE55F>] /Length 85 /Filter /FlateDecode >> stream xʻ@@;o-D kC)2 'X]Q"K#RI'$JF?Vkew ?sG endstream endobj startxref 42780 %%EOF S4Vectors/inst/doc/S4QuickOverview.R0000644000175400017540000002210613175736135020327 0ustar00biocbuildbiocbuild### R code from vignette source 'S4QuickOverview.Rnw' ### Encoding: UTF-8 ################################################### ### code chunk number 1: setup ################################################### options(width=60) library(Matrix) library(IRanges) library(ShortRead) library(graph) ################################################### ### code chunk number 2: S4_object_in_dataset ################################################### library(graph) data(apopGraph) apopGraph ################################################### ### code chunk number 3: S4_object_from_constructor ################################################### library(IRanges) IRanges(start=c(101, 25), end=c(110, 80)) ################################################### ### code chunk number 4: S4_object_from_ceorcion ################################################### library(Matrix) m <- matrix(3:-4, nrow=2) as(m, "Matrix") ################################################### ### code chunk number 5: S4_object_from_high_level_IO_function ################################################### library(ShortRead) path_to_my_data <- system.file( package="ShortRead", "extdata", "Data", "C1-36Firecrest", "Bustard", "GERALD") lane1 <- readFastq(path_to_my_data, pattern="s_1_sequence.txt") lane1 ################################################### ### code chunk number 6: S4_object_inside_another_object ################################################### sread(lane1) ################################################### ### code chunk number 7: getters_and_setters ################################################### ir <- IRanges(start=c(101, 25), end=c(110, 80)) width(ir) width(ir) <- width(ir) - 5 ir ################################################### ### code chunk number 8: specialized_methods ################################################### qa1 <- qa(lane1, lane="lane1") class(qa1) ################################################### ### code chunk number 9: showMethods ################################################### showMethods("qa") ################################################### ### code chunk number 10: showClass ################################################### class(lane1) showClass("ShortReadQ") ################################################### ### code chunk number 11: setClass ################################################### setClass("SNPLocations", slots=c( genome="character", # a single string snpid="character", # a character vector of length N chrom="character", # a character vector of length N pos="integer" # an integer vector of length N ) ) ################################################### ### code chunk number 12: SNPLocations ################################################### SNPLocations <- function(genome, snpid, chrom, pos) new("SNPLocations", genome=genome, snpid=snpid, chrom=chrom, pos=pos) ################################################### ### code chunk number 13: test_SNPLocations ################################################### snplocs <- SNPLocations("hg19", c("rs0001", "rs0002"), c("chr1", "chrX"), c(224033L, 1266886L)) ################################################### ### code chunk number 14: length ################################################### setMethod("length", "SNPLocations", function(x) length(x@snpid)) ################################################### ### code chunk number 15: test_length ################################################### length(snplocs) # just testing ################################################### ### code chunk number 16: genome ################################################### setGeneric("genome", function(x) standardGeneric("genome")) setMethod("genome", "SNPLocations", function(x) x@genome) ################################################### ### code chunk number 17: snpid ################################################### setGeneric("snpid", function(x) standardGeneric("snpid")) setMethod("snpid", "SNPLocations", function(x) x@snpid) ################################################### ### code chunk number 18: chrom ################################################### setGeneric("chrom", function(x) standardGeneric("chrom")) setMethod("chrom", "SNPLocations", function(x) x@chrom) ################################################### ### code chunk number 19: pos ################################################### setGeneric("pos", function(x) standardGeneric("pos")) setMethod("pos", "SNPLocations", function(x) x@pos) ################################################### ### code chunk number 20: test_slot_getters ################################################### genome(snplocs) # just testing snpid(snplocs) # just testing ################################################### ### code chunk number 21: show ################################################### setMethod("show", "SNPLocations", function(object) cat(class(object), "instance with", length(object), "SNPs on genome", genome(object), "\n") ) ################################################### ### code chunk number 22: S4QuickOverview.Rnw:374-375 ################################################### snplocs # just testing ################################################### ### code chunk number 23: validity ################################################### setValidity("SNPLocations", function(object) { if (!is.character(genome(object)) || length(genome(object)) != 1 || is.na(genome(object))) return("'genome' slot must be a single string") slot_lengths <- c(length(snpid(object)), length(chrom(object)), length(pos(object))) if (length(unique(slot_lengths)) != 1) return("lengths of slots 'snpid', 'chrom' and 'pos' differ") TRUE } ) ################################################### ### code chunk number 24: set_chrom ################################################### setGeneric("chrom<-", function(x, value) standardGeneric("chrom<-")) setReplaceMethod("chrom", "SNPLocations", function(x, value) {x@chrom <- value; validObject(x); x}) ################################################### ### code chunk number 25: test_slot_setters ################################################### chrom(snplocs) <- LETTERS[1:2] # repair currently broken object ################################################### ### code chunk number 26: setAs ################################################### setAs("SNPLocations", "data.frame", function(from) data.frame(snpid=snpid(from), chrom=chrom(from), pos=pos(from)) ) ################################################### ### code chunk number 27: test_coercion ################################################### as(snplocs, "data.frame") # testing ################################################### ### code chunk number 28: AnnotatedSNPs ################################################### setClass("AnnotatedSNPs", contains="SNPLocations", slots=c( geneid="character" # a character vector of length N ) ) ################################################### ### code chunk number 29: slot_inheritance ################################################### showClass("AnnotatedSNPs") ################################################### ### code chunk number 30: AnnotatedSNPs ################################################### AnnotatedSNPs <- function(genome, snpid, chrom, pos, geneid) { new("AnnotatedSNPs", SNPLocations(genome, snpid, chrom, pos), geneid=geneid) } ################################################### ### code chunk number 31: method_inheritance ################################################### snps <- AnnotatedSNPs("hg19", c("rs0001", "rs0002"), c("chr1", "chrX"), c(224033L, 1266886L), c("AAU1", "SXW-23")) ################################################### ### code chunk number 32: method_inheritance ################################################### snps ################################################### ### code chunk number 33: as_data_frame_is_not_right ################################################### as(snps, "data.frame") # the 'geneid' slot is ignored ################################################### ### code chunk number 34: S4QuickOverview.Rnw:527-530 ################################################### is(snps, "AnnotatedSNPs") # 'snps' is an AnnotatedSNPs object is(snps, "SNPLocations") # and is also a SNPLocations object class(snps) # but is *not* a SNPLocations *instance* ################################################### ### code chunk number 35: automatic_coercion_method ################################################### as(snps, "SNPLocations") ################################################### ### code chunk number 36: incremental_validity_method ################################################### setValidity("AnnotatedSNPs", function(object) { if (length(object@geneid) != length(object)) return("'geneid' slot must have the length of the object") TRUE } ) S4Vectors/inst/doc/S4QuickOverview.Rnw0000644000175400017540000004024113175714520020666 0ustar00biocbuildbiocbuild%\VignetteIndexEntry{A quick overview of the S4 class system} %\VignetteDepends{methods,Matrix,IRanges,ShortRead,graph} \SweaveOpts{keep.source=TRUE, eps=FALSE, width=9, height=3} \documentclass[9pt]{beamer} \usepackage{slides} \AtBeginSection[] { \begin{frame}{Outline} \tableofcontents[currentsection,currentsubsection] \end{frame} } \title{A quick overview of the S4 class system} \author{Herv\'e Pag\`es\\ \href{mailto:hpages@fredhutch.org}{hpages@fredhutch.org}} %\institute[FHCRC]{Fred Hutchinson Cancer Research Center\\ % Seattle, WA} \date{June 2016} \begin{document} <>= options(width=60) library(Matrix) library(IRanges) library(ShortRead) library(graph) @ \maketitle \frame{\tableofcontents} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{What is S4?} \begin{frame}[fragile] \frametitle{The S4 class system} \begin{block}{} \begin{itemize} \item The \textit{S4 class system} is a set of facilities provided in R for OO programming. \item Implemented in the \Rpackage{methods} package. \item On a fresh \R{} session: \begin{Schunk} \begin{Sinput} > sessionInfo() \end{Sinput} \begin{Soutput} ... attached base packages: [1] stats graphics grDevices utils datasets [6] methods base \end{Soutput} \end{Schunk} \item R also supports an older class system: the \textit{S3 class system}. \end{itemize} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{A different world} \begin{block}{The syntax} \begin{Schunk} \begin{Sinput} > foo(x, ...) \end{Sinput} \end{Schunk} not: \begin{Schunk} \begin{Sinput} > x.foo(...) \end{Sinput} \end{Schunk} like in other OO programming languages. \end{block} \begin{block}{The central concepts} \begin{itemize} \item The core components: \emph{classes}\footnote{also called \emph{formal classes}, to distinguish them from the S3 classes aka \emph{old style classes}}, \emph{generic functions} and \emph{methods} \item The glue: \emph{method dispatch} (supports \emph{simple} and \emph{multiple} dispatch) \end{itemize} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{} \begin{block}{The result} \begin{Schunk} \begin{Sinput} > ls('package:methods') \end{Sinput} \begin{Soutput} [1] "addNextMethod" "allGenerics" [3] "allNames" "Arith" [5] "as" "as<-" [7] "asMethodDefinition" "assignClassDef" ... [211] "testVirtual" "traceOff" [213] "traceOn" "tryNew" [215] "unRematchDefinition" "validObject" [217] "validSlotNames" \end{Soutput} \end{Schunk} \begin{itemize} \item Rich, complex, can be intimidating \item The classes and methods we implement in our packages can be hard to document, especially when the class hierarchy is complicated and multiple dispatch is used \end{itemize} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{S4 in Bioconductor} \begin{block}{} \begin{itemize} \item Heavily used. In BioC 3.3: 3158 classes and 22511 methods defined in 609 packages! (out of 1211 software packages) \item Top 10: 128 classes in \Rpackage{ChemmineOB}, 98 in \Rpackage{flowCore}, 79 in \Rpackage{IRanges}, 68 in \Rpackage{rsbml}, 61 in \Rpackage{ShortRead}, 58 in \Rpackage{Biostrings}, 51 in \Rpackage{rtracklayer}, 50 in \Rpackage{oligoClasses}, 45 in \Rpackage{flowUtils}, and 40 in \Rpackage{BaseSpaceR}. \item For the end-user: it's mostly transparent. But when something goes wrong, error messages issued by the S4 class system can be hard to understand. Also it can be hard to find the documentation for a specific method. \item Most Bioconductor packages use only a small subset of the S4 capabilities (covers 99.99\% of our needs) \end{itemize} \end{block} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{S4 from an end-user point of view} \begin{frame}[fragile] \frametitle{Where do S4 objects come from?} \begin{block}{From a dataset} <>= library(graph) data(apopGraph) apopGraph @ \end{block} \begin{block}{From using an object constructor function} <>= library(IRanges) IRanges(start=c(101, 25), end=c(110, 80)) @ \end{block} \end{frame} \begin{frame}[fragile] \frametitle{} \begin{block}{From a coercion} <>= library(Matrix) m <- matrix(3:-4, nrow=2) as(m, "Matrix") @ \end{block} \begin{block}{From using a specialized high-level constructor} \begin{Schunk} \begin{Sinput} > library(GenomicFeatures) > makeTxDbFromUCSC("sacCer2", tablename="ensGene") \end{Sinput} \begin{Soutput} TxDb object: # Db type: TxDb # Supporting package: GenomicFeatures # Data source: UCSC # Genome: sacCer2 # Organism: Saccharomyces cerevisiae # Taxonomy ID: 4932 # UCSC Table: ensGene # UCSC Track: Ensembl Genes ... \end{Soutput} \end{Schunk} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{} \begin{block}{From using a high-level I/O function} <>= library(ShortRead) path_to_my_data <- system.file( package="ShortRead", "extdata", "Data", "C1-36Firecrest", "Bustard", "GERALD") lane1 <- readFastq(path_to_my_data, pattern="s_1_sequence.txt") lane1 @ \end{block} \begin{block}{Inside another object} <>= sread(lane1) @ \end{block} \end{frame} \begin{frame}[fragile] \frametitle{How to manipulate S4 objects?} \begin{block}{Low-level: getters and setters} <>= ir <- IRanges(start=c(101, 25), end=c(110, 80)) width(ir) width(ir) <- width(ir) - 5 ir @ \end{block} \begin{block}{High-level: plenty of specialized methods} <>= qa1 <- qa(lane1, lane="lane1") class(qa1) @ \end{block} \end{frame} \begin{frame}[fragile] \frametitle{How to find the right man page?} \begin{itemize} \item \Rcode{class?graphNEL} or equivalently \Rcode{?\`{}graphNEL-class\`} for accessing the man page of a class \item \Rcode{?qa} for accessing the man page of a generic function \item The man page for a generic might also document some or all of the methods for this generic. The \textit{See Also:} section might give a clue. Also using \Rcode{showMethods()} can be useful: <>= showMethods("qa") @ \item \Rcode{?\`{}qa,ShortReadQ-method\`} to access the man page for a particular method (might be the same man page as for the generic) \item In doubt: \Rcode{??qa} will search the man pages of all the installed packages and return the list of man pages that contain the string \Rcode{qa} \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Inspecting objects and discovering methods} \begin{itemize} \item \Rcode{class()} and \Rcode{showClass()} {\footnotesize <>= class(lane1) showClass("ShortReadQ") @ } \item \Rcode{str()} for compact display of the content of an object \item \Rcode{showMethods()} to discover methods \item \Rcode{selectMethod()} to see the code \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Implementing an S4 class (in 4 slides)} \begin{frame}[fragile] \frametitle{Class definition and constructor} \begin{block}{Class definition} {\footnotesize <>= setClass("SNPLocations", slots=c( genome="character", # a single string snpid="character", # a character vector of length N chrom="character", # a character vector of length N pos="integer" # an integer vector of length N ) ) @ } \end{block} \begin{block}{Constructor} {\footnotesize <>= SNPLocations <- function(genome, snpid, chrom, pos) new("SNPLocations", genome=genome, snpid=snpid, chrom=chrom, pos=pos) @ <>= snplocs <- SNPLocations("hg19", c("rs0001", "rs0002"), c("chr1", "chrX"), c(224033L, 1266886L)) @ } \end{block} \end{frame} \begin{frame}[fragile] \frametitle{Getters} \begin{block}{Defining the \Rfunction{length} method} {\footnotesize <>= setMethod("length", "SNPLocations", function(x) length(x@snpid)) @ <>= length(snplocs) # just testing @ } \end{block} \begin{block}{Defining the slot getters} {\footnotesize <>= setGeneric("genome", function(x) standardGeneric("genome")) setMethod("genome", "SNPLocations", function(x) x@genome) @ <>= setGeneric("snpid", function(x) standardGeneric("snpid")) setMethod("snpid", "SNPLocations", function(x) x@snpid) @ <>= setGeneric("chrom", function(x) standardGeneric("chrom")) setMethod("chrom", "SNPLocations", function(x) x@chrom) @ <>= setGeneric("pos", function(x) standardGeneric("pos")) setMethod("pos", "SNPLocations", function(x) x@pos) @ <>= genome(snplocs) # just testing snpid(snplocs) # just testing @ } \end{block} \end{frame} \begin{frame}[fragile] \frametitle{} \begin{block}{Defining the \Rfunction{show} method} {\footnotesize <>= setMethod("show", "SNPLocations", function(object) cat(class(object), "instance with", length(object), "SNPs on genome", genome(object), "\n") ) @ <<>>= snplocs # just testing @ } \end{block} \begin{block}{Defining the \textit{validity method}} {\footnotesize <>= setValidity("SNPLocations", function(object) { if (!is.character(genome(object)) || length(genome(object)) != 1 || is.na(genome(object))) return("'genome' slot must be a single string") slot_lengths <- c(length(snpid(object)), length(chrom(object)), length(pos(object))) if (length(unique(slot_lengths)) != 1) return("lengths of slots 'snpid', 'chrom' and 'pos' differ") TRUE } ) @ \begin{Schunk} \begin{Sinput} > snplocs@chrom <- LETTERS[1:3] # a very bad idea! > validObject(snplocs) \end{Sinput} \begin{Soutput} Error in validObject(snplocs) : invalid class "SNPLocations" object: lengths of slots 'snpid', 'chrom' and 'pos' differ \end{Soutput} \end{Schunk} } \end{block} \end{frame} \begin{frame}[fragile] \frametitle{} \begin{block}{Defining slot setters} {\footnotesize <>= setGeneric("chrom<-", function(x, value) standardGeneric("chrom<-")) setReplaceMethod("chrom", "SNPLocations", function(x, value) {x@chrom <- value; validObject(x); x}) @ <>= chrom(snplocs) <- LETTERS[1:2] # repair currently broken object @ \begin{Schunk} \begin{Sinput} > chrom(snplocs) <- LETTERS[1:3] # try to break it again \end{Sinput} \begin{Soutput} Error in validObject(x) : invalid class "SNPLocations" object: lengths of slots 'snpid', 'chrom' and 'pos' differ \end{Soutput} \end{Schunk} } \end{block} \begin{block}{Defining a coercion method} {\footnotesize <>= setAs("SNPLocations", "data.frame", function(from) data.frame(snpid=snpid(from), chrom=chrom(from), pos=pos(from)) ) @ <>= as(snplocs, "data.frame") # testing @ } \end{block} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Extending an existing class} \begin{frame}[fragile] \frametitle{Slot inheritance} \begin{itemize} \item Most of the time (but not always), the child class will have additional slots: {\footnotesize <>= setClass("AnnotatedSNPs", contains="SNPLocations", slots=c( geneid="character" # a character vector of length N ) ) @ } \item The slots from the parent class are inherited: {\footnotesize <>= showClass("AnnotatedSNPs") @ } \item Constructor: {\footnotesize <>= AnnotatedSNPs <- function(genome, snpid, chrom, pos, geneid) { new("AnnotatedSNPs", SNPLocations(genome, snpid, chrom, pos), geneid=geneid) } @ } \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Method inheritance} \begin{itemize} \item Let's create an AnnotatedSNPs object: {\footnotesize <>= snps <- AnnotatedSNPs("hg19", c("rs0001", "rs0002"), c("chr1", "chrX"), c(224033L, 1266886L), c("AAU1", "SXW-23")) @ } \item All the methods defined for SNPLocations objects work out-of-the-box: {\footnotesize <>= snps @ } \item But sometimes they don't do the right thing: {\footnotesize <>= as(snps, "data.frame") # the 'geneid' slot is ignored @ } \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{} \begin{itemize} \item Being a SNPLocations \emph{object} vs being a SNPLocations \emph{instance}: {\footnotesize <<>>= is(snps, "AnnotatedSNPs") # 'snps' is an AnnotatedSNPs object is(snps, "SNPLocations") # and is also a SNPLocations object class(snps) # but is *not* a SNPLocations *instance* @ } \item Method overriding: for example we could define a \Rfunction{show} method for AnnotatedSNPs objects. \Rfunction{callNextMethod} can be used in that context to call the method defined for the parent class from within the method for the child class. \item Automatic coercion method: {\footnotesize <>= as(snps, "SNPLocations") @ } \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Incremental validity method} \begin{itemize} \item The \textit{validity method} for AnnotatedSNPs objects only needs to validate what's not already validated by the \textit{validity method} for SNPLocations objects: {\footnotesize <>= setValidity("AnnotatedSNPs", function(object) { if (length(object@geneid) != length(object)) return("'geneid' slot must have the length of the object") TRUE } ) @ } \item In other words: before an AnnotatedSNPs object can be considered valid, it must first be a valid SNPLocations object. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{What else?} \begin{frame}[fragile] \frametitle{} \begin{block}{Other important S4 features} \begin{itemize} \item \textit{Virtual} classes: equivalent to \textit{abstract} classes in Java \item Class unions (see \Rcode{?setClassUnion}) \item Multiple inheritance: a powerful feature that should be used with caution. If used inappropriately, can lead to a class hierarchy that is very hard to maintain \end{itemize} \end{block} \begin{block}{Resources} \begin{itemize} \item Man pages in the \Rpackage{methods} package: \Rcode{?setClass}, \Rcode{?showMethods}, \Rcode{?selectMethod}, \Rcode{?getMethod}, \Rcode{?is}, \Rcode{?setValidity}, \Rcode{?as} \item The \textit{Extending RangedSummarizedExperiment} section of the \textit{SummarizedExperiment} vignette in the \Rpackage{SummarizedExperiment} package. \item Note: S4 is \emph{not} covered in the \textit{An Introduction to R} or \textit{The R language definition} manuals\footnote{http://cran.fhcrc.org/manuals.html} \item The \emph{Writing R Extensions} manual for details about integrating S4 classes to a package \item The \textit{R Programming for Bioinformatics} book by Robert Gentleman\footnote{http://bioconductor.org/help/publications/books/r-programming-for-bioinformatics/} \end{itemize} \end{block} \end{frame} \end{document} S4Vectors/inst/doc/S4QuickOverview.pdf0000644000175400017540000052735013175736135020712 0ustar00biocbuildbiocbuild%PDF-1.5 % 55 0 obj << /Length 840 /Filter /FlateDecode >> stream xVMo1W19^}$ ꉶmRDҤƻnZ!!y>5-Hӻސ ɐW@4YoT(6V4n=T\QWV ڄvY@_Hh򙌵Ɗrf9і6p P-e|u5á dƺfFbuѵA_O*u1@Kxy^t -9Wifa]$"+k=AWQлȫU@dԔLWtVZ̹uZV{)_l; i_JƺǔcDf[n?}9+';_ig'לjdO0zaAZg/^&v%FɟI[z%!'^6Nxwޖq>v*H~ydy4aVhM"#%bΊc}08kHҁ)h~^ ,!Ue)F=a77e\k}-$$4TO )9/k)>6@RVUxXHh|^B}j(+tlolNlwٜANehvBǯ'SkDŽ_њo-pys F[rN_ b躶ۜmn5?k9<;تooFk.)8P 2k0Z"&iDS+J,Od^(n'v)VulJF\SǪ$ꁸC]h^n*F4r endstream endobj 92 0 obj << /Length 869 /Filter /FlateDecode >> stream xWKO1WzR%Z$RmB RB?3{YRZ(T*ƞm8J8(a8_%88D]R>'NrHiAhcINicM;[bJLN\ QjTThb0Fhd( q &$Y (RREm2n GJyA i(<gd1@/S02v%/ȈnF,11b)]%R8T^Ru-SͶbRF*D'+BHcBT_]*k83Tup̬tsÚP|ȪAu,♔^o/ŸL(|p" <]bID |Ҩ m)o>z1@7n0QY /4i SCGM&J#N?5**Bȝ(I/ta%o؀ÓWɋ@=b=(G ͍5N/i N࿏|A|jDۙ kg~0=o%mz>0%YU7JK$s?1ѷ9[v*<<8=ٻiZW6QO7pxK,ĻԐK2BdqÅ)-I}ϒp3R endstream endobj 125 0 obj << /Length 911 /Filter /FlateDecode >> stream xVKO1Wq=~}BDrH==TK J~g{YBTCjoRD(~-` ^D h ڋLGwDqډ7Y Oh 1D'jt!Fw(218q#B}T(@&4j1{$,'ڜ- |ZGHm1z#.5( 2V;I=CVr~w4 ĀD^#Pd#u70, C7 ҫTbs/\lD@ʙJREOTWYiܸi?^jf;_e4oZϐ gIʢ Ri~]gғO:$ ?O,%:%QFd*#&vI.kw3-,W-P.O1r;V9 ,g cOSzYΪ;݌vqVqx?n)c#:]+Fu>P{{ĴˮD6_SirqŠsI% ^R!吔@UfW(%-2{nx-]xc%yVUi endstream endobj 151 0 obj << /Length 1278 /Filter /FlateDecode >> stream xWKo7W({Iz(ZŰ[%뗔!9j1P+y~3C҂0~(a0>E02*.cO ] g#,{aC,xv(S×LiuLs[6Fl޲<2ϕ "~Z!ALdltްLoE HM!Ehna@Y2xܭrG S_{YP\k#t<: x-QBLD\ĩTH`e^yDLU@ I"sTwihж!33P%ۇs82jP̤sC@UH$-I티j HOw Q?D^tx2IÓF S 48]3 m˃4К E[OY4,8J6h=+^I*BUSûz d2N[)B"Ʊ^qAopE=>+*zkA۾ ..e+OYTcDirאdRcxLT?%Ւ_~m:r.5o ZЋѻSe$%ZB[a[՜}_Mx4ZQg@z?D|vM>^A^Csm 4 ,;5ʶ?ujzf2^j?9 k-=XCh>ÝL%FߝJ8Y˩Y }w gNeUh0qF]Ǥę4ItIqwҸ΢U nq0]m5zF,0Qa̟qo.r%%DYGD;s5.4@Ieo{B/.L$W;_CumRw[ Al)m˭k)3]OՎ1"U L9Q>.18IV|v*n:Mx &\('IҊdcDߟ&Uz_QLCue.HVTϊ{xvGp6'ɐZ.3l x0m%I.Dun21@B]urF=`4Rt~8?ĎOvSs`tcIcK`n>M]G76X4R{G[G顥 endstream endobj 11 0 obj << /Type /ObjStm /N 100 /First 826 /Length 1850 /Filter /FlateDecode >> stream xZr}߯G!=ˉd%%* \I( u# BX6OS3l;LyD1gb3KbHHd ( >`Sl+R.<`BNLN]88 ɒSob9QIɎ ·؀D 3+ɏ\ـ-8 V9(8\L HGP42EE R8Jp1戸5l.fZ36H -U.|@<1d<9P5|R` vp̠0b( e)eduM !ƃ LD!ǘ2ЩqEg&0f:eoʢˈ*fa0A!1꣄> stream xXKo7W(=hh.cߒdؒvZ7vIZ%3yϬd:[} `)eQ\*l999 \ZVYvwɆcTγ>A^H}ɔ\47azr&l%@:x>0"e+Dy!PFk&%˞69!JoX~ۃ@p?Ԅ<[AT}*o ȒIiG'}aA>oMĬV\k0с, ƌp1=@o DsRp-m˼܉誄.bSG{!F@)% 474>D$պSgu 5EOѹ0S8υ _kd? bOtRGuNHGM,4F2d7RP8tݷF )GP咶B$f=ad倶,-w}[(*faqJˮ*NSe/I /c,fpu]u]VAEJ].;dCcX@H+תbɾ">c8yiF&ns%yЍŢL?MDKvD[a[{3i6ZM/grVHCu.""Zנ!U%/FBE!7ѦxR O~X ܌VmzXȅJxЕ].Dy)}~2i|1tn&u&_-F.U_} vO>١H4}c?R7;|`ޅo v݅.BA܈wrGhf}RNՎi^`Q)x}zOMMMo]ۓ(]ؼ "::-kpǢqOU&u,ڼߕ^ FXl㜴 c@i( X* bV]o|dD)g\1P)( |1fg endstream endobj 208 0 obj << /Length 1364 /Filter /FlateDecode >> stream xWYo7~ R +>mHNcb)izFw`G@Ho&`'$jf Ԥ01gDK2oCfHgs@UY@Ari>IdeHJ!{8Y(>?"C̀KhzK$+ @)UơG[*kI@9/(9}"W6np8$ \]=Tvx 2D~(fb(q!h<*,HRhN(#vUp /peK;Eظ57H $1.~A78a_2 H%FҦL~KA笈O&ޅ$]$)'؄*ZdlZTxxU\97`ٔ e\?Gn6<.Gl.ݰz7s:[Ý5ÃcB  8p &Fku͠o/w">:6vY5pdFv ߅عHO'l˝pp HFKh@[B'i<ڒ QGU:MP&f?I4id*2UτƋ{^&S }CLamUi=ө> stream xWo7 _= ${^ĨӠHJΗsڠ] p9'~HB_(R`Эnb3/0gF99B Ic\>/ĴC9F}%1NƆVtwi1"H&'n>JMh)RD`EmoLHCOm1HC.e9cos|C_+ DQ=F>gd>,(75cM3,RTzjq+]]hP+&0pd!)a&idHP"n>xA7eAZ '{VYOZU$@ۊ eL)f;tȩ/zw `)HW9]gDqx7~gY,axǻu]}rU*O[EzUKMUVHlOfYxuIxRJ)2x"z^SHH` ta4ӦUʗwf,:X~xzH[LIHr%N!B;8kd&v:3LJCXv=vҭ`mS nO>&4B@"{ڵVRz zX(dg=mE*42v~@h}r9*A.aXLKZH~8wr#[Ϝ?&5SnFdA8 D Fw\Q'-b6U{~~U3lDt{<{GL{hPvR{ax`.GlֆWyk a&.k,m:n@'WOIUhlC:hh| #Jf)5U5*DAWLk(w#.G*#tpX`'Gbg)pwTu8Snm4W.ǜn7G"%CgH=B%IP7`γ,7vxCS?\Xהܵ-^FAAҷMd-2k[E_/J.7lu唛o6)B>"g&I_!|o>7v])Y:`,2ʺa@dw ezrШlo׀>sNBqx]j $uvOƳnQǪnyCo;$=E&m2A0E%-Kse%L> zg:8bec+( 4)> stream xVMO1Wq=ZTʥ"P-"%)(weI HY3yϟJ%ލԠ,sD0^`ЍXhӳ>N;>C`n $&:a hs1e-6Dĵ sQEob#4r} Vths~7/ŴRᏘcKF.4xz@ϑvVɲ?ؘH M52`!% 'FT]+HTe%)'pmx֛nQqG6,xmEnK49-*8-q*C{hTD),u ,!.2-Fտ9G\˜^ HKҐSҸ$>Q+S9a^)AC)N1<'+gUO$d1a5OѿɇVml;1nVf:zV[ZALgB;+1=v`|/ӽm ߝ3$Hcqfų$eQzPJkb.3ᓁ6_nu⤗cR 脈[3eD5ROQ^T!VHwL(;N@7l,s*ǩ #ORb\gӳn;_UԞn_g\upy> stream xMoFsl/~F| ڇ-FR16}qҠD@;˙wg?)/L~3~qHk}`K%,TأTc!Ո T֧7ѩ %䬦Dz5eb>bQèamXBEJ/8j㈚"PS%O0!ICdI1%ՔIf |vQ={ A{BԞ"=!=͑AM֐)rƣ'Q j"jl&F犋*HL(9Dj_ %ўJ)hOɔ\h?JY_C$Gj#D90L9"IʩR.nFrܰTÀ8`5 @%:$UЦ2%* Gicr- {'jMZD&5K;AF 2Ǭ/1tt)JêedYtZZBﵗzV$T+b`10@ tk͈'ІɮWydM  bTTaa]\4.u"u)kaIIӝzIժ4ٛ67M__/]vwϺm~Z^m+HkMHtmv6$F䄺3=uOg?n^,67JIHi3i'$# "睎ڔ)]B Ah­:FWHqCvBrt\-.R݅R;!e-GvWRc\:J+cK[Ľ$}b'ۻ?Nmp쇛}Xry?3C.k=:=<9κgߗzyx5%<*n-u_X ]'~W|b5g8<ͫWq_^\-I<_l6x{ӳb$?T4i s14;.Ss| vGDi? 7=_WgK$Rw|˻xl zY68T4vQ1&̚׋n~)ao![7o[ -`e2Y,Vl/l/wy1Vܰ ?bon1r:+QBW~!~LIĩI Mf4dFMf4JФ4)_M@ǻhrYp(;!| +sRA1(xoTjQ7F5UW%Vbe2NS)>)q")i*gN9e攙SfN9XNsq~!ƻߔ|Mˡh"d4aﱉ~i`o1V&+Jc6`?6lc'n:lf69p6Ia<6Sa2&3l2&_ gm׻%!B !1#`C0bF ]Eg ḇG͟e-,NX;vaBgRYթJJ \N endstream endobj 290 0 obj << /Length 1232 /Filter /FlateDecode >> stream xXn7}WQ KMEYYvK%A3$Z"u$hÙC΅C:2^DJhL;ŽLՊKG{JY˥e*ַ]i9IiD7)in|͚聍%@:x>0Ty`+D:BL={.5a'9 -5!*}B3xm(PWw(Y(Zr^=ld`!lؕjeq(P]NP%Sd$ 'W"-bEFnk`g"Gxߕ9>C 3H;6wYp24I"N^}&=%dgt(T6QtA9!^bAix҈ʠߜwJ˸xa-<ƴHS o=adak+X(A삐M3aw5 s0H /,X<),}|S,"6eo {1xS.UGseĔ3tq(㺲AXh?E6LIF)p+Ju*RkOZҁy<: :vxylo 8СOѳse0%!%!0LYíl:cǿMͤZg+l 0Bv"׀1kzM6:}Ђ2aB:t͢]Uz2t`^O@ڣZTD@[٠;=[a {R?{P> .t?MXmgҔE#kLY63*/Ng>N`:liȬW!>۶,CM,nr'7CV÷!'}0+T~ g{쓇t?fꅫ!U3tK2ͭ޾X %7uΘfJ,#0<>O,??w;yyXQ,gSjI /M^f6A^[TqPwСR>w],CE"V( u)9ocČݰVPl{V nP~E|[jLy2v#=^ xmj'22P H%(r1} R~ԧ_Wt endstream endobj 315 0 obj << /Length 1329 /Filter /FlateDecode >> stream xWKo7WeT ܥ4 ('7e-n,ٖ 9|R6M(.3<83l{:Y|QSR6sY1TŖ3Iz'eFM:t_25׶bM Y$m*=k2VXدm-$֚)`ȞS68EɦdgKo_9̒ as@CMѼYInL00q(J@m-F@ YO"GH%n;*=Swd!Id JyP тeN F7>P&wr'13ˌ?$Qp m}&G{P,K¿pcD^c4xYa,2Wa *1n1J)шRW<BX%mHxpm :ºVJ43@lҲ;x?j ⢑U0X9|)TnN܂ fW^mA>r%-+kO\$e\5&Jl8cETU䒹#[/ oxk4-`,J U6llG}#8ڷôGẬaxDe|Ɉ Ҋ/yq2Tuv>i Nh4b5×g[AxtR (!B,3]+B?NA8R Z[sEm~M=,:ϔ2RRAF,/-B]IkXn$ixiސߙ~6"y'"  endstream endobj 340 0 obj << /Length 1541 /Filter /FlateDecode >> stream xn6=_AfwT1Z +NqmqesCi'ٺ-V&ytW3"%k.ʰՂmg;%RluRqԌ~z ڀ|ɔ\ےi^` eXHۺdo\+cf Ф(ѳ'!e]0Šs< .ZZXkknQa8k1i:AdDbBtkmhE*^5 %!+2@J~Wdg.36{ͣ c.x9ᣧG{boO/I~֟Q"1BbVI#ATF)00ރ?jQ2@sw/ o=QƖ-AQ/5=-Sz*;?j TrQK7s" f89Hq{VW!~8izKEJɕ'\Fa ^a|4IV;BB <4RUKƆԃ3WP0 ox+4 0%h*+X>Kppઆv8^L\<,`Y\ ߞO_#yr{廇z`3I靈Rj?a,J ,T>cOTJ8)1b) zA+:/I bq_ߠS0ׄ jlsgb:SFڠUw:xrDl8-,<"'uHHR60>ۨsX#A/IUC:{LỉYY~>kb!i_5ylA=9z9n\(Xlk+ʞ2W$QNȚȱWtj-6=2)>}TLaK{5Ϗ=GXѝ#Ljx" eEx1$W7j] 08*^r4QƋВp6T@f`}Gc'+^_'f N9ֿCr'vM%Ӹz (d@H8Q>#<#lݜZxCã֜ۈ%O |iCڭ{et&oNODv5vw&ssoæ& Kd{[t29K&Mv^=߾l3vKsi+ҝ&97Bd|Gǫ+vdU^uejIrԗi>3nrt;DF4I]wڌM堷=Kt&ybo?1P-Lw̖&r63nߋ$RyیO=8+7zž'L^{ 6!S endstream endobj 365 0 obj << /Length 1289 /Filter /FlateDecode >> stream xWKo7 |:G -д zǏkǻ/)8;n]@#")Ej9;c4^@/ g)d#k!-[ut%0,}4luƤ))X86(/TV0Ukװv9a-n6H;n0WK빇7\B6 "{l%fuoCOޱ%hCeQ^ ["'\ 1 ɾ]Ό0m>^S;0IC/'Z&S$EpT{I^l$Jڤ>Y' 76z󇲸;|Cd!Ii <.4 G¿ UJFdYൖ Es둙H 4[$7묔࠶ڪr6LEW4<)eٓ`R^\ JN8Y8GЫ%/4lMLZezk.o4ЋٓRcHB vt nfǎNث oRr_cㅘ_]-?]B;v#kӯ[>z˝?j!, D+$"p!9\$j@F<`ھO[ @Wp>X!L:=]TZYH_g D4(i9 8 켵mଂw`W2RVq U<;Y2p 7nxeBk F7Qj9AzKӤ#tp0톃rtEE24`r]R*:6!c<g|$ȲqB;kۜߴ7ښ?t@21Q>K.SL8xI:'DA?b6؎JY ⱒV QcE7g*v'q`|[\*wqsn 4L *Q^`laתя>-,8;MOHO t}X.دC]dE7)D ܠ?wN>Fݤ"Dx9,f/aٻCdowUiX}XeS׃d!eԾOJ)/!Vۤ ]#)#?47R|/]5 endstream endobj 266 0 obj << /Type /ObjStm /N 100 /First 912 /Length 1628 /Filter /FlateDecode >> stream xZKoFW챽;3y ؇-FR16MƄDBB,7Cqq$;XI.CYJ @FWA&G,#p,2T&()@9KEuwzTІ#]A-%]ܐZ'>*@_N r" šDe I!L95J0 sΈaԕ]HNd(k P#.zL%Ț6rF>0᫤WB{QĒ+$(ñ] RLUKr!ÌT58x YwX<,EYu)Nt9҈O.Wl5[`ϱI+CJ@( `CU,ʠ@pB AҫOUqU(u< -Lf0vՂPLO42XMFMaɫ3(A_ԒxSHiX AACؠC(iZTFTܽҕb&gĬ n,kȬHYϑfڜ4Z;{fzǡݓ6-/79J(}֪ǭW tߝuu7O_XlUݻoGo=zΑaF8R&v6#y#"%8;OR[KIؓ' C ,p"As=(4d2 5F;hmd': {#M# u,p7 C>k3t]drҝuO-|yw!sJ {\:t,+g߽<>,6KGMwگ6CxHu jր˳%rG;_ݸwwŲrh2ܮI߬/7G>qyuxпuquۈ. ]ͅzq1l rҹf,罖Ŕ6-s6&F6,֮AIXE؎:I`9ȾMH9S.E>LLzf3̤g&}0& lcl#zG1.ޘ>{s} G5PT8@5NQSmre+$d4LfŤ3X G=N>&f1}cf3r؇&i,{#C3=_Bm}cl, 6V*X`clxlxbxbxbxbxbxbxbxrDo>>0}E􃏅~L?f1 2X#Sж4x@0xE0^WDxE4hx|H4hxA_I2tDo?>0}ECбw endstream endobj 391 0 obj << /Length 1617 /Filter /FlateDecode >> stream xWYo7~ׯ $՚r ) >ޚ>(, Kr j;I%gf8Õb*'[OOkHa*]8cut%6q;;ϞYmf*ڤ. \ڣ# ژx+LQZk;*dVȵpTиMI^3;CIW #e!#-N,ARtz.),< P z#jF`~VęPT1,`UxD@vWt@* H1¥]$0[w$*p/#!b [I!s2ɤ/o2pp}]W$g I_?9DK I'6bv&ojm (-Wڏ?oHO; 8Qbp"oUIUI4WHPD*Wi UYN&JyD qE]O*i-8m(y)ͨ %=9 ual!,YLQ@dDø uE 3>J_wymAiz\&UE[EvUMM6Hџ$sHFLI 'hw nFT3hZy uhZy{~xxk xcw|K,I(r%.,eac{GctƧ鿕\8lw3XOz9HrUPIOBӮ0A0] N: dA` - v OWw|*Q> `z0տ "bBPt=g,:j;y+F8PáS5WiĿ SSQ9dWh`L  *giZ*$iOy>-'!|5O mRPqfN͊Miy$„=|R7uGi3!hx9gm8-O<㰄.,]xe l.% Ă@q.Q3T!9M2~8:/9*BX4\X,QXОw԰EUeP;뜠 .;K݊ C@t|M97|jxF9ly*%loߠ CPZ.Q;ȒDW+,8+nӱ;G I[am}Vg<QXݰuV>)C;w5ӞbZxPÇ\ G7vcstFO0ZD3_F2߰ |<[~A)`͸~֔s5G+ܹs[rFkF30Tsɻ9[ zثCbGǧg9o͚w$99y~.6w͉]c"[Ј*yjۀ;DyGS #cO4nyMm a3`]}4@2G0;f YM1.kCxmV!!ѵlfϤ!'E3_uWnq i=e_np endstream endobj 417 0 obj << /Length 1387 /Filter /FlateDecode >> stream xWYo7~ׯ 5].MA^e7:l:}gǮdH PZ9? ifz|k0 o™25*g+v=TrmwIi'A9F0|R9Sif V.z7P $eM0Jm\Be 6sDvF7BI1o,fZ )2ȣF &tJ0D =ߒ8#"UJ"|xEjM F1J@ fqQ\I5r܋@?ɶ$At?ICڢаϸ?n PNPmṊ ĻZpjn3Hs GUXjrbKޑ|S@½B@b<hH`{Zg( q, L<Sǹa2Ͷ5twmԛFr0(:,9Ɇ%R`K'!<D5!r܇~g`@?jp\騏*P~YXbE;rw:@e<@; pT7tB.S_OO>";:BGYKh##c4j0EXN 3IsFHͳPPOk߶8GQf\>@=j/ksW m-eM jϐlivzIocT endstream endobj 446 0 obj << /Length 914 /Filter /FlateDecode >> stream xVKo1Wb<~}BB*^PH6m$I;vUggıPH Z=CkGb>ڻl'NiS{nXIB#MtHѮFgbt[lH&ׂBDG}Є #eĞQd{Dݰ J?bjcA0YtiHA\m, d~ ۮ:yA{$^# hrb#@]ЛL8- C7 ҫTbkI/\lL@YJ֯ҸqcxDŽ1"2U˦/e::|E+CQs=u΄,4T~*^xL1F+tE#9' - Zo)W,x̓, xmE@y[ LNqJ+NJJ5**@H;Eyښϕo%KmmQop{-.w2]or%giJii^h2) WJАo@*^'ïakD:7Xq/wћҒEb6Y ١2t5 &f XYc,I(R*ﶫzL{gD/:K8aLJ{KBuNfR Ԛ:uA]`׾tiep A3?UIx~0>*z:fLueIy1:׊_&@ǙFQtsQU9ޝ'9,4B?Ȍ:;E`D.仸n v%Km]6sk/)]o˵i endstream endobj 472 0 obj << /Length 1216 /Filter /FlateDecode >> stream xWKo7WEB %M ǡ@Ӄj;>lqĨ.u1|t-FH[IȵrA\oC+jj-8F'޿5 uF"5!/樍(*G7{sE|@%j-"gF?@ׄVB@nmx Bɓb,?Y:9%B 'f \P{0 ~n&Sg%>`[jAj|B~]fغD5@㒉| e@ܐF*ۆzKVPie?a B|m߭ݱ 0xɆSL^ϓUzLHl1{i%v?`;;lo,m-Xd32ݢ'S DKY>V7Ij}^l4\cK;rƴ޹)Hӎ_PD>xVVlY>2#}- |_:ӴtUhx/+8gy/\raYm³ #뻩Jܘ35@5K} K*eLwLИe"P!_Jb<'uU|L-Gp8\ձ%blK{%s3 /rx}zmUu>[*>kV<p`Xa@pږw@ϹdjCEvYVcl(n;,昍= x|?d럤NF0P endstream endobj 368 0 obj << /Type /ObjStm /N 100 /First 913 /Length 1706 /Filter /FlateDecode >> stream x[Mo7ﯘc{%9/*=A6AH*ҦoV#%,k{, q8$,T\JOe{RJ#P&VT2Xw'N|B7<`( ILWB ;)9{,42:4;3HG#'ge6zW h4\a4 i%d\IqԃQt:)_q OQ5RJNSεP {!%IqR,_J9gSF(;6gDm2eOqNJYgT\C2*c0U_D xPPGPuCxu-<[q#[E0fDQ-. #H}aF-$eAM%Q 4SNwNDX"&=׈R޳⁦:D[d ڢ]@ U"x VZDDtH¨D3#!L$U =clCIӝaAriӏm럮j}XqX{=}F?Mjq7!qN]{J0zL''ԝR|u}bj{zg &}=&ALxX& Ƥ$K ;"ս "e`"΍ ˤ%evLR[K9IIju2ry a[=Ov6`'t݋K =_=_;cI_HFs:]-/6bgmHgV?/~8^Q_q"b=-Z~|[Ɪޯ֧ mf^R^it3,7nZna!!]f c 9^zuqKlFӆοvE:bFw^}\_,djdzck xAwq=_^(l1Y~O [d+he2[Y4bx+W ^1;Ъ4R\A6yƞӾ_\q=p!4.<j02 اI^"y0ٕI#"IB]&׾Ǥ?&1IL?P~ bܛM2:3"HEs߁d&Ij[m Vbe2Y,V7z&ɷQķa*y|G w)?c%¯=áw4EMFS|AGO' b?uJ4%MIƍ=JKg/J2dxەPqRKz7H endstream endobj 498 0 obj << /Length 1215 /Filter /FlateDecode >> stream xXKo#7 W(4ҥ(6m$m$q]?$jf$qO'#LHq2lP"rը jthomFQFD_t6deH/ъhA(+UGwb@[ @H{g߂L7PFkP !fũEz:"Af]IHksD#MѕVJ\@aJ!۠YJk'h+hO !ʡE51DW*@]PC"!\eeX=gs/Lr'D_JMcq샏=i2,zTv)3dȌ.8f-ba$4/Qh!exFQ\l\}d¥lJtyi=)HFzKP5fU mmuȇ6Y7F1Of ]O֐9xf/Xd:t#Fd* R"-mo# 0-zY=_oaN՝iJaa^hBɧ$#ŪvJ? h[i*R0/$5%7BӖ J"Ed݌`( QD間={rhtxAg[_#Ejv> stream xXn7}WCe4b8$w ڠ(Ҵվ$AȒ’bɉ4Mh3 I v@z@`ܨJr%[ڢP[6kUiXg_Ԡ|`R)lצb;XfHRђ f,TRIdQg3vV?~ 05Gjka3D#LyQ^!e k@02 ~7wɋ*qxVʒD*nPM$=M d Tr3Ks"!0:H@ޕQHM@{I4&hQ3v y7>~W`NQduD#hfs-'"5j>$[{-_l[rtFnq`8wᢶ#SAK^@wXL!7,썰D)*IIf|}tڇB)bOHxQ{koI۬jRצ ۯnbFO }Ypڅ 0> (cS^6*Px]}r fS$p f<׼7vAo o!\,ȿt*S+'{*"?]ע̷rJoΤUx~Q7~I(O]-/Ppr~ `sڎ 03uze8/}F.MlEˢ@cU&>F@4q%6@*mI5l?ug( endstream endobj 550 0 obj << /Length 1488 /Filter /FlateDecode >> stream xXo7 B>w:)[XRIl'^c;#%J'g'鼾$I?R"].gO[|m0 o™23`͇l:ds& F Y['6Ii1^hr0I2e ܔnXk5`o0Im-ʔJ1 l;a49?6P>38COskaІȣf:s_k9aE$) ݎqFEPgTM3 r(*▣4wܓ@ $CI)_dڲ԰F q@D7>(kw O03ˌ:Ais}Nx^X~_*Jx5"s )hiH2GܴRε 0W kyZaW.-Ⱥ7\G +JOԇER'J%Q"oMNԱEdU#ƃ޵ .>\[J27,ȸ,J.I}m 3 ?6K HOzcCZ z#FLRAhހh<\1,0.:]T{qْ6.;_t^m qp\p<.$c[HRc )4a 0=%}F?G|M*:aL"ZZ ,0HP%qsZzw M*V[A;5ާ3KwI n7'g0~;VI.uR E?5zX<[u}iLHƟ N?+lt.b%# h! e+x^$y5Y8OR{Ifl"4k.D|rL&&K2}:$^y}XXN݃c& q!ꊷLj>.UHw0p:4Ktz՛ׇ`] /ƾ{6t;$T6^dA\%Uwצ FζoQYpqT ۳jzؠcuW Kd-;5A-GѠ*# o#O e``F\5W7+Ermק垍YLHx*1$SGԳL\de{\?¡v!v̽2SUmѨ4/EqgVh(m'T!mi,HQ߭`+P槿z®KzUex6HzDWzX\.$LHc"k$J,8:=F5tf2GR aIO)Bjjc-*a_}, endstream endobj 580 0 obj << /Length 914 /Filter /FlateDecode >> stream xVKo1Wb<~}BB*^PH6m$I;vUggıPH Z=CkGb>ڻl'NiS{nXIB#MtHѮFgbt[lH&ׂBDG}Є #eĞQd{Dݰ J?bjcA0YtiHA\m, d~ ۮ:yA{$^# hrb#@]ЛL8- C7 ҫTbkI/\lL@YJ֯ҸqcxDŽ1"2U˦/e::|E+CQs=u΄,4T~*^xL1F+tE#9' - Zo)W,x̓, xmE@y[ LNqJ+NJJ5**@H;Eyښϕo%KmmQop{-.w2]or%giJii^h2) WJАo@*^'ïakD:7Xq/wћҒEb6Y ١2t5 &f XYc,I(R*ﶫzL{gD/:K8aLJ{KBuNfR Ԛ:uA]`׾tiep A3?UIx~0>*z:fLueIys2:E Y]&ǙjuOvVrjQ;OrX.9h~`v~~y\wq1$J-} ZZ&}nx-]xc%ei endstream endobj 475 0 obj << /Type /ObjStm /N 100 /First 915 /Length 1589 /Filter /FlateDecode >> stream x[MoEﯨ#\fC"%D|Xhy5[ȞU4ꙭ~_vT(PD,V)͔X` 2l\l ee&c HPg5L\BJ1$sn.b4"ER+ -;M̹ Q 7D"W&ȩFX$tVpI`Ш6l{Ew4@.!LQiᠧFiତ䥂;iihDHZڿfC"9w Jȥ,4dQP5T40G-YF%`pʁJ4xf*< !TRRi j\r5JUL?΁b5'&%p Lpn2QY67)YWEf[FDjQk`?CbL *(H4PRR" #*BPbƙYmP!h~2Ȝ@ xP[*f?  <{{ig)p͢ ˅mm8;[z-7?o?/'e WWs]]eT :C@R:,Y8=3/P>{ff=Zg"&+HӁv$%ˁH ].@L*b#DH&eZ&gRclMbeRC҅`+rLrH&mZ& 9}`VG.&f[W'AD#t= e"!#a #%`y2FNH)ANV$bcwiČǹ{I"pq @3UW\0Y?_zg'8w6Oф3 |~~uQ|9q=l_n ^&{ѷx*b=Q:h;Rf{zEbۭk/]얻!fj '\q> `/_|inݚJ͛víoVnO6oiH86hCZq\*),%0#HV<[v݊V:^ux5k9^]M`I;R`-S-HY)X سSwّOEvddGe,;f1ˎYv̲#}'M } be?3}6Umq[3`bpBd=4H9 ROFԓ m 5ȬAf 2kOKK@c5XAM<\+:1> (*>*[Jt]"Kq!.9%C\r%K/9^rxR9!Rg2RTi'#UکHd*T,Uf2K**#}?[Z IP"H`o]@dDv]@dIvx-:-W8^q<L}2US󌫺pipYG>NDL}p݅˱Lc endstream endobj 607 0 obj << /Length 1412 /Filter /FlateDecode >> stream xWo7 BȾ\I'ۀh]Y*%hC4h2ʕ)Od w;~3(GP]ե3=*SR8Uz ťDqVtB/?mI!siI_(d!qC];X1e2E ˤax3$݌Ϭs{k\ddiI_Ihō`%?_,>'Q0"sR9lbAL2!A qsi Vr&Lk5dTm+X0u`J6Ul҈IӺw xɖ+RPH&S3Xf96 d.ְMn:Ǐfv\UxrTtɟh$'KuIRr:6+Jw\눏`lZby :vyr.'oTtma/z+ \1\p++mUZP)NU1oc &3ZV$5H{6spڗF{׶"2hĆڃCP0@K, M+~KtT"_ݽLߥ|V B3f:3@VfP&?HH&i\LǤ!%$d'Ξ"dV&<"$E@r:~jCb#NϷ S|ҡ- uTyx-&` ^))S`gXEZIGW$tοyKN$G5.ɪV8OxgCQoƦ҂JVu] KǴyqN;f'I:4,two>,)?CYaimV.= \]ŪyEٺTS*%hxs~0] l9G޳C>uzDJAob&|Kռ}ă}Q &v\6Ts⥳Uq>Q"AuҊ6 ,]T*9z{.nv-!? ׀gHd*vȉP&)Ijti]S{1 p˨SA4A k }@~)IeItSE?P> stream xW[o[7 ~|"va˰"+8 {h/׍!ۿ/I]%Il] ̀$>R$%K1R|ߓ[=`_!E TV('V1g}VUVfbtOy>G tBWעYDo[Q$7D 2zj PZk %ώĩ蝒7"}6(uJ,5!/(GW{Wj+)L]~eEv8|]jr#L$=M+ !}$ܖ!|$U^dB)0+&-#gyҘc L(ͤOߕ9>/(2$GF?欴csOք]-4m5ȸ%/>}/<r$G˅Fɥ"Ipt1g'  yhqFB/!&M`h#ϛ"S ̬bǔF\=y#N!Jz;X (58|S(hS,e7(dϺo2ׂjoR8 m BIY!e:c͓ӨձhӳVsAGwx $9)F(k* ^/'/Pk՟#-Q'Yl:gA~cHsAWZy1wpC(˃s^kLU ٳ>5jК'dz)Nѐl^fI~m9Iq#/I!r*v0J#w0jA2(i <}Agm DE(OCcayGzNƦj=M{;6uIHuj'>X\j>5> stream xWoG_Jj#n*/@HݛN8J<Uêa%3RHm6t@?[.Ez3O:r^QG9F0|-҆RXYJ1:*d/ŕ@K ҠZaMb%}!#ѿ#T!/֠ )&ce ʩNVN{ 3uٿ/wNE-uND@ WInz j)VָffUNo^:Ek&0OxA 42G(HKF!3 7T@>L dtCنS8|2Yi'k|m5}}'~p`­, ) dfFm yg X^^x'5PLQ=rߟL"=eO Ȅz2et1Ϛ\4b={OWDmz?/kogw bYoswc C_Ǝ)[DѶ wZ@!n&&o甔E&*zf І' c /_5#T*d1mSCI%@{S(r٢upxT@Uٽ 5ʋ&:iuwQS>>ҴH/ǍXΉ?vv0A#W )kMR/ẛ+* Km}_+jb4T~Kҳdnm~̭W1ˤ|h P +mour&XtVo|4Exи5{-ƣ3(H9^*Vi#']"MW/7{SEH> stream xXo5{{J ԠVp$(wg{{%ZJ>)go3 ™v0ZH6sgs- K?F`]Yu,8(g/TV0Ukװr YJ ) e(ZZ=7*($ nsv(4K?^G(_Щk0Rwl ڐ"|yTm`V@Y0a[ $ 0~QR>}<"kjn: iDU)d ⒣H+W[RD L?C$#D$3 7>z dtCYq)R2JP̤sC@E$G`^'; &tPq"\X<IýEqJ j)V&SAhMF"ǵ%f/$LeS;_&܉psN x47e~)Y0˸;OA NHCO/sh\Z= F†vEɋb,ȟ,MhPr:d֐T] K}%&V1?zmFo-\$U[:Y3X}?+M}QY'[9D7ZpU _BjYOᙡ?CާqİM H'qZDM&$Bo~ak]2 6#!@d_1 Ə0^]nwLJ\E#R`ԣWS?Ԛn^5Kyy aBQ$d"|ؿ\bt5rB+jhXL[aKWʜY߅P|N! H{.L8|wuow~9r/a endstream endobj 583 0 obj << /Type /ObjStm /N 100 /First 912 /Length 1662 /Filter /FlateDecode >> stream x[Mo7W̱3CumT[R +mf5rUg  + |pH*Y3ʪB⢐RLrf%VKaf.B1SHY, $ߘIrCr5J}p[:q2K5bPqfbiSJG* VK&؆)5rcƘބRPLo 7eJ"b(i (UԀnj׈xD ,VKteKz'ղ1bN"=dT8Lg *hE-%P٦W*\mzbI*ST SďdPiȱ"t4sPNɔaA- TAkx'Rg5nє%SK lV)J0J&Dh[IbESKc$X.J Y넚 zՄ 5 `hfj1rH2ʰZ҅zzX5<+ۈyF6^Y ~f@`ȊjaT<ǫf򘍤lՎ{d\1/7t eoT(.^Of 8cg Hr$g Hrx%ˎ/;~9u8L'2zL09&21LLeb*SʘJQyIvb?T;JרkcSd1JASu*NEԩ:^qx+W8^DUd*AUPՍ7 endstream endobj 711 0 obj << /Length 912 /Filter /FlateDecode >> stream xVKO1Wq=~}TVRzh{;Cj>?8J|A XhQx-qB7Zb},f[AVJp8D M|'tr@2}iFЈv9:[bCJ4r.>Ml`BcF7P=bOmwÂRN+1ldѥ\#-3^!YN@Ʒ]U,*4czIIFd#uG7pXj* Wi )'pm09g)yjz~376vL#"S utBG/ie2aΜǹÙ05ʝR {i+^"?O|ѿDE_xĢdI'hKF[oJE= <8yXzBɀVtv[PQ^IrRHwi(O[ybAfRY/ablTsqoq_鵠m_΀, >5! MSubb<%sJ rc"}V DBcj_$|h^޼cQa_念7ﵥ%t )+A#ejƗ :6ۆhwZϐ ř5ϒRi~]gҳO>$~Q2IcR 脈[seD5ROQ^T!VH`>fB \d0l,3H»T'W |6D7c*KjrgHbXtؗ2>T{{´n/}UNͫwIQ7/؟; /D,2]BxҺ-rt德oui endstream endobj 736 0 obj << /Length 2155 /Filter /FlateDecode >> stream x]SF_G{>߷i;IIm>a&#Bއ$#4M2c^^~oV@xdNB iͼX콾L"~iI ²_dՀT)o Ŵ+jwQ] \I빇/ʕJ nPb5b:]/|=G45'{W,Bd^ BXFO[<":iS1ahg֢`JY|->9H*L /[.ce$~8HĤ}"[eiޓ$Y =IėdH<tT"Zd PAt_e)$H:JUf:@/;F2;A5)bhd*Cd1x{nOr~Y[̱[m9\N#KaV_QyM,<; G%m!9dpkpr{=wgB9J@ &aDI+HXo+XP32}!lIAbt*܅[/gg6n/,BWPfs2L|(n6xW|؂LY~Q77־x1ra,J݌|T4 H$"?#(g1V)pA,K2T+$c-Ɩ`6[u*3pO_Ų\ЍLy?޻DԬX[듸;ݝTٽUKrFպuwjcpZ(h?ݓ?ƶLPZԖ*;*Q5VU~zͶK^'94]M\ߴ~bV+JudzR3e7CaaLΧii) Y0ϱT~#;~S\w'rF?Q ʵ$M?uϯQrʉ ^^A7d4ugmwiPJSAJY;1c'\2T߇Ƙtޡбl\sOFCL]V*~]l8duRu|JmjWtY9|c&GH-q}7M?o`0/ѨnO|J1/2̂iz~y(iyV!jfx—z 'I8 wBŔn k`Z?L ?-ZVtZW, |\@c%ɔZ(1"LC_9> stream x36г0S0P04R0!csCB. P*ɥ`ƥU()*Mw pV0wQ6T0tQ0``/ endstream endobj 742 0 obj << /Length 105 /Filter /FlateDecode >> stream x36г0S0P04S0T02U06WH1* -2ɹ\N\ \@a.}O_T.}gC.}hCX.O;: ? \=9 endstream endobj 748 0 obj << /Length 115 /Filter /FlateDecode >> stream x332V0P02T02Q06T01WH1* 2ɹ\N\ \@a.}O_T.}gC.}hCX.Oy{z?` \= 0 endstream endobj 760 0 obj << /Length1 2663 /Length2 17531 /Length3 0 /Length 19065 /Filter /FlateDecode >> stream xڌeT\;@w \{ݝ!Xpww .A^s=FA,UP+ lmx2JL&&V&& es'+o *ֆaH&b2H9[Y<̜͙oQCDEHUߢU ٺ7-hd{v5YX[4Nu-5U2@csgt2)h8;͝* P@t]3]'[s,w&P<6P(>h;IPmƆV؀T젌;F GЃ(ߩNf?T:[o)FtF.@P2, (9Xt.T#gP3~nh@7ҼoEmPC +8kݛ31+^Y\R[,Q3J-HHX/B  uey~QO^2E  )ӠCW }yph/C@TVP/w8)\NGfvS" 4QqJJ]n_w6D!F)'OX˓YOtETtEKҐ>d| mb1[/r:v4A1$hjR)?)˰zdAedrڙ~snEl' ÀS;CiV~kVVeOPKȱ?` F|`pI:` j\%\ å=^PXHZvWPQor(]Gǯo*fܯ~A\d^) SLd0)ALSo8+)}hi'h|WHǘma,Aklxmݏ0ںze/At4gRv~|)pt0&G ~'w AWSwM] [ٰ1=,i>gBc7 Iʟ%EFjB- * 񫲚tU &=q~)0$Pv }um$.ͬvC_{LvN' HDiyz>~+Un- /-` &DoE#ØGytU "eo_6%/7 ț ,|^3c;WF:IQE˒jXF4Eaσ [F(^0بp{YUo/&Gs/+N°x\wـdtQ# 1b48g-*mC{Xjft 1=caRO CzwT#$L< h {I8yߔQ㔍LBr<ʲ%?IBNv}ujg`q$n~~WΝp|pVRq~hT(C=&c 7$؎SԎb66^0^3Ā KMBՁ[,{ X~t>J'XŒFUP0BJ?gE9W3Yp™``5LWSS=ZB5ʅ my\o67 Ϥ_1jL\(#zhǜtiZ Iʥ\RoK+[etjA^;<[Qr3\]u r250Vۑ>z_ȫ4רM g=S-43U߆kwʟŋHv+2[ b7C&;9 'F\W"#/>Y־ց(zlcetfnoZfzTe]-b7klhR$ei,'Ye=YlKyskZh!e6h ԍdSr!L X8Y9X RLFJNgD҄g95`f`IK2{"xj m3GT7 ';4di N1n_1t(_ J޷D "^-v|JmI>va(␩$|lwpϬI7gI~(fQ0B+8#j5G|$Ui)޳"meD 'I;tA_7V=ο8JK#Qi-ys^nޘ{QwEB}0 &  $L]%'vmZÌP.Ԯ;Pɝb+wut9L ;@PN !.mG@)nt`8jsMm^ʐY^KgDkd4̌I w܀hH4t@/GbB0L)>qeb9A؋x'+R!ՎMs9/J<{F%oטPaିy,vR ^4, BDb&Ɍc#5[tw=!,߅qS潉UW$檃va'_.֮ )l[͗'09Y36:8"4YY!DefZEcX},kFQ 7e,[3%`^9泯kZeoaWq%}xg7dN &sO_bIP\G>*[  w3t\*F6ݪE|1Skuj6j{x/#oHOv>Ro~j[e<*vQNnޢpzyf˹`}ݾe:~ ң`lu'SF:Tv^pvuPFkY#PD&Tn_d#{$>'#ǑŒ#Sj:ʵʡX'dtJP]$Kp0YGXMKx_nj4MԤX|<"䇈{'2aff™rʾq4 Q ?sϝ/C*4}R䲲%~7^/v 0o9eH!` E?[wkB~ͻpָYy~94PS*A_m+A O)s"rk6ۋ+W^u`7,%zcYqO3AI|^ZVR]f$r7洘` _QX!".B|4e{׹ ,œ|3_q,|a}ަldo?/so ,>)X5_N(V.Ԇzά_31p!\Gc +Q1KO獃TuE,!aUӎWg O_~XY@b`w-o#vY?ƗESo 7IǍ );J(16@ ^{RWMA:*ɚe]B&")b) 2B/&{Ib::OW?-),TKt҄Uh2GޭK5mUs%anhhlTy,8mcɬ+ S7ZvBNrhюL2;vacVsߴJm^4(බ8(j;ފ|l^FbX=يoT'a|A='Sf?ħ7#D=:8;s+*`Go Hym\7ՙ^\Ĥ!Xbfwxp-E>A}C:V R#hLE0YxyN~}6!v7俊Չ`8.@V: Mk>u,_w\ DdiG лfG{eW0(1\ n Zi+>ₖN!m ԰"LiN*<үRIHH}!{UBgckČ1 XeI^hg@BNflޯUk8NM{&$B$_Qz8{WiJN2{K82v#O4YamՀ@Y]N1IjW_ OK$m!5{57*.y\ 5N3jNd}\OZ-=-G1M v &\<O&KY" J]魼X4ڇS*;fʂaR^PHo1_Z`뮕}}"= Ų; HyGɘ>v<og 3&s ny/|~^t\L{̃c{5?5: avG=%Te o>5L"Q5^ bY=ss@_/~B&Ɏgku-61΋>L;neJn13wgI;$B1d^߫;=mal]5?6Y;6Gh@v*Pxv ڔBApMڑLU'7wW]XK bg"HM|♼9:_e W:m1E]XB1+DX;{؝Ij=j l`d)-fӥTu ۅh-ʏݦ|r"L_p!d oKsJZ朢lyp"M+mUJdӲ0+x+ +t:*֏yw/9L9ʭa[}XQ䯞X*QWv;><MB3in2;q$Ei>t'Z|SO6rwAљ3m8  xQ4ܛL?K w*tHC;#Ư\6b6_?y*?XфzC;;iy-i ,!)mi -em 3ZT=wNj]p1aB{KjOg\2ڍ Y-+˽a%F*tR&\)Lld]m@0}$ZMʥW>\4\3q#0+265v/ kiҫ[څh:S#!kU{1_ju{2K#IC>WTf|c|fK3A!W-T#Q] GT*飷aR.-*X=-!jRV^[Cz&M}\zY߸BV@/k%Ufh{ҐXtoz`?A?ws;M,0 QtJû 2_`t)FC0ȈBZRݎǥKHbuϱj>wL9CCw`'^a6-͡CR5wDW&ؕn=!}̀<ճbiT0m(mkfs#^j,) hww w\v}(X\QX)!L ~~ AiwK,˽w"mwOTR`oa9|n0'33F8#ա!uK7RefW%r+e<աȢI?i݂njsƹ9K RO/3r2WI͘?24$‘Z2bO9yP[.tjN=5_=%c#,<棣;MSpa0n\0AeS ~G@22O ݩ XZ_vQ K㽱F"6!o=H&F]s>+h2 %+О8zD}qUKkrB b/rD Md@,fy,n < 01G*Q4-3QҦ`#9&,e|ᒧrScue0,c J_:1]$>m }2_x"Qanuٞ6VǴk,] i35Z ݜpt T)Zn ~$ IϮ6Ɓ4t)*rkS-q,tkLWua mT,9ҍv"٤'Θc3mTGH1}ΌbK(z5O6oR.M5UK-Hx|@M5:X|2C&},9QDS뚸s.}KҥJXku^rJ3uL W8Y00Pr FAj0Øb?]n/ʼnairKfdc|L!j9sLO{ڽDJG={p 3n\]世*~oAren訿Y#L*uT^ԓ%,hi: ؀yYEJ՟"xGF#qB|2'5C ~u&GD0|#)8&n$A˴޾o'؉"fTYУΰ7'ɤW>OiBsZ]ʵV|hV/:=1)w%ı:qWH=b}_{mԃ_5eOƀ-Xa3O:k֟') 78E,Bَ(^J2<5Qރ`›)Dʺ-+sE~ |MB*DZ. vIeh_偻꨷/ĺVeT6bW5ç.fP(8YE:Qœ[1R( 7`xG˗6 vū[&L0cmb˶*5؊ב/%np0\; %g[WXH6;2{1bZZh^ 6i:~&Slۣig7ځlH]/D{nzA4 "DžHs]+.B)`OÒxn\%jxl%Ow^|>J(IQx.{&Ƈf Bu1RR(zB1I2r J5@ R,6/a8';'u@#^u=vmOP5~en\{gIQ1rHdzV!L᭏!.7geE'4 T6zy!IXBjḮ!+hvhZ+~pćMy\ "CD̋ eL/"SJg88i2`XSNg DR>-X+,[Fu9K;#e֧4[`sУG[+=f&R vG…zx>~LJ$r#e܅%E6@z%Zm$A~HדjNfCZs/&EG\&.N>L+6P!?"SjgvJ\,~̃€*Q\ ,UǤ;{`pDr-OYp&tOt`ptÒ"i~k=x4A D qeƱӑ!gB&F_"tEKWX٪^]HErϑ5629"(SK=;֒ڔ>"nܞ\0Fl[7ӄE,s>.XTF+." L >w4ЦV,.imDXʵ"ԅŬQŪP\g7ޫ}"f`7)f9S 鎸^j ?(ib*Wqq%eA>WW2alrs/ГMd~SD*a*%rB5ח%ӢqLia4L&q6F-R zUy(?@ V2,3otcZݔ\^&@ OspTsd'ǐeÐFEsy+KKt3<`3嚚d-@u6Jy$eBo?)8~ GQJa% {=2LMr MUo5)A8v}kF?J@֔!ȁs{ =)^_؇faυP]̅&X͈ MO# K!zr#aqdo :ہ$ڰkmJ0{ýkMd4|ƜЭ:,ˤ1\mȋ2Rz×Q2S:!?!׻G8զUg\ٿ!XF1Cf ꕌdv}²&hLGx*~{OpފZvɆqNl/L\-WVkf Ă}MtjKt{y&\<SUлtȳ i1)F!d[qcia{+)Hb\?%Wy*Z\Ԩٝlg͚ 1z&zky.\8_OHgel$S 9/= TfnJFwA8hz,B p辀0 G7 qx\q^QJ+q$V ,Ȯup.  {cP5-V5F2 x:T#+KD7 髾_&B(@-W=ۼ~Kc4p*&ݻ}2RS{,C=|qi]Rub"X+9@ -q«$/:8ڱm;J. F5)6XxVeԉUJw| k7p&1 Ӗ$]|1.ӱt+4ڑe0h=r>'C Kۮ44܆v:z@08= L㳹vƾzW iB䈖BY)/UܯK*%d mwE3="0u/I#'=Z$ON9m2MCT:V no599(BU 7&A8S.1׆¦] cG\V8CZCyiX>=x&.Δ/}t7cQ ݶn!&ȡE׃ &{HPCo%|\nN \! 3A ImK`I3|f<XێCdvhu4dh+xOWca"v쒙]sOdSnN<> d-9p,"l*l'*zm6?F6R-[u72 ̢ c4J+qBU`{Zn7֑0Ϗ7&,-\p>QvTxyb‰ h!VhԸhDYG\>Z?Ӓ_UrOwa˿._ǥ4 <4!\@l!Z {hryuu瑙AWaROo~T`# Lfv IO'VMbd*n=z,rDAe`l~M7b\L@ 9΃F&T@&-6}%K=D!9av0YLS EJ+CvVh:YȳeE@2EeDō 1rQa/8~8<9_:S*uCJF 3f@Iٲci+,6bYT~lE'frImQJt{ҩYʺ| /{L1`0 -!VW,)~2a4nH]apӺnu/i lcI+.{2$ K> ZJWjSLI(Uo8Ak!L#  sux+)rQ>ܿ6x-x+1x$Tй1o*ׅ` >c\8OuVd0A(MAT_8KD6ȲM8 w,d ݍ:)>(y@;M %wvooڨRIWoL< ^!QN+Lwh贈sb#X3/vEgSq3Tl2V5ZcM`fh6B϶v_⮝+R'=n 3̘.z;$ba,Ӯ5^ԀYk#YWpSPA$vydj<U~ޑmm&J4D'ĊeV.ǫל*hi/ Iu(f$?rPVf0^>Lʕ[K /IL԰Dm4xz/x zj BGWm*fN bLH,璒ƒ*έ!bJ|Z3uYYH\Hk{ߝN^O=y` υ^BZ Q _эmmƍLhum4(two_o>bFǁ%a 8jh B%>.=lcb";D}j0c8Eޗ A;Kc<mb5> stream xڍT (VwP[` EZx š8-PM9~ZX+䙙=7{PY@@WvV@BQ]9Y@ ?v-  G3j4u*BrnvvN;;qH-9F vzs;??/_b g@d1\+ +Jv\@ JJcEhX]vC,]=LAlrp.qs9eʎ `s8vVW"_M!^`+%PV`ute:X 4s@כL͠I7HLS3ՅlF_i,`!9' vC݋˵ux8,ʰpsdt;d%P~۬@n  Osk_hx9r2Ckq8,e ?Sw `6w(C ˿z`O>~௿B;`;+fTdO ;r;),uV =(vR@@n9Z俲_I;7ym\7W(Bڠ'Wdv_)t=H4dv5_k4;Hlノ-mʿ\ R_c0uv6BB{G _m `cuB,!(&7/M7$%^ ~;o&No;(&~͢YM7AhKP2MPf CG˯`u@$uZ!@h "A l@h ?ZRutB:_rG@Q;Tb\L]wn TB{P^ T__a |_F 'ev b.jSrS-F9J;c>]}o'vVC˶>P|IB'ӺKA:k!9o%鵮>ؗۓyV US["g7B,FZq+$.ı4?3ծ TgQϞ6yFՐ|DD'f6c}v4×~I!kU/*5XBփ+Z.l'3nrSVa#K$/zcڼhkF"B(Jo/$. N )aHïM**Dې } <>Љr][>' /I#yF/IwW? g)%hK$[X(3OWҼg h?`Z*(3+_M[zo)vKkhV|/}#湔 ?Ϥ-g8"~^ܰˏj *_:غ=U3giA,X2 Ds<Kf1a  {` 5m[&m6_PW8TF_ 1&.E2?:ZH(|Q}fz̔┿Y U-SͨtXO!a"x{/YZ=fvjq{,)VTO&%$4|h+۱="HK֮e,/ !<)}`^vWkA&CpwpA㊟H!ȡ*-{ŃSgkI8'*M'>(TI02.Af̄,ٓuz~_[A^yin}% "=:`C6qw%M8ȧ?"JvQWyP oϨvla@v)pB'EݸX uO@LԋF>0c KG$co}+t6R mJY{kI~C&J4:+=BfM,. j|`ʝSǍؙc1h:0Q9TTe('U57 = 'gAЩG=M =v!hțgo=B00G*At\iH >1S$=Bt妓ݬbNK!jd٧.ժ^)LoJlD|p޿KSq硓ڃS0 ?~dNݴ*wh/To[t#dreǙD̮9˷yQ C/]?,;le+߱. jy[ ʲ d-4Swt얜vfMl IqA> ld3"U'bR#΋ Yh#(I; B;[]4sZgk:1߼Bহ Ʃ "mp-Fcny]]\IM>^Jn7yYb5NSMΑ x|z!WH)e*Um5p^´ 6 ԥ/qgskXzY-0uTX҅* ZL`H\ ޲a_RFk8!R&B_7שe`yM†z"\Rŷ|9] ye{oMkuMl^i %8u8I0QSz<~Té8(6,7OUp]ypb}zQvS/y~Rh$+ʜ[~sT DB\5|7뺽9xdbg"6e@,R2<.U%h n) +7'Kaã:-=?'~C &*1Lh뺭C0әg,"dMݰFBݷ>XN- 9 a nS>x8-:ȳPvF\jXtJ n]iɟjN8Vmqw/ş0%$1Ԯ39>ےb&} AcbggY)D$S ZygfΝڵyzB~8FH/m\`^Bo*3$ 6[-"AokYa|gFLVV '7}oɘC|d32w_ 'VH*S$=rnuΟ301zc**DZ]Oge0%0(%O`m5uY)- zKf:L(ċ>_7_yHo{2g׃Ɋ 4Kڐ{L}Uv?dgﱆoX͊os=LP`$OR NTvP">s]L&:2{Jʻ6TPc@3;!y4E-x4+k&L4G 3r{,laf[$ fiu1,%Kق=嬎692/"Ep'n-z]usd&Y 4WdR> .mΗDZy5g4hlM\hWDWHt7D%' E39jS_X #Ԋ;!Qȏ=H}TO >P"rR% i%U}I02&0-2y #-Mfr ]1r `tWB5Je4?Ě"$K\t4}!C2*6XzB>GNsSY{lG:^!$6k:"*y aUn $m?E7иV4ۢGX;'ES˳eJ71jqLm`'7%6h7ijf|OOי[yةU)-h;GbQe_Y)c7-EDҹ=q ^.B)8dw|`~Od2t r(xٔҦ1?.ig|;?zBjo?n.SADcEV8x{򬸌M0g=X+ Ut;P~1ØA#{~o!8 IDUaĴH ¸EZ|*\5ƍ;LTH9ۀlE*5)Ȍԅ*B^퇸xfM@D?lnRep@D_G쮛wϿm+f2ń0%Ǭ{OExsE9ʃ"!IP{CU-~N@ Y`V~&Xjʵדę7:r> c>92»1cW P}L"c' {}|?Z_ K|pAƒryMjTqJ:i;LoLCB^:.7 E] 뭟R̨}_ 0r4}:CIϊnaxkQTLV1۝ĖXu Vb TntO0Å:F·yR(ckmqI.yL0cXؽqUޱ@Vtvq&6qPɉ흆&\Lf?Qj$\w5ր۸Di|oBq'$8-iO1֐UX; ] ƾH`:!4V yZ l2O2fHEyUw>)7D*zN9M M?1wwP-kѡS3l<I '~"SQu۪f-uCrY\$=ĀGoTc/Q¤ 1zX(v-ԍZ4u{l!>Dx_y%^V֠B2ѿ$mR%`:"uxݔf)S4eXBwa"$馚LJb9+p ]7mNmc#RAŬó$4 y6꽻*ȫGD"_iqJ9%πd2JEdץDۭ!^9:WRKd e=YՄ_{#o+z~g7xPC4b;R<A_5e2SGҡfΊ)+$A}zjx|S򽪉, >)Ko /w.aX9>Yn~3J/qo4 ;]ڌtkl1RMg2=}-kɦ ڐ(C=쟋y_2"?NpU2y"-/${nhYʒV*~ 9ox}Tyb_ e吔MQ6U_찣QMMrC9+r5SOuT)0:?2|8GB]I^U蔤rs۠W}{!4% 3]qOva+V'|Ftj+̾"/X;tu(}L3 t6Qd9 nLc-\x ^"R2: ڇr_W CxͪA&m1<|9#v[~Յ,YxJ6rt#*m4k}<:.`1u,%i7ajQD0)/ ;jfzPDQ 4q#)(o͹*]$4Uv8M!]<[YĢn"7݄GHewF-Kx0Vz?! WXL<~;K7OvSՈ(֮ȽU,^TT>Z4||s# gD`bV![IJ5T,-U#aV[: J^8.~R^7VZ7[4Ȱ}o ]e bPLqЯP7uզA8?d6P܄X*ߺ',a8 l=Y fu{WѡqZ"/a_Mba_uߥy08x$u]yNO#S[TK-21m|v巔^܄?-}u<0kN0ʗɸʶ`lJ؄˗GޚU6+VXQ{Жqc_Hd9,OGsr_ `%,D<Q 홻s4VheZטn ͊ ]d>ܼ9ѧ{N/+Y M6=d>}B<Wh1'u6u(ܺmWdɯq,q!H=1"S.b$V~6Y}*;Wfc[D CX;WÜGTⳈzƟQ@4ٷUa,2wQ#Q?w )owJ<. e2qM~A2D"|OBs-U b•zYtS\<Gd$jLs!ʯ҈HyMu+9y@" ڪAt6ϳpck(L0x*o\Yv-ιa*Hʻ;V}̖kW>սOi7{ endstream endobj 764 0 obj << /Length1 1787 /Length2 8903 /Length3 0 /Length 10028 /Filter /FlateDecode >> stream xڍPj6LK7Htttw#KKIIIt"!)ҒJz37;uu?u?30lJ0(O /fa1]YTy0)8MHJ(!vM^ fy@6 `rNȺ= (@w":ڂ\0[Rp ry8Hsp| pG r5/6 ]fy  DdxADs@ +Xngo@'lman Jp_87r!A 9$ !{? et@i b80[/ߵ`ՂܜOYSMKH巛B\@ XMb j ki5v/AE:!J_nZ2zV<||C3DH ؜T~F\2 h-b #R0_7*&2F $@"a悠/?@P"8).@~n`Dt"ػ(H fXă! c4~[? ? f. O?5&=OKu>?5~]m~n`_-,V2ܩ.Fڇg#ZGti0sƃlIw9k* Ԝcݹ!_U HrGǢU~{i2RD]ӥy,ϣD8{8e>jU9C,`N.mpxꋗHezuny}s0N ϊ>36\mwv^G -Q_:I:9<E~>IP_IJqЧsV$ٻ4l$ފ/$rTATy>`)> +}պS(nLޔ{>zȭ4d&7Vj.P&$w}&^kъRY ~ّTe11D-L;O 1Z )"QHmJdP(vyy1yr\ސsFW =3w tMe"v5 ,'*K3p\:gG&SAŒָz9Bp ;-Mnnm}[ŏܼ6k5 c‡NW)Of9>!L*r  {h^C%~94 K$ 8/>$M{S+.-ⱽ0L~M(~%iZ ݿ2ڭ%1V{ZMOsgpi\,=I&Gg\%9ka)sVvQtLvRm뱻H_nF>f[k{XLgdS!DCO  Xci!#&Hy.k'8i`:`=u;MX붔u1^Vj\95do6.p+Ȫ\I4H^*_@Z O0?+ {j4Ç7Ix]CjKe|?u wxLEJB~nZtEd:3 Y-{hH9bnˍZ4-#EGGoʉ Ο&֌DD5nFHKqTT^#>V1h#[Fͺ]Ifbbg ?,#Iմ*v#|9}/ιbcAu~NO2Tm_~ȅMCe]ɵ=YԜzS [w7'{մS#|\oB(rmr)uG/nzI +;gƒ8?#(91 R,), M Ex!yBA8oJ_^ - Ve/to{P96O:jpGymAZ6>,j/sMu/)|_&%l;U48^ۂI EV 6VHo=s* ,3`}Oe_b>Q1..+>AmB ޟ>8zsq%P9.c& *)݉;c/Ě,jcdKX~]?6zB; FW&EJUd#55i[ۯ+4@"Xj_p6y%V[pG)j@V>QU1(@yHF5#h_6U,$OpGƃ!ۭ&)x'`{mV_fZBt[-B|Z7g39x{lԩdL0-v0|)=]]} Z欒fj#9U{w⁘RvPh,y`"գGZ(&.-}3N9c ЁgBKy(&ح,5|`}fvʚkrI&Jx{!N,k2֐Pz9p*"GexDaeW1n3T4=uRJZ@P?fJ7IzikZRzV3GH0:wTuhrJF 3e'`%ťM)k8TIY30yUAٵw:yZƎSvCף%nS:64iKEFLYÚ>b{f01oW- xZF`͒xլRt|jΞJFKǍϨPpx`}6էzg,|h?,6BM@eEzԍH,u!"K O4+fjɩI%Ǒ_'N64zӻ?UA38 yP3W @4EA"I:*Yq~"TmZm$x ݟOPnkrN;, W#G7nQ{(@Dr~2ԩ(qr['ԛӗuo[k=yYb}SE)݁^1BQ-}"vE4kOMO͠r`ʀ ~wS|M`-xxb&Y'@F{Njswwܪ{Lw>P.5a3B7AEcG~{qz(6cfCY4A33VVmwL=J,8kR.:B}` %H4uiYZ^ LOU$08Aǻz[w.([vĝ6>1%kQP~6$%nUKc Y s6QѝYv">?m>Eޝ Ƌ}#EX1hllk_{Nhڬ7݂Tg^~{eyG@vlI')9;?Њ: pzbdkۨڂ>PF|,ĩ5`ç@ YgA0^y#=NsCbupͳJ&K'N"=ޭ JkM|w SKOt(+^(lhEN.:=(?բ^x.8-9q$`X^8@Z#۴(00>r>jQ<NG=_YlPK{N^ @ZeIom`(gW-#*^|ykdX3dDho'4}#_䅏^bu9뇬섇sHwͫOJvO>m)*ǒ@5u=.8A+A2oߖMܘ ,m (eA2@oh%f93N+…C%!^}NDmA<#ZpYUu\Y;36"!3Cr_!Z?\^}y&V0NLl T8ƉT~6Bk S^r~(?p;ȋl%$(ptM`(fWMX*%t~F7hq$t:Frd bՀc<)V'`Z #v3ؑC z^i0K6h'!ddwcB!Pvr9c5\;D9[7M|^[E"Bt_yh:8}C& czzQ|2Gaߙ95xQ&iQ[ۑm>9wH;C/F2<7t'ڑ^YR[4GYP)Xհ<`;"l[((u> ~צf|yݧpxPQX3 K숺Тmg 9{[IUQq8&Tw7'Yn4fgiiz">e8IuUV $m}dSU~U٣[V]n1$8qīf);pl`늳DGѕ9?ST?;/QgquKbt]+^to?t2T0"`ky<ڎh}ScXh8U< бYxV#)pyYGQhlfeԕrI+8!}-ecnBqF9}.ƧHe2>/16|%|" z4EjT̊`^p-W`nK0͇(оįR󮮙 /hr#\|tm*8VJNw[8ojWRHZ1dLNv]iWDq"JN2; }3\s Xa8w endstream endobj 683 0 obj << /Type /ObjStm /N 100 /First 901 /Length 2937 /Filter /FlateDecode >> stream x[[s۶~ׯc;gBo3qfNnhu"E'idNՖk=hKpX,s eR){fA }d. d8BL*#HVcґ!*faJOP , R09`ĉ0 %S!YƒLK90 E˴Q8:=3MG.bd:z5B`PdFB1 Ìscx3r<,3%yYA5+88r^JDH$ XI%(%4iLib$hqt3 ܀ d8P0 D|&H#[|!Wٱ ,hð`e5^#-Y$Wf͢ w=IBȀS,M\m5W,i1(4:!M0P OQ!K* b TRJhVe+BXyKx R G%-A"Te (i[acLg"MgԃǏˆ'Iq_&|NP`##͟VG(E#bQ.2N {}Ɵ-Yݏډ2߳~, m%%v@%AU" ȸ^K0WB}"l |MQ, mƇ`,qUDR|8n|-BW:sCe$J7=:a0Y!UTCtܪ}Ր/i` ȦZoI1;<5Hj%{ZY.Mw>݂]vz"u%q#\Gei;nzؠJ&(&ﵓ>6JĶG{TV PP'ġ҇`ӵ {4{v7Y3ѓSF0.'O۫nLSٛ^5'z!Vi@yE^8@bWO`/&B2ZOaPDeUuMm]}C<Wץ7w/BP7dtWiFt]42ws8qmuRƏB{̢S,oE eōM}PLJh )farDc7ѸA4nCͺ; 1m q[DE4hp+M cy+lN%|%Ƣ5S!ջ+Ы [DQ½^+I6SXiJod,%NV6o?ҪD[[{@*!]Y,X,C VQUAԪ jU"e Qs9cV"2cLeh-,/UGHtkj# o"wJ4$ڥLhph5sln=Z5kx3k{r(r@&E##kȲf Y6VJ:k̳졥?(|*D,ݟ=)ñ8]±ȝ[z"XAq6l#$)5'.duzB'K=8}̃E*5E82q@^,2 lQHBS09OZ.&%{PCy ԩ RFn[JR1v 71bxB=A͕xRݖY .SZ_eq/mQP Ʈf9Er02ѥuhD.;-XRy$%.5*}Q֔$s(foiR6.)J1ePZcJ{s U!%00LVӢ[-[] AKGc]xs-ʚ~S55 i6.fcHk mXnaI.Rr,dfsfU4;p)he Sɂ9-L*nsKEB>\ӪjQF"KuVn lR:6uZ2O;y=<\de%2ˍɳKvH{dZ=N`F~3mntٷ]j/zjGjl"{{t<=f8CC)E_G'q/2EF ~ǟg|?/K?7?-g 5\c| ~OG~ 0YP̈́{>f:|'I'WM7Mx[ \K"ِl%LlOj?Wmߜyǧ|\ygkj@k7}~~{Q#b`߻("NbubL?]h h1,|Xc ?p¯ѢahfKtcihUZ9;5=9qO=y?'EݰtߧNYew{dd~ endstream endobj 766 0 obj << /Length1 2411 /Length2 12904 /Length3 0 /Length 14294 /Filter /FlateDecode >> stream xڍP.L#].{Ѱ[J.g97s0Zj\M `io]=֮fVor_WfgTXzVll-p_* r;׉qpLMC#n;́-1 v,@Έ& `%X#^o`X#vo`8ro')F|ٕE|ʿ8oΧF Z"~023G19 ln/@v)GKboGxYN߻ 'xrv&Z1AnM,CpՖ&x˶MV^V@?,2? n? @p? ia7wdnxЃ;] _j0=jp0Gh\H\@g7Sd֠?f:^_ ׹~2+;;t6\!}r~V? n\#8N]K׿>?; 4C2 im#`ޙ[JvYϒu76KW^*Y%b8 ZMs?g.y-xӣ0ΒTHЌ`hSx{9jv;]t[t4"8#r Q}xLg 7u9g;X-ѕtojyñߐ%$iq羕(O<=F3&oBSR}%i-`v @q(&JHl%" Z9a5Ul ?HpڲCϏR @Ӷ2tELj:&ж1Bb=S[urL*e1kH}ixba(Q&'6d\3 )s.sK(Ve]Ur?<ÁqѪnGtd;:~GY ȧYWf%q![1ID@_b$pCۏ &?.j~acVEE ]ɹ#9œMJ8T[3e[>n8o}QA .@ c^ q^lY^; }_\!RY'`* Ёa(oLbgΪa8yNh޲GU(sm3 `zU-[_ʗmnL è[\*}΅o!\W%d8ŵ.3azjJu64LQ7vdIbgU&EG)'R#]2(mFQkGԤy"P-h)7>֒KO^?):Be{łVlS0d6;ε# WG ^POR0ܵF3y73f9.n ⥵kOW~viɟK~0l5l_:_kߤB+JyGySֲ7.&j3ɇ|S_ËPHPѾ>CLJ|Z9դ`fvvƝ3a?+`Css$peqz {æ:we r%PƊćU^9N0 LX )0|dѹ-E4ƽiS%%S)0 kwE^<曞/\''e%UzҵN$ -Hb4X@(w^n(/׺%q;?;~x:~Ce}IS~qg^G7ÊΡImNmRᗻ O0;jw)U< FGOyje*wDlQKNiK7\٢#6N".CYL8_qfOB1ЩTu-2老~Py"4zم"ySU7v`Kt N2|l8;HJ2䢪38EImV K$w÷ODkNVpr>=iGj ,|4 1tٰbVW} ~ך)3E2ˀ0rQ_7.M)N ,pw3d>clōg:F&$ JZ5B2{#ۿ~Jj. T'b>P5?Hj,fa|U+& /RH}ߺ[! }s=R]2 5lOgAQ=o6'S͙@rmu LjF5J &dN6./Nr}3Tf8F{+&3z&$3dɞHfRFݻ /AЯ͕e#=duFtr\gQMLo"iWMjHn"!C+|&vKX.r2IiQ=g|ɅŤ7kFwCjVN䌒bχhq6c\7+󍉍=ibP&je59X'JNL?/ F2ߪCrm`Au֘BpV?2 j6a *rXPH[7rLuU Y :Grc| D*;}SdE.,ʯ>UT86*bߠ=wb lIE؏V;^s6dRr  ϸqC_{ w?wwSqkvM䫈-L|H/X,ÕO~'_>}[#LRaeG5t% `X 9݄I.yjbϱU2#rgoq?Жa:}\Vsò :. k;d*zJbdXVr@.)%A4A юxw[J='nwad%Ǣ؋ywp=Ps"p֒}䎀 ߃6dD'H%9dd 8.Iv!.BۡJ : 6Ep@5MtaXkxKRߵӼ|M~Ƨ7m>R!|P/XoqP<3ifs9NP!p+['Fq:L巈][t/Kvy4gŎej!mUCEyY rpc A&U⭮Au 7vƪmİ93Bf.YwkF p؂D=Hsm$ds?,[ WHJEp=d`j53w67m[l5;9-aD[ϊ l穃ZvVӏp=v^OBž yxvu&%*-̮Gnt]c L _cs\FIs%eo&69eLt҆V_7+oED @g/t4rNIҮ({qX-]I|i&?ÁJ­)誮+1Uȁ%am0N6(b>L"],c  ^|޵LKx=D9Xs3hci)-FWIV$=y* VD/p(y,B}bmD8qeg۴LoF]DTɩ}RU?o8pÕ#V")=>TƍWg] aX$b/"5^ʸg0!Q]i"雬lu)ٱ>#WxOdÚ&>:MS ѥY+2:ŋ:Gc0CXfffw?V} YʲaCsCd1z9\?2U\**!$Vѻ23ǥLvS'|˱}М;LS=^pWyޣpf\N’i<%A(W)v%*L{C\9)izl5WggWiàzZi3%{7yK2/fvKHQ\Dz$>OCv` uck \8mdL݉$dzV1*Iax;[|\> 4QQjSZL7} 6$۾EaL-'WU>:\דE@xutg^NV\c)I*xuwoXWÃ&v6 2 (I@wcv595,ߞqg I|V2 D0).1{5Z$u0nKei,ŇWn'=8.}IѶ?wϾSq 3sTvLǠD:e?ŦIJWWcHWxb ${X/XptmV7G:aNhPKP_fxáiO&*h"׍M\giR߰X h;1< d,/|j!^N+)Uֈ;Jm1m:zccOSqc8H҉,[u0Ձk2U8Eb;^=M,h*(k˥e-[K2KՀNkyNL68/FDo#lmFgMOԕTh|-Xs%*@>}n.|.iPxGb2!5dy"_3}ivDa4ַ's* mdݴkSgnyѕs褷VOjCq2J.m O6Ԁ8odVjYefꗦ6ZDFx\Oy+%nNs?}aޖZ~:$~ǖvy?[3h@VN}T vH[%]3;/4JyB[+m{olm ">)2SʻqwVǬQ ٜeY|H&aufeͤ}FG׏cQ'vn&?©3F+Kz Oe ߧ\: ߅>}X\s>} +į+ᅗ@02Ӭ%,\1890LC"Փ}mUܖvTɪFVpuOMA}ʑaΐάą2^Y{>Ƣٻ5Pu uo8.O 爳BB9_ꎃfa&;K 8!yIeY3zP>S0Q>]7~ʑT29n/Ҧ<⏿MsbTɯJ[w eჲZUA/4 ~>G5&*i bfY1횡,<8ĕǃkڽx+|24,EGP0RǔILX%wWEԿQૐ=}|x 𢗾 1./ZsmUʱh(B:a$U{ 8&-\$mHi*듀oL\[^T `vxmC[Z$ WGp&KԊU+q%tEZNƶ2X:&+ #7CLA3'߉+6R;ۆ&:[Q#vn &K}FoɊI?3ɶx;t5XϹg+v; kv NJ͉bD%ۧx4)'U aeL)FPTC:[ 8$9ȕ#,?g-֞fqi OP3wpPvqOkxHb׾oE] ʟY* a߻]\9DW50˝!D+^ZufR*YJqΖ>^B(}6҄⢓waAU15\g ֱ?RMP%/:>eI]Rz]!3+> SͰ50Qi%X X(8a-F9SS`s%.!vz{1Yx X1 \cGJ|hP4|sX߼&_`͸R̊DNI3+EC[m'W젷yeY+ܩbվ,qi^QZp=KVX~@fU4/DԴO5cv6 c[vnm0Iyr?&,,Lq0j#% (sM8u *W\G$]g#Ɖ$\=E1m~q[ЗI$Hƥ~3;ppHax*8r*X Kfv&N,;5-6Xg&O/ 勒Km00Sw%N5m./{`+9 v($ZJAOo?讃)IY9GHg2VP=Ŵ/_M{f*Zjm&ZsܔL'xO)sRf& Jp#Qٷ$ djU/ D/׺Qʥvn"Gk( Pt\DKafo|y쑙asZ;,&Q u T(yGBl/{4aҮAP3vfZb{-3O됒= F|Sy*"ԣ~ Rx +Vcf 4l*߰j O^B"8Ln7Υ.A-͐AدC:|\I8 ]Phj"M}}&wW=4K*NiZ8[Q)bwb }-y0[qxt';=oʇIB8+OeDʪ_W#i_K{,KA 릐1*nH-η̘3&v Sv7!ϼ@X9╥Z-WMzѣ3QL}w)q:#f0xڝWF_" dذj܅_'$TAIi #>z]̹*7瑉^uRָRikrn{M46TvcwU_S6~XsJ4՚̅o#X^=kۂ_P؍|h h5{CW<#egWhÔ)iH7oF#i>寏pN{ǟ }ŀ|h}bnliSa|AWjXؠbs9u7y>p%"ܙƫ݃ӵ9v3h~ }5RBlMa [wxht(-c) jT:\0ɋ5qŪ1()zކaRox=`}I~DqO.JqCo;|KEJx7IӠ+qJΜ鋧u즏J u];~p~aw{2Tս#km4$dU,1k. ^^/[dN'^o;::}5ZMQWm<'qxT d4GbAL38G`3 \˥?ЇAe8ĝʃB u]qkzo#=vي?eqY#3s_\!sЙW \hKK_$ oYf#} $ňll*qDҵRbgۿ|a/;YEI.)w͠)I+?UEa*%Km 񓖊DidQZȞW0W*C~kn]bXrGuQghkӎs_&@mϨ a ŁYa% h3 # 5B3)&2"wÂ:a,RD}ccsRXNl2@X߈9_ ?lĻy!ŘEHWVS)*%Tq&^Cqztj`>E!Ԃo1ז,x9tRCt{ZkGX/!(Z}0$H7=xWH#EDN[fL&Ӓ3ntDH?srv Wy! u,l\VğxW$P{g~B :c5B> M'DևjiOwH`*EnOORk0\["r+!i;,pQ{”*%I Z3aFR'⬆1C%?ĺcsH=l]x ΃kzQh:M @ @jȫďOk5뾳VG#Tw։`nMԣ5=:}g>%me4`N]TF!iI|b|#B7fā=^tN<οxX`>;xP'ˆ=Kx E`TDu iфRM Нr.əpqu~A'nX~چ2̹P=rݶhOV>\D{mwR=mQ+FUa ^^}i=SKnC$g[y=ܧ_a_6пrA*5R7yc97YQ֏ͭ/71WDf&v{}UF%X)n!G}p=}9%J;d{kSߊ|^G@ʡTPV,n &NueZ|:͊+~$d4"M(J:> udJgp:POmǠ6a6g(@GkcgŠXئ2-w);8~}&K9&w/? }X{~PZ\#b{[ե"g1.:@'S_) uo,A3bf>bgE ~VygkwV,{p#7=UݦɼVJw2}v:)iy55ǥ)n ֠(UM3dwxȩEGΘdm>{iZm-?2[1U6/Ի]n-# G[-%1zB N=}GM{4ʛA*lJu͹] MZK4AKp po'cs>ӧ%ZKgItYmQսr}M8@8im<|GmzK8[8Qѯ6?-ZRWQ{+6'Zxt?飮TF b1^&#T+6=B45 sXG#%}" C\l LJH>䋼8ߧvN'ei]{d ^~m}tLaOK9gGCO꯵<Ñ~LxT/m4 mD+7ibR1ؐݺ]ZȉnϞ575' \shM-ƺs͐ms2ɬy)m\5DBvd93.eŏ*6]#r jBWRZ8Űbq'N2C>|Z%|.{I旨΢%0/g{Gvڧ" 壼.\h5,W 7aOlC/e4}#LDNq*jM"ɐcpqR %WMb#FcV?ň+(Iq6C w5n(+tB[%= @*-8v9>!Lg>i;5;ʺr+r|Y̜LU>iR׆E?E2?y':Fz{ ȨI\8YAчVnƵ̞ݍm$_{0epX1:21\G+p]0zأ"ͨH1x9FL)=F&DGJg4B! %̆?^iϊ= Wӹ&+8{nEcC=uOhtPJD?4^~O`)2U2bvh/ޫsyro̕M؄1}%XX/1ϒ|]>&W G@3Q z 8Rie"t=H`"$JCS7UfC}㦁w #_*?F:t.5•>;"Q|J<CZqqRY ݗ3<*7Гxr5.a7pKZ^^xm=2H2 I^Ro(0Qq CNb ҙ&;̆y2)Zs<0K2] B0 a+`">ZåDY5z]LBŃقdLroxp ypS'Һ~e+Nv"ebu_5{!XTWtۓ}Pe$vHo,T\_ )3S'7 F !(2PP,驸QݙZO*/J"Ͷ5%fT2c;,~CsOĈ&=k+O1$x7%K|*ShrD:9: O7e'9{z"&԰@Nvj[#OZ[b޹~ܾ:!ݑCͧwK7G8׎s1?<,sYFiՅ֓öҪTdd@ Pٷ&8$]J8;|)0ѳ\^eu`A<{:t $I$==x?TCKS7*IO(?jKJh&#⨁)/:)«v5zF|.i^)oA+P{o\)El)Q\I|^H÷ʧnT':OHmplYLY9H.Nv:3!oM4m- ܁+PTOc"`XIYJfX n$́=6>w>rK^L[r_뵄y*CtZ!M@OZZc e4?!$% =ziÍ1i5,n͒ޕ3ѮA32椻BIm=9)VV& /;FRqN2dVK=~WLy5ɺ9l*Rl1j*(-!r2uG:~8- `!F*ښԜvkh۾G=CZ]?* s3`0oV 3t 4?vؽRyw#ePVA~j ~Lx&b2j WW8bcwIyԎ r(yÖ"%q/zR$x 68> endstream endobj 769 0 obj << /Length1 1531 /Length2 7300 /Length3 0 /Length 8311 /Filter /FlateDecode >> stream xڍwT[>Cw %H"]0P҈ !tH  Z߷f߳gb7PpD8@UpX$ T12 $ 2ܠqLH/.jB۔QhvE໒`qI( IED %>0G PzH _@ XBBw8PA@{ " h(vA<$|}}ݽHg;@_h"}_Muݡ&!PH(mpAp/t7ih#|N0ӯF=L0Oo$ @{('qˌ"(tB7 9A/{(o0 ugGN`0?@0'kp7OYHM@_]ﯞUTD$āb $.;=BwB%Q x;E - 1?KwO鿲bT~xd?{w|QQAT3uyWe 3Z`?0/UQ?&& #`.t_>A//4)*p׸ #Q0KGo1:n0@~+ZBLѯ @!?(PP (r B ѱ>Ca47Wo$=Eމ BSTkUD[&_iܝSsa6.aF9ZS`UV]EKYEXUלYo1GSNna)H#0c:&S|oSk>nmi{[M{K!ޡxY쐞;o=bt,v-3@o%N0M]HT-eQJәy>YjT]ejO/y!s jvG, 3]'<@Ǣrt,x]"~{U(3\_(  9q'."T۷HsV Q'1|Nͦ/qd,gkq7OUFzźnniJ"W݇#3JyP^=g.PVkjw> x /d^|UOݞ\U$RTItl˖%c[:,iyIYR*HJZ.$RPyyIQZY?.W&.1FhL)cVǾ;KMD؃.HLʌ'QF&u֞>].bh]vQX˅@dY.W R?x4ƾLB!OFf DzlmΌII/bBZ6aY͡B\o=ʵRx^_RH w4?1=0Up..SпKyKje1,Or\5yI`MEP/_׊{8g$OrkL4Naq>t1v$]G-/ Nk^z4?[OrpuRqLVϋf/?|R:)Zw[%T`dp=w ;`qP(l؞i8 5k|3b&OdΖb>| 07JYShyMjN0Ur4bڬW-ʅ öڦXZ($5s`/̑r7O*,φw-kknI򰏊pr'nZ6Kyl |5ϭU E+oSU|U jP"Q]cܾ"FyI<3X&2&cqP0m%+y6vnkWD>i|S*O0+jij:>6=pF&bn^GBWUjtlO\?o2߸R&tY:E&[L5=)ni[e =ȷXQZ<^WADȩK UL4·2ܴOs9|h'-}yΟ[ /)lZ-"2sDde☼aw`?(^z#ĦM1(jjYzXphotc t"1ϣ'YLB*${rѼ''cީ5fwluCg"U"Ҹ h9Y>? iR4H,ب*jN4fܚgdYN+s}(;'k}UdU ݭ xtMLJs,ԝ4XH%5^dH&^r,ԳG&X"%*nDg-ORdm$ۿzO3Ŭ'⨶g,c歷KX^'SʽZ[ex< :^lVp" g&5 <b0KL_q*b%q_m >G)H`\ čUۮ-pQ3ɫKsi\GHY mF]`/s(y T\6 hɳ./NEDS\<#%?M fI1 -[G K6 P 1ROo5xr184 :ۇ|]~o?\_pD흷ŦcpPƣxSFsM; VokJ_VP\TxgeSIlX-73Y7J\;oNxV_92N|jH>3z[q^ǜ}$і,-P赉} ]G#kO2rBm^: +K;ՠR^澐4IsZ];/8+n}Vy@Hy^2 nwc!z-[(Tx'9y~F㎪,cuHVVڲh7W E>r/ A pׅԿ3,P#aih|H.P߿aoYT8v";'TGLL' E@-IήhX5-`\#6 7Z4>U7OSH@ naa <0R Q-zmhydޣ'ks٢ &oxȤG;?'{^m+5F2mFu2s]N6)m7 e`n޷ Fo%-_* "3///4Ga)0_p&EtF4;KxR-Ys(t}⽧|p6OtD:Qc Ai&)Ϧ#P63Qe+Gѩ| |G4oOrɌmQR(|ӱúj {)Ђd&?gô(Ok|įUd26BG#:w= ߻bRRfSO޵yz[GFX7*+/msq!5YU^~1A!-49}]'zb "l;=ڔuXg5vIqw*I84HZ6Q£9>GBrư^ %8Lf!f4XBKZZ9[:L=<|k@I "wkƵEARB3~81(f[?y)T {s'8)p}&kϪC]P[JQ1{~y<^MSX;C+ γGZȩ3p f;dŅХe,%S b!$Zdѓ;StLy-;d/kO#'MIqNNl[AAn&Sq}=嗻;L_B)[|NjTQk-? K zd'kZe޶$DkvVSVE ϻlA_ۼUpC$PwZJ$~|"kg{0)5Cd\vηT(}\8т PWPMa5j GiWuB{vXuH\Цɝ|,@Z3u!MLbw4O©ÄHq6-y7e=&ZpM2O(|rhb=jW#wp-"9=ؽ z+NI<%8Vr͹0R/KQr֭ų5BN(Щ&xժD0J㞊[g,h}+}|UBԽ3IDPr90kclՅC"$](Kd[p|Ѭټ JlJ|V%_3۽Ug)(7k,le#C87 =1G-F^Y8!9\on"I>omDiEmqЍz4?rFy GJY=վz,ƶsUY=Em3O=C@q%}fVϧG2"wa&` L8L*4%)'\Sfӥ$ {R%U&"$E{V^#_BtY`B$ &K@o+ڧ{ɘf.u' UrĵIyd'*HN2ۄ9&yAft1`^,Ѳ"OvW\@#Qǃ)rbi+PK~9Oreb1+9`j8Ր{5학zNji?'y |gaYmwίac"Z cg7 3-_]7RhŽ])CyY0UZE3( ;avO˹: k>!m>jxto[<16Zt)HeqQ gg QqxDJoqlIh0s)O0.wNaX. 8|Yk$;iFK%z3aqmu3E!ŹM~Ň)O١LpS2j]r<#߬wiW+%snNDȸ 5TʶeGayrs_|2i|r'JL-iT1[^R*Tnb j"F/UY)bTL1yZI?1y}l-1ª0y'%i*2:>NNGxogU\ttV{Kr3]@pRh#r"IԻoDMU{fɯ Po6#6Ï Z$2/o endstream endobj 771 0 obj << /Length1 1538 /Length2 7383 /Length3 0 /Length 8393 /Filter /FlateDecode >> stream xڍtTZ5JM!N; II ^)ҤKDzW"*H.AUZﭬd3sfd3*hV,(jK@B 0'NiǸ#(01pʦ^( |K@aH/"#Px"`m!w'TD`ثs H8#yu"0DCpϿRsb]@///!]qx!; ~ Ё &D 0rD0Dc 8QW!(:`u kAy9? BHWr#\]-!7VA~!.x'".P@:?w(urG+5+`h$ºO CpQh/_j 4F!jgq*(~+ `]QDzğVG٣{uQx^s頯 tK(z?wO濲JT<\\~y ?~'Jث5F_-꿩?vWCx ۫\=A8ߵ/(rYT̆6IҾT=oV`uK_^Y'Mѯ+1mIugG}TK^HG (֪y4IxRkzZG;>ni sj:Ӄ(O͋-7Db&$.a7&;gͨ%LG^ :"-X)Oܥ yKৡ3?WVF =xS6߭BM'l㗦n~ӺQs2PE,'Ʒߩ`p^p c::JF&vĜ:@=]J@ qQh8uP2V?ezGwL`P1+|85?ۂٷ6ߜ*5JJދ[yq{Q]"PvN-q]yn1F >q% P& KTapmşxɳzK?8f-[Y~M|F Ou~`qKɆWN7/>C_#Bj]}&),kOvX7 ϵ۬,ɶl0\S]Z)@ 2e~Szvлiu#z3Ϲnz2~in9Y+7VQ3*_ Uo'4Mhy1υPe7ZCN5ʏ-qf Ke=v4KӞ9NFZ9!#(Fq҄v=7Z.k.nV `랠Uވ=,26θr+l }"u)y0A@Ƒ䗤V.(AڔK χ%9U7uRcА8$^Ij9(V+Cߔ6.?2$Ji@ L| Da'L0=Ru#` } wePRg$)~U~Ю?y! 31)2pR)I!s|o(Cv χtэYQV( O~Ka{i#Qw'%fR}2UMxqVZl rtDt׿]ˋ Sp9 -~k e>虾nk~f=~N6-"Ε H>J)vUj'IA`H ҥ#nrCfg|YeRܗdȌ9uq}2ٌ0 |ER}Mb"^̮VIJ3tލ7p/xqD'mͼy-Ew1|=r4ØĒ-9NO~լڋ5"ber<~׻ޙ,]{&q,f+{7~+^T=ՒTYloda\í`QRӡgj% ȩ*#&c B|>4w_Tx1xv(x07Z7uɝ>wg< Vi(VzހBP;8ʫTF'kAv6}G=ȉhG/$X,DvۖfQЭˌ޹&bj(-Tf4TlyЭq6.Qf!xٺKJn&(YG蹙4M wJV,ͨf9yT^hD T_e5*ڢyG)9ͱh[?WobN8 N}mҩ4k Bٵ`{,#āŀs\%Z 4l)]p^NC|\uX]6z^vcC!. +aI%=UO- F7k#Naڎ;ɚkb::k}_}߱- x`DgoZy#GϏ%}j~D텣7]8jX>>-,e @w̸bn<@l|MÃ>c.:LkI/t K6Y4 B$ZE 0,1/l&cIw;:Me!N3㨰._yv'wԦؖ8i^ԗEfj8)p6Bx MP\ R t-ܱfC0k{))Z91G,Maac 1[?kf6s,Ww݀nDɽe >{ yJݸC M[dRb *2%.F+-DuBX97/-=;N msKCUjEe6lh/%%[պ]l*[iKTi/j, /ȅ` /f8xQ o7>x>r77M7PqY"Wrkj`֩K:1~N'=ԫB=-3SGMkTZ"hV(?W{_DOg}l"@G`DScT,tcr֋?f#5n@Vsvfk{3f fQaN'иs|G }ia;یH 1"CyMf4MYLju:k[D~җ'>j*=z:;@Iև^tole惜tZx >Yf}l#7 21fz!ϦYQ8lɥ-.fqK ol:\7Qb+D4zcA^L(67!xk'ɞ:DUd\}匣<w@uHE7^em-[$u|!>*C_j$B hGlVx]綒؉Z@G\`#ygH]G V٧`|RUz}L7UdJCs[Xoo}~쐄JW FQY[bT\W.M cilI^&߾9d֗_6õ:/s_AQH7t%5~V*̚;"n^o{|1skvc/7Poo k=GͶGJ3ЎJ9)*J>_4S Jº Bq0d7ٌ*g[mS#)|'s$L^1Lt_" } Rygka>ҩu"ﻟTkM\7TzOSoN:c̻r>L}p͆è۾L wt2Mc)mz(7U8n;6xesw9_Lך GHcUg|*S-$Aɉ%aYK̄H9|}G!K͝hpL_ zh ui7nE tKi-uK,+ X ֽ{f@۩/bV5XK–16R/i]j:\mS朤]5^wBͯ"BJ+U_>."]CR_("t,Zz_ z b(QIIܻ@e{r/1%G9|.3GsO6y25,cA_;}CtV+z7hCzO*dSa/fonAvqY4q}st*rkעslK%&b5KΟKmzG.T!sB[߇*3TI4z%4ޥԝ qb'SpGI0֫)<&JZG&'4h,QSwZz"퇹F݈L!aCºS+b=oqK6Ğ1"'Z| K{FCgujE`xe|1chuz7}Ny[n.꺶u+oi쒉BLyȔҤY܈m蛜5N-H0'fQ"sUg&@YPƋXn> ;'7lu{PdA=]Oy&@V?RHB\\b />w)~wjquĮN} izb?Mim8jNq*LIFcz&vpme%x +$=ts Ar:B⋚nQ]O]S|7Y#۝0Dcހ0*8j>;a v)\iRdƬ՟ۡ;HV\s[) H0V1̖뛕t''s^;7L^.1aY577iru*otΧwb4iƶ_E7ٮP֝xV;4,W18#P(Dٕ2X,4Uc>";gXv.K(il(g4߱|&d᭻q B% =z#C9ʰliIS|a Yx7$FQΠ5Ը戽FOt @ip8,fy9ͨ;qwmG<\؇rK~쪷jTNpZ#EvR &JMIVtI/8M8Ύ?燋/"!NƩq'ptC? 0bʠ!!p|+(m:/ [uWA6)VQ9nz{]+}ƩܟO(+߅3_,yuD+[3-yk: endstream endobj 773 0 obj << /Length1 1996 /Length2 10711 /Length3 0 /Length 11911 /Filter /FlateDecode >> stream xڍTtt !]543tK+%!]*O\z5k1u}*Lf`WTWr0#SSkX9#Sk ?"3$P ȹ8ll<vVV cW+3"3@ :!S=,,!@kJ`a+ jt25-vMm`S+tvgaqssc6sb;Z1ܬ-j@'+ e֘VN;nƎ@`ke 9AR\@f@Gdu@;XF?`cfOllj 7yX,V@3#d+ 7v556% % 0tONVNNVzdU͒ 3qK#, ̭@f0sgy rpJ1!Y\:,del0|oe|!{9Ύ.^:fVwuh7C ?6~C& hI1bb`w f:*V`+ 2 ٧%3\:RC&=z\?lWߔU$bk8Cn"r@*ͬ\+l D3mrrX9Z=2lV Iae䆙@'\B./* 2i\cGGcdAC f@& ;CRȿN "7qX~E7X$C<)xY,* y꿉 Z&Ѣ⃐o&g lᅱ_bX DoEVw_^ِ?ҟib@HS6 Di7A'Ȼ!@?DAd8ο{Trt%la. DdžAr D__Slyzk #/t"/΁MkZoDݘvN>h3 :INnƩg/ɗH-IXw(9d_xԒyp^1K 7{z5F#TVc i8f'E:57MMBkb)JU x$|[jM&\4Y[vсzJo*17WzI\vO>:cI Hp"Tj?db{H.HG=}/uR}$R N&J2nU~3s1x')f$GDPf@/>ŴOO(LA,vEoj=+O(;rߢ">5ҏ~״79tJ煮SQ[s=xn.4 ,.X#n[M%243ޠӕrZ᰻Lpa;m3T,Ăՠ=7nIR  _Pk1B B!Ky m7|ϓb`fPHv'-wa@Ns9ģOr6b PԪqmuو_n2:.lMGrN`Q5, x#y$.Cjdm#v}TM_hi'y8ۉUxx"gxkxl /*[/R7ϯkY\> V0t{>t18lt-QzR]t~HO[.^7e^!L#X,ws{/,➚}"贴{9L,U1 h^߈RchnØ>x˩Yxo&U3꫁tK6Vz;iV̥U֖?R\k|fy5ը+4]^ˤ#2F΋ bF*X]Wm 6S[`\lӘ>se#C _nb˩=͙&TCD7Ԓz{QxE{ܭU4jO۝5 5dnM5wDց:V)ߤ*+]2Yxҝ)&R7+]lֈ#erɬ.t,ř ܔ^gyB;Cby%%516<ь"Cny ZU/ҷRIn=7 1W®7yi +/wRipvh:e4V%+m/=\x[SNycpch˸M4sMʹ2@%XmWJn՘5?/ #Nw[s(-?Th_G߯i]޳lFZu )%e ?ܬDjPxQAS4Dw[Vf찙nXo>gi uN%I9Lb/MTT!lr.p [vHkԑh{5ہsP\,W8S5\)3mz`zpʡ?m},( -ґܱ4ҿx%给IX?_npa,2|xFp(LIQa+ D1xPXνHѡ5(?Fo{m;> 3'NJn"[(}@p\6EgiD t㉂]Z MHOYaEȘ>A]xS_Ds,]BW7ԝcq>uX,Sru(r6U^^BL5x'pe[|+`]AU%cW6|#(VX2cTtsA:7yC#`we++T꫍er NTH{(D ;&EQ BK52+MF4r{` C}.eDx5/{X" %ig+3RJ6d8thp̢%9ad1fڦ]5˧[`'5EM]W6#}&>5JGœͷe"M$K3 X'o w'8㸳pmc`q >_1$ygLa 9n%(xRFՈKh܌1&er{[ͪB(girat6ԟ&|4+s=NuFJ:$1n -K;\s_I虹]FRpW`L58W+$5ih-2i5Ffė`vB١fG\T@ªt[dZd~-?Ej#JịR0ݛXn2Y)xytp8`Vk(>cˢ֝na]QXznSZtfuS(~zeh0q j&SeunzĆ@[gE,Q}gE4r`]GzwY O4K;ћ|^E3}FvRՕxӦʦrgmX15(rb,A|FJQ$C]/L8$߱5Vp؄k|\2 J' sllR&׆u(GQu/ !ƾv}8քW6[SZR[Ntyiqsj mpDSqWs3!$ &%ˑ)qYM]ptX&ި?|#g EȂ\ߙ,WZvA >6#]Ȩ!Q=q @9rհ[ahj5Im!̅M[V.'&jSb."r# 7OzkJ彿1,G1E?tzƯCgZ%)} :Ip0,R42>3?$?_+u+2 ]WYkVM)m68 :({,  $Xet%$.d {\iɌQTJ!n$tIM*FOkHNzfLؙ37W4_hjZXYY>Qfd:@rbh[X:FP9D1<]Ñ:wHt[w5 qkEZK6)F_3*,*8$`&zwUz%Uփ؀_Y>Shٷ $|ŭ;ĐB/.2?msqA}<B{]&Zxqx)xLyH(8+pwJ)x4JHѪ4uܢ!G>zXXe'˗NJe®Kv)K~ƒX&8w)%;.;܎$jtٜ+UO)IW z]%)>c5sonҚ¸*&BRQۤ&Qq<߲(R|x$\8SSk#7wtQ eZ+޲K(EIb:1& R]i=KS8~3}5K F>k>fﴓ D@h.aݸϓ_y/%C3Ր1-j<`^m[ugȼ{ (bq24Yz?;;S;()@!; HД_޸v4 >\-7X+^Kb7 5@.7q ,u>27)4$$NjՠvJ5 Ufzc'SЧAOǤu4V/#vĺS Ŕ~(E!~KK1arH/ec7" k-E:)5e[Z[@6"=m|P͔;oGWtN_Rcz̈́-|L"R)rAjzmA-o^x'j"{QӔ0Dޮq]yC-6v";1`DUx~g#|I?I*Jب:'Z}@s6U|$3՚i%#ϡlqyptj-G1켎ORVRn G)kLiXjlJP At޴Qci|_F)s]in|m#Y@(Ǎ-a[N;`3 L7xE73-+wlڻh]P8m9uyU=I)*2 HYq-Lja@ΚJP-0 ÍOOqjQ1j~pI#8ˎ0#]1|ZT Fs^T|I_o5̊Ge> {008b')5sTV.Si%&1@MtQNzL:rYnޟdΫ>p>'p8c]"1B8\lZB% FaV8` FTڨ$!5hTM_-v2*3*/z&%ZFe kp h\1^$fx ̟:g}l\8y== ]}H*:C{شCZ#;?#;Q#VlO|&h@, rJ4f=Bb;Fkb,FJXf R!!+W~/8DyG/ee4lC0bhil9Erz_7`GYF?U/ IŜ5$ǽ0s|5s 8e12o 5A]R EshAq$VXaz|41BZb1=4rKW{5ܩ̂irei@7lt뚣;OSʮL۫`z& \Kc0ҹW UĥK[Gu?ݔ`/W\&՚n-ʲЪ=LK; ը'u|ny?"7os.!ґ+ F!IM]{D+ ^ϴ CEYGS=qbD~M;cd_zɀtF+\.:;һ×3gl͔ T.\}w^rǧs]ެDu_"1}Hbh͵ ަr.KWJv<.Mݍ=hw |d)ҴMɀPcd!O\Gd06}Y((D>*N$%(k\}A6Y¼O!c liӜ=`q=|΅hp:%7}ftw4:&tsWI>.ש]K h2ݿKDچd;?r"j/>z$//^ϬsBچ֠4junxZ$* '1}u 5S$[F@"XrrSm?-e{4ccXΡ# |\[s GIVx-ӸV>Q:w5%&>ԅu'#55h:{=1jG`6G2-|8hhNvmokܣd%йun|qn}Wi91G *4^5OQ‡ЧΕ2^h|zyUZɤUѢwI}7w3j0y;-`FZ zy%={E@8VTP.e<ɋ;~hJn@FH$ ,u2 x?f{WdL5 J^K]ŻÌ1Jh퐢tJr 1Pz\Z3|HCRX.9+ơ""?u) kn}5/ԏ7PI\Lm7<V 7M2]M!oN-N@)3eNSrZI8xI]MFw(T,vܕ g7atA p`]Juz_AYFj'$Zu)" TLU(*>,*]w󖈽EϵQtY7+=۸o:eYGhCu,~LmI9wPl-}.H(vUo%0 ʹ (;|\jeKm}M]}kRcvg76l&럈FlAϰ}s+DZꍯ[sẅ́Z뉊lÂwo{*Y; Iያk;Kձ|JƎu`MJxZ5T҉X&eהd ]S{q/IiZ4Dz9OQx7꛸Q؏bXO!^g2fo;4N.rVwbyؒzsM]]"&un42[ؐ/\}8'?y_qWdt))BPkPD}CZ!6b"c_Q,UfZm@sDJj}*yءvSPk =y2 CY[XH5!<4k,ן/C #^Fn´[%vl\|'* !`TI!xkW6%ְVllD0J6^cT~)74)ɧ0sMXuq4#rsux_rK%㡠t-5ΏKYEތFj//BG![Q|NG[ ~'9??{ 5T۔LsUҁ5+ѷYͯZ Wexdi?m)%vЙFt'Lt )Gs4E,HeJְˉElȤ*}Z Os*1ˀ#nlfpJӲEE$gJ; \kLNY>/2^MF$|N7@QˏvMA'Ǜux=-Wl4͉+^AA> +'ozk,S KB[~W: }2'd6@*G]I%7řvu1Ev}LXʼosDزlѽ\^nd~‘+9dׁ.0irH 8e߾D IΜ=(ia4ذ}#g#?:`q)c@S  mQՉt C"j@&%wZ>lԄu^`<$K:FgZLӻ ie BDruOS4h(¦a:I"iJOou\ _5i1+--|[~-{qcwұ@^uV}bl7|":D+>k"P>f]mBނ$^dI_KaSs湯{c9(0a'PRCJLPҹN^(>1]`\:\Ө@uNLO` tX+t;=2F2UI}ٲneR⋱gNG_:A}hIr=;uIrf@A; uK:)$[oW/X8Upmȇɼ 9iMs>xsM&܀0,BJSLr~l+T|79%>C=0b >ăBc%9"f*Q"10[r=z$3Cv+ɞG!2Qm9`ځK_|y^*$Y1?-nXIJ>;E V{c|dY;D&;&Ic}s̴߭K(z,[j&ȫrqiMȞo- /}IQ_% R6SJeŭ[ ^ XSzԦf^ ZLm'%|a63IH^FmCB[8'`)Q‘<ֵt3ďNJEkפ:$RUUQA)b?JCd(^"^1udu/w?b_"psnw|!AD> stream xڍT tKJ3tЭtww0 1t"HtJwKH7wZ<;gs`搴Z7'P #y9@ 0 ABD0vY2KN (y8y"܂"@ !M c r;4 lgO5 [XX/w3 lm ZA֖Nm5= "i u{Z w'`381:`P[8Awh+]@U6`7'tx&Cr:XB|;- PSy߆NP% nW9IM%sv9NKM,F 1~'vY:B^-bc.]( \ @ P@\u|\@)@]"@`[ yTapsl0 x`Acề@q?2 `r騩(]:))7 Ka ' " ;Wx?j˥,0&@~5A|f_j?jKg?_ _5 *,k s`w97F k0^1'0uS@~o/6( /~>F<?n"ڀ`' lnT%[7pI= Ap< aH%ΩΩ"! K#h> pi? >< x.G7 3|@pGrv[؃_c^Nf u$||%>O@xl ~#zxnvCFF؃ Xe? < ? a@x? ^3??rQsyP\o?G~@h.r}O[l.?TWï5DG8By)K+{M~6Q.5}Ѩ7ߩJEW,?'{$g..'GT2% 2c-;Tr78~YaZ̰:0/'y $Ró>x MI )#cg"O wz\gqPgGQ^Q'(Pk>4^K90+) iNdpZɉ,>2ڙV4$NhБNhՒ#ږ"扪԰$Q46sޥhu(lO v ӵXJ>ΪXcE-`D}ӸY's[Jj'` z:aww"4 [ɾy3EtjpQi?@-kX~ LFĸN$QL,ʵ kA6P".qbeaŹ1P8>3@" wj&ZF2o|@3R-/M+ Ps)0Z0Y^*2x.> u.nv{e˺Q,oqトP=>@H}2VT0tа ;-ލ9OF~+fIDHU[N޼e7Ԫ꾦)}3XU1eI|i͗i]+.[ZAw~ʡpRM^.&SNM<ފqG+^'ʨGJa@z{sֺkhYWKI {d9][e sEWYW9mAޟZNτAj"+^=4V3[n,q /KR\CTh]nKw<]?Ux^p"1/gne0{uH:_.ȖV4n#Rb\,^}j*5wf޿ }/f{ *b1V#psü zl[x3 Z"Z,%Ѻ%smU  !!Nr*)o#na*9~ Yg%|XN7*84__b_B%R-^w#G#~@z̃-7싾ũ4S$)*p޲h zuWdtez*wǗ[E"҉KHXKb("Pܺ"o};́f,<٩]{!Цm=+?oIBIlqQd8#ت[8 AA$l zc㔉qٙ'lO(-7pwp?ٓ*ztRB\oDr2S IYK-**[FVh, _iz{yיO- ꠌ査g2sqP+ y' i9槃S=$"'#xɕT#]YYdڣǎ4ztU\^sBïl 8he1ljRksh&In)Nh]s5Pdq{I;ֳ $- y 9{ψslG/G F:WTE^3r>uiPo$.f`+Jn/L B]_:Y<ƍE~<. ]X檀īɡ.5LzIC[ z`ST$NIqe>ԟ*xJh8 :C6g?ҸJhq ?ӮużٌZ4>IsٚŧY6 ׮ڞ0 đOK VI܀HqdȴHs:{JH~i8SPhZ3]I a\eǾ }=р|t;bvY\_U1W+[nΦ;)`K{GK|c|uSDA#DHO=Uωm8X<2#b&_GLDzK xז~8B:~zZE~Z0x /W^$x Ϙ2mvD0`}_´wHiڄOH==pg)>~3>"u@ ݈[O1! mq-~te_Q|-25,$|GDg剭@nk؝& .sCPCLx-+7#ĄM`[N UJhXՃRSPW_J鶄kYpݯzyM?.yVA'ۮOvPZQI^dW\iZ1My\42jl'ZъFmsCJ8G|ƫvd^"rSwxhϛmA(N+?+0,Ơɳqrڸ%ݏV%~Z\*G@:4gFBȮ , LRv PjEطush.3 \'yͱ' lII~EVsc0B77OVFaQ#T#]3ēuWW~MtpH%L*֔ z^YDaߢŷU4IEݾo׷ m*u);G䯊 Aq+wu7 s\+J@2H ̭)xd/oؗZ3^?p"G~G6[|Ot7ޔ}70hV1F.ړĵf=g6U|^.`i4[3Zu#|iS_u@|n:=NZpKH# Htekf`*Nٺ[p s n.&kil2+lvZ `myARڃ'.uaܴP ]=u$G #'% 9'(w}jVE']xiwry9o0(z~SOUKՈU 7[,5v,afvU 131}&~od^,؝>&_iLH齪X|+rZ߮:F_`$o{}ax[|{ j `vay%2M~Qk8FjD\Qr4#qō }u紟>'uLeME&W ܈@.(Sg7 !/*!jލfܵC@3X= 1c#xѵ/[3:`xnjF~lKbAh,ֈԤ8'y}&Ih8h wbXJ 'qgX7D^Fw0ERcTljdqO|w^WW6Iߒ~d /ez-I"l^!xN޳j6rw|w ٿsl?JQCկIt&:0gU"p7-1sN%콌m.&S?*RNLjgݛy%zI7ꀳd8p I'">s}QDnl^էE11 )@Qs9;bO#LLç1+Ry;?Հ)%POD] ~\FRmNk|BRhE;V^CSayyTZklR_T&v]Gg.f8%S/\#4 ~ke;[]s^R8D5׷.]cXB@h"s%2"yNNSLxI~8ݬ@OWRF9C4%^kO 7yT+?&`PȔ]}0{_P8o= [oUJvUl0Gq95;b fO`Oh=$5ZI^-EPA Y3L獠Q]x$FSߌLs-jSVca|(H.w*Tӓ,S^CG9;ܾ%mʋH4M̫I,ç-FeVppPA춅ːe0D#nķ枎>GB0{?U+S~V~\|ݳb%\Ryâ]ބ #9ywڽ^PQ+Y\jGe=71{eBiP݅tE ՠDgP4lQ/'*m(AMa'eb?a^U="NZ H=vy^Jzok3e$ $ ݕ+jjKءOeC?`,/v|[ ҊuRPp2\D-Ux֤PXzߴ4TThc}O&L:]~:S; &GdCz֘@.#xַ$jPZ\g $ {Nرc$*8=EZ-Rq{_^D72 /Ic#[/u+XEX̦l&/ +חKƹEH;vzԗZY򒻡OfEpOCMAub֗˴ʅXcy 04FFS%nWZ Gfu`倸8^-9Xqc#'l9T@*,fH ꨬ'\*GdzJib+^H!a#?X,]rp]-sZ$~n q, 7$ntŧ rpj͖lEZ1dMf^"VE,;] )@lŭs;`H~,͂9>sաd5,2tcxJ/2"Ht[ٌ?M+x%N^3K03(:ځ[u4-jPUY`H0BvP7lcI>Z{ʲډ을/,!}_$#LcX(3.7z.CE89A30#WoH#Kex'TrӔ1*;\(̬Rq%u۳"$xXM:+iA_{9w4( l`CK3n}M#X_t`ʼ%S<7 {?%|G,^zvAm $aC_ۥiOC3?9~"H#*(?| #e1;ܷc'[L6';,ܚLb +7ZW⎦ef_綪|:2a/V+P6I i!9- &if_fEͅbLb:=7}xc;n# IhvF7&Pz|^Dv."v;#!o48NU̙$G$_csLw%I Yoe&4_G⃜c:GZmz j`(pv) x`ywPbF!>{1GahwqFȷ8Qǂ!GwN>z6y_쌓eO o2S_j,~N?Y /AP3kSw[yx ׁ V.Hx2$=/B)>pҕv9"+[%:\v{P wo(ٕ["P*Th%, Fy˕ugbjưT&`g(k[5-Z:âh,NZa$nyrU>ۃk$)tǽkR-fRuFupV,gu_@lCƒ{ԃ.<+Sv᤯-! ] Ս[mwo|Lg51f ټ 7vt&I`J| _؟PTZ4nӾt4Xp|ž&>md5E#=Oyd\'A n&hYSj`1=+@G_*Vg6mhxۭ$!GToѣ*e`O(KshJ|"[ND}9d f>T@ B$vts-㛶pW& c?qđg̑85B:gvPISR#MS#Q1|$;#v}T||-*Y}` +*w+O[RwuU{+Wd#FޥVXU |65?"G d<&NJL7Ѥ}r|c0d5:71*1;l=m0x+0yO{Fq͐!m fZ^y9g(܇K߁f<j <:Z{r7T6/8[|Wn(#͍}P I%1z/j!mF2@3HDA@^>N&BwUu"45mۗĿo$6z%m!p9mr fŨe'Q*wS($^*&>AUvT?eBC[;!)K5M_WEN6}}\*RA,CvF2zW6D~KBqЏ · Lz_П(A3^Zhpj3Bi֎?,RpbՋGNJ"S; ~G9!btf5ʰĈsU)-)L7!Hc3-m"(2գ,^"74zp<XwrJ.(ډEjy]4VŠ{9v"_!s+~f?Op9Qpf-Oy yn"Uhe/aZxH~(Jhlkӯyi9#&yW&f潀W0)qo֜18x}N'Fe SmثmH$P ~~/ڑQ!™Pc`=57@Dl8y\†TgZ?Mzi/$m6Hr-f9+ԥM_5.-kr\y~K4/"բa56E;x'>UXB@Pʫob endstream endobj 777 0 obj << /Length1 2429 /Length2 15376 /Length3 0 /Length 16799 /Filter /FlateDecode >> stream xڍPڶ- =@n @N݂;$XwϹ'ޫ{LSZP3;]ؘY|VVfVVvD** -bD*- @WLl`sqظxYY쬬|c0qr@D*qG/g+8КxvAf&EW+8-@ tV,,&v.Ζ W+4U0@ʘV ,\=Ldw{ٛuY#_ 2`76f_@;99{- [ @YJӕ`bodkb 6 @JT`.幘9]]]@Wp%.9mbdm<} ,@aȢirrJ,B#XYYy9@'̊^8:8,E@@;"d 0ZD;<c/Cz;z1{,:z ?:11O7wпI񔵷p+IѠw,%V܀ yv¬Vr[M6znWtm בUV| D-mFhr5{1%ق*.+ҁϕ p* wFI{3;7 F\\6A4zf{W \rsXD X ^`aHAl?"qX p?AgPgPT vΧqX4 p}b:O9Z(lS '_;? 7? 8%O0UeK>!\}frGnpX[Ri&j6V^V@Xe@0S@pWmØ\_G6 |#w3%}48?:Qs8<,'ۿ=WNpuk?,N8 47ht LO\`.@;_6@  f6eqrc\=? x` ;4 tWܜms_P~@Oڲ@uChC(Ձv:Ϛs*\ ]mvh$:i{\DGjDNYS!bL"NZA6P퐽rTNn*X_=*7&×Tk^*4c  Lsa]1/<o0f~%2 q}c\G@IO u99G#v*S^#J j_x!fntRB$Uj:Pz y3pywʁCyjZތ&g`V޻ ʢ*9N?O@V;b /:ԛyt =&Ɛ%XaU9kg".iPd+i /~4v%󉅜 H5B$>KW|FL3gҩY7p^BnxKbVY[uL>zG:+%!CI e.w[o(cB8%܂)R5<[g٢%ʻk^\_{gyh"t,׃p\fyY9eLi,շ]R|w+la00_@ր?Mh1$p&20>wma4{l$^I´׾θ%}79sf ̏y(y'jĐKl&+E$x冓ISBOGYK|hրL"T&XK;{xGl@(~/Rߎ!.d%ff<~Asa,/3[ˋ \|팿7V3Z"WꌌE=@n!pKPB%(qmP)8o+C*$ei^܁ [DԋgѺRrlڞS]tZ0ӄpDw9AQ:s_וFs 6n,9,mm{պ..KB4 o20-=CrAMɱ'W"{w-k ,C: doJ.q78upe\: pW{2*/=yu0lO012 m5cëVN\Oz[3ԡFM)CE͹Ātf'?rۇ. ?(qd Թ4 \`M 2\xWdG e;51b#y@t&Œ TnC|hz42 ӄWKL354K׏yг|b-2PԦ~V;{@Ob>v?j7ɼ P@bJgBELyJy\\5s+Wb. +;aVжm\QMU5)""o`\uggKW(~>yk;x*߭т#./I*[!F?ۂ1 o#xr|ٛ-ZJGXx[2g\trJgGl~yƁVDwͣS nd מԚIȣ/}d#^i\ԶH Ez9-r%otq0z1I$F6!h%>&[ڱVX#9QV44LjoM6SX5aQ+I@^B`}Ur?H] m&6!cYɸA~.\9nP5?w:Y|6*j8Vn ˙ vPێ>J%eOJ€P>yb;Fo@>{B+7Lc'VJ!E(\Gjؼ6NM Dd~%JGeuwKC9(k)T6wgz WA:I[D 2xO{H,Vh̳h_+&tzr)7N3_mUH`Br sFNd`P|B_7L)kLR[vV+z} : 'Tdy0G!u~PSnXw }q~ڤ-N+ULn Bч1rtL~`Th0{qBZrgEE-$X$\OS%*ItNJ_Fi[zL|ECq*XAI{ΤUʊ\Sο"̡,z22o,̔&i! \@5|Fͬ/3TY_'cg]b YJ,&tG ٟh;kOe|o&gõV)d1+{:QiqMtI\{C7YxV6Ci!vFdrlA,ys o(f6c Q0~fe C!T]U|̱WYY%AsYNF?|/Ո;C*!~d R^1eJ1gȜF-GEbij7 ,CR!h>~_#uƤB,U?"?;vvV}M\&=pRiŘ̛)%G'^^/TʱAFZ;[IEÁ&Jڋ)1VOF,y[-_5Xtؐ ެyܮoʏE?.s|U|y?`z5;ijŕy'бbӄh-7ܑt;>bSly^'E(Buh`0@,G%Gà<0!C~F&ig$lEZFJa1c6k⊇ڍ++$R!9jt 9%SAȸyg@x&},0Ͽ=P x*uE^F NEmegmGoDy'D͙8A/ ]I+# LK_-s6Iy]wiE)(F]D!~n]n<a8oV?HĦ~$~p 9 mtQ@=zF\yE'0[QPDŽPTlu3~ +PxDm6`snALY/5}&qy:$0(FFxnVIk+K揯W+;*N%)T>?׷ݕiAz}˻QK=|y0oĨ~YiWǑ}W5龟 Gd.أ~OZʠZbhE5@HH"wPPLSZ"IRڗXoQKg i'T:> )tUzT MMV8>Z>A `䁢O3*~pp676ܛc!:Ptb- &j[!K;AV}my}DC>t>-AjEv`QQWj[|K9U>L[>1-Y|s Vi&F6Hh'_GFFS'O{aKvCY'9X?VNuW4~*PERCNOa8v*rf%Y8ĬK <*Y~ԀP:b:}/KmE%P6~cmc:$sd}1KCE@iNv6Ex캧ڟcV1sp G (ZrjKQ ?)]ro +)I96Q!kf Fo]q'o6.I~$ee[3_L7/Z?Y V(6X3KAsHL?&ѼkV9yԑ$rxrЛBB0@I:{1U R'Wk R#J1 ҼVÑui?50M;T}8Q$;DtCSڧr5E QMHT_7;"qXtv%ea1+ï;bЎUap,qc\}#lZ?J܌CqnDQ1u'mE]앛 51Akv/e}Q&˫ŬlƇV=-fΏkr"#&=7o/&fxbdȣKL,셔#u#9#),R% +zO KvlHw Ǜe%Q4c LTS.f:l2(8d6aWَ+Ek3u[ 8;bFIZ0^֢:*C{#'Y!e Gwծ贼MtLV߮L[˝) ]5}c\dg G~p ga5}+gjl$4˞P( 6< YbC[e鲎Bl6JRQN5#C DGAp᧞S)E w u ```.V8K7c;S.oɕIJ/wW\ё+ҮiH͂Lc"9 k z6 , ^<#J #G3&bG<‰m5Xh)ϢY9,_}qRFg[i~hZ2*E.^A}G6)X|6vPcbQ3&n^ Mf\Xˇ Y^ĩY_a샚T;%mX*2!ۃXѵ}3W[E(WnPLqYke( =FQƯFΤ+2L{Sd`:/ЙH,G54;[\y~KC}ﬦv qn%ApOoPofOI-Uz@n{b]x*(ȶ]e]VFDg&5]8]vMlCL4^:HJNAIJF ̮D4<܍-0S̄˦m3i76:q 57;V #o*( {¾̤tAS]JR ~s zWgYTO)y81)n eRfxn_*wL!߬w*QDv~l|;r*w3691va!*Tr^e@H%TO&kMU/㇝\CL>\"l@}[8-&5)b]DƦ0.&܌ !%s dFIncmTc ʝwKueEKA{^!3._`|]#p)sP%fHsYptqӢ.A*~}Fx_y1T>3gW w_͆I㐋bݖ3L+/" -:-kZ?4%O[inQ^Yiߕa*xjW*ʾ{ZoD@qRgE io|Vj߃C'F<<ZjƦ`r4n[F0(lsBAx=֭%0Oܩ=̱]d>|- \uxi!-(K\knp$2fX\%!#]fT!\GAk|B@;k|R I H U 7C- ";$%aIGI% C؆t-.ȿd(x8&NUɟp~>\uѭ p"®6P> 9|xɑNe;;PS,*p[GY4=KK`Խ[FhpICBQ탏sVG B]4ښVyin;V [$O=LV0e_-E#B+5ru>8jo"{q|24D9>O823[}=gdzkz$9Uf#bGI I)naPXLl.=+s9&e^Uw}DZH耊3Z&`T̀,+n9ɷO?clMܨ^@&P,/] ~dc[~n>WЇ=tjkU~h)@պ5oojj?#4gF@V/oG9R{NK]~VGh^Lj{:(oU_F ,QIEW nZ(_-+е7|9uGdvx]94bR3OB{S\XSF͊]MKq] |ӋHƀFF}!9;C̶TrjڋmYt$R;;%WO8;Z7^V(OWvq3,~eJ>q/tmt T~]{=+U&A\9uJ?~IU|鋦MrQ@$*M!2RT˝ew:4,A)CX.sI_񱥖8ǖ\=%1~)njy 8thD͸ZeZҙڨx]DUqpj) ~ؿ%P*)tw*OU xݣJ3ws{ڟV\^:ڑ2Mf+k[T$q^?u`nݦ֐Hَ~6&N38 D\ɿgE@@L(OjGܨ7l|phV?rn@5hJ}=skvQHu~wD`{̀@K U}$vTI^7, 8oG z{lZ$DpI_XLG0yE VpRyҕ)72ye%-rQ$l w}>BxfE_kvJc֤糸/5 l;&"0*ejkM^G98<'n 5TE4:CߘXA7^Wģs{^ {c4m :|ޢƾ,9]p\kjVEnH6#%վ`y qMv3""85:-_C|RWVxen>KH\d ֯M(Pe'넰WU(M4c Mp.X ~<H$Jy[&\lԂ?%4We}Zh=xrx}o|Wa2G }[<%cz ک$hʼnU{RQ5^d$ѷĔ-^^)&/Pީk̎kG=@#~8EQw}`s(F[Y [;{Q ibRmGnΆK;qCF j&id{E*۵ t]oһ`WA;kRAO#ᆫԓ#/ nIapbcr[;vk񏴒>I$?yܵ`ʢX?)l}RBV:B-UBkPx-@ԬT oG)4;&y+-Yt:U=D5dS~JLE-2} l/mRE7/"yIo[\B'!E7ʐRI7rP$jZ} Nf^b v)WTS~9Q> XW->f cfybީT#d a;|~ȎRrz]F@p眺SurGRHwsI23MXŅ՞d?bZ7 =)$ӳ/df- M%^))qPI+49ם0U.[봋-x;7Ub鏩 rkmek(xqB\h1Kg:FE9tBZ@t|7uQp ݄Cn7:y.֊ykAEss/hx?KDEW+4>BOHgs]8JZ*A:[DΟf[.dtg-P: v/e(}žã82'h=jn> c$!v ~|h=:㿁!BmfU!mvMy n>v%f`YAրnɀgo3+Ζ}-%9"Ÿ1G)`Is~༃Ӕ4EțIu]v $+I/e "Ͱj!ї#;nWʎ%p 2(mGc'#F.EmkԛklNq(nD0X<%!=.h(ٴ~Vd_Mʏ^Þz8 kV}\#%-~9 II^@EFdVD!t #&1/G:v"*# 2~j E(K=%\{K5aOTٜx^/RgդY o˶H1+hBZh8*>vLt{)mMO?.RsIS xʴZΊAxaRB')sȉXbwzH`êgv|LTq:opQ/wsq6d'M͇ 4I'P#Zۨ;ߢR dTPwt28} FoE(6띐Ռ:SD;$ ~gNHFWW6M76PERaLd>ߘ/3͕t2YޟN2g#">IwFkCӤwTFtXMbhjٝ&JP"Bw~chU6n\DA ]N[eRJ AdA`Temvǟ$"JRJJh́qq۰(WpŠ[F4v9 q?nv%7YEI2}ZG3+vaܜ;1 nt;˺SPgLߔ["MB"S'ÕFWC!O X䥬x:wu^r[=ߎ/O\P؃п2٢j)M?\+p#/DOG>f9 ?4g?9 zo@IxA>K)zc{@;NfD3Ymi333px1D)ÈP={e2Q.cKKLFi(*90n7ِ4Dg%I$TGn"?47RLNg֓^F-wx-SY4&}nǾ}K*B$nAn'Wxna3`BXkP3Sm&yFYg&´\?"MLCpSMԸ6vMgW4# 5GN4och20O!;Zy]Ec `RU|.EqS?@x1D`33I ?;;"jXo U[g h)Vgnz\iڶKO}PʲpBm7me%a`?G6;4r=zŕXV-L>"H\W >廣zJ-+/zi&bl/͊o~n~wwIOڄ1ؗŐjvcDv#N.KURf|=+{DcXŚcw8sɼ.c Nk<h󈬝P՘85 JoR?lsz5\~cMoYgRbAz'6 63/ ov, $ci=pls,0R"ZH53r߂'= .o2dy@`֤B+QZ-=&\kK؊x7[W11W=iOz:A<9l*{ `o"y(<7u6΀cSQ}yU|##{[ܵ>m\{/MFIlMԶ&LʥڃK^B':)5_8DcaBZMRhY2ZPT4Ѡ]S#/~uW |}mʭ-sq/J^sh7-_Fł\p da&j%*;QQ]\M=QX'%rM@lfoͼKD*T˖[,Ohxȥ L[}SiÇ#Z9A]}y.Y.0e+X)3$hm=rP_؜F)%gURh WڸF~g]ֻD+ ,yZIyMߞ j[4ЂI|F;մg0л\R[Ceg Q;{Wa2_MTJI;x߭z> A֡V)2.F0TIh6' C>I EZEMgwE3+r\{'hX^M*[ j4܂]x~Kotm(ӧ.GeЃx:y_:%6蠤$Meoߗ Ubİpj9-Sd: p I[w՝ӉZ%ei)S }Oq,ݭsX)ϐx^nyOlCLbDFv25V10 Oe=mYz+-SS%NcԖMA!R}r'%r75{@"{2&;۸0uU5,*2թ-R<^QW@cYEl\ڎ(;F)5i_:Ǡ֟V5pPꅵ{ķ*M5. -UiO FܷZ]0Q*MUW6B||8m-~ /vn&41ܡ4]**_&SᘲXA3죠gF|j_( z69;=]ǎT f6}d=dӤ/Z@ "w߳ endstream endobj 779 0 obj << /Length1 1313 /Length2 6528 /Length3 0 /Length 7434 /Filter /FlateDecode >> stream xڍwTT{6%- 000t7"ҡ t7 *H#Hw}k}:ks޿>k]y!yx%ZZb~~!^~~A\C Kb FA0PD; 4ܡ!$??@_?@8BZ 8 ˢwF@_ 1yg0b H݉@(n #]t:2Ov#< @gxqY?zP-v0XWoW 3yC`; QEz!@u=(;s @E^+lD_a )0 ]۽L %A` _E]`Ww_;?:{0 /$/ ^|z~*uCw\_7D}El?`?/9,àϗOQW^WŐO^_A @@X /w] 4Ubk2-߱w!M]o ENH mfm?@g@ F7gU [Ց;7t!H[?߾0[8׆n>wWn@`;;`G)_g Sl==ƻ#^J0 l;5 sk:/Y~e$w ~y+UT>eik߭*ȆDƟ~WV/Vq'GɻGr+hqhx V]Q[5X޸}Kz٩UU831Z*3 ,96)1<؜$^_ONdk~!`Ÿ\[3%- z7߂ݓ^unMSwSCR=lGbb֪GCQ dCnE;, Ws͋c]R-NA gľ 6fx=5߶ׅGnp^ 57qR^>b_dY<2)U 8yk콺'|>oy@SĠ=LB* @dz; uyo2$(r]II_\yg'[9 KQqf% cߣ5izkrQeaam:Z|k=JޙLUԬ!DfhPjFS2ag0yQ^G>4-H}ښֱ# |9"#7^펳O+ F =ꁨoV":.^dd;|s;*Cr|Y Ϋj`ޤw O.ԪJZOxu0|ﻗd1-U4&1txG \=[G_uxi ߳WETs>x/K;7p8 P_%aY_oj| H%{"~%]t.V*Lecw22f '{Jcsk !E3 ߇YTf*M>ɉQcoZ5}hզNCΫ|Ŝ roƽV[bOifӽ:hauݻ\\imy[C' &rpwk0~eAon*OfK}2+0LxI#8+rPڟ mc`^F޻9j8>y-Z3aDa(:C)Sb34=h c߂pFg^%홨e,$ ̙qL1 ~Srw?&e< aJ$멏L܄;DاG+[ҾIǘͽ79syS!9BVV}HE.r_EE|7F|! })1<&(nA0](2MMj:Ll#ij.IO?V,H5n3lg<{y!om. aa +CFi~&t8pQ)h۵|,1M]]PíI޿IjVq?Fxǖ',ɔt^ݢU,|ʼvt攍yICբe+^;K(۩KΦDNדSnh-Nj: hd["fkFϾ涱jC~I8l }y9\f7?q37W?zVim{d!flPZkL/ͷPes_,Əy,*(oozKL^nd?ph!B84WMsqd#Y*'A?i|?AͲ ڨQ|Ka%{Jã}<-gMzK2m?iqQĿ}rN/LjQķXSGj/]j^x%}AGJfE%a*#.1GLhd7k!&_y~uk9)ZۙWװԇKx؇γyB]=f+3 5q#Ih_nt ^Lpw̾:hW\0%N1 2Qf0,v\ O'8'/FZ6+Uŕ3 :$̮ra1Ir EEuȚrR'qg5z9.JeC^O?5V!ÖS+灷PGͧ9urdsb}]4wZ0o{INlՉ.]VcO^"|#Y 6^|Ѐ39݀:O8(k(}LJGlµFH5:hen~m lD]ucjP ;Ȃ]=X Ba刨U# XY9a?O:lq1+lʪF~O|i@]eq+N;݇xc"Eb DrfT1qr)wDClAHĭz9ƙ77quAcgV_~!ikyܡ|BS\?vO^ ~$ӹi  ^ΕMuWH%΃ CeG|[zBODd6߬WpaIG:gхE+(S5&kf$ Ikbyn-(.LD79IM-i/ 9;  )E(imMcXеøTꒉ&w9zC} ub/n2_>+Z©Ȟ֫T"U=AM ZKla!TSO&!C.e/qi1]]e^PgdjF~16v 8]&ڥv;1SeU+򀫝X\_9L VFayصx,֦W-ͻ0'5d!/FTȭp2d+\o0h`%db>RW<%~Fg)h:Ì>)z{(86^JsLQq,%Svz[}~=\Ū"Z|9uگbPkϗ X11NcjRD\⓺uFS +-ݬ8޳E/WI>Qʈ0T<xr;O,'zFfwQ|Dv-'qvUp.QU(9]e LUDAfElνeS/%wIyvɈԄbʽAw {tG.^'9<ɩ/V&URRk)lRh>./b8B۲ {y!Oao~Zmn\bR`.l\⦆n'r+zGB H/4Æ v>wёs.x|=3Ytuf0i/[-4ģL Au/<Ԟa0Q@gfAua|]3ζN8SΫ Aåay(D$ |&T"ʤ;y1+{)gǨ2rdWU pF޸+{k;@,M2NЊR'kyBʄnK4 *yn-SqSx=z{e7ȣ54 SR;<w$kx]4s=N}oqVʛ^LHF>_`fxI\A~"s=fez2\ ӛKP,,V9;_(-mg1z< pvQ~hk[;hu؂aĸdW,N#+n򄾤.E*; 57ӏ_!u>fZYd#ؚl{q;#m$T]3`&SbGiʔȕMLm34iٞaAfit3B2*[@7|NQIdž5lc CcQwOh4iUkO0qSM0g[MoVi,D*3_BbKa?gj_ZXnbՎaM$$={4iQ{H9Lk<B@/eJUozE PůVF-㗋FPlj?XG}F-e{ -@m7`E!{%Vq-_ +圗SRm[M-jk JBWG80ccUK Cmn GkDXdmQd==)7ȁ᳠@כEDIRS3Dtg V*b))ags`Fm7nLCo!=qYSAX Pr2AN`| S+t5vnD-8Mٿ]F|q,1s4U!q]wdO"Z :ܦ:*8Uȶd d>` QQƃ85RdH֧2?"16ךD19P;^ʣr&2Y Xt@B\aM b _>Uf@h ~UThѳK2"OX_RUs:ӡ3e۷\{Q)KlVPYc:,?>~BADOS\3-[蛮/#sھ!~;e%dg$" B?2H=殙0)@yrJFG{| YKTT8@AsBSf8 7VGNƍK{mo\!ǭI3n o/9*ۻoGUj-c yԫi U! M'¿9,^~1ieMF8cvSkbK[.ɩl'| ,n*D1A x=uF:ڷ5 TGyEؓҙo/+QH~.T3wƖ'qMo-vԲUr&tJ߬(e3'st.|H,=iJ`ywA dwmc`bd3KxDLR:8-W+/ )˲PĆ5=e٭;qG`ȂW͠W3c?OC}2u˨I!QdqJ}-' OmXKQ endstream endobj 798 0 obj << /Author(Herv\351 Pag\350s hpages@fredhutch.org)/Title(A quick overview of the S4 class system)/Subject()/Creator(LaTeX with Beamer class version 3.36)/Producer(pdfTeX-1.40.16)/Keywords() /CreationDate (D:20171030195716-04'00') /ModDate (D:20171030195716-04'00') /Trapped /False /PTEX.Fullbanner (This is pdfTeX, Version 3.14159265-2.6-1.40.16 (TeX Live 2015/Debian) kpathsea version 6.2.1) >> endobj 768 0 obj << /Type /ObjStm /N 44 /First 382 /Length 2041 /Filter /FlateDecode >> stream xڭY[s۶~ׯc4dƗ8qc;n\Lh∔_vJIIdo 1Fc2"''ZsxJb5a! fgVJ-'.# •= ;Dqa@ p$"'`Ɣ&;|%ь%AmZ 4=88.11!iE$,ᰁ̀}pNJD1 +ztZ,8bc5Q@A͖(xJnSY0Āt8lg@o98)E4^g/{tz7__8D uO qCgztiE[_n%=*hI֣|"REK_tC7-=SzF{zA?K:iN4l<$ 8zS{Oof9<-qw맴ѿNN3:w.Tw~᳘]—UMsZ߃xn]{}?|֏N>.5Fk(W~*fo~x{bv²Qf j=7j{m[t-}9鏦n |z{- dYV[Ƞfn`Mm!jh.6^qqC941RwG)cE'`YEuۨONL$xkp~zxk0ؐ |%``.wU ]QuuzC%i_Ӯo_FRӱKl[7UZנƱ=|c2j }M$ H0ϜA[CdC"}C c0| t1M:~i*<_%ǃ_.0=hKp ͉'Hc Qnc!9n~)M6 /=!􏣏uO0<nAb^Vp_9G?m cUQkEK! 3?;9|tFL.vGo6c;ot;z‡)-|g6K [6\te_Tc{uzVL/INp"N}ƼTsCҔ G^ێ>9+]rRs[0Y+9jakȕlp5/UGeϫ>Ӌ8Q7tLaf}p a 6G[[eMOIQin;y+AV}T6, qm*5o;dq=ǀf|E(.Qܬ=ﷶoS=/pPeLu YEqonߢAN@]GoUgRݸ_oqx<۬2(zI.[ܥ wМQ:Ò)T ,e?8$YC3 ш5\L]vmP Tj,RH2e;Rz(.8eW-ܵ^-?mSawH%0Pb ֌k( JiL a REutgW_1l (D|i6:M0KIUN䓊$ {[0lԋǻ.rӏ_nʏ˗/bR~̀x^@qxP"P(( Jeer@(9C5J8 XDb"C0±ED,b2e!,qyk#.qy#.GoO~XݧR endstream endobj 799 0 obj << /Type /XRef /Index [0 800] /Size 800 /W [1 3 1] /Root 797 0 R /Info 798 0 R /ID [<4710E85CFEF951227BE5C16015352DA9> <4710E85CFEF951227BE5C16015352DA9>] /Length 1720 /Filter /FlateDecode >> stream x%9\Y{v{k}w}nodÑ $I "  &AG's&yt]SV[unPonw]7}-R7yhnЍt% ]Xg=hܞ1O4l[8;?[L`!`c b0XXh/.]_؄ؑni{o[ ]|L"޸,<.`{0؇888888Ӹ]%+Lᔋl'zxWxfn`۸Y<#<<3ÕԍiMv-SB*OєJJJQIQIQIQIQIimODl9G{c{X||vN(((蠬Hݚ3.c0mHHHHHHHHHHHHHQgQgQgQgQgQq }t]((((((((((((((((UDccam' xb1`)&ѢiAW=4T4T>y^4FcqL`!&u3_Z/zk[۰;0؃ǪmWqpGpp'ppgppQ+;vkx!nnfqO/7Rwo[6mggvyyyyyyyy\\^ERwc^Tzy:u?=C^X1d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1d =r+|/Đ@YYYYYYYYYYYYYYYYy&u/'h#****+"k2 $&߀JJJJJJJJJJJJJJJJsw ]SB !C!b1B !C!bՅǯP !C!b1B !C!b1B !>0> CBA t:BA t:B!F6_B0mX?ўb~X?a~X? Eyjn+|Y%XI@Z*sP^uH`C:T 0omma[kXX؀؄؂؆؁؅i`?]>\AaQqIiYy\E\e0__k ό?3qwq ^ޢa~}}0wTsWsWsWsWsWsWskUkUku_k/:vdjjjڨ+ ڨڨڨڨڨڨڨڨڨڨڨڨڨڨڨڨ/hײ$l̪֯֯֯֯֯֯֯֯֯֯֯֯֯֯֯֯֯ͫU;2w5wM']]]]殺o09_RY;OSJ?bҠKÔ7 r;IFoXhVa56b6c bڟ;0vg߃o|1_ڗv endstream endobj startxref 173872 %%EOF S4Vectors/inst/include/0000755000175400017540000000000013175714520016041 5ustar00biocbuildbiocbuildS4Vectors/inst/include/S4Vectors_defines.h0000644000175400017540000000646613175714520021557 0ustar00biocbuildbiocbuild/***************************************************************************** S4Vectors C interface: typedefs and defines ------------------------------------------- The S4Vectors C interface is split in 2 files: 1. S4Vectors_defines.h (this file): contains the typedefs and defines of the interface. 2. S4Vectors_interface.h (in this directory): contains the prototypes of the S4Vectors C routines that are part of the interface. Please consult S4Vectors_interface.h for how to use this interface in your package. *****************************************************************************/ #ifndef S4VECTORS_DEFINES_H #define S4VECTORS_DEFINES_H #include #include #define NA_LLINT LLONG_MIN /* Get or set i-th element from int or long long int array 'x'. GET_INT_OR_LLINT() always returns a long long int. SET_INT_OR_LLINT() can take a value 'v' that is int or long long int but is not safe if 'is_llint' is 0 and 'v' is a long long int. */ #define GET_INT_OR_LLINT(x, is_llint, i) \ ((is_llint) ? ((const long long int *)(x))[i] \ : (long long int) ((const int *)(x))[i]) #define SET_INT_OR_LLINT(x, is_llint, i, v) \ { \ if (is_llint) \ ((long long int *)(x))[i] = (v); \ else \ ((int *)(x))[i] = (v); \ } /* Hash table -- modified from R_HOME/src/main/unique.c */ struct htab { int K, M; unsigned int Mminus1; int *buckets; }; /* * Auto-Extending buffers used for temporary storage of incoming data whose * size is not known in advance: * * o IntAE: Auto-Extending buffer of ints; * o IntAEAE: Auto-Extending buffer of Auto-Extending buffers of ints; * o IntPairAE: Auto-Extending buffer of pairs of ints; * o IntPairAEAE: Auto-Extending buffer of Auto-Extending buffers of pairs * of ints; * o LLongAE: Auto-Extending buffer of long long ints; * o CharAE: Auto-Extending buffer of chars; * o CharAEAE: Auto-Extending buffer of Auto-Extending buffers of chars. * * Some differences between AE buffers and SEXP: (a) AE buffers auto-extend * i.e. they automatically reallocate when more room is needed to add a new * element, (b) they are faster, and (c) they don't require any * PROTECT/UNPROTECT mechanism. */ typedef struct int_ae { size_t _buflength; size_t _nelt; int *elts; } IntAE; typedef struct int_aeae { size_t _buflength; size_t _nelt; IntAE **elts; } IntAEAE; typedef struct intpair_ae { IntAE *a; IntAE *b; } IntPairAE; typedef struct intpair_aeae { size_t _buflength; size_t _nelt; IntPairAE **elts; } IntPairAEAE; typedef struct longlong_ae { size_t _buflength; size_t _nelt; long long int *elts; } LLongAE; typedef struct char_ae { size_t _buflength; size_t _nelt; char *elts; } CharAE; typedef struct char_aeae { size_t _buflength; size_t _nelt; CharAE **elts; } CharAEAE; /* * Holder structs. */ typedef struct chars_holder { const char *ptr; int length; } Chars_holder; typedef struct ints_holder { const int *ptr; int length; } Ints_holder; typedef struct doubles_holder { const double *ptr; int length; } Doubles_holder; /* * Hit selection modes. */ #define ALL_HITS 1 #define FIRST_HIT 2 #define LAST_HIT 3 #define ARBITRARY_HIT 4 #define COUNT_HITS 5 #endif S4Vectors/inst/include/S4Vectors_interface.h0000644000175400017540000002107513175714520022073 0ustar00biocbuildbiocbuild/***************************************************************************** S4Vectors C interface: prototypes --------------------------------- The S4Vectors C interface is split in 2 files: 1. S4Vectors_defines.h (in this directory): contains the typedefs and defines of the interface. 2. S4Vectors_interface.h (this file): contains the prototypes of the S4Vectors C routines that are part of the interface. *****************************************************************************/ #include "S4Vectors_defines.h" /* * Safe signed integer arithmetic. * (see safe_arithm.c) */ void reset_ovflow_flag(); int get_ovflow_flag(); int safe_int_add( int x, int y ); int safe_int_mult( int x, int y ); /* * Low-level sorting utilities. * (see sort_utils.c) */ void sort_int_array( int *x, size_t nelt, int desc ); void get_order_of_int_array( const int *x, int nelt, int desc, int *out, int out_shift ); int sort_ints( int *base, int base_len, const int *x, int desc, int use_radix, unsigned short int *rxbuf1, int *rxbuf2 ); void get_order_of_int_pairs( const int *a, const int *b, int nelt, int a_desc, int b_desc, int *out, int out_shift ); int sort_int_pairs( int *base, int base_len, const int *a, const int *b, int a_desc, int b_desc, int use_radix, unsigned short int *rxbuf1, int *rxbuf2 ); void get_matches_of_ordered_int_pairs( const int *a1, const int *b1, const int *o1, int nelt1, const int *a2, const int *b2, const int *o2, int nelt2, int nomatch, int *out, int out_shift ); void get_order_of_int_quads( const int *a, const int *b, const int *c, const int *d, int nelt, int a_desc, int b_desc, int c_desc, int d_desc, int *out, int out_shift ); int sort_int_quads( int *base, int base_len, const int *a, const int *b, const int *c, const int *d, int a_desc, int b_desc, int c_desc, int d_desc, int use_radix, unsigned short int *rxbuf1, int *rxbuf2 ); void get_matches_of_ordered_int_quads( const int *a1, const int *b1, const int *c1, const int *d1, const int *o1, int nelt1, const int *a2, const int *b2, const int *c2, const int *d2, const int *o2, int nelt2, int nomatch, int *out, int out_shift ); /* * Hash table management. * (see hash_utils.c) */ struct htab new_htab(int n); int get_hbucket_val( const struct htab *htab, int bucket_idx ); void set_hbucket_val( struct htab *htab, int bucket_idx, int val ); /* * Low-level manipulation of the Auto-Extending buffers. * (see AEbufs.c) */ size_t increase_buflength(size_t buflength); size_t IntAE_get_nelt(const IntAE *ae); size_t IntAE_set_nelt( IntAE *ae, size_t nelt ); void IntAE_set_val( const IntAE *ae, int val ); void IntAE_extend( IntAE *ae, size_t new_buflength ); void IntAE_insert_at( IntAE *ae, size_t at, int val ); IntAE *new_IntAE( size_t buflength, size_t nelt, int val ); void IntAE_append( IntAE *ae, const int *newvals, size_t nnewval ); void IntAE_delete_at( IntAE *ae, size_t at, size_t nelt ); void IntAE_shift( const IntAE *ae, size_t offset, int shift ); void IntAE_sum_and_shift( const IntAE *ae1, const IntAE *ae2, int shift ); void IntAE_qsort( const IntAE *ae, size_t offset, int desc ); void IntAE_uniq( IntAE *ae, size_t offset ); SEXP new_INTEGER_from_IntAE(const IntAE *ae); IntAE *new_IntAE_from_INTEGER(SEXP x); IntAE *new_IntAE_from_CHARACTER( SEXP x, int keyshift ); size_t IntAEAE_get_nelt(const IntAEAE *aeae); size_t IntAEAE_set_nelt( IntAEAE *aeae, size_t nelt ); void IntAEAE_extend( IntAEAE *aeae, size_t new_buflength ); void IntAEAE_insert_at( IntAEAE *aeae, size_t at, IntAE *ae ); IntAEAE *new_IntAEAE( size_t buflength, size_t nelt ); void IntAEAE_pappend( const IntAEAE *aeae1, const IntAEAE *aeae2 ); void IntAEAE_shift( const IntAEAE *aeae, int shift ); void IntAEAE_sum_and_shift( const IntAEAE *aeae1, const IntAEAE *aeae2, int shift ); SEXP new_LIST_from_IntAEAE( const IntAEAE *aeae, int mode ); IntAEAE *new_IntAEAE_from_LIST(SEXP x); SEXP IntAEAE_toEnvir( const IntAEAE *aeae, SEXP envir, int keyshift ); size_t IntPairAE_get_nelt(const IntPairAE *ae); size_t IntPairAE_set_nelt( IntPairAE *ae, size_t nelt ); void IntPairAE_extend( IntPairAE *ae, size_t new_buflength ); void IntPairAE_insert_at( IntPairAE *ae, size_t at, int a, int b ); IntPairAE *new_IntPairAE( size_t buflength, size_t nelt ); size_t IntPairAEAE_get_nelt(const IntPairAEAE *aeae); size_t IntPairAEAE_set_nelt( IntPairAEAE *aeae, size_t nelt ); void IntPairAEAE_extend( IntPairAEAE *aeae, size_t new_buflength ); void IntPairAEAE_insert_at( IntPairAEAE *aeae, size_t at, IntPairAE *ae ); IntPairAEAE *new_IntPairAEAE( size_t buflength, size_t nelt ); size_t LLongAE_get_nelt(const LLongAE *ae); size_t LLongAE_set_nelt( LLongAE *ae, size_t nelt ); void LLongAE_set_val( const LLongAE *ae, long long val ); void LLongAE_extend( LLongAE *ae, size_t new_buflength ); void LLongAE_insert_at( LLongAE *ae, size_t at, long long val ); LLongAE *new_LLongAE( size_t buflength, size_t nelt, long long val ); size_t CharAE_get_nelt(const CharAE *ae); size_t CharAE_set_nelt( CharAE *ae, size_t nelt ); void CharAE_extend( CharAE *ae, size_t new_buflength ); void CharAE_insert_at( CharAE *ae, size_t at, char c ); CharAE *new_CharAE(size_t buflength); CharAE *new_CharAE_from_string(const char *string); void CharAE_append_string( CharAE *ae, const char *string ); void CharAE_delete_at( CharAE *ae, size_t at, size_t nelt ); SEXP new_CHARSXP_from_CharAE(const CharAE *ae); SEXP new_RAW_from_CharAE(const CharAE *ae); SEXP new_LOGICAL_from_CharAE(const CharAE *ae); size_t CharAEAE_get_nelt(const CharAEAE *aeae); size_t CharAEAE_set_nelt( CharAEAE *aeae, size_t nelt ); void CharAEAE_extend( CharAEAE *aeae, size_t new_buflength ); void CharAEAE_insert_at( CharAEAE *aeae, size_t at, CharAE *ae ); CharAEAE *new_CharAEAE( size_t buflength, size_t nelt ); void CharAEAE_append_string( CharAEAE *aeae, const char *string ); SEXP new_CHARACTER_from_CharAEAE(const CharAEAE *aeae); /* * SEXP_utils.c */ const char *get_classname(SEXP x); /* * LLint_class.c */ int is_LLint(SEXP x); R_xlen_t get_LLint_length(SEXP x); long long int *get_LLint_dataptr(SEXP x); SEXP alloc_LLint(const char *classname, R_xlen_t length); /* * subsetting_utils.c */ int copy_vector_block( SEXP dest, int dest_offset, SEXP src, int src_offset, int block_width ); int copy_vector_positions( SEXP dest, int dest_offset, SEXP src, const int *pos, int npos ); int copy_vector_ranges( SEXP dest, int dest_offset, SEXP src, const int *start, const int *width, int nranges ); /* * vector_utils.c */ int vector_memcmp( SEXP x1, int x1_offset, SEXP x2, int x2_offset, int nelt ); SEXP list_as_data_frame( SEXP x, int nrow ); /* * integer_utils.c */ int check_integer_pairs( SEXP a, SEXP b, const int **a_p, const int **b_p, const char *a_argname, const char *b_argname ); SEXP find_interv_and_start_from_width( const int *x, int x_len, const int *width, int width_len ); /* * Low-level manipulation of Hits objects. * (see Hits_class.c) */ SEXP new_Hits( int *from, const int *to, int nhit, int nLnode, int nRnode, int already_sorted ); int get_select_mode(SEXP select); /* * Low-level manipulation of Rle objects. * (see Rle_class.c) */ SEXP construct_logical_Rle( R_xlen_t nrun_in, const int *values_in, const void *lengths_in, int lengths_in_is_L ); SEXP construct_integer_Rle( R_xlen_t nrun_in, const int *values_in, const void *lengths_in, int lengths_in_is_L ); SEXP construct_numeric_Rle( R_xlen_t nrun_in, const double *values_in, const void *lengths_in, int lengths_in_is_L ); SEXP construct_complex_Rle( R_xlen_t nrun_in, const Rcomplex *values_in, const void *lengths_in, int lengths_in_is_L ); SEXP construct_character_Rle( SEXP values_in, const void *lengths_in, int lengths_in_is_L ); SEXP construct_raw_Rle( R_xlen_t nrun_in, const Rbyte *values_in, const void *lengths_in, int lengths_in_is_L ); SEXP construct_Rle( SEXP values_in, const void *lengths_in, int lengths_in_is_L ); /* * Low-level manipulation of Vector objects. * (see List_class.c) */ const char *get_List_elementType(SEXP x); void set_List_elementType(SEXP x, const char *type); /* * Low-level manipulation of SimpleList objects. * (see SimpleList_class.c) */ SEXP new_SimpleList(const char *classname, SEXP listData); /* * Low-level manipulation of DataFrame objects. * (see DataFrame_class.c) */ SEXP new_DataFrame(const char *classname, SEXP vars, SEXP rownames, SEXP nrows); S4Vectors/inst/include/_S4Vectors_stubs.c0000644000175400017540000003750313175714520021430 0ustar00biocbuildbiocbuild#include "S4Vectors_interface.h" #define DEFINE_CCALLABLE_STUB(retT, stubname, Targs, args) \ typedef retT(*__ ## stubname ## _funtype__)Targs; \ retT stubname Targs \ { \ static __ ## stubname ## _funtype__ fun = NULL; \ if (fun == NULL) \ fun = (__ ## stubname ## _funtype__) R_GetCCallable("S4Vectors", "_" #stubname); \ return fun args; \ } /* * Using the above macro when retT (the returned type) is void will make Sun * Studio 12 C compiler unhappy. So we need to use the following macro to * handle that case. */ #define DEFINE_NOVALUE_CCALLABLE_STUB(stubname, Targs, args) \ typedef void(*__ ## stubname ## _funtype__)Targs; \ void stubname Targs \ { \ static __ ## stubname ## _funtype__ fun = NULL; \ if (fun == NULL) \ fun = (__ ## stubname ## _funtype__) R_GetCCallable("S4Vectors", "_" #stubname); \ fun args; \ return; \ } /* * Stubs for callables defined in safe_arithm.c */ DEFINE_NOVALUE_CCALLABLE_STUB(reset_ovflow_flag, (), () ) DEFINE_CCALLABLE_STUB(int, get_ovflow_flag, (), () ) DEFINE_CCALLABLE_STUB(int, safe_int_add, (int x, int y), ( x, y) ) DEFINE_CCALLABLE_STUB(int, safe_int_mult, (int x, int y), ( x, y) ) /* * Stubs for callables defined in sort_utils.c */ DEFINE_NOVALUE_CCALLABLE_STUB(sort_int_array, (int *x, size_t nelt, int desc), ( x, nelt, desc) ) DEFINE_NOVALUE_CCALLABLE_STUB(get_order_of_int_array, (const int *x, int nelt, int desc, int *out, int out_shift), ( x, nelt, desc, out, out_shift) ) DEFINE_CCALLABLE_STUB(int, sort_ints, (int *base, int base_len, const int *x, int desc, int use_radix, unsigned short int *rxbuf1, int *rxbuf2), ( base, base_len, x, desc, use_radix, rxbuf1, rxbuf2) ) DEFINE_NOVALUE_CCALLABLE_STUB(get_order_of_int_pairs, (const int *a, const int *b, int nelt, int a_desc, int b_desc, int *out, int out_shift), ( a, b, nelt, a_desc, b_desc, out, out_shift) ) DEFINE_CCALLABLE_STUB(int, sort_int_pairs, (int *base, int base_len, const int *a, const int *b, int a_desc, int b_desc, int use_radix, unsigned short int *rxbuf1, int *rxbuf2), ( base, base_len, a, b, a_desc, b_desc, use_radix, rxbuf1, rxbuf2) ) DEFINE_NOVALUE_CCALLABLE_STUB(get_matches_of_ordered_int_pairs, (const int *a1, const int *b1, const int *o1, int nelt1, const int *a2, const int *b2, const int *o2, int nelt2, int nomatch, int *out, int out_shift), ( a1, b1, o1, nelt1, a2, b2, o2, nelt2, nomatch, out, out_shift) ) DEFINE_NOVALUE_CCALLABLE_STUB(get_order_of_int_quads, (const int *a, const int *b, const int *c, const int *d, int nelt, int a_desc, int b_desc, int c_desc, int d_desc, int *out, int out_shift), ( a, b, c, d, nelt, a_desc, b_desc, c_desc, d_desc, out, out_shift) ) DEFINE_CCALLABLE_STUB(int, sort_int_quads, (int *base, int base_len, const int *a, const int *b, const int *c, const int *d, int a_desc, int b_desc, int c_desc, int d_desc, int use_radix, unsigned short int *rxbuf1, int *rxbuf2), ( base, base_len, a, b, c, d, a_desc, b_desc, c_desc, d_desc, use_radix, rxbuf1, rxbuf2) ) DEFINE_NOVALUE_CCALLABLE_STUB(get_matches_of_ordered_int_quads, (const int *a1, const int *b1, const int *c1, const int *d1, const int *o1, int nelt1, const int *a2, const int *b2, const int *c2, const int *d2, const int *o2, int nelt2, int nomatch, int *out, int out_shift), ( a1, b1, c1, d1, o1, nelt1, a2, b2, c2, d2, o2, nelt2, nomatch, out, out_shift) ) /* * Stubs for callables defined in hash_utils.c */ DEFINE_CCALLABLE_STUB(struct htab, new_htab, (int n), ( n) ) DEFINE_CCALLABLE_STUB(int, get_hbucket_val, (const struct htab *htab, int bucket_idx), ( htab, bucket_idx) ) DEFINE_NOVALUE_CCALLABLE_STUB(set_hbucket_val, (struct htab *htab, int bucket_idx, int val), ( htab, bucket_idx, val) ) /* * Stubs for callables defined in AEbufs.c */ DEFINE_CCALLABLE_STUB(size_t, increase_buflength, (size_t buflength), ( buflength) ) DEFINE_CCALLABLE_STUB(size_t, IntAE_get_nelt, (const IntAE *ae), ( ae) ) DEFINE_CCALLABLE_STUB(size_t, IntAE_set_nelt, (IntAE *ae, size_t nelt), ( ae, nelt) ) DEFINE_NOVALUE_CCALLABLE_STUB(IntAE_set_val, (const IntAE *ae, int val), ( ae, val) ) DEFINE_NOVALUE_CCALLABLE_STUB(IntAE_extend, (IntAE *ae, size_t new_buflength), ( ae, new_buflength) ) DEFINE_NOVALUE_CCALLABLE_STUB(IntAE_insert_at, (IntAE *ae, size_t at, int val), ( ae, at, val) ) DEFINE_CCALLABLE_STUB(IntAE *, new_IntAE, (size_t buflength, size_t nelt, int val), ( buflength, nelt, val) ) DEFINE_NOVALUE_CCALLABLE_STUB(IntAE_append, (IntAE *ae, const int *newvals, size_t nnewval), ( ae, newvals, nnewval) ) DEFINE_NOVALUE_CCALLABLE_STUB(IntAE_delete_at, (IntAE *ae, size_t at, size_t nelt), ( ae, at, nelt) ) DEFINE_NOVALUE_CCALLABLE_STUB(IntAE_shift, (const IntAE *ae, size_t offset, int shift), ( ae, offset, shift) ) DEFINE_NOVALUE_CCALLABLE_STUB(IntAE_sum_and_shift, (const IntAE *ae1, const IntAE *ae2, int shift), ( ae1, ae2, shift) ) DEFINE_NOVALUE_CCALLABLE_STUB(IntAE_qsort, (const IntAE *ae, size_t offset, int desc), ( ae, offset, desc) ) DEFINE_NOVALUE_CCALLABLE_STUB(IntAE_uniq, (IntAE *ae, size_t offset), ( ae, offset) ) DEFINE_CCALLABLE_STUB(SEXP, new_INTEGER_from_IntAE, (const IntAE *ae), ( ae) ) DEFINE_CCALLABLE_STUB(IntAE *, new_IntAE_from_INTEGER, (SEXP x), ( x) ) DEFINE_CCALLABLE_STUB(IntAE *, new_IntAE_from_CHARACTER, (SEXP x, int keyshift), ( x, keyshift) ) DEFINE_CCALLABLE_STUB(size_t, IntAEAE_get_nelt, (const IntAEAE *aeae), ( aeae) ) DEFINE_CCALLABLE_STUB(size_t, IntAEAE_set_nelt, (IntAEAE *aeae, size_t nelt), ( aeae, nelt) ) DEFINE_NOVALUE_CCALLABLE_STUB(IntAEAE_extend, (IntAEAE *aeae, size_t new_buflength), ( aeae, new_buflength) ) DEFINE_NOVALUE_CCALLABLE_STUB(IntAEAE_insert_at, (IntAEAE *aeae, size_t at, IntAE *ae), ( aeae, at, ae) ) DEFINE_CCALLABLE_STUB(IntAEAE *, new_IntAEAE, (size_t buflength, size_t nelt), ( buflength, nelt) ) DEFINE_NOVALUE_CCALLABLE_STUB(IntAEAE_pappend, (const IntAEAE *aeae1, const IntAEAE *aeae2), ( aeae1, aeae2) ) DEFINE_NOVALUE_CCALLABLE_STUB(IntAEAE_shift, (const IntAEAE *aeae, int shift), ( aeae, shift) ) DEFINE_NOVALUE_CCALLABLE_STUB(IntAEAE_sum_and_shift, (const IntAEAE *aeae1, const IntAEAE *aeae2, int shift), ( aeae1, aeae2, shift) ) DEFINE_CCALLABLE_STUB(SEXP, new_LIST_from_IntAEAE, (const IntAEAE *aeae, int mode), ( aeae, mode) ) DEFINE_CCALLABLE_STUB(IntAEAE *, new_IntAEAE_from_LIST, (SEXP x), ( x) ) DEFINE_CCALLABLE_STUB(SEXP, IntAEAE_toEnvir, (const IntAEAE *aeae, SEXP envir, int keyshift), ( aeae, envir, keyshift) ) DEFINE_CCALLABLE_STUB(size_t, IntPairAE_get_nelt, (const IntPairAE *ae), ( ae) ) DEFINE_CCALLABLE_STUB(size_t, IntPairAE_set_nelt, (IntPairAE *ae, size_t nelt), ( ae, nelt) ) DEFINE_NOVALUE_CCALLABLE_STUB(IntPairAE_extend, (IntPairAE *ae, size_t new_buflength), ( ae, new_buflength) ) DEFINE_NOVALUE_CCALLABLE_STUB(IntPairAE_insert_at, (IntPairAE *ae, size_t at, int a, int b), ( ae, at, a, b) ) DEFINE_CCALLABLE_STUB(IntPairAE *, new_IntPairAE, (size_t buflength, size_t nelt), ( buflength, nelt) ) DEFINE_CCALLABLE_STUB(size_t, IntPairAEAE_get_nelt, (const IntPairAEAE *aeae), ( aeae) ) DEFINE_CCALLABLE_STUB(size_t, IntPairAEAE_set_nelt, (IntPairAEAE *aeae, size_t nelt), ( aeae, nelt) ) DEFINE_NOVALUE_CCALLABLE_STUB(IntPairAEAE_extend, (IntPairAEAE *aeae, size_t new_buflength), ( aeae, new_buflength) ) DEFINE_NOVALUE_CCALLABLE_STUB(IntPairAEAE_insert_at, (IntPairAEAE *aeae, size_t at, IntPairAE *ae), ( aeae, at, ae) ) DEFINE_CCALLABLE_STUB(IntPairAEAE *, new_IntPairAEAE, (size_t buflength, size_t nelt), ( buflength, nelt) ) DEFINE_CCALLABLE_STUB(size_t, LLongAE_get_nelt, (const LLongAE *ae), ( ae) ) DEFINE_CCALLABLE_STUB(size_t, LLongAE_set_nelt, (LLongAE *ae, size_t nelt), ( ae, nelt) ) DEFINE_NOVALUE_CCALLABLE_STUB(LLongAE_set_val, (const LLongAE *ae, long long val), ( ae, val) ) DEFINE_NOVALUE_CCALLABLE_STUB(LLongAE_extend, (LLongAE *ae, size_t new_buflength), ( ae, new_buflength) ) DEFINE_NOVALUE_CCALLABLE_STUB(LLongAE_insert_at, (LLongAE *ae, size_t at, long long val), ( ae, at, val) ) DEFINE_CCALLABLE_STUB(LLongAE *, new_LLongAE, (size_t buflength, size_t nelt, long long val), ( buflength, nelt, val) ) DEFINE_CCALLABLE_STUB(size_t, CharAE_get_nelt, (const CharAE *ae), ( ae) ) DEFINE_CCALLABLE_STUB(size_t, CharAE_set_nelt, (CharAE *ae, size_t nelt), ( ae, nelt) ) DEFINE_NOVALUE_CCALLABLE_STUB(CharAE_extend, (CharAE *ae, size_t new_buflength), ( ae, new_buflength) ) DEFINE_NOVALUE_CCALLABLE_STUB(CharAE_insert_at, (CharAE *ae, size_t at, char c), ( ae, at, c) ) DEFINE_CCALLABLE_STUB(CharAE *, new_CharAE, (size_t buflength), ( buflength) ) DEFINE_CCALLABLE_STUB(CharAE *, new_CharAE_from_string, (const char *string), ( string) ) DEFINE_NOVALUE_CCALLABLE_STUB(CharAE_append_string, (CharAE *ae, const char *string), ( ae, string) ) DEFINE_NOVALUE_CCALLABLE_STUB(CharAE_delete_at, (CharAE *ae, size_t at, size_t nelt), ( ae, at, nelt) ) DEFINE_CCALLABLE_STUB(SEXP, new_CHARSXP_from_CharAE, (const CharAE *ae), ( ae) ) DEFINE_CCALLABLE_STUB(SEXP, new_RAW_from_CharAE, (const CharAE *ae), ( ae) ) DEFINE_CCALLABLE_STUB(SEXP, new_LOGICAL_from_CharAE, (const CharAE *ae), ( ae) ) DEFINE_CCALLABLE_STUB(size_t, CharAEAE_get_nelt, (const CharAEAE *aeae), ( aeae) ) DEFINE_CCALLABLE_STUB(size_t, CharAEAE_set_nelt, (CharAEAE *aeae, size_t nelt), ( aeae, nelt) ) DEFINE_NOVALUE_CCALLABLE_STUB(CharAEAE_extend, (CharAEAE *aeae, size_t new_buflength), ( aeae, new_buflength) ) DEFINE_NOVALUE_CCALLABLE_STUB(CharAEAE_insert_at, (CharAEAE *aeae, size_t at, CharAE *ae), ( aeae, at, ae) ) DEFINE_CCALLABLE_STUB(CharAEAE *, new_CharAEAE, (size_t buflength, size_t nelt), ( buflength, nelt) ) DEFINE_NOVALUE_CCALLABLE_STUB(CharAEAE_append_string, (CharAEAE *aeae, const char *string), ( aeae, string) ) DEFINE_CCALLABLE_STUB(SEXP, new_CHARACTER_from_CharAEAE, (const CharAEAE *aeae), ( aeae) ) /* * Stubs for callables defined in SEXP_utils.c */ DEFINE_CCALLABLE_STUB(const char *, get_classname, (SEXP x), ( x) ) /* * Stubs for callables defined in LLint_class.c */ DEFINE_CCALLABLE_STUB(int, is_LLint, (SEXP x), ( x) ) DEFINE_CCALLABLE_STUB(R_xlen_t, get_LLint_length, (SEXP x), ( x) ) DEFINE_CCALLABLE_STUB(long long int *, get_LLint_dataptr, (SEXP x), ( x) ) DEFINE_CCALLABLE_STUB(SEXP, alloc_LLint, (const char *classname, R_xlen_t length), ( classname, length) ) /* * Stubs for callables defined in subsetting_utils.c */ DEFINE_CCALLABLE_STUB(int, copy_vector_block, (SEXP dest, int dest_offset, SEXP src, int src_offset, int block_width), ( dest, dest_offset, src, src_offset, block_width) ) DEFINE_CCALLABLE_STUB(int, copy_vector_positions, (SEXP dest, int dest_offset, SEXP src, const int *pos, int npos), ( dest, dest_offset, src, pos, npos) ) DEFINE_CCALLABLE_STUB(int, copy_vector_ranges, (SEXP dest, int dest_offset, SEXP src, const int *start, const int *width, int nranges), ( dest, dest_offset, src, start, width, nranges) ) /* * Stubs for callables defined in vector_utils.c */ DEFINE_CCALLABLE_STUB(int, vector_memcmp, (SEXP x1, int x1_offset, SEXP x2, int x2_offset, int nelt), ( x1, x1_offset, x2, x2_offset, nelt) ) DEFINE_CCALLABLE_STUB(SEXP, list_as_data_frame, (SEXP x, int nrow), ( x, nrow) ) /* * Stubs for callables defined in integer_utils.c */ DEFINE_CCALLABLE_STUB(int, check_integer_pairs, (SEXP a, SEXP b, const int **a_p, const int **b_p, const char *a_argname, const char *b_argname), ( a, b, a_p, b_p, a_argname, b_argname) ) DEFINE_CCALLABLE_STUB(SEXP, find_interv_and_start_from_width, (const int *x, int x_len, const int *width, int width_len), ( x, x_len, width, width_len) ) /* * Stubs for callables defined in Hits_class.c */ DEFINE_CCALLABLE_STUB(SEXP, new_Hits, (int *from, const int *to, int nhit, int nLnode, int nRnode, int already_sorted), ( from, to, nhit, nLnode, nRnode, already_sorted) ) DEFINE_CCALLABLE_STUB(int, get_select_mode, (SEXP select), ( select) ) /* * Stubs for callables defined in Rle_class.c */ DEFINE_CCALLABLE_STUB(SEXP, construct_logical_Rle, (R_xlen_t nrun_in, const int *values_in, const void *lengths_in, int lengths_in_is_L), ( nrun_in, values_in, lengths_in, lengths_in_is_L) ) DEFINE_CCALLABLE_STUB(SEXP, construct_integer_Rle, (R_xlen_t nrun_in, const int *values_in, const void *lengths_in, int lengths_in_is_L), ( nrun_in, values_in, lengths_in, lengths_in_is_L) ) DEFINE_CCALLABLE_STUB(SEXP, construct_numeric_Rle, (R_xlen_t nrun_in, const double *values_in, const void *lengths_in, int lengths_in_is_L), ( nrun_in, values_in, lengths_in, lengths_in_is_L) ) DEFINE_CCALLABLE_STUB(SEXP, construct_complex_Rle, (R_xlen_t nrun_in, const Rcomplex *values_in, const void *lengths_in, int lengths_in_is_L), ( nrun_in, values_in, lengths_in, lengths_in_is_L) ) DEFINE_CCALLABLE_STUB(SEXP, construct_character_Rle, (SEXP values_in, const void *lengths_in, int lengths_in_is_L), ( values_in, lengths_in, lengths_in_is_L) ) DEFINE_CCALLABLE_STUB(SEXP, construct_raw_Rle, (R_xlen_t nrun_in, const Rbyte *values_in, const void *lengths_in, int lengths_in_is_L), ( nrun_in, values_in, lengths_in, lengths_in_is_L) ) DEFINE_CCALLABLE_STUB(SEXP, construct_Rle, (SEXP values_in, const void *lengths_in, int lengths_in_is_L), ( values_in, lengths_in, lengths_in_is_L) ) /* * Stubs for callables defined in List_class.c */ DEFINE_CCALLABLE_STUB(const char *, get_List_elementType, (SEXP x), ( x) ) DEFINE_NOVALUE_CCALLABLE_STUB(set_List_elementType, (SEXP x, const char *type), ( x, type) ) /* * Stubs for callables defined in SimpleList_class.c */ DEFINE_CCALLABLE_STUB(SEXP, new_SimpleList, (const char *classname, SEXP listData), ( classname, listData) ) /* * Stubs for callables defined in DataFrame_class.c */ DEFINE_CCALLABLE_STUB(SEXP, new_DataFrame, (const char *classname, SEXP vars, SEXP rownames, SEXP nrows), ( classname, vars, rownames, nrows) ) S4Vectors/inst/unitTests/0000755000175400017540000000000013175714520016420 5ustar00biocbuildbiocbuildS4Vectors/inst/unitTests/test_DataFrame-class.R0000644000175400017540000003527713175714520022547 0ustar00biocbuildbiocbuildtest_DataFrame_construction <- function() { score <- c(X=1L, Y=3L, Z=NA) counts <- c(10L, 2L, NA) ## na in rn checkException(DataFrame(score, row.names = c("a", NA, "b")), silent = TRUE) ## invalid rn length checkException(DataFrame(score, row.names = "a"), silent = TRUE) ## dups in rn checkException(DataFrame(score, row.names = c("a", "b", "a")), silent = TRUE) DF <- DataFrame() # no args checkTrue(validObject(DF)) row.names <- c("one", "two", "three") DF <- DataFrame(row.names = row.names) # no args, but row.names checkTrue(validObject(DF)) checkIdentical(rownames(DF), row.names) DF <- DataFrame(score) # single, unnamed arg checkTrue(validObject(DF)) checkIdentical(DF[["score"]], score) DF <- DataFrame(score, row.names = row.names) #with row names checkTrue(validObject(DF)) checkIdentical(rownames(DF), row.names) DF <- DataFrame(vals = score) # named vector arg checkTrue(validObject(DF)) checkIdentical(DF[["vals"]], score) DF <- DataFrame(counts, vals = score) # mixed named and unnamed checkTrue(validObject(DF)) checkIdentical(DF[["vals"]], score) checkIdentical(DF[["counts"]], counts) DF <- DataFrame(score + score) # unnamed arg with invalid name expression checkTrue(validObject(DF)) checkIdentical(DF[["score...score"]], score + score) mat <- cbind(score) DF <- DataFrame(mat) # single column matrix with column name checkTrue(validObject(DF)) checkIdentical(DF[["score"]], unname(score)) mat <- cbind(score, counts) DF <- DataFrame(mat) # two column matrix with col names checkTrue(validObject(DF)) checkIdentical(DF[["score"]], unname(score)) checkIdentical(DF[["counts"]], counts) colnames(mat) <- NULL DF <- DataFrame(mat) # two column matrix without col names checkTrue(validObject(DF)) checkIdentical(DF[["V1"]], unname(score)) sw <- DataFrame(swiss, row.names = rownames(swiss)) # a data.frame checkIdentical(as.data.frame(sw), swiss) rownames(swiss) <- NULL # strip row names to make them comparable sw <- DataFrame(swiss) # a data.frame checkIdentical(as.data.frame(sw), swiss) sw <- DataFrame(swiss[1:3,], score = unname(score)) checkIdentical(as.data.frame(sw), data.frame(swiss[1:3,], score)) sw <- DataFrame(score = score, swiss = swiss[1:3,]) # named data.frame/matrix checkIdentical(as.data.frame(sw), data.frame(score = score, swiss = swiss[1:3,])) ## identity df <- DataFrame(A=I(list(1:3))) checkIdentical(as.data.frame(df), data.frame(A=I(list(1:3)))) ## recycling DF <- DataFrame(1, score) checkIdentical(DF[[1]], rep(1, 3)) checkIdentical(DF[[2]], score) } test_DataFrame_coerce <- function() { ## need to introduce character() dim names checkTrue(validObject(as(matrix(0L, 0L, 0L), "DataFrame"))) score <- c(X=1L, Y=3L, Z=NA) DF <- as(score, "DataFrame") checkTrue(validObject(DF)) checkIdentical(DF[[1]], score) } test_DataFrame_subset <- function() { data(swiss) sw <- DataFrame(swiss) rn <- rownames(swiss) checkException(sw[list()], silent = TRUE) # non-atomic checkException(sw[NA], silent = TRUE) # column indices cannot be NA checkException(sw[100], silent = TRUE) # out of bounds col checkException(sw[,100], silent = TRUE) checkException(sw[1000,], silent = TRUE) # out of bounds row oldOpts <- options(warn=2) checkException(sw[1:3, drop=TRUE], silent = TRUE) # drop ignored checkException(sw[drop=TRUE], silent = TRUE) checkException(sw[foo = "bar"], silent = TRUE) # invalid argument options(oldOpts) checkException(sw[,"Fert"], silent = TRUE) # bad column name sw <- DataFrame(swiss) checkIdentical(sw[], sw) # identity subset checkIdentical(sw[,], sw) checkIdentical(sw[NULL], DataFrame(swiss[NULL])) # NULL subsetting checkIdentical(sw[,NULL], DataFrame(swiss[,NULL])) checkIdentical(as.data.frame(sw[NULL,]), structure(data.frame(swiss[NULL,]), row.names = character())) rownames(sw) <- rn ## select columns checkIdentical(as.data.frame(sw[1:3]), swiss[1:3]) checkIdentical(as.data.frame(sw[, 1:3]), swiss[1:3]) ## select rows checkIdentical(as.data.frame(sw[1:3,]), swiss[1:3,]) checkIdentical(as.data.frame(sw[1:3,]), swiss[1:3,]) checkIdentical(as.data.frame(sw[sw[["Education"]] == 7,]), swiss[swiss[["Education"]] == 7,]) checkIdentical(as.data.frame(sw[Rle(sw[["Education"]] == 7),]), swiss[swiss[["Education"]] == 7,]) ## select rows and columns checkIdentical(as.data.frame(sw[4:5, 1:3]), swiss[4:5,1:3]) checkIdentical(as.data.frame(sw[1]), swiss[1]) # a one-column data frame checkIdentical(sw[,"Fertility"], swiss[,"Fertility"]) ## the same checkIdentical(as.data.frame(sw[, 1, drop = FALSE]), swiss[, 1, drop = FALSE]) checkIdentical(sw[, 1], swiss[,1]) # a (unnamed) vector checkIdentical(sw[[1]], swiss[[1]]) # the same checkIdentical(sw[["Fertility"]], swiss[["Fertility"]]) checkIdentical(sw[["Fert"]], swiss[["Fert"]]) # should return 'NULL' checkIdentical(sw[,c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE)], swiss[,c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE)]) checkIdentical(as.data.frame(sw[1,]), swiss[1,]) # a one-row data frame checkIdentical(sw[1,, drop=TRUE], swiss[1,, drop=TRUE]) # a list ## duplicate row, unique row names are created checkIdentical(as.data.frame(sw[c(1, 1:2),]), swiss[c(1,1:2),]) ## NOTE: NA subsetting not yet supported for XVectors ##checkIdentical(as.data.frame(sw[c(1, NA, 1:2, NA),]), # mixin some NAs ## swiss[c(1, NA, 1:2, NA),]) checkIdentical(as.data.frame(sw["Courtelary",]), swiss["Courtelary",]) subswiss <- swiss[1:5,1:4] subsw <- sw[1:5,1:4] checkIdentical(as.data.frame(subsw["C",]), subswiss["C",]) # partially matches ## NOTE: NA subsetting not yet supported for XVectors ##checkIdentical(as.data.frame(subsw["foo",]), # bad row name ## subswiss["foo",]) ##checkIdentical(as.data.frame(sw[match("C", row.names(sw)), ]), ## swiss[match("C", row.names(sw)), ]) # no exact match } test_DataFrame_dimnames_replace <- function() { data(swiss) cn <- paste("X", seq_len(ncol(swiss)), sep = ".") sw <- DataFrame(swiss) colnames(sw) <- cn checkIdentical(colnames(sw), cn) cn <- as.character(seq_len(ncol(swiss))) colnames(sw) <- cn colnames(swiss) <- cn checkIdentical(colnames(sw), colnames(swiss)) colnames(sw) <- cn[1] colnames(swiss) <- cn[1] checkIdentical(colnames(sw), colnames(swiss)) rn <- seq(nrow(sw)) rownames(sw) <- rn checkIdentical(rownames(sw), as.character(rn)) checkException(rownames(sw) <- rn[1], silent = TRUE) checkException(rownames(sw) <- rep(rn[1], nrow(sw)), silent = TRUE) rn[1] <- NA checkException(rownames(sw) <- rn, silent = TRUE) } test_DataFrame_replace <- function() { score <- c(1L, 3L, NA) counts <- c(10L, 2L, NA) DF <- DataFrame(score) # single, unnamed arg DF[["counts"]] <- counts checkIdentical(DF[["counts"]], counts) DF[[3]] <- score checkIdentical(DF[[3]], score) DF[[3]] <- NULL # deletion DF[["counts"]] <- NULL DF$counts <- counts checkIdentical(DF$counts, counts) checkException(DF[[13]] <- counts, silent = TRUE) # index must be < length+1 checkException(DF[["tooshort"]] <- counts[1:2], silent = TRUE) sw <- DataFrame(swiss, row.names = rownames(swiss)) # a data.frame sw1 <- sw; swiss1 <- swiss sw1[] <- 1L; swiss1[] <- 1L checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[] <- 1; swiss1[] <- 1 checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1["Education"] <- 1; swiss1["Education"] <- 1 checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[,"Education"] <- 1; swiss1[,"Education"] <- 1 checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1["Courtelary",] <- 1; swiss1["Courtelary",] <- 1 checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[1:3] <- 1; swiss1[1:3] <- 1 checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[,1:3] <- 1; swiss1[,1:3] <- 1 checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[2:4,1:3] <- 1; swiss1[2:4,1:3] <- 1 checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[2:4,-c(2,4,5)] <- 1; swiss1[2:4,-c(2,4,5)] <- 1 checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[,1:3] <- sw1[,2:4]; swiss1[,1:3] <- swiss1[,2:4] checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[2:4,] <- sw1[1:3,]; swiss1[2:4,] <- swiss1[1:3,] checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[2:4,1:3] <- sw1[1:3,2:4]; swiss1[2:4,1:3] <- swiss1[1:3,2:4] checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1["NewCity",] <- NA; swiss1["NewCity",] <- NA checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[nrow(sw1)+(1:2),] <- NA; swiss1[nrow(swiss1)+(1:2),] <- NA checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1["NewCol"] <- seq(nrow(sw1)); swiss1["NewCol"] <- seq(nrow(sw1)) checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[ncol(sw1)+1L] <- seq(nrow(sw1)); swiss1[ncol(swiss1)+1L] <- seq(nrow(sw1)) checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[,"NewCol"] <- seq(nrow(sw1)); swiss1[,"NewCol"] <- seq(nrow(sw1)) checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1["NewCity","NewCol"] <- 0 swiss1["NewCity","NewCol"] <- 0 checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1["NewCity",] <- DataFrame(NA); swiss1["NewCity",] <- data.frame(NA) checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[nrow(sw1)+(1:2),] <- DataFrame(NA) swiss1[nrow(swiss1)+(1:2),] <- data.frame(NA) checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1["NewCol"] <- DataFrame(seq(nrow(sw1))) swiss1["NewCol"] <- data.frame(seq(nrow(sw1))) checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[ncol(sw1)+1L] <- DataFrame(seq(nrow(sw1))) swiss1[ncol(swiss1)+1L] <- data.frame(seq(nrow(sw1))) checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1[,"NewCol"] <- DataFrame(seq(nrow(sw1))) swiss1[,"NewCol"] <- data.frame(seq(nrow(sw1))) checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw; swiss1 <- swiss sw1["NewCity","NewCol"] <- DataFrame(0) swiss1["NewCity","NewCol"] <- data.frame(0) checkIdentical(as.data.frame(sw1), swiss1) sw1 <- sw mcols(sw1) <- DataFrame(id = seq_len(ncol(sw1))) sw1["NewCol"] <- DataFrame(seq(nrow(sw1))) checkIdentical(mcols(sw1, use.names=TRUE), DataFrame(id = c(seq_len(ncol(sw1)-1), NA), row.names = colnames(sw1))) } ## combining test_DataFrame_combine <- function() { data(swiss) rn <- rownames(swiss) sw <- DataFrame(swiss, row.names=rn) swisssplit <- split(swiss, swiss$Education) ## rbind checkIdentical(rbind(DataFrame(), DataFrame()), DataFrame()) score <- c(X=1L, Y=3L, Z=NA) DF <- DataFrame(score) checkIdentical(rbind(DF, DF)[[1]], c(score, score)) zr <- sw[FALSE,] checkIdentical(rbind(DataFrame(), zr, zr[,1:2]), zr) checkIdentical(as.data.frame(rbind(DataFrame(), zr, sw)), swiss) target <- do.call(rbind, swisssplit) current <- do.call(rbind, lapply(swisssplit, DataFrame)) rownames(target) <- rownames(current) <- NULL checkIdentical(target, as.data.frame(current)) DF <- DataFrame(A=I(list(1:3))) df <- as.data.frame(DF) checkIdentical(as.data.frame(rbind(DF, DF)), rbind(df, df)) ## combining factors df1 <- data.frame(species = c("Mouse", "Chicken"), n = c(5, 6)) DF1 <- DataFrame(df1) df2 <- data.frame(species = c("Human", "Chimp"), n = c(1, 2)) DF2 <- DataFrame(df2) df12 <- rbind(df1, df2) rownames(df12) <- NULL checkIdentical(as.data.frame(rbind(DF1, DF2)), df12) checkIdentical(rownames(rbind(sw, DataFrame(swiss))), rownames(rbind(swiss, swiss))) checkIdentical(rownames(do.call(rbind, lapply(swisssplit, DataFrame))), unlist(lapply(swisssplit, rownames), use.names=FALSE)) checkException(rbind(sw[,1:2], sw), silent = TRUE) other <- sw colnames(other)[1] <- "foo" checkException(rbind(other, sw), silent = TRUE) } test_DataFrame_looping <- function() { data(iris) actual <- by(iris, iris$Species, nrow) ## a bit tricky because of the 'call' attribute attr(actual, "call")[[1]] <- as.name("by") iris <- DataFrame(iris, row.names=rownames(iris)) checkIdentical(actual, by(iris, iris$Species, nrow)) } test_DataFrame_annotation <- function() { df <- DataFrame(x = c(1L, 3L, NA), y = c(10L, 2L, NA)) mcols(df) <- DataFrame(a = 1:2) checkIdentical(mcols(df)[,1], 1:2) checkIdentical(mcols(df[2:1])[,1], 2:1) checkIdentical(mcols(cbind(df,df))[,1], rep(1:2,2)) df$z <- 1:3 checkIdentical(mcols(df, use.names=TRUE), DataFrame(a = c(1L, 2L, NA), row.names = c("x", "y", "z"))) } ## '[<-' setter test_DataFrame_Setter <- function() { .SingleBracket <- function(df0, df1, idx) { target <- df0 for (i in seq_len(length(df0))[idx]) target[[i]] <- df1[[i]] df <- df0 df[idx] <- df1[idx] stopifnot(identical(target, df)) df <- DataFrame(df0) df[idx] <- DataFrame(df1)[idx] if (!identical(DataFrame(target), df)) FALSE else TRUE } df0 <- data.frame(x=11:12, y=21:22, z=31:32) df1 <- data.frame(matrix(LETTERS[1:6], ncol=3)) checkTrue(.SingleBracket(df0, df1, c(FALSE, FALSE, TRUE))) checkTrue(.SingleBracket(df0, df1, c(TRUE, FALSE, TRUE))) checkTrue(.SingleBracket(df0, df1, c(TRUE, TRUE, TRUE))) checkTrue(.SingleBracket(df0, df1, TRUE)) target <- df0 target[] <- df1[] df <- DataFrame(df0) df[] <- DataFrame(df1)[] checkIdentical(DataFrame(target), df) for (i in c('a', 'c', 'e')) { DF <- DataFrame(A=1:5, row.names=letters[1:5]) df <- data.frame(A=1:5, row.names=letters[1:5]) DF[i, 'B'] <- df[i, 'B'] <- 1 checkIdentical(as.data.frame(DF), df) } } test_DataFrame_droplevels <- function() { df <- DataFrame(state.name, state.region, state.region.rle=Rle(state.region)) df2 <- head(df) checkIdentical(lapply(droplevels(df2), levels), list(state.name=NULL, state.region=c("South", "West"), state.region.rle=c("South", "West"))) } test_DataFrame_transform <- function() { DF <- DataFrame(state.name, state.region, state.area) df <- as.data.frame(DF) checkIdentical(transform(DF), DF) TF <- transform(DF, log.area = log(state.area), ratio = log.area / state.area) tf <- transform(transform(df, log.area = log(state.area)), ratio = log.area / state.area) checkIdentical(tf, as.data.frame(TF)) } S4Vectors/inst/unitTests/test_DataTable-class.R0000644000175400017540000000176413175714520022536 0ustar00biocbuildbiocbuildtest_DataTable_basic <- function() { x <- DataFrame(a = 1:10, b = 11:20) y <- as.data.frame(x) checkIdentical(x[,1], y[,1]) checkIdentical(as.data.frame(x[,2:1]), y[,2:1]) # checkIdentical(as.data.frame(cbind(x,x)), cbind(y,y)) checkIdentical(dim(x), dim(y)) checkIdentical(nrow(x), nrow(y)) checkIdentical(ncol(x), ncol(y)) checkIdentical(as.data.frame(head(x)), head(y)) checkIdentical(as.data.frame(rbind(x,x)), rbind(y,y)) # checkIdentical(as.data.frame(tail(x)), tail(y)) } test_DataTable_subset <- function() { y <- airquality rownames(y) <- as.character(seq_len(nrow(y))) x <- as(y, "DataFrame") checkIdentical(as.data.frame(subset(x, Temp > 80, select = c(Ozone, Temp))), subset(y, Temp > 80, select = c(Ozone, Temp))) checkIdentical(as.data.frame(subset(x, Day == 1, select = -Temp)), subset(y, Day == 1, select = -Temp)) checkIdentical(as.data.frame(subset(x, select = Ozone:Wind)), subset(y, select = Ozone:Wind)) } S4Vectors/inst/unitTests/test_FilterRules.R0000644000175400017540000001206213175714520022043 0ustar00biocbuildbiocbuildtest_FilterRules_construct <- function() { ## as a simple character vector filts <- c("peaks", "promoters") parsedFilts <- list(peaks = expression(peaks), promoters = expression(promoters)) filters <- FilterRules() checkTrue(validObject(filters)) checkIdentical(as.list(filters), list()) filters <- FilterRules(filts) checkTrue(validObject(filters)) checkIdentical(as.list(filters), parsedFilts) checkIdentical(active(filters), structure(rep(TRUE, 2), names=filts)) ## with functions and expressions filts <- c(parsedFilts, list(find_eboxes = function(rd) rep(FALSE, nrow(rd)))) filters <- FilterRules(filts, active = FALSE) checkTrue(validObject(filters)) filts$find_eboxes <- new("FilterClosure", filts$find_eboxes) checkIdentical(as.list(filters), filts) checkIdentical(active(filters), structure(rep(FALSE, 3), names=names(filts))) ## direct, quoted args (character literal parsed) filters <- FilterRules(under_peaks = peaks, in_promoters = "promoters") filts <- list(under_peaks = expression(peaks), in_promoters = expression(promoters)) checkTrue(validObject(filters)) checkIdentical(as.list(filters), filts) ## mix them up filters <- FilterRules(filts, diffexp = de) checkTrue(validObject(filters)) checkIdentical(as.list(filters), c(filts, list(diffexp = expression(de)))) filts <- as.list(filters) checkException(FilterRules(c(filts, 1)), silent = TRUE) checkException(FilterRules(filts, active = filts), silent = TRUE) checkException(FilterRules(list(find_eboxes = function() NULL)), silent = TRUE) } test_FilterRules_append <- function() { filts <- c("peaks", "promoters") filts2 <- c("introns", "exons") filters <- FilterRules(filts) filters2 <- FilterRules(filts2, active=FALSE) both <- append(filters, filters2) checkTrue(validObject(both)) bothFilts <- structure(list(quote(peaks), quote(promoters), quote(introns), quote(exons)), names = c(filts, filts2)) checkIdentical(unlist(as.list(both)), bothFilts) bothActive <- structure(c(TRUE, TRUE, FALSE, FALSE), names = names(bothFilts)) checkIdentical(active(both), bothActive) both <- c(filters, filters2) checkTrue(validObject(both)) checkIdentical(unlist(as.list(both)), bothFilts) checkIdentical(active(both), bothActive) filters[["cons"]] <- "cons" filts <- list(peaks = quote(peaks), promoters = quote(promoters)) filts <- c(filts, cons = quote(cons)) checkIdentical(unlist(as.list(filters)), filts) filters[["cons"]] <- quote(cons) checkIdentical(unlist(as.list(filters)), filts) filters[["cons"]] <- expression(cons) checkIdentical(unlist(as.list(filters)), filts) fun <- function(rd) rep(FALSE, nrow(rd)) filters[[4]] <- fun filts <- c(filts, X = new("FilterClosure", fun)) checkIdentical(unlist(as.list(filters)), filts) checkException(filters[[]] <- "threeprime", silent = TRUE) checkException(filters[[1]] <- 2, silent = TRUE) checkException(filters[[1]] <- list(quote(foo), quote(bar)), silent = TRUE) } test_FilterRules_subset <- function() { filts <- c("peaks", "promoters", "introns") filters <- FilterRules(filts) checkIdentical(sapply(unlist(filters[1:2]), deparse), structure(filts[1:2], names = filts[1:2])) checkIdentical(sapply(unlist(filters[]),deparse), structure(filts, names = filts)) checkException(filters[1,2], silent = TRUE) } test_FilterRules_active <- function() { filts <- c("peaks", "promoters", "introns") filters <- FilterRules(filts) ## set the active state directly active(filters) <- FALSE checkIdentical(active(filters), structure(rep(FALSE, 3), names = filts)) active(filters) <- TRUE checkIdentical(active(filters), structure(rep(TRUE, 3), names = filts)) active(filters) <- c(FALSE, FALSE, TRUE) checkIdentical(active(filters), structure(c(FALSE, FALSE, TRUE), names = filts)) active(filters)["promoters"] <- TRUE checkIdentical(active(filters), structure(c(FALSE, TRUE, TRUE), names = filts)) checkException(active(filters) <- rep(FALSE, 2), silent = TRUE) checkException(active(filters) <- rep(FALSE, 5), silent = TRUE) checkException(active(filters)["introns"] <- NA, silent = TRUE) ## toggle the active state by name or index active(filters) <- c(NA, 2) # NA's are dropped checkIdentical(active(filters), structure(c(FALSE, TRUE, FALSE), names = filts)) active(filters) <- c("peaks", NA) checkIdentical(active(filters), structure(c(TRUE, FALSE, FALSE), names = filts)) checkException(active(filters) <- "foo", silent = TRUE) checkException(active(filters) <- 15, silent = TRUE) } test_FilterRules_annotation <- function() { filts <- c("peaks", "promoters") filters <- FilterRules(filts) mcols(filters) <- DataFrame(a = 1:2) checkIdentical(mcols(filters)[,1], 1:2) checkIdentical(mcols(filters[2:1])[,1], 2:1) checkIdentical(mcols(c(filters,filters))[,1], rep(1:2,2)) checkIdentical(mcols(append(filters,filters))[,1], rep(1:2,2)) } S4Vectors/inst/unitTests/test_Hits-class.R0000644000175400017540000001452113175714520021617 0ustar00biocbuildbiocbuildtest_Hits_constructor <- function() { from <- c(5, 2, 3, 3, 3, 2) to <- c(11, 15, 5, 4, 5, 11) id <- letters[1:6] hits0 <- Hits(from, to, 7, 15, id, sort.by.query=FALSE) checkTrue(validObject(hits0, complete=TRUE)) checkTrue(class(hits0) == "Hits") checkIdentical(as.integer(from), from(hits0)) checkIdentical(as.integer(to), to(hits0)) checkIdentical(7L, nLnode(hits0)) checkIdentical(15L, nRnode(hits0)) checkIdentical(id, mcols(hits0)$id) hits1 <- Hits(from, to, 7, 15, id, sort.by.query=TRUE) checkTrue(validObject(hits1, complete=TRUE)) checkTrue(class(hits1) == "SortedByQueryHits") checkIdentical(c(2L, 2L, 3L, 3L, 3L, 5L), from(hits1)) checkIdentical(c(15L, 11L, 5L, 4L, 5L, 11L), to(hits1)) checkIdentical(7L, nLnode(hits1)) checkIdentical(15L, nRnode(hits1)) checkIdentical(c("b", "f", "c", "d", "e", "a"), mcols(hits1)$id) ## By default, 'sort.by.query' is FALSE. checkIdentical(hits0, Hits(from, to, 7, 15, id)) } test_Hits_coercion <- function() { ## sparse from <- c(1L, 1L, 3L) to <- 1:3 hits <- Hits(from, to, 3, 3) checkIdentical(as.matrix(hits), cbind(from=from, to=to)) checkIdentical(as.table(hits), c(2L, 0L, 1L)) checkIdentical(as.table(t(hits)), c(1L, 1L, 1L)) hits <- Hits(from, to, 3, 3, sort.by.query=TRUE) checkIdentical(as.matrix(hits), cbind(queryHits=from, subjectHits=to)) checkIdentical(as.table(hits), c(2L, 0L, 1L)) checkIdentical(as.table(t(hits)), c(1L, 1L, 1L)) ## dense from <- rep(1:2, each=2) to <- rep(1:2, 2) hits <- Hits(from, to, 3, 2) checkIdentical(as.matrix(hits), cbind(from=from, to=to)) checkIdentical(as.table(hits), c(2L, 2L, 0L)) checkIdentical(as.table(t(hits)), c(2L, 2L)) hits <- Hits(from, to, 3, 2, sort.by.query=TRUE) checkIdentical(as.matrix(hits), cbind(queryHits=from, subjectHits=to)) checkIdentical(as.table(hits), c(2L, 2L, 0L)) checkIdentical(as.table(t(hits)), c(2L, 2L)) } test_remapHits <- function() { from0 <- c(1L, 1L, 2L, 3L, 3L) to0 <- c(1L, 2L, 5L, 2L, 4L) hits0 <- Hits(from0, to0, 3L, 6L, sort.by.query=TRUE) ## No remapping (i.e. map is missing or is the identity function). checkIdentical(remapHits(hits0), hits0) Lnodes.remapping1 <- seq_len(nLnode(hits0)) new.nLnode1 <- nLnode(hits0) Rnodes.remapping1 <- seq_len(nRnode(hits0)) new.nRnode1 <- nRnode(hits0) hits10 <- remapHits(hits0, Lnodes.remapping=Lnodes.remapping1, new.nLnode=new.nLnode1) checkIdentical(hits10, hits0) hits01 <- remapHits(hits0, Rnodes.remapping=Rnodes.remapping1, new.nRnode=new.nRnode1) checkIdentical(hits01, hits0) hits11 <- remapHits(hits0, Lnodes.remapping=Lnodes.remapping1, new.nLnode=new.nLnode1, Rnodes.remapping=Rnodes.remapping1, new.nRnode=new.nRnode1) checkIdentical(hits11, hits0) ## With maps that add a fixed offset to from(x), and a fixed offset ## to to(x). Lnodes.remapping2 <- Lnodes.remapping1 + 20L new.nLnode2 <- new.nLnode1 + 20L Rnodes.remapping2 <- Rnodes.remapping1 + 30L new.nRnode2 <- new.nRnode1 + 30L hits20 <- remapHits(hits0, Lnodes.remapping=Lnodes.remapping2, new.nLnode=new.nLnode2) expected_hits20 <- Hits(from0 + 20L, to0, 23, 6, sort.by.query=TRUE) checkIdentical(hits20, expected_hits20) hits02 <- remapHits(hits0, Rnodes.remapping=Rnodes.remapping2, new.nRnode=new.nRnode2) expected_hits02 <- Hits(from0, to0 + 30L, 3, 36, sort.by.query=TRUE) checkIdentical(hits02, expected_hits02) hits22 <- remapHits(hits0, Lnodes.remapping=Lnodes.remapping2, new.nLnode=new.nLnode2, Rnodes.remapping=Rnodes.remapping2, new.nRnode=new.nRnode2) expected_hits22 <- Hits(from0 + 20L, to0 + 30L, 23, 36, sort.by.query=TRUE) checkIdentical(hits22, expected_hits22) ## With injective and non-ascending maps. Lnodes.remapping3 <- 100L * rev(Lnodes.remapping1) + Lnodes.remapping1 new.nLnode3 <- 400L Rnodes.remapping3 <- 100L * rev(Rnodes.remapping1) + Rnodes.remapping1 new.nRnode3 <- 700L hits30 <- remapHits(hits0, Lnodes.remapping=Lnodes.remapping3, new.nLnode=new.nLnode3) expected_hits30 <- Hits(c(103, 103, 202, 301, 301), c( 2, 4, 5, 1, 2), 400, 6, sort.by.query=TRUE) checkIdentical(hits30, expected_hits30) hits03 <- remapHits(hits0, Rnodes.remapping=Rnodes.remapping3, new.nRnode=new.nRnode3) expected_hits03 <- Hits(from0, c(502, 601, 205, 304, 502), 3, 700, sort.by.query=TRUE) checkIdentical(t(hits03), t(expected_hits03)) hits33 <- remapHits(hits0, Lnodes.remapping=Lnodes.remapping3, new.nLnode=new.nLnode3, Rnodes.remapping=Rnodes.remapping3, new.nRnode=new.nRnode3) expected_hits33 <- Hits(c(103, 103, 202, 301, 301), c(304, 502, 205, 502, 601), 400, 700, sort.by.query=TRUE) checkIdentical(t(hits33), t(expected_hits33)) ## With non-injective maps (as factors). Lnodes.remapping4 <- factor(c("B", "A", "B"), levels=c("A", "B")) Rnodes.remapping4 <- factor(c("a", "b", "a", "b", "a", "b"), levels=c("a", "b")) hits40 <- remapHits(hits0, Lnodes.remapping=Lnodes.remapping4) expected_hits40 <- Hits(c(1, 2, 2, 2), c(5, 1, 2, 4), 2, 6, sort.by.query=TRUE) checkIdentical(hits40, expected_hits40) hits04 <- remapHits(hits0, Rnodes.remapping=Rnodes.remapping4) expected_hits04 <- Hits(c(1, 1, 2, 3), c(1, 2, 1, 2), 3, 2, sort.by.query=TRUE) checkIdentical(hits04, expected_hits04) hits44 <- remapHits(hits0, Lnodes.remapping=Lnodes.remapping4, Rnodes.remapping=Rnodes.remapping4) expected_hits44 <- Hits(c(1, 2, 2), c(1, 1, 2), 2, 2, sort.by.query=TRUE) checkIdentical(hits44, expected_hits44) } S4Vectors/inst/unitTests/test_List-class.R0000644000175400017540000002734313175714520021631 0ustar00biocbuildbiocbuild### NOTE: List is an abstract type, so we just test with IntegerList library(IRanges) test_List_replace_names <- function() { int1 <- c(1L,2L,3L,5L,2L,8L) int2 <- c(15L,45L,20L,1L,15L,100L,80L,5L) for (compress in c(TRUE, FALSE)) { collection <- IntegerList(int1, int2, compress=compress) names(collection) <- c("one", "two") checkIdentical(names(collection), c("one", "two")) names(collection) <- NULL checkIdentical(names(collection), NULL) names(collection) <- "one" checkIdentical(names(collection), c("one", NA)) checkException(names(collection) <- c("one", "two", "three"), silent=TRUE) } } test_List_unlist <- function() { for (compress in c(TRUE, FALSE)) { x0 <- list(c(a=1L), 21:23, 33L) x <- IntegerList(x0, compress=compress) target <- unlist(x0) current <- unlist(x) checkIdentical(target, current) names(x0) <- names(x) <- LETTERS[1:3] target <- unlist(x0) names(target)[2:4] <- "B" # base::unlist() behaviour not what we want! current <- unlist(x) checkIdentical(target, current) names(x0)[2] <- names(x)[2] <- "" target <- unlist(x0) current <- unlist(x) checkIdentical(target, current) names(x0)[2] <- names(x)[2] <- NA target <- unlist(x0) names(target)[2:4] <- "" # base::unlist() behaviour not what we want! current <- unlist(x) checkIdentical(target, current) names(x0[[2]])[] <- names(x[[2]])[] <- NA target <- unlist(x0) names(target)[2:4] <- NA # base::unlist() behaviour not what we want! current <- unlist(x) checkIdentical(target, current) names(x0[[2]]) <- names(x[[2]]) <- "b" target <- unlist(x0) names(target)[2:4] <- c("b", NA, NA) # base::unlist() behaviour not what # we want! current <- unlist(x) checkIdentical(target, current) names(x0[[2]])[] <- names(x[[2]])[] <- "a" target <- unlist(x0) names(target)[2:4] <- "a" # base::unlist() behaviour not what we want! current <- unlist(x) checkIdentical(target, current) names(x0)[2] <- names(x)[2] <- "A" target <- unlist(x0) current <- unlist(x) checkIdentical(target, current) } } test_List_extraction <- function() { int1 <- c(1L,2L,3L,5L,2L,8L) int2 <- c(15L,45L,20L,1L,15L,100L,80L,5L) for (compress in c(TRUE, FALSE)) { collection <- IntegerList(int1, int2, compress=compress) checkException(collection[[]], silent=TRUE) checkException(collection[[1, 2]], silent=TRUE) checkException(collection[[numeric()]], silent=TRUE) checkException(collection[[NULL]], silent=TRUE) checkException(collection[[c(1,2)]], silent=TRUE) checkException(collection[[-1]], silent=TRUE) checkException(collection[[5]], silent=TRUE) checkIdentical(collection[[NA_integer_]], NULL) checkIdentical(collection[[1]], int1) checkIdentical(collection[[2]], int2) checkIdentical(collection[["1"]], NULL) checkIdentical(collection$foo, NULL) checkIdentical(IntegerList(one=int1, int2, compress=compress)[["one"]], int1) checkIdentical(IntegerList(one=int1, int2, compress=compress)$one, int1) } } test_List_subset <- function() { int1 <- c(1L,2L,3L,5L,2L,8L) int2 <- c(15L,45L,20L,1L,15L,100L,80L,5L) for (compress in c(TRUE, FALSE)) { collection <- IntegerList(one=int1, int2, compress=compress) unnamed <- IntegerList(int1, int2, compress=compress) checkException(collection[1,2], silent=TRUE) if (compress) { checkException(collection[5], silent=TRUE) checkException(collection[c(NA, 2)], silent=TRUE) checkException(collection[c(TRUE, TRUE, TRUE)], silent=TRUE) checkException(unnamed["one"], silent=TRUE) } checkException(collection[c(-1,2)], silent=TRUE) empty <- IntegerList(compress=compress) names(empty) <- character(0) checkIdentical(collection[0], empty) checkIdentical(collection[numeric()], empty) checkIdentical(collection[logical()], empty) checkIdentical(collection[character()], empty) checkIdentical(collection[NULL], empty) checkIdentical(collection[], collection) checkIdentical(collection[FALSE], empty) checkIdentical(collection[c(FALSE, FALSE)], empty) checkIdentical(collection[list()], empty) checkIdentical(collection[TRUE], collection) checkIdentical(collection[c(TRUE, FALSE)], IntegerList(one=int1, compress=compress)) rl2 <- IntegerList(int2, compress=compress) names(rl2) <- "" checkIdentical(collection[2], rl2) checkIdentical(collection[c(2,1)], IntegerList(int2, one=int1, compress=compress)) checkIdentical(collection[-1], rl2) checkIdentical(collection["one"], IntegerList(one=int1, compress=compress)) } } test_List_replace <- function() { int1 <- c(1L,2L,3L,5L,2L,8L) int2 <- c(15L,45L,20L,1L,15L,100L,80L,5L) for (compress in c(TRUE, FALSE)) { collection <- IntegerList(one=int1, int2, compress=compress) checkException(collection[1,2] <- 1L, silent=TRUE) checkException(collection[c(-1,2)] <- 1L, silent=TRUE) newcollection <- collection newcollection[list()] <- 1L checkIdentical(newcollection, collection) newcollection <- collection newcollection[] <- collection checkIdentical(newcollection, collection) newcollection1 <- newcollection2 <- collection newcollection1[2:1] <- collection checkIdentical(newcollection1, IntegerList(one=int2, int1, compress=compress)) newcollection2[] <- collection[2:1] checkIdentical(newcollection2, newcollection1) value <- IntegerList(1:10, compress=compress) newcollection <- collection newcollection[TRUE] <- value checkIdentical(newcollection, IntegerList(one=1:10, 1:10, compress=compress)) newcollection <- collection newcollection[c(TRUE, FALSE)] <- value checkIdentical(newcollection, IntegerList(one=1:10, int2, compress=compress)) newcollection <- collection newcollection["one"] <- value checkIdentical(newcollection, IntegerList(one=1:10, int2, compress=compress)) newcollection <- collection newcollection[list(6:5, TRUE)] <- list(-1:-2, -99:-100) checkIdentical(newcollection, IntegerList(one=c(1,2,3,5,-2,-1), rep(c(-99,-100), 4), compress=compress)) collection <- IntegerList(one=int1, two=int2, compress=compress) newcollection <- collection newcollection[c("two", "one")] <- collection checkIdentical(newcollection, IntegerList(one=int2, two=int1, compress=compress)) newcollection <- collection newcollection[list(two=6:5, one=TRUE)] <- list(-1:-2, -99:-100) checkIdentical(newcollection, IntegerList(one=rep(c(-99,-100), 3), two=c(15,45,20,1,-2,-1,80,5), compress=compress)) collection <- IntegerList(one=c(a=1,b=2), two=c(d=1,b=0,a=5), compress=compress) newcollection1 <- newcollection2 <- collection newcollection1[list(two=2, one=2:1)] <- list(99, 11:12) checkIdentical(newcollection1, IntegerList(one=c(a=12,b=11), two=c(d=1,b=99,a=5), compress=compress)) newcollection2[list(two="b", one=c("b", "a"))] <- list(99, 11:12) checkIdentical(newcollection2, newcollection1) } } test_List_combine <- function() { int1 <- c(1L,2L,3L,5L,2L,8L) int2 <- c(15L,45L,20L,1L,15L,100L,80L,5L) for (compress in c(TRUE, FALSE)) { col1 <- IntegerList(one=int1, int2, compress=compress) col2 <- IntegerList(two=int2, one=int1, compress=compress) col3 <- IntegerList(int2, compress=compress) if (compress) checkException(append(col1, col2, c(1,2,3)), silent=TRUE) checkException(append(col1, col2, col3), silent=TRUE) checkIdentical(append(col1, col2), IntegerList(one=int1, int2, two=int2, one=int1, compress=compress)) checkIdentical(append(col1, col2, 1), IntegerList(one=int1, two=int2, one=int1, int2, compress=compress)) checkIdentical(append(col1, col2, 0), IntegerList(two=int2, one=int1, one=int1, int2, compress=compress)) checkIdentical(append(append(col1, col2), col3), IntegerList(one=int1, int2, two=int2, one=int1, int2, compress=compress)) ## for 'c' checkIdentical(c(col1, col2, col3), IntegerList(one=int1, int2, two=int2, one=int1, int2, compress=compress)) if (compress) { checkException(c(col1, int2), silent=TRUE) checkException(c(col1, col2, recursive=TRUE), silent=TRUE) } } } test_List_apply <- function() { int1 <- c(1L,2L,3L,5L,2L,8L) int2 <- c(15L,45L,20L,1L,15L,100L,80L,5L) for (compress in c(TRUE, FALSE)) { col1 <- IntegerList(one=int1, int2, compress=compress) checkIdentical(lapply(col1, mean), list(one=mean(int1), mean(int2))) checkException(lapply(col1, 2), silent=TRUE) } } test_List_annotation <- function() { int1 <- c(1L,2L,3L,5L,2L,8L) int2 <- c(15L,45L,20L,1L,15L,100L,80L,5L) for (compress in c(TRUE, FALSE)) { ilist <- IntegerList(int1, int2, compress=compress) mcols(ilist) <- DataFrame(a=1:2) checkIdentical(mcols(ilist)[,1], 1:2) checkIdentical(mcols(ilist[2:1])[,1], 2:1) checkIdentical(mcols(c(ilist,ilist))[,1], rep(1:2,2)) checkIdentical(mcols(append(ilist,ilist))[,1], rep(1:2,2)) } } test_List_as.data.frame <- function() { for (compress in c(TRUE, FALSE)) { x <- IntegerList(c=11:12, a=21:23, b=integer(0), a=41L, compress=compress) ## empty-ish if (compress) { # currently fail when 'x' is a SimpleList because # unlist() is broken on an empty SimpleList, so skip it current <- as.data.frame(x[0]) target <- data.frame(group=integer(0), group_name=character(0), value=integer(0), stringsAsFactors=FALSE) checkIdentical(target, current) } ## group, group_name, value current <- as.data.frame(x) target <- data.frame(group=c(1L, 1L, 2L, 2L, 2L, 4L), group_name=c("c", "c", "a", "a", "a", "a"), value=c(11:12, 21:23, 41L), stringsAsFactors=FALSE) checkIdentical(target, current) current <- as.data.frame(x, group_name.as.factor=TRUE) target$group_name <- factor(target$group_name, levels=unique(names(x))) checkIdentical(target, current) current <- as.data.frame(unname(x)) target$group_name <- rep(NA_character_, 6) checkIdentical(target, current) current <- as.data.frame(unname(x), group_name.as.factor=TRUE) target$group_name <- factor(target$group_name, levels=character(0)) checkIdentical(target, current) current <- as.data.frame(x, value.name="test") checkIdentical(unlist(x, use.names=FALSE), current$test) ## outer mcols mcols(x) <- DataFrame(stuff=LETTERS[4:1], range=IRanges(1:4, 10)) current <- as.data.frame(x, use.outer.mcols=TRUE) target <- data.frame(group=c(1L, 1L, 2L, 2L, 2L, 4L), group_name=c("c", "c", "a", "a", "a", "a"), value=c(11:12, 21:23, 41L), stringsAsFactors=FALSE) target <- cbind(target, as.data.frame(mcols(x))[current$group, , drop=FALSE]) rownames(target) <- NULL checkIdentical(target, current) ## relist mcols(x) <- NULL current <- as.data.frame(x) if (compress) checkIdentical(relist(current$value, x), x) } } S4Vectors/inst/unitTests/test_List-utils.R0000644000175400017540000000217613175714520021661 0ustar00biocbuildbiocbuild### NOTE: List is an abstract type, so we just test with IntegerList library(IRanges) test_List_funprog <- function() { int1 <- c(1L,2L,3L,5L,2L,8L) int2 <- c(15L,45L,20L,1L,15L,100L) for (compress in c(TRUE, FALSE)) { collection <- IntegerList(int1, int2, int1, compress=compress) addcollect <- IntegerList(int2, int1, int1, compress=compress) checkIdentical(Reduce("+", collection), Reduce("+", list(int1, int2, int1))) checkIdentical(as.list(Filter(function(x) mean(x) > 10, collection)), Filter(function(x) mean(x) > 10, list(int1, int2, int1))) checkIdentical(Find(function(x) mean(x) > 10, collection), Find(function(x) mean(x) > 10, list(int1, int2, int1))) checkIdentical(Map("+", collection, addcollect), Map("+", list(int1, int2, int1), list(int2, int1, int1))) checkIdentical(mapply("+", collection, addcollect), mapply("+", list(int1, int2, int1), list(int2, int1, int1))) checkIdentical(Position(function(x) mean(x) > 10, collection), Position(function(x) mean(x) > 10, list(int1, int2, int1))) } } S4Vectors/inst/unitTests/test_Pairs-class.R0000644000175400017540000000101013175714520021753 0ustar00biocbuildbiocbuildtest_Pairs <- function() { score <- rnorm(10) p <- Pairs(1:10, Rle(1L, 10), score=score, names=letters[1:10]) checkIdentical(first(p), 1:10) checkIdentical(mcols(p)$score, score) checkIdentical(p %in% p[1:5], c(rep(TRUE, 5), rep(FALSE, 5))) checkIdentical(as.data.frame(p), data.frame(first=first(p), second=second(p), score, names=names(p), stringsAsFactors=FALSE)) z <- zipup(p) first(p) <- Rle(1:10) checkIdentical(zipdown(z), p) } S4Vectors/inst/unitTests/test_Rle-class.R0000644000175400017540000002347013175714520021435 0ustar00biocbuildbiocbuildtest_Rle_construction <- function() { empty <- Rle() checkTrue(validObject(empty)) checkIdentical(Rle(), new("Rle")) checkIdentical(length(empty), 0L) x <- Rle(rep(6:10, 1:5)) checkTrue(validObject(x)) checkIdentical(x, Rle(6:10, 1:5)) y <- Rle(factor(rep(letters, 1:26))) checkTrue(validObject(y)) checkIdentical(y, Rle(factor(letters), 1:26)) checkIdentical(Rle(c(TRUE, TRUE, FALSE, FALSE, FALSE, NA, NA, NA)), Rle(c(TRUE, FALSE, NA), c(2, 3, 3))) checkIdentical(Rle(c(1L, 1L, 1L, 2L, 2L, NA, NA, NA)), Rle(c(1L, 2L, NA), c(3, 2, 3))) checkIdentical(Rle(c(1, 1, 1, 2, 2, NA, NA, NA)), Rle(c(1, 2, NA), c(3, 2, 3))) checkIdentical(Rle(c("a", "a", "b", "b", "b", NA, NA, NA)), Rle(c("a", "b", NA), c(2, 3, 3))) } test_Rle_replace <- function() { x <- Rle(1:26, 1:26) runValue(x) <- letters checkTrue(validObject(x)) checkIdentical(x, Rle(letters, 1:26)) runLength(x) <- 26:1 checkTrue(validObject(x)) checkIdentical(x, Rle(letters, 26:1)) } test_Rle_coercion <- function() { x <- rep(6:10, 1:5) xRle <- Rle(x) y <- c(TRUE,TRUE,FALSE,FALSE,TRUE,FALSE,TRUE,TRUE,TRUE) yRle <- Rle(y) checkIdentical(x, as.vector(xRle)) checkIdentical(as.integer(x), as.integer(xRle)) checkIdentical(as.numeric(x), as.numeric(xRle)) checkIdentical(as.complex(x), as.complex(xRle)) checkIdentical(as.factor(x), as.factor(xRle)) checkIdentical(y, as.vector(yRle)) checkIdentical(as.logical(y), as.logical(yRle)) checkIdentical(as.character(y), as.character(yRle)) checkIdentical(as.raw(y), as.raw(yRle)) checkIdentical(as.factor(y), as.factor(yRle)) } test_extract_ranges_from_Rle <- function() { extract_ranges_from_Rle <- S4Vectors:::extract_ranges_from_Rle # Extract single range. x <- Rle() for (method in 0:3) { current <- extract_ranges_from_Rle(x, 1L, 0L, method) checkIdentical(x, current) checkException(extract_ranges_from_Rle(x, 1L, 1L, method), silent=TRUE) checkException(extract_ranges_from_Rle(x, 0L, 0L, method), silent=TRUE) checkException(extract_ranges_from_Rle(x, 0L, 1L, method), silent=TRUE) } x <- Rle(0.8, 10L) for (method in 0:3) { target <- Rle(numeric(0)) for (start in 1:11) { current <- extract_ranges_from_Rle(x, start, 0L, method) checkIdentical(target, current) } checkException(extract_ranges_from_Rle(x, 0L, 0L, method), silent=TRUE) checkException(extract_ranges_from_Rle(x, 12L, 1L, method), silent=TRUE) target <- Rle(0.8) for (start in 1:10) { current <- extract_ranges_from_Rle(x, start, 1L, method) checkIdentical(target, current) } checkException(extract_ranges_from_Rle(x, 0L, 1L, method), silent=TRUE) checkException(extract_ranges_from_Rle(x, 11L, 1L, method), silent=TRUE) } # Extract multiple ranges. x <- Rle(factor(letters[1:3], levels=rev(letters)), 7:5) start <- 1L width <- length(x) for (method in 0:3) { current <- extract_ranges_from_Rle(x, start, width, method) checkIdentical(x, current) } start <- seq_along(x) width <- rep(1L, length(start)) for (method in 0:3) { current <- extract_ranges_from_Rle(x, start, width, method) checkIdentical(x, current) } start <- seq_len(length(x) + 1L) width <- rep(0L, length(start)) target <- Rle(factor(levels=rev(letters))) for (method in 0:3) { current <- extract_ranges_from_Rle(x, start, width, method) checkIdentical(target, current) } start <- seq_len(length(x) - 5L) width <- rep(c(6L, 2L, 7L), length.out=length(start)) target <- S4Vectors:::extract_ranges_from_vector_OR_factor( S4Vectors:::decodeRle(x), start, width) for (method in 0:3) { current <- extract_ranges_from_Rle(x, start, width, method) checkIdentical(target, S4Vectors:::decodeRle(current)) } start <- rev(start) width <- rev(width) target <- S4Vectors:::extract_ranges_from_vector_OR_factor( S4Vectors:::decodeRle(x), start, width) for (method in 0:3) { current <- extract_ranges_from_Rle(x, start, width, method) checkIdentical(target, S4Vectors:::decodeRle(current)) } } test_Rle_general <- function() { x <- rep(6:10, 1:5) xRle <- Rle(x) checkIdentical(unique(x), unique(xRle)) checkIdentical(x[c(3,2,4,6)], as.vector(xRle[c(3,2,4,6)])) checkIdentical(append(x,x), as.vector(append(xRle,xRle))) checkIdentical(append(x,x,3), as.vector(append(xRle,xRle,3))) checkIdentical(c(x,x) %in% c(7:9), as.vector(c(xRle,xRle)) %in% c(7:9)) checkIdentical(c(x, x), as.vector(c(xRle, xRle))) checkIdentical(is.na(c(NA, x, NA, NA, NA, x, NA)), as.vector(is.na(c(Rle(NA), xRle, Rle(NA, 3), xRle, Rle(NA))))) checkIdentical(is.unsorted(c(1,2,2,3)), is.unsorted(Rle(c(1,2,2,3)))) checkIdentical(is.unsorted(c(1,2,2,3), strictly = TRUE), is.unsorted(Rle(c(1,2,2,3)), strictly = TRUE)) checkIdentical(length(x), length(xRle)) checkIdentical(match(c(x,x), c(7:9)), as.vector(match(c(xRle,xRle), c(7:9)))) checkIdentical(rep(x, times = 2), as.vector(rep(xRle, times = 2))) checkIdentical(rep(x, times = x), as.vector(rep(xRle, times = x))) checkIdentical(rep(x, length.out = 20), as.vector(rep(xRle, length.out = 20))) checkIdentical(rep(x, each = 2), as.vector(rep(xRle, each = 2))) checkIdentical(rep(x, x, 20), as.vector(rep(xRle, x, 20))) checkException(rep(xRle, x, each = 2), silent = TRUE) checkIdentical(rep(x, 2, each = 2), as.vector(rep(xRle, 2, each = 2))) checkIdentical(rep(x, length.out = 20, each = 2), as.vector(rep(xRle, length.out = 20, each = 2))) checkIdentical(rep(x, x, 20, 2), as.vector(rep(xRle, x, 20, 2))) checkIdentical(rep.int(x, times = 2), as.vector(rep.int(xRle, times = 2))) checkIdentical(rev(x), as.vector(rev(xRle))) library(IRanges) checkIdentical(as.vector(xRle[IRanges(start=1:3, width=1:3)]), x[c(1,2,3,3,4,5)]) z <- x z[] <- rev(z) zRle <- xRle zRle[] <- rev(zRle) checkIdentical(z, as.vector(zRle)) z <- x z[c(1,5,3)] <- 3:1 zRle <- xRle zRle[c(1,5,3)] <- 3:1 checkIdentical(z, as.vector(zRle)) z <- x z[1:5] <- 0L zRle <- xRle zRle[IRanges(start=1:3, width=1:3)] <- 0L checkIdentical(z, as.vector(zRle)) checkIdentical(sort(c(x,x)), as.vector(sort(c(xRle,xRle)))) checkIdentical(as.vector(subset(xRle, rep(c(TRUE, FALSE), length.out = length(.(x))))), subset(x, rep(c(TRUE, FALSE), length.out = length(x)))) checkIdentical(as.vector(window(x, start = 3, end = 13)), as.vector(window(xRle, start = 3, end = 13))) z <- x z[3:13] <- 0L zRle <- xRle window(zRle, start = 3, end = 13) <- 0L checkIdentical(z, as.vector(zRle)) } ## --------------------------------------------- ## table() and sort() ## --------------------------------------------- test_Rle_sort <- function() { ## atomic ix <- c(NA, 3L, NA) nx <- c(2, 5, 1, 2, NA, 5, NA) cx <- c("c", "B", NA, "a") lx <- c(FALSE, FALSE, NA, TRUE, NA) checkIdentical(sort(nx), as.numeric(sort(Rle(nx)))) checkIdentical(sort(nx, na.last=TRUE), as.numeric(sort(Rle(nx), na.last=TRUE))) checkIdentical(sort(nx, na.last=FALSE), as.numeric(sort(Rle(nx), na.last=FALSE))) checkIdentical(sort(ix), as.integer(sort(Rle(ix)))) checkIdentical(sort(cx), as.character(sort(Rle(cx)))) checkIdentical(sort(lx), as.logical(sort(Rle(lx)))) checkIdentical(sort(numeric()), as.numeric(sort(Rle(numeric())))) checkIdentical(sort(character()), as.character(sort(Rle(character())))) ## factor nf <- factor(nx) checkIdentical(sort(nf), as.factor(sort(Rle(nf)))) checkIdentical(sort(nf, decreasing=TRUE, na.last=TRUE), as.factor(sort(Rle(nf), decreasing=TRUE, na.last=TRUE))) checkIdentical(sort(nf, na.last=FALSE), as.factor(sort(Rle(nf), na.last=FALSE))) checkIdentical(sort(factor()), as.factor(sort(Rle(factor())))) ## factor, unused levels nf <- factor(nx, levels=1:6) checkIdentical(levels(sort(nf)), levels(sort(Rle(nf)))) } test_Rle_table <- function() { ## atomic ix <- c(NA, 3L, NA) nx <- c(2, 5, 1, 2, NA, 5, NA) cx <- c("c", "B", NA, "a") lx <- c(FALSE, FALSE, NA, TRUE, NA) checkIdentical(table(ix), table("ix"=Rle(ix))) checkIdentical(table(nx), table("nx"=Rle(nx))) checkIdentical(table(cx), table("cx"=Rle(cx))) checkIdentical(table(lx), table("lx"=Rle(lx))) checkIdentical(table(numeric()), table(Rle(numeric()))) checkIdentical(table(character()), table(Rle(character()))) ## factor nf <- factor(nx) checkIdentical(table("nx"=nx), table("nx"=Rle(nx))) checkIdentical(table(factor()), table(Rle(factor()))) ## factor, unused levels nf <- factor(nx, levels=1:6) cf <- factor(cx, levels=c("a", "c", "B", "b")) checkIdentical(as.factor(table(nf)), as.factor(table(Rle(nf)))) checkIdentical(as.factor(table(cf)), as.factor(table(Rle(cf)))) } test_Rle_Integer_overflow <- function() { x0 <- Rle(as.integer(c(1,(2^31)-1,1))) checkIdentical(NA_integer_, suppressWarnings(sum(x0))) testWarning <- NULL suppressWarnings(withCallingHandlers({sum(x0) }, warning=function(warn) { msg <- conditionMessage(warn) exp <- gettext("integer overflow - use runValue(.) <- as.numeric(runValue(.))", domain="R") if (msg == exp) testWarning <<- TRUE })) checkTrue(testWarning) x <- Rle(c(1,(2^31)-1,1)) checkIdentical(mean(x0), mean(x)) } S4Vectors/inst/unitTests/test_Rle-utils.R0000644000175400017540000005762313175714520021477 0ustar00biocbuildbiocbuildlibrary(IRanges) # many tests in this file use functionalities defined # in IRanges test_Rle_groupGeneric <- function() { set.seed(0) x <- sample(0:3, 50, replace = TRUE) xRle <- Rle(x) checkIdentical(numeric(0) + 1, as.vector(Rle(numeric(0)) + 1)) checkIdentical(x + 1, as.vector(xRle + 1)) checkIdentical(2 * x + 3, as.vector(2 * xRle + 3)) checkIdentical(x[(x > 0) & (x < 3)], as.vector(xRle[(xRle > 0) & (xRle < 3)])) checkIdentical(log(x), as.vector(log(xRle))) checkIdentical(range(x), range(xRle)) checkIdentical(sum(x), sum(xRle)) checkIdentical(prod(x), prod(xRle)) checkIdentical(cumsum(x), as.vector(cumsum(xRle))) checkIdentical(cumprod(x), as.vector(cumprod(xRle))) checkIdentical(round(x + .25), as.vector(round(xRle + .25))) checkIdentical(signif(x + .25), as.vector(signif(xRle + .25))) checkIdentical(Im(x + 5i), as.vector(Im(xRle + 5i))) } test_Rle_general <- function() { x <- rep(6:10, 1:5) xRle <- Rle(x) checkIdentical(aggregate(xRle, IRanges(start = 3:6, end = 13:10), FUN = mean), aggregate(xRle, FUN = mean, start = 3:6, width = seq(11, 5, by = -2))) exp <- c(mean(x[3:13]), mean(x[4:12]), mean(x[5:11]), mean(x[6:10])) agg <- aggregate(xRle, FUN = function(x) x, start = 3:6, end = 13:10) checkEquals(exp, aggregate(xRle, FUN = mean, start = 3:6, end = 13:10)) checkEquals(as.vector(aggregate.ts(ts(x, frequency = 5), FUN = mean)), aggregate(xRle, FUN = mean, start = c(1, 6, 11), end = c(5, 10, 15))) #checkIdentical(findRange(c(1, 3, 5), xRle), IRanges(start = c(1,2,4), width = 1:3)) #checkIdentical(head(x, 8), as.vector(head(xRle, 8))) #checkIdentical(head(x, -3), as.vector(head(xRle, -3))) #checkException(split(Rle(1:26), integer()), silent = TRUE) #checkException(split(Rle(1:26), Rle()), silent = TRUE) #checkIdentical(lapply(as.list(split(Rle(1:26), letters)), as.vector), # split(1:26, letters)) #checkIdentical(lapply(as.list(split(Rle(1:26), Rle(letters))), as.vector), # split(1:26, letters)) #checkIdentical(lapply(as.list(split(Rle(1:26), letters[1:2])), as.vector), # split(1:26, letters[1:2])) #checkIdentical(lapply(as.list(split(Rle(1:26), Rle(letters[1:2]))), as.vector), # split(1:26, letters[1:2])) #checkIdentical(lapply(as.list(split(Rle(integer()), letters)), as.vector), # split(integer(), letters)) #checkIdentical(lapply(as.list(split(Rle(integer()), Rle(letters))), as.vector), # split(integer(), letters)) #checkIdentical(splitRanges(Rle(letters, 1:26)), # split(IRanges(end = cumsum(1:26), width = 1:26), letters)) checkIdentical(summary(x), summary(xRle)) #checkIdentical(tail(x, 8), as.vector(tail(xRle, 8))) #checkIdentical(tail(x, -3), as.vector(tail(xRle, -3))) #checkException(tapply(xRle), silent = TRUE) #checkIdentical(tapply(x, x), tapply(xRle, xRle)) #checkIdentical(tapply(x, x, mean), tapply(xRle, xRle, mean)) #checkIdentical(tapply(xRle, x, mean), tapply(xRle, xRle, mean)) #checkIdentical(tapply(x, x, mean, simplify = FALSE), # tapply(xRle, xRle, mean, simplify = FALSE)) #checkIdentical(tapply(xRle, x, mean, simplify = FALSE), # tapply(xRle, xRle, mean, simplify = FALSE)) } test_Rle_logical <- function() { checkIdentical(logical(), as.vector(Rle(logical()))) x <- c(TRUE,TRUE,FALSE,FALSE,TRUE,FALSE,TRUE,TRUE,TRUE) xRle <- Rle(x) checkIdentical(!x, as.vector(!x)) checkIdentical(which(x), as.vector(which(x))) checkIdentical(as(xRle, "IRanges"), IRanges(start = c(1,5,7), width = c(2, 1, 3))) } test_Rle_numerical <- function() { checkIdentical(numeric(), as.vector(Rle(numeric()))) x <- cumsum(cumsum(1:10)) xRle <- Rle(x) checkIdentical(pmax(x, rev(x)), as.vector(pmax(xRle, rev(xRle)))) checkIdentical(pmin(x, rev(x)), as.vector(pmin(xRle, rev(xRle)))) checkIdentical(pmax.int(x, rev(x)), as.vector(pmax.int(xRle, rev(xRle)))) checkIdentical(pmin.int(x, rev(x)), as.vector(pmin.int(xRle, rev(xRle)))) checkIdentical(diff(x), as.vector(diff(xRle))) checkIdentical(diff(x, lag = 2), as.vector(diff(xRle, lag = 2))) checkIdentical(diff(x, differences = 2), as.vector(diff(xRle, differences = 2))) checkIdentical(diff(x, lag = 2, differences = 2), as.vector(diff(xRle, lag = 2, differences = 2))) x <- rep(c(1.2, 3.4, NA, 7.8, 9.0), 1:5) y <- x - rev(x) xRle <- Rle(x) yRle <- Rle(y) checkIdentical(mean(x), mean(xRle)) checkIdentical(mean(x, na.rm = TRUE), mean(xRle, na.rm = TRUE)) checkIdentical(var(x), var(xRle)) checkEqualsNumeric(var(x, na.rm = TRUE), var(xRle, na.rm = TRUE)) checkIdentical(var(x, y), var(xRle, yRle)) checkEqualsNumeric(var(x, y, na.rm = TRUE), var(xRle, yRle, na.rm = TRUE)) checkIdentical(cov(x, y), cov(xRle, yRle)) checkEqualsNumeric(cov(x, y, use = "complete"), cov(xRle, yRle, use = "complete")) checkIdentical(cor(x, y), cor(xRle, yRle)) checkEqualsNumeric(cor(x, y, use = "complete"), cor(xRle, yRle, use = "complete")) checkIdentical(sd(x), sd(xRle)) checkEqualsNumeric(sd(x, na.rm = TRUE), sd(xRle, na.rm = TRUE)) checkIdentical(median(x), median(xRle)) checkIdentical(median(x, na.rm = TRUE), median(xRle, na.rm = TRUE)) checkIdentical(quantile(x, na.rm = TRUE), quantile(xRle, na.rm = TRUE)) checkIdentical(mad(x), mad(xRle)) checkIdentical(mad(x, na.rm = TRUE), mad(xRle, na.rm = TRUE)) checkIdentical(IQR(x, na.rm = TRUE), IQR(xRle, na.rm = TRUE)) y <- (-20:20)^2 y[c(1,10,21,41)] <- c(100L, 30L, 400L, 470L) checkEqualsNumeric(smoothEnds(y), as.vector(smoothEnds(Rle(y)))) checkEqualsNumeric(runmed(y, 7), as.vector(runmed(Rle(y), 7))) checkEqualsNumeric(runmed(y, 11), as.vector(runmed(Rle(y), 11))) checkEqualsNumeric(runmed(y, 7, "keep"), as.vector(runmed(Rle(y), 7, "keep"))) checkEqualsNumeric(runmed(y, 11, "keep"), as.vector(runmed(Rle(y), 11, "keep"))) checkEqualsNumeric(runmed(y, 7, "constant"), as.vector(runmed(Rle(y), 7, "constant"))) checkEqualsNumeric(runmed(y, 11, "constant"), as.vector(runmed(Rle(y), 11, "constant"))) x <- rep(c(1.2, 3.4, 5.6, 7.8, 9.0), 1:5) y <- rep(1:5, c(4, 2, 5, 1, 3)) xRle <- Rle(x) yRle <- Rle(y) checkEqualsNumeric(sapply(1:13, function(i) sum(window(x, i, i + 2))), as.numeric(runsum(xRle, k = 3))) # checkEqualsNumeric(sapply(1:13, function(i) sum(window(rev(x), i, i + 2))), # as.numeric(runsum(rev(xRle), k = 3))) checkEqualsNumeric(sapply(1:13, function(i) sum(window(y, i, i + 2))), as.integer(runsum(yRle, k = 3))) checkEqualsNumeric(sapply(1:13, function(i) sum(window(rev(y), i, i + 2))), as.integer(runsum(rev(yRle), k = 3))) checkEqualsNumeric(sapply(1:13, function(i) mean(window(x, i, i + 2))), as.numeric(runmean(xRle, k = 3))) checkEqualsNumeric(sapply(1:13, function(i) mean(window(rev(x), i, i + 2))), as.numeric(runmean(rev(xRle), k = 3))) checkEqualsNumeric(sapply(1:13, function(i) mean(window(y, i, i + 2))), as.numeric(runmean(yRle, k = 3))) checkEqualsNumeric(sapply(1:13, function(i) mean(window(rev(y), i, i + 2))), as.numeric(runmean(rev(yRle), k = 3))) checkEqualsNumeric(sapply(1:13, function(i) sum(window(x, i, i + 2))), as.numeric(runwtsum(xRle, k = 3, wt = rep(1,3)))) checkEqualsNumeric(sapply(1:13, function(i) sum(window(x, i, i + 2)/3)), as.numeric(runwtsum(xRle, k = 3, wt = rep(1/3,3)))) checkEqualsNumeric(sapply(1:13, function(i) sum(window(y, i, i + 2))), as.numeric(runwtsum(yRle, k = 3, wt = rep(1,3)))) checkEqualsNumeric(sapply(1:13, function(i) sum(window(y, i, i + 2)/3)), as.numeric(runwtsum(yRle, k = 3, wt = rep(1/3,3)))) checkEqualsNumeric(sapply(1:13, function(i) min(window(x, i, i + 2))), as.numeric(runq(xRle, k = 3, i = 1))) checkEqualsNumeric(sapply(1:13, function(i) median(window(x, i, i + 2))), as.numeric(runq(xRle, k = 3, i = 2))) checkEqualsNumeric(sapply(1:13, function(i) max(window(x, i, i + 2))), as.numeric(runq(xRle, k = 3, i = 3))) checkIdentical(runq(xRle, k = 3, i = 2), rev(runq(rev(xRle), k = 3, i = 2))) checkEqualsNumeric(sapply(1:13, function(i) min(window(y, i, i + 2))), as.numeric(runq(yRle, k = 3, i = 1))) checkEqualsNumeric(sapply(1:13, function(i) median(window(y, i, i + 2))), as.numeric(runq(yRle, k = 3, i = 2))) checkEqualsNumeric(sapply(1:13, function(i) max(window(y, i, i + 2))), as.numeric(runq(yRle, k = 3, i = 3))) checkIdentical(runq(yRle, k = 3, i = 2), rev(runq(rev(yRle), k = 3, i = 2))) } test_Rle_character <- function() { checkIdentical(character(), as.vector(Rle(character()))) txt <- c("The", "licenses", "for", "most", "software", "are", "designed", "to", "take", "away", "your", "freedom", "to", "share", "and", "change", "it.", "", "By", "contrast,", "the", "GNU", "General", "Public", "License", "is", "intended", "to", "guarantee", "your", "freedom", "to", "share", "and", "change", "free", "software", "--", "to", "make", "sure", "the", "software", "is", "free", "for", "all", "its", "users") txt <- rep(txt, seq_len(length(txt))) txtRle <- Rle(txt) checkIdentical(nchar(txt), as.vector(nchar(txtRle))) checkIdentical(substr(txt, 3, 7), as.vector(substr(txtRle, 3, 7))) checkIdentical(substring(txt, 4, 9), as.vector(substring(txtRle, 4, 9))) checkIdentical(chartr("@!*", "alo", txt), as.vector(chartr("@!*", "alo", txtRle))) checkIdentical(tolower(txt), as.vector(tolower(txtRle))) checkIdentical(toupper(txt), as.vector(toupper(txtRle))) checkIdentical(sub("[b-e]",".", txt), as.vector(sub("[b-e]",".", txtRle))) checkIdentical(gsub("[b-e]",".", txt), as.vector(gsub("[b-e]",".", txtRle))) checkIdentical(paste(txt, rev(txt), sep = "|"), as.vector(paste(txtRle, rev(txtRle), sep = "|"))) modifyFactor <- function(x, FUN, ...) { levels(x) <- FUN(levels(x), ...) x } fac <- factor(txt) facRle <- Rle(fac) checkIdentical(modifyFactor(fac, substr, 3, 7), as.factor(substr(facRle, 3, 7))) checkIdentical(modifyFactor(fac, substring, 4, 9), as.factor(substring(facRle, 4, 9))) checkIdentical(modifyFactor(fac, chartr, old = "@!*", new = "alo"), as.factor(chartr("@!*", "alo", facRle))) checkIdentical(modifyFactor(fac, tolower), as.factor(tolower(facRle))) checkIdentical(modifyFactor(fac, toupper), as.factor(toupper(facRle))) checkIdentical(modifyFactor(fac, sub, pattern = "[b-e]", replacement = "."), as.factor(sub("[b-e]",".", facRle))) checkIdentical(modifyFactor(fac, gsub, pattern = "[b-e]", replacement = "."), as.factor(gsub("[b-e]",".", facRle))) checkTrue(is.factor(runValue(paste(facRle, rev(facRle), sep = "|")))) } test_Rle_factor <- function() { checkIdentical(factor(character()), as.factor(Rle(factor(character())))) x <- factor(rep(letters, 1:26)) xRle <- Rle(x) checkIdentical(levels(x), levels(xRle)) levels(x) <- LETTERS levels(xRle) <- LETTERS checkIdentical(levels(x), levels(xRle)) checkIdentical(nlevels(x), 26L) xRle[] <- xRle checkIdentical(Rle(x), xRle) checkIdentical(x, xRle[TRUE,drop=TRUE]) } ## --------------------------------------------- ## runsum(), runmean(), runwtsum() ## --------------------------------------------- .naive_runsum <- function(x, k, na.rm=FALSE) sapply(0:(length(x)-k), function(offset) sum(x[1:k + offset], na.rm=na.rm)) checkIdenticalIfNaNsWereNAs <- function(x, y) { x[is.nan(x)] <- NA_real_ y[is.nan(y)] <- NA_real_ checkIdentical(x, y) } test_Rle_runsum_real <- function() { x0 <- c(NA, NaN, Inf, -Inf) x <- Rle(x0) ## na.rm = TRUE target1 <- .naive_runsum(x0, 4, na.rm=TRUE) target2 <- .naive_runsum(x, 4, na.rm=TRUE) checkIdenticalIfNaNsWereNAs(target1, target2) current <- as.vector(runsum(x, 4, na.rm=TRUE)) checkIdenticalIfNaNsWereNAs(target1, current) ## na.rm = FALSE target1 <- .naive_runsum(x0, 4, na.rm=FALSE) target2 <- .naive_runsum(x, 4, na.rm=FALSE) checkIdenticalIfNaNsWereNAs(target1, target2) current <- as.vector(runsum(x, 4, na.rm=FALSE)) checkIdenticalIfNaNsWereNAs(target1, current) x0 <- c(NA, Inf, NA, -Inf, Inf, -Inf, NaN, Inf, NaN, -Inf) x <- Rle(x0) for (k in 1:2) { target1 <- .naive_runsum(x0, k, na.rm=TRUE) target2 <- .naive_runsum(x, k, na.rm=TRUE) checkIdenticalIfNaNsWereNAs(target1, target2) current <- as.vector(runsum(x, k, na.rm=TRUE)) checkIdenticalIfNaNsWereNAs(target1, current) target1 <- .naive_runsum(x0, k, na.rm=FALSE) target2 <- .naive_runsum(x, k, na.rm=FALSE) checkIdenticalIfNaNsWereNAs(target1, target2) current <- as.vector(runsum(x, k, na.rm=FALSE)) checkIdenticalIfNaNsWereNAs(target1, current) } ## NOTE : Inconsistent behavior in base::sum() ## sum(x, y) and x + y: ## > sum(NaN, NA) ## [1] NA ## > NaN + NA ## [1] NaN ## also between sum(c(x, y)) and sum(x, y): ## This inconsistency only exists on linux, not Mac or Windows ## > sum(c(NaN, NA)) ## [1] NaN ## > sum(NaN, NA) ## [1] NA ## x0 <- c(NA, NaN, NA) ## x <- Rle(x0) ## target1 <- c(x0[1] + x0[2], x0[2] + x0[3]) ## target2 <- as.vector(c(x[1] + x[2], x[2] + x[3])) ## checkIdentical(target1, target2) ## current <- as.vector(runsum(x, k=2, na.rm=FALSE)) ## checkIdentical(target1, current) } test_Rle_runsum_integer <- function() { x0 <- c(NA_integer_, 1L, 1L) x <- Rle(x0) for (k in 1:3) { target1 <- .naive_runsum(x0, k, na.rm=TRUE) target2 <- .naive_runsum(x, k, na.rm=TRUE) checkIdentical(target1, target2) current <- as.vector(runsum(x, k, na.rm=TRUE)) checkIdentical(target1, current) target1 <- .naive_runsum(x0, k, na.rm=FALSE) target2 <- .naive_runsum(x, k, na.rm=FALSE) checkIdentical(target1, target2) current <- as.vector(runsum(x, k, na.rm=FALSE)) checkIdentical(target1, current) } x0 <- c(1L, NA_integer_, 1L) x <- Rle(x0) for (k in 1:3) { target1 <- .naive_runsum(x0, k, na.rm=TRUE) target2 <- .naive_runsum(x, k, na.rm=TRUE) checkIdentical(target1, target2) current <- as.vector(runsum(x, k, na.rm=TRUE)) checkIdentical(target1, current) target1 <- .naive_runsum(x0, k, na.rm=FALSE) target2 <- .naive_runsum(x, k, na.rm=FALSE) checkIdentical(target1, target2) current <- as.vector(runsum(x, k, na.rm=FALSE)) checkIdentical(target1, current) } } .naive_runmean <- function(x, k, na.rm=FALSE) sapply(0:(length(x)-k), function(offset) mean(x[1:k + offset], na.rm=na.rm)) test_Rle_runmean <- function() { x0 <- c(NA, 1, 1) x <- Rle(x0) for (k in 1:3) { target1 <- .naive_runmean(x0, k, na.rm=TRUE) target2 <- .naive_runmean(x, k, na.rm=TRUE) checkIdentical(target1, target2) current <- as.vector(runmean(x, k, na.rm=TRUE)) checkIdentical(target1, current) target1 <- .naive_runmean(x0, k, na.rm=FALSE) target2 <- .naive_runmean(x, k, na.rm=FALSE) checkIdentical(target1, target2) current <- as.vector(runmean(x, k, na.rm=FALSE)) checkIdentical(target1, current) } x0 <- c(0, NA, NaN, 0, NA, Inf, 0, NA, -Inf, 0, Inf, -Inf) x <- Rle(x0) for (k in 1:2) { target1 <- .naive_runmean(x0, k, na.rm=TRUE) target2 <- .naive_runmean(x, k, na.rm=TRUE) checkIdentical(target1, target2) current <- as.vector(runmean(x, k, na.rm=TRUE)) checkIdentical(target1, current) target1 <- .naive_runmean(x0, k, na.rm=FALSE) target2 <- .naive_runmean(x, k, na.rm=FALSE) checkIdentical(target1, target2) #current <- as.vector(runmean(x, k, na.rm=FALSE)) #checkIdentical(target1, current) } } .naive_runwtsum <- function(x, k, wt, na.rm=FALSE) sapply(0:(length(x)-k), function(offset) { xwt <- x[1:k + offset] * wt sum(xwt, na.rm=na.rm)}) test_Rle_runwtsum_real <- function() { x0 <- c(NA, NaN, Inf, -Inf) x <- Rle(x0) wt <- rep(1, 4) target1 <- .naive_runwtsum(x0, 4, wt, na.rm=TRUE) target2 <- .naive_runwtsum(x, 4, wt, na.rm=TRUE) checkIdentical(target1, target2) current <- as.vector(runwtsum(x, 4, wt, na.rm=TRUE)) checkIdentical(target1, current) target1 <- .naive_runwtsum(x0, 4, wt, na.rm=FALSE) target2 <- .naive_runwtsum(x, 4, wt, na.rm=FALSE) checkIdentical(target1, target2) #current <- as.vector(runwtsum(x, 4, wt, na.rm=FALSE)) #checkIdentical(target1, current) x0 <- c(NA, Inf, NA, -Inf, Inf, -Inf, NaN, Inf, NaN, -Inf) x <- Rle(x0) for (k in 1:2) { if (k==1) wt <- 1 else wt <- c(1, 1) target1 <- .naive_runwtsum(x0, k, wt, na.rm=TRUE) target2 <- .naive_runwtsum(x, k, wt, na.rm=TRUE) checkIdentical(target1, target2) current <- as.vector(runwtsum(x, k, wt, na.rm=TRUE)) checkIdentical(target1, current) target1 <- .naive_runwtsum(x0, k, wt, na.rm=FALSE) target2 <- .naive_runwtsum(x, k, wt, na.rm=FALSE) checkIdentical(target1, target2) current <- as.vector(runwtsum(x, k, wt, na.rm=FALSE)) checkIdentical(target1, current) } x0 <- c(1, NA, 1, NaN, 1, NA) x <- Rle(x0) for (k in 1:2) { if (k==1) wt <- 2 else wt <- c(1, 1) target1 <- .naive_runwtsum(x0, k, wt, na.rm=FALSE) target2 <- .naive_runwtsum(x, k, wt, na.rm=FALSE) checkIdentical(target1, target2) current <- as.vector(runwtsum(x, k, wt, na.rm=FALSE)) checkIdentical(target1, current) } } test_Rle_runwtsum_integer <- function() { x0 <- c(NA_integer_, 1L, 1L) x <- Rle(x0) iwt <- rep(2L, 3) for (k in 1:3) { wt <- iwt[1:k] target1 <- .naive_runwtsum(x0, k, wt, na.rm=TRUE) target2 <- .naive_runwtsum(x, k, wt, na.rm=TRUE) checkIdentical(target1, target2) current <- as.vector(runwtsum(x, k, wt, na.rm=TRUE)) checkIdentical(as.numeric(target1), current) target1 <- .naive_runwtsum(x0, k, wt, na.rm=FALSE) target2 <- .naive_runwtsum(x, k, wt, na.rm=FALSE) checkIdentical(target1, target2) current <- as.vector(runwtsum(x, k, wt, na.rm=FALSE)) checkIdentical(as.numeric(target1), current) } x0 <- c(1L, NA_integer_, 1L) x <- Rle(x0) iwt <- rep(2L, 3) for (k in 1:3) { wt <- iwt[1:k] target1 <- .naive_runwtsum(x0, k, wt, na.rm=TRUE) target2 <- .naive_runwtsum(x, k, wt, na.rm=TRUE) checkIdentical(target1, target2) current <- as.vector(runwtsum(x, k, wt, na.rm=TRUE)) checkIdentical(as.numeric(target1), current) target1 <- .naive_runwtsum(x0, k, wt, na.rm=FALSE) target2 <- .naive_runwtsum(x, k, wt, na.rm=FALSE) checkIdentical(target1, target2) current <- as.vector(runwtsum(x, k, wt, na.rm=FALSE)) checkIdentical(as.numeric(target1), current) } } .naive_runq <- function(x, k, i, na.rm=FALSE) sapply(0:(length(x)-k), function(offset) { xsub <- x[1:k + offset] if (!na.rm) { ## Manually handle NA's because they are not allowed ## in 'x' of quantile(x, ...) when na.rm=FALSE. if (any(is.na(xsub))) NA else quantile(xsub, probs=i/k, na.rm=na.rm, names=FALSE, type=3) } else { ## If all NA's, just return first NA value. ## Not handled in quantile(). if (all(is.na(xsub))) { xsub[1] } else { xsub <- xsub[!is.na(xsub)] quantile(xsub, probs=i/k, na.rm=na.rm, names=FALSE, type=3) } } }, USE.NAMES=FALSE) test_Rle_runq_real <- function() { x0 <- c(NA_real_) x <- Rle(x0) k <- length(x); i <- 1 target1 <- as.numeric(.naive_runq(x0, k, i, na.rm=TRUE)) current <- as.numeric(runq(x, k, i, na.rm=TRUE)) checkIdentical(target1, current) x0 <- c(3, NA, 1, NaN, 4, Inf, 2, -Inf) x <- Rle(x0) k <- length(x) for (i in c(1, length(x))) { target1 <- as.numeric(.naive_runq(x0, k, i, na.rm=TRUE)) current <- as.numeric(runq(x, k, i, na.rm=TRUE)) checkIdentical(target1, current) target1 <- as.numeric(.naive_runq(x0, k, i, na.rm=FALSE)) current <- as.numeric(runq(x, k, i, na.rm=FALSE)) checkIdentical(target1, current) } x0 <- c(3, NA, 1, NaN, 4, Inf, 2, -Inf) x <- Rle(x0) i <- 1 ## NOTE : special case k=1, returns NA not NaN target1 <- c(3, NA, 1, NA, 4, Inf, 2, -Inf) current <- as.numeric(runq(x, k=1, i=1, na.rm=TRUE)) checkIdentical(target1, current) for (k in c(2:length(x))) { target1 <- as.numeric(.naive_runq(x0, k, i, na.rm=TRUE)) current <- as.numeric(runq(x, k, i, na.rm=TRUE)) checkIdentical(target1, current) target1 <- as.numeric(.naive_runq(x0, k, i, na.rm=FALSE)) current <- as.numeric(runq(x, k, i, na.rm=FALSE)) checkIdentical(target1, current) } x0 <- c(1, 2, 3, 4, 5) x <- Rle(x0) k <- length(x); i <- 4 target1 <- .naive_runq(x0, k, i, na.rm=TRUE) current <- as.vector(runq(x, k, i, na.rm=TRUE)) checkIdentical(target1, current) x0 <- c(1, 2, 3, NA, NA) x <- Rle(x0) k <- length(x); i <- 4 target1 <- .naive_runq(x0, k, i, na.rm=TRUE) current <- as.vector(runq(x, k, i, na.rm=TRUE)) checkIdentical(target1, current) } test_Rle_runq_integer <- function() { x0 <- c(NA_integer_) x <- Rle(x0) k <- length(x); i <- 1 target1 <- as.numeric(.naive_runq(x0, k, i, na.rm=TRUE)) current <- as.numeric(runq(x, k, i, na.rm=TRUE)) checkIdentical(target1, current) x0 <- NA_integer_ x <- Rle(x0) k <- i <- 1 target1 <- unlist(.naive_runq(x0, k, i, na.rm=TRUE)) target2 <- as.vector(do.call(c, (.naive_runq(x, k, i, na.rm=TRUE)))) checkIdentical(target1, target2) current <- as.vector(runq(x, k, i, na.rm=TRUE)) checkIdentical(as.integer(unname(target1)), current) x0 <- c(NA_integer_, 2L, 1L) x <- Rle(x0) k <- 3 for (i in 1:3) { target1 <- unlist(.naive_runq(x0, k, i, na.rm=TRUE)) current <- as.vector(runq(x, k, i, na.rm=TRUE)) checkIdentical(unname(target1), current) target1 <- unlist(.naive_runq(x0, k, i, na.rm=FALSE)) current <- as.integer(runq(x, k, i, na.rm=FALSE)) checkIdentical(as.integer(target1), current) } x0 <- c(3L, 2L, NA_integer_, NA_integer_, 1L, 2L) x <- Rle(x0) i <- 1 for (k in 1:6) { target1 <- unlist(.naive_runq(x0, k, i, na.rm=TRUE)) current <- as.vector(runq(x, k, i, na.rm=TRUE)) checkIdentical(target1, current) target1 <- unlist(.naive_runq(x0, k, i, na.rm=FALSE)) current <- as.integer(runq(x, k, i, na.rm=FALSE)) checkIdentical(as.integer(target1), current) } } S4Vectors/inst/unitTests/test_Vector-merge.R0000644000175400017540000000467213175714520022152 0ustar00biocbuildbiocbuildtest_Vector_merge <- function() { library(GenomicRanges) ## Binary merge gr <- GRanges(c("chr1:1-1000", "chr1:2000-3000"), a=1:2, b=2:1) gr2 <- GRanges(c("chr1:1-1000", "chr1:2000-3000"), c=c(1,3), d=c(3,1)) target <- granges(gr) mcols(target) <- DataFrame(a=1:2, b=2:1, c=c(1,3), d=c(3,1)) current <- merge(gr, gr2) checkIdentical(target, current) gr <- GRanges(c("chr1:1-1000", "chr1:2000-3000", "chr1:1-10"), a=1:3, b=c(2,1,3)) target <- granges(gr2) mcols(target) <- DataFrame(a=1:2, b=c(2,1), c=c(1,3), d=c(3,1)) current <- merge(gr, gr2) checkIdentical(target, current) current <- merge(gr, gr2, all.y=TRUE) checkIdentical(target, current) target <- granges(gr) mcols(target) <- DataFrame(a=1:3, b=c(2,1,3), c=c(1,3,NA), d=c(3,1,NA)) current <- merge(gr, gr2, all=TRUE, sort=FALSE) checkIdentical(target, current) current <- merge(gr, gr2, all.x=TRUE, sort=FALSE) checkIdentical(target, current) target <- sort(target) current <- merge(gr, gr2, all=TRUE, sort=TRUE) checkIdentical(target, current) x <- GRanges(c("chr1:1-1000", "chr2:2000-3000"), score=c(0.45, 0.1), a1=c(5L, 7L), a2=c(6, 8)) y <- GRanges(c("chr2:150-151", "chr1:1-10", "chr2:2000-3000", "chr2:2000-3000"), score=c(0.7, 0.82, 0.1, 0.2), b1=c(0L, 5L, 1L, 7L), b2=c(1, -2, 1, 1.5)) checkException(merge(x, y[-3])) target0 <- c(granges(x), granges(y[-4]))[c(4, 1, 3, 2)] mcols(target0) <- DataFrame(score=c(0.82, 0.45, 0.7, 0.1), a1=c(NA, 5L, NA, 7L), a2=c(NA, 6, NA, 8), b1=c(5L, NA, 0L, 1L), b2=c(-2, NA, 1, 1)) current <- merge(x, y[-4], all=TRUE) checkIdentical(target0, current) current <- merge(x, y[-4], all.x=TRUE) checkIdentical(target0[c(2, 4)], current) current <- merge(x, y[-4], all.y=TRUE) target <- target0[c(3, 4, 1)] seqlevels(target) <- seqlevels(current) checkIdentical(target, current) current <- merge(x, y[-4]) checkIdentical(target0[4], current) ## Self merge is a no-op if 'sort=FALSE' (or object already sorted) and ## if the object has no duplicates checkIdentical(x, merge(x, x)) ## N-ary merge current <- merge(x, y[-4], x, all=TRUE) checkIdentical(target0, current) } S4Vectors/inst/unitTests/test_expand-methods.R0000644000175400017540000000373713175714520022534 0ustar00biocbuildbiocbuildlibrary(IRanges) # for CharacterList test_expand <- function(){ ## setup aa <- CharacterList("a", paste0("d", 1:2), paste0("b", 1:3), c(), "c") bb <- CharacterList(paste0("sna", 1:2),"foo", paste0("bar", 1:3),c(),"hica") df <- DataFrame(aa=aa, bb=bb, cc=11:15) ## tests ## test one col without dropping res1 <- expand(df, colnames="aa", keepEmptyRows=TRUE) checkTrue(dim(res1)[1]==8) checkTrue(dim(res1)[2]==3) checkIdentical(res1$aa,c("a","d1","d2","b1","b2","b3",NA,"c")) checkIdentical(res1$bb[[4]],c("bar1","bar2","bar3")) ## test one col with dropping res2 <- expand(df, colnames="aa", keepEmptyRows=FALSE) checkTrue(dim(res2)[1]==7) checkTrue(dim(res2)[2]==3) checkIdentical(res2$aa,c("a","d1","d2","b1","b2","b3","c")) checkIdentical(res2$bb[[4]],c("bar1","bar2","bar3")) ## test two columns no dropping res3 <- expand(df, colnames=c("aa","bb"), keepEmptyRows=TRUE) checkTrue(dim(res3)[1]==15) checkTrue(dim(res3)[2]==3) checkIdentical(res3$aa, c("a","a","d1","d2","b1","b1","b1","b2","b2","b2","b3","b3","b3",NA,"c")) checkIdentical(as.character(as.data.frame(res3[14,])), c(NA, NA, "14")) ## test two columns with dropping res4 <- expand(df, colnames=c("aa","bb"), keepEmptyRows=FALSE) checkTrue(dim(res4)[1]==14) checkTrue(dim(res4)[2]==3) checkIdentical(res4$aa, c("a","a","d1","d2","b1","b1","b1","b2","b2","b2","b3","b3","b3","c")) ## inverted order (different sorting of 2 cols, no dropping res5 <- expand(df, colnames=c("bb","aa"), keepEmptyRows=TRUE) checkTrue(dim(res5)[1]==15) checkTrue(dim(res5)[2]==3) checkIdentical(res5$aa, c("a","a","d1","d2","b1","b2","b3","b1","b2","b3","b1","b2","b3",NA,"c")) ## inverted order (different sorting of 2 cols, with dropping res6 <- expand(df, colnames=c("bb","aa"), keepEmptyRows=FALSE) checkTrue(dim(res6)[1]==14) checkTrue(dim(res6)[2]==3) checkIdentical(res6$aa, c("a","a","d1","d2","b1","b2","b3","b1","b2","b3","b1","b2","b3","c")) } S4Vectors/inst/unitTests/test_map_ranges_to_runs.R0000644000175400017540000000543213175714520023473 0ustar00biocbuildbiocbuildmap_ranges_to_runs <- S4Vectors:::map_ranges_to_runs map_positions_to_runs <- S4Vectors:::map_positions_to_runs test_map_ranges_to_runs <- function() { test_all_methods <- function(target, run_lens, start, width) { for (method in 1:3) { current <- map_ranges_to_runs(run_lens, start, width, method) checkIdentical(target, current) } current <- map_ranges_to_runs(run_lens, start, width) checkIdentical(target, current) } ## 0 range to map target <- list(integer(0), integer(0), integer(0), integer(0)) test_all_methods(target, integer(0), integer(0), integer(0)) test_all_methods(target, 15:10, integer(0), integer(0)) ## 1 range to map target <- list(0L, 6L, 0L, 0L) test_all_methods(target, 15:10, 1L, sum(15:10)) target <- list(0L, 1L, 0L, 12L) test_all_methods(target, 15:10, 1L, 3L) target <- list(0L, 1L, 12L, 0L) test_all_methods(target, 15:10, 13L, 3L) target <- list(0L, 2L, 13L, 13L) test_all_methods(target, 15:10, 14L, 3L) target <- list(0L, 2L, 13L, 0L) test_all_methods(target, 15:10, 14L, 16L) target <- list(0L, 3L, 14L, 12L) test_all_methods(target, 15:10, 15L, 16L) target <- list(1L, 2L, 0L, 11L) test_all_methods(target, 15:10, 16L, 16L) target <- list(5L, 1L, 8L, 0L) test_all_methods(target, 15:10, 74L, 2L) target <- list(5L, 1L, 0L, 0L) test_all_methods(target, 15:10, 66L, 10L) target <- list(1L, 3L, 11L, 2L) test_all_methods(target, c(9L, 15L, 17L, 11L), 21L, 30L) ## more than 1 range to map start <- 74:1 width <- rep.int(2L, length(start)) current <- map_ranges_to_runs(15:10, start, width) for (i in seq_along(start)) { target_i <- map_ranges_to_runs(15:10, start[i], width[i]) current_i <- lapply(current, `[[`, i) checkIdentical(target_i, current_i) } } test_map_positions_to_runs <- function() { test_all_methods <- function(run_lens, pos) { run_breakpoints <- cumsum(run_lens) target <- findInterval(pos - 1L, run_breakpoints) + 1L width <- rep.int(1L, length(pos)) for (method in 1:3) { current <- map_positions_to_runs(run_lens, pos, method) checkIdentical(target, current) current <- map_ranges_to_runs(run_lens, pos, width, method) checkIdentical(target, current[[1L]] + 1L) } current <- map_positions_to_runs(run_lens, pos) checkIdentical(target, current) current <- map_ranges_to_runs(run_lens, pos, width) checkIdentical(target, current[[1L]] + 1L) } test_all_methods(integer(0), integer(0)) test_all_methods(15:10, integer(0)) test_all_methods(15:10, seq_len(sum(15:10))) test_all_methods(15:10, rev(seq_len(sum(15:10)))) } S4Vectors/man/0000755000175400017540000000000013175714520014214 5ustar00biocbuildbiocbuildS4Vectors/man/Annotated-class.Rd0000644000175400017540000000215413175714520017525 0ustar00biocbuildbiocbuild\name{Annotated-class} \docType{class} \alias{Annotated} \alias{Annotated-class} % accessors \alias{metadata} \alias{metadata,Annotated-method} \alias{metadata<-} \alias{metadata<-,Annotated-method} \title{Annotated class} \description{The virtual class \code{Annotated} is used to standardize the storage of metadata with a subclass.} \details{ The \code{Annotated} class supports the storage of global metadata in a subclass. This is done through the \code{metadata} slot that stores a list object. } \section{Accessors}{ In the following code snippets, \code{x} is an \code{Annotated} object. \describe{ \item{}{\code{metadata(x)}, \code{metadata(x) <- value}: Get or set the list holding arbitrary R objects as annotations. May be, and often is, empty. } } } \author{P. Aboyoun} \seealso{ The \link{Vector} class, which extends Annotated directly. } \examples{ showClass("Annotated") # shows (some of) the known subclasses ## If the IRanges package was not already loaded, this will show ## more subclasses: library(IRanges) showClass("Annotated") } \keyword{methods} \keyword{classes} S4Vectors/man/DataFrame-class.Rd0000644000175400017540000002221213175714520017431 0ustar00biocbuildbiocbuild\name{DataFrame-class} \docType{class} \alias{class:DataFrame} \alias{DataFrame-class} % accessor \alias{nrow,DataFrame-method} \alias{ncol,DataFrame-method} \alias{rownames,DataFrame-method} \alias{colnames,DataFrame-method} \alias{rownames<-,DataFrame-method} \alias{colnames<-,DataFrame-method} % constructor \alias{DataFrame} % subsetting \alias{[,DataFrame-method} \alias{[<-,DataFrame-method} \alias{[[<-,DataFrame-method} \alias{[[,DataFrame-method} % coercion \alias{as.data.frame,DataFrame-method} \alias{as.matrix,DataFrame-method} \alias{coerce,matrix,DataFrame-method} \alias{coerce,vector,DataFrame-method} \alias{coerce,list,DataFrame-method} \alias{coerce,integer,DataFrame-method} \alias{coerce,Vector,DataFrame-method} \alias{coerce,data.frame,DataFrame-method} \alias{coerce,data.table,DataFrame-method} \alias{coerce,NULL,DataFrame-method} \alias{coerce,table,DataFrame-method} \alias{coerce,AsIs,DataFrame-method} \alias{coerce,DataFrame,data.frame-method} \alias{coerce,xtabs,DataFrame-method} \alias{coerce,ANY,DataFrame-method} \alias{coerce,SimpleList,DataFrame-method} \alias{coerce,ANY,AsIs-method} % combining \alias{cbind,DataFrame-method} \alias{rbind,DataFrame-method} \title{DataFrame objects} \description{ The \code{DataFrame} class extends the \link{DataTable} virtual class and supports the storage of any type of object (with \code{length} and \code{[} methods) as columns. } \details{ On the whole, the \code{DataFrame} behaves very similarly to \code{data.frame}, in terms of construction, subsetting, splitting, combining, etc. The most notable exception is that the row names are optional. This means calling \code{rownames(x)} will return \code{NULL} if there are no row names. Of course, it could return \code{seq_len(nrow(x))}, but returning \code{NULL} informs, for example, combination functions that no row names are desired (they are often a luxury when dealing with large data). As \code{DataFrame} derives from \code{\linkS4class{Vector}}, it is possible to set an \code{annotation} string. Also, another \code{DataFrame} can hold metadata on the columns. For a class to be supported as a column, it must have \code{length} and \code{[} methods, where \code{[} supports subsetting only by \code{i} and respects \code{drop=FALSE}. Optionally, a method may be defined for the \code{showAsCell} generic, which should return a vector of the same length as the subset of the column passed to it. This vector is then placed into a \code{data.frame} and converted to text with \code{format}. Thus, each element of the vector should be some simple, usually character, representation of the corresponding element in the column. } \section{Constructor}{ \describe{\code{DataFrame(..., row.names = NULL, check.names = TRUE)}: Constructs a \code{DataFrame} in similar fashion to \code{\link{data.frame}}. Each argument in \code{...} is coerced to a \code{DataFrame} and combined column-wise. No special effort is expended to automatically determine the row names from the arguments. The row names should be given in \code{row.names}; otherwise, there are no row names. This is by design, as row names are normally undesirable when data is large. If \code{check.names} is \code{TRUE}, the column names will be checked for syntactic validity and made unique, if necessary. To store an object of a class that does not support coercion to \code{DataFrame}, wrap it in \code{I()}. The class must still have methods for \code{length} and \code{[}. } } \section{Accessors}{ In the following code snippets, \code{x} is a \code{DataFrame}. \describe{ \item{}{\code{dim(x)}: Get the length two integer vector indicating in the first and second element the number of rows and columns, respectively. } \item{}{\code{dimnames(x)}, \code{dimnames(x) <- value}: Get and set the two element list containing the row names (character vector of length \code{nrow(x)} or \code{NULL}) and the column names (character vector of length \code{ncol(x)}). } } } \section{Coercion}{ \describe{ \item{}{\code{as(from, "DataFrame")}: By default, constructs a new \code{DataFrame} with \code{from} as its only column. If \code{from} is a \code{matrix} or \code{data.frame}, all of its columns become columns in the new \code{DataFrame}. If \code{from} is a list, each element becomes a column, recycling as necessary. Note that for the \code{DataFrame} to behave correctly, each column object must support element-wise subsetting via the \code{[} method and return the number of elements with \code{length}. It is recommended to use the \code{DataFrame} constructor, rather than this interface. } \item{}{\code{as.list(x)}: Coerces \code{x}, a \code{DataFrame}, to a \code{list}. } \item{}{\code{as.data.frame(x, row.names=NULL, optional=FALSE)}: Coerces \code{x}, a \code{DataFrame}, to a \code{data.frame}. Each column is coerced to a \code{data.frame} and then column bound together. If \code{row.names} is \code{NULL}, they are retrieved from \code{x}, if it has any. Otherwise, they are inferred by the \code{data.frame} constructor. NOTE: conversion of \code{x} to a \code{data.frame} is not supported if \code{x} contains any \code{list}, \code{SimpleList}, or \code{CompressedList} columns. } \item{}{\code{as(from, "data.frame")}: Coerces a \code{DataFrame} to a \code{data.frame} by calling \code{as.data.frame(from)}. } \item{}{\code{as.matrix(x)}: Coerces the \code{DataFrame} to a \code{matrix}, if possible. } } } \section{Subsetting}{ In the following code snippets, \code{x} is a \code{DataFrame}. \describe{ \item{}{\code{x[i,j,drop]}: Behaves very similarly to the \code{\link{[.data.frame}} method, except \code{i} can be a logical \code{Rle} object and subsetting by \code{matrix} indices is not supported. Indices containing \code{NA}'s are also not supported. } \item{}{\code{x[i,j] <- value}: Behaves very similarly to the \code{\link{[<-.data.frame}} method. } \item{}{\code{x[[i]]}: Behaves very similarly to the \code{\link{[[.data.frame}} method, except arguments \code{j} and \code{exact} are not supported. Column name matching is always exact. Subsetting by matrices is not supported. } \item{}{\code{x[[i]] <- value}: Behaves very similarly to the \code{\link{[[<-.data.frame}} method, except argument \code{j} is not supported. } } } \section{Combining}{ In the following code snippets, \code{x} is a \code{DataFrame}. \describe{ \item{}{ \code{rbind(...)}: Creates a new \code{DataFrame} by combining the rows of the \code{DataFrame} objects in \code{...}. Very similar to \code{\link{rbind.data.frame}}, except in the handling of row names. If all elements have row names, they are concatenated and made unique. Otherwise, the result does not have row names. Currently, factors are not handled well (their levels are dropped). This is not a high priority until there is an \code{XFactor} class. } \item{}{ \code{cbind(...)}: Creates a new \code{DataFrame} by combining the columns of the \code{DataFrame} objects in \code{...}. Very similar to \code{\link{cbind.data.frame}}, except row names, if any, are dropped. Consider the \code{DataFrame} as an alternative that allows one to specify row names. } } } \author{Michael Lawrence} \seealso{ \itemize{ \item \link{DataTable} and \link{SimpleList} which DataFrame extends directly. } } \examples{ score <- c(1L, 3L, NA) counts <- c(10L, 2L, NA) row.names <- c("one", "two", "three") df <- DataFrame(score) # single column df[["score"]] df <- DataFrame(score, row.names = row.names) #with row names rownames(df) df <- DataFrame(vals = score) # explicit naming df[["vals"]] # arrays ary <- array(1:4, c(2,1,2)) sw <- DataFrame(I(ary)) # a data.frame sw <- DataFrame(swiss) as.data.frame(sw) # swiss, without row names # now with row names sw <- DataFrame(swiss, row.names = rownames(swiss)) as.data.frame(sw) # swiss # subsetting sw[] # identity subset sw[,] # same sw[NULL] # no columns sw[,NULL] # no columns sw[NULL,] # no rows ## select columns sw[1:3] sw[,1:3] # same as above sw[,"Fertility"] sw[,c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE)] ## select rows and columns sw[4:5, 1:3] sw[1] # one-column DataFrame ## the same sw[, 1, drop = FALSE] sw[, 1] # a (unnamed) vector sw[[1]] # the same sw[["Fertility"]] sw[["Fert"]] # should return 'NULL' sw[1,] # a one-row DataFrame sw[1,, drop=TRUE] # a list ## duplicate row, unique row names are created sw[c(1, 1:2),] ## indexing by row names sw["Courtelary",] subsw <- sw[1:5,1:4] subsw["C",] # partially matches ## row and column names cn <- paste("X", seq_len(ncol(swiss)), sep = ".") colnames(sw) <- cn colnames(sw) rn <- seq(nrow(sw)) rownames(sw) <- rn rownames(sw) ## column replacement df[["counts"]] <- counts df[["counts"]] df[[3]] <- score df[["X"]] df[[3]] <- NULL # deletion } \keyword{classes} \keyword{methods} S4Vectors/man/DataTable-class.Rd0000644000175400017540000001656113175714520017440 0ustar00biocbuildbiocbuild\name{DataTable-class} \docType{class} \alias{class:DataTable} \alias{DataTable-class} \alias{DataTable} \alias{NROW,DataTable-method} \alias{NCOL,DataTable-method} \alias{dim,DataTable-method} \alias{ROWNAMES} \alias{ROWNAMES,ANY-method} \alias{ROWNAMES,DataTable-method} \alias{dimnames,DataTable-method} \alias{dimnames<-,DataTable-method} \alias{head.DataTable} \alias{tail.DataTable} \alias{subset,DataTable-method} \alias{na.omit,DataTable-method} \alias{na.exclude,DataTable-method} \alias{is.na,DataTable-method} \alias{complete.cases,DataTable-method} \alias{cbind,DataTable-method} \alias{rbind,DataTable-method} \alias{merge,DataTable,DataTable-method} \alias{merge,data.frame,DataTable-method} \alias{merge,DataTable,data.frame-method} \alias{by,DataTable-method} \alias{transform.DataTable} \alias{transform,DataTable-method} \alias{duplicated,DataTable-method} \alias{duplicated.DataTable} \alias{unique,DataTable-method} \alias{unique.DataTable} \alias{as.env} \alias{as.env,NULL-method} \alias{as.env,DataTable-method} \alias{as.data.frame.DataTable} \alias{as.data.frame,DataTable-method} \alias{show,DataTable-method} \alias{xtabs,DataTable-method} \title{DataTable objects} \description{ DataTable is an API only (i.e. virtual class with no slots) for accessing objects with a rectangular shape like \link{DataFrame} or \link[IRanges]{RangedData} objects. It mimics the API for standard \link{data.frame} objects. } \section{Accessors}{ In the following code snippets, \code{x} is a \code{DataTable}. \describe{ \item{}{ \code{nrow(x)}, \code{ncol(x)}: Get the number of rows and columns, respectively. } \item{}{ \code{NROW(x)}, \code{NCOL(x)}: Same as \code{nrow(x)} and \code{ncol(x)}, respectively. } \item{}{ \code{dim(x)}: Length two integer vector defined as \code{c(nrow(x), ncol(x))}. } \item{}{ \code{rownames(x)}, \code{colnames(x)}: Get the names of the rows and columns, respectively. } \item{}{ \code{dimnames(x)}: Length two list of character vectors defined as \code{list(rownames(x), colnames(x))}. } } } \section{Subsetting}{ In the code snippets below, \code{x} is a DataTable object. \describe{ \item{}{ \code{x[i, j, drop=TRUE]}: Return a new DataTable object made of the selected rows and columns. For single column selection, the \code{drop} argument specifies whether or not to coerce the returned sequence to a standard vector. } \item{}{ \code{head(x, n=6L)}: If \code{n} is non-negative, returns the first n rows of the DataTable object. If \code{n} is negative, returns all but the last \code{abs(n)} rows of the DataTable object. } \item{}{ \code{tail(x, n=6L)}: If \code{n} is non-negative, returns the last n rows of the DataTable object. If \code{n} is negative, returns all but the first \code{abs(n)} rows of the DataTable object. } \item{}{ \code{subset(x, subset, select, drop=FALSE)}: Return a new DataTable object using: \describe{ \item{subset}{logical expression indicating rows to keep, where missing values are taken as FALSE.} \item{select}{expression indicating columns to keep.} \item{drop}{passed on to \code{[} indexing operator.} } } \item{}{ \code{\link[stats:na.fail]{na.omit}(object)}: Returns a subset with incomplete cases removed. } \item{}{ \code{\link[stats:na.fail]{na.exclude}(object)}: Returns a subset with incomplete cases removed (but to be included with NAs in statistical results). } \item{}{ \code{\link[base:NA]{is.na}(x)}: Returns a logical matrix indicating which cells are missing. } \item{}{ \code{\link[stats]{complete.cases}(x)}: Returns a logical vector identifying which cases have no missing values. } } } \section{Combining}{ In the code snippets below, \code{x} is a DataTable object. \describe{ \item{}{ \code{cbind(...)}: Creates a new \code{DataTable} by combining the columns of the \code{DataTable} objects in \code{...}. } \item{}{ \code{rbind(...)}: Creates a new \code{DataTable} by combining the rows of the \code{DataTable} objects in \code{...}. } \item{}{ \code{merge(x, y, ...)}: Merges two \code{DataTable} objects \code{x} and \code{y}, with arguments in \code{...} being the same as those allowed by the base \code{\link{merge}}. It is allowed for either \code{x} or \code{y} to be a \code{data.frame}. } } } \section{Looping}{ In the code snippets below, \code{x} is a DataTable object. \describe{ \item{}{ \code{by(data, INDICES, FUN, ..., simplify = TRUE)}: Apply \code{FUN} to each group of \code{data}, a \code{DataTable}, formed by the factor (or list of factors) \code{INDICES}. Exactly the same contract as \code{\link{as.data.frame}}. } } } \section{Transforming}{ In the code snippets below, \code{x} is a DataTable object. \describe{ \item{}{ \code{transform(`_data`, ...)}: adds or replaces columns based on expressions in \code{\dots}. See \code{\link{transform}}. } } } \section{Utilities}{ \describe{ \item{}{\code{duplicated(x)}: Returns a logical vector indicating the rows that are identical to a previous row. } \item{}{\code{unique(x)}: Returns a new \code{DataTable} after removing the duplicated rows from \code{x}. } \item{}{ \code{show(x)}: By default the \code{show} method displays 5 head and 5 tail lines. The number of lines can be altered by setting the global options \code{showHeadLines} and \code{showTailLines}. If the object length is less than the sum of the options, the full object is displayed. These options affect GRanges, GAlignments, Ranges, DataTable and XString objects. } } } \section{Coercion}{ \describe{ \item{}{\code{as.env(x, enclos = parent.frame())}: Creates an environment from \code{x} with a symbol for each \code{colnames(x)}. The values are not actually copied into the environment. Rather, they are dynamically bound using \code{\link{makeActiveBinding}}. This prevents unnecessary copying of the data from the external vectors into R vectors. The values are cached, so that the data is not copied every time the symbol is accessed. } \item{}{\code{as.data.frame(x, row.names=NULL, optional=FALSE, ...)}: Creates the corresponding data.frame. } } } \section{Statistical modeling with DataTable}{ A number of wrappers are implemented for performing statistical procedures, such as model fitting, with DataTable objects. \subsection{Tabulation}{ \describe{ \item{}{\code{xtabs(formula = ~., data, subset, na.action, exclude = c(NA, NaN), drop.unused.levels = FALSE)}: Like the original \code{\link{xtabs}}, except \code{data} is a \code{DataTable}. } } } } \seealso{ \itemize{ \item \link{DataFrame} for an implementation that mimics \code{data.frame}. \item \link{data.frame} } } \examples{ showClass("DataTable") # shows (some of) the known subclasses library(IRanges) df <- DataFrame(as.data.frame(UCBAdmissions)) xtabs(Freq ~ Gender + Admit, df) } \keyword{methods} \keyword{classes} S4Vectors/man/FilterMatrix-class.Rd0000644000175400017540000000403413175714520020221 0ustar00biocbuildbiocbuild\name{FilterMatrix-class} \docType{class} \alias{FilterMatrix-class} % accessors \alias{filterRules,FilterMatrix-method} \alias{filterRules} % subsetting \alias{[,FilterMatrix-method} % splitting and combining \alias{cbind,FilterMatrix-method} \alias{rbind,FilterMatrix-method} % constructor \alias{FilterMatrix} % utilities \alias{show,FilterMatrix-method} \alias{summary,FilterMatrix-method} \title{Matrix for Filter Results} \description{A \code{FilterMatrix} object is a matrix meant for storing the logical output of a set of \code{\linkS4class{FilterRules}}, where each rule corresponds to a column. The \code{FilterRules} are stored within the \code{FilterMatrix} object, for the sake of provenance. In general, a \code{FilterMatrix} behaves like an ordinary \code{\link{matrix}}. } \section{Accessor methods}{ In the code snippets below, \code{x} is a \code{FilterMatrix} object. \describe{ \item{}{\code{filterRules(x)}: Get the \code{FilterRules} corresponding to the columns of the matrix. } } } \section{Constructor}{ \describe{ \item{}{ \code{FilterMatrix(matrix, filterRules)}: Constructs a \code{FilterMatrix}, from a given \code{matrix} and \code{filterRules}. Not usually called by the user, see \code{\link{evalSeparately}}. } } } \section{Utilities}{ \describe{ \item{}{ \code{summary(object, discarded = FALSE, percent = FALSE)}: Returns a numeric vector containing the total number of records (\code{nrow}), the number passed by each filter, and the number of records that passed every filter. If \code{discarded} is \code{TRUE}, then the numbers are inverted (i.e., the values are subtracted from the number of rows). If \code{percent} is \code{TRUE}, then the numbers are percent of total. } } } \author{ Michael Lawrence } \seealso{ \itemize{ \item \code{\link{evalSeparately}} is the typical way to generate this object. \item \link{FilterRules} objects. } } \keyword{classes} \keyword{methods} S4Vectors/man/FilterRules-class.Rd0000644000175400017540000002260713175714520020055 0ustar00biocbuildbiocbuild\name{FilterRules-class} \docType{class} \alias{class:expression_OR_function} \alias{expression_OR_function-class} \alias{expression_OR_function} \alias{class:FilterRules} \alias{FilterRules-class} % accessors \alias{active} \alias{active,FilterRules-method} \alias{active<-} \alias{active<-,FilterRules-method} % subsetting \alias{[,FilterRules-method} \alias{[[<-,FilterRules-method} \alias{subsetByFilter} \alias{subsetByFilter,ANY,FilterRules-method} % splitting and combining \alias{append,FilterRules,FilterRules-method} \alias{c,FilterRules-method} % evaluating \alias{eval,FilterRules,ANY-method} \alias{evalSeparately} \alias{evalSeparately,FilterRules-method} % constructor \alias{FilterRules} % general \alias{summary,FilterRules-method} % filter closures \alias{params} \alias{params,FilterClosure-method} \alias{coerce,standardGeneric,FilterClosure-method} \alias{coerce,function,FilterClosure-method} \alias{show,FilterClosure-method} \title{Collection of Filter Rules} \description{A \code{FilterRules} object is a collection of filter rules, which can be either \code{expression} or \code{function} objects. Rules can be disabled/enabled individually, facilitating experimenting with different combinations of filters.} \details{ It is common to split a dataset into subsets during data analysis. When data is large, however, representing subsets (e.g. by logical vectors) and storing them as copies might become too costly in terms of space. The \code{FilterRules} class represents subsets as lightweight \code{expression} and/or \code{function} objects. Subsets can then be calculated when needed (on the fly). This avoids copying and storing a large number of subsets. Although it might take longer to frequently recalculate a subset, it often is a relatively fast operation and the space savings tend to be more than worth it when data is large. Rules may be either expressions or functions. Evaluating an expression or invoking a function should result in a logical vector. Expressions are often more convenient, but functions (i.e. closures) are generally safer and more powerful, because the user can specify the enclosing environment. If a rule is an expression, it is evaluated inside the \code{envir} argument to the \code{eval} method (see below). If a function, it is invoked with \code{envir} as its only argument. See examples. } \section{Accessor methods}{ In the code snippets below, \code{x} is a \code{FilterRules} object. \describe{ \item{}{\code{active(x)}: Get the logical vector of length \code{length(x)}, where \code{TRUE} for an element indicates that the corresponding rule in \code{x} is active (and inactive otherwise). Note that \code{names(active(x))} is equal to \code{names(x)}.} \item{}{\code{active(x) <- value}: Replace the active state of the filter rules. If \code{value} is a logical vector, it should be of length \code{length(x)} and indicate which rules are active. Otherwise, it can be either numeric or character vector, in which case it sets the indicated rules (after dropping NA's) to active and all others to inactive. See examples.} } } \section{Constructor}{ \describe{ \item{}{ \code{FilterRules(exprs = list(), ..., active = TRUE)}: Constructs a \code{FilterRules} with the rules given in the list \code{exprs} or in \code{...}. The initial active state of the rules is given by \code{active}, which is recycled as necessary. Elements in \code{exprs} may be either character (parsed into an expression), a language object (coerced to an expression), an expression, or a function that takes at least one argument. \strong{IMPORTANTLY}, all arguments in \code{...} are \strong{\code{quote()}}'d and then coerced to an expression. So, for example, character data is only parsed if it is a literal. The names of the filters are taken from the names of \code{exprs} and \code{...}, if given. Otherwise, the character vectors take themselves as their name and the others are deparsed (before any coercion). Thus, it is recommended to always specify meaningful names. In any case, the names are made valid and unique. } } } \section{Subsetting and Replacement}{ In the code snippets below, \code{x} is a \code{FilterRules} object. \describe{ \item{}{ \code{x[i]}: Subsets the filter rules using the same interface as for \code{\linkS4class{Vector}}. } \item{}{ \code{x[[i]]}: Extracts an expression or function via the same interface as for \code{\linkS4class{List}}. } \item{}{ \code{x[[i]] <- value}: The same interface as for \code{\linkS4class{List}}. The default active state for new rules is \code{TRUE}. } } } \section{Combining}{ In the code snippets below, \code{x} is a \code{FilterRules} object. \describe{ \item{}{\code{append(x, values, after = length(x))}: Appends the \code{values} \code{FilterRules} instance onto \code{x} at the index given by \code{after}. } \item{}{\code{c(x, ..., recursive = FALSE)}: Concatenates the \code{FilterRule} instances in \code{...} onto the end of \code{x}. } } } \section{Evaluating}{ \describe{ \item{}{ \code{eval(expr, envir = parent.frame(), enclos = if (is.list(envir) || is.pairlist(envir)) parent.frame() else baseenv())}: Evaluates a \code{FilterRules} instance (passed as the \code{expr} argument). Expression rules are evaluated in \code{envir}, while function rules are invoked with \code{envir} as their only argument. The evaluation of a rule should yield a logical vector. The results from the rule evaluations are combined via the AND operation (i.e. \code{&}) so that a single logical vector is returned from \code{eval}. } \item{}{ \code{evalSeparately(expr, envir = parent.frame(), enclos = if (is.list(envir) || is.pairlist(envir)) parent.frame() else baseenv())}: Evaluates separately each rule in a \code{FilterRules} instance (passed as the \code{expr} argument). Expression rules are evaluated in \code{envir}, while function rules are invoked with \code{envir} as their only argument. The evaluation of a rule should yield a logical vector. The results from the rule evaluations are combined into a logical matrix, with a column for each rule. This is essentially the parallel evaluator, while \code{eval} is the serial evaluator. } \item{}{ \code{subsetByFilter(x, filter)}: Evaluates \code{filter} on \code{x} and uses the result to subset \code{x}. The result contains only the elements in \code{x} for which \code{filter} evaluates to \code{TRUE}. } \item{}{\code{summary(object, subject)}: Returns an integer vector with the number of elements in \code{subject} that pass each rule in \code{object}, along with a count of the elements that pass all filters. } } } \section{Filter Closures}{ When a closure (function) is included as a filter in a \code{FilterRules} object, it is converted to a \code{FilterClosure}, which is currently nothing more than a marker class that extends \code{function}. When a \code{FilterClosure} filter is extracted, there are some accessors and utilities for manipulating it: \describe{ \item{}{\code{params}: Gets a named list of the objects that are present in the enclosing environment (without inheritance). This assumes that a filter is constructed via a constructor function, and the objects in the frame of the constructor (typically, the formal arguments) are the parameters of the filter. } } } \author{ Michael Lawrence } \seealso{ \link{FilterMatrix} objects for storing the logical output of a set of FilterRules objects. } \examples{ ## constructing a FilterRules instance ## an empty set of filters filters <- FilterRules() ## as a simple character vector filts <- c("peaks", "promoters") filters <- FilterRules(filts) active(filters) # all TRUE ## with functions and expressions filts <- list(peaks = expression(peaks), promoters = expression(promoters), find_eboxes = function(rd) rep(FALSE, nrow(rd))) filters <- FilterRules(filts, active = FALSE) active(filters) # all FALSE ## direct, quoted args (character literal parsed) filters <- FilterRules(under_peaks = peaks, in_promoters = "promoters") filts <- list(under_peaks = expression(peaks), in_promoters = expression(promoters)) ## specify both exprs and additional args filters <- FilterRules(filts, diffexp = de) filts <- c("promoters", "peaks", "introns") filters <- FilterRules(filts) ## evaluation df <- DataFrame(peaks = c(TRUE, TRUE, FALSE, FALSE), promoters = c(TRUE, FALSE, FALSE, TRUE), introns = c(TRUE, FALSE, FALSE, FALSE)) eval(filters, df) fm <- evalSeparately(filters, df) identical(filterRules(fm), filters) summary(fm) summary(fm, percent = TRUE) fm <- evalSeparately(filters, df, serial = TRUE) ## set the active state directly active(filters) <- FALSE # all FALSE active(filters) <- TRUE # all TRUE active(filters) <- c(FALSE, FALSE, TRUE) active(filters)["promoters"] <- TRUE # use a filter name ## toggle the active state by name or index active(filters) <- c(NA, 2) # NA's are dropped active(filters) <- c("peaks", NA) } \keyword{classes} \keyword{methods} S4Vectors/man/Hits-class.Rd0000644000175400017540000002510513175714520016520 0ustar00biocbuildbiocbuild\name{Hits-class} \docType{class} \alias{class:Hits} \alias{Hits-class} \alias{Hits} \alias{class:SelfHits} \alias{SelfHits-class} \alias{SelfHits} \alias{class:SortedByQueryHits} \alias{SortedByQueryHits-class} \alias{SortedByQueryHits} \alias{class:SortedByQuerySelfHits} \alias{SortedByQuerySelfHits-class} \alias{SortedByQuerySelfHits} \alias{parallelSlotNames,Hits-method} % accessors \alias{from} \alias{from,Hits-method} \alias{to} \alias{to,Hits-method} \alias{nLnode} \alias{nLnode,Hits-method} \alias{nRnode} \alias{nRnode,Hits-method} \alias{nnode} \alias{nnode,SelfHits-method} \alias{countLnodeHits} \alias{countLnodeHits,Hits-method} \alias{countRnodeHits} \alias{countRnodeHits,Hits-method} \alias{queryHits} \alias{subjectHits} \alias{queryLength} \alias{subjectLength} \alias{countQueryHits} \alias{countSubjectHits} % updateObject \alias{updateObject,Hits-method} % coercion \alias{coerce,Hits,SortedByQueryHits-method} \alias{as.matrix,Hits-method} \alias{as.table,Hits-method} % subsetting \alias{extractROWS,classNameForDisplay,ANY-method} % displaying \alias{classNameForDisplay,Hits-method} \alias{classNameForDisplay,SelfHits-method} \alias{show,Hits-method} % commbining \alias{c,Hits-method} % other transformations \alias{t,Hits-method} \alias{remapHits} \alias{breakTies} % SelfHits \alias{isSelfHit} \alias{isRedundantHit} \title{Hits objects} \description{ The Hits class is a container for representing a set of hits between a set of \emph{left nodes} and a set of \emph{right nodes}. Note that only the hits are stored in the object. No information about the left or right nodes is stored, except their number. For example, the \code{\link[IRanges]{findOverlaps}} function, defined and documented in the \pkg{IRanges} package, returns the hits between the \code{query} and \code{subject} arguments in a \code{Hits} object. } \usage{ ## Constructor functions Hits(from=integer(0), to=integer(0), nLnode=0L, nRnode=0L, ..., sort.by.query=FALSE) SelfHits(from=integer(0), to=integer(0), nnode=0L, ..., sort.by.query=FALSE) } \arguments{ \item{from, to}{ 2 integer vectors of the same length. The values in \code{from} must be >= 1 and <= \code{nLnode}. The values in \code{to} must be >= 1 and <= \code{nRnode}. } \item{nLnode, nRnode}{ Number of left and right nodes. } \item{...}{ Metadata columns to set on the Hits object. All the metadata columns must be vector-like objects of the same length as \code{from} and \code{to}. } \item{sort.by.query}{ Should the hits in the returned object be sorted by query? If yes, then a SortedByQueryHits object is returned (SortedByQueryHits is a subclass of Hits). } \item{nnode}{ Number of nodes. } } \section{Accessors}{ In the code snippets below, \code{x} is a \code{Hits} object. \describe{ \item{}{\code{length(x)}: get the number of hits} \item{}{\code{from(x)}: Equivalent to \code{as.data.frame(x)[[1]]}.} \item{}{\code{to(x)}: Equivalent to \code{as.data.frame(x)[[2]]}.} \item{}{\code{nLnode(x)}, \code{nrow(x)}: get the number of left nodes} \item{}{\code{nRnode(x)}, \code{ncol(x)}: get the number of right nodes} \item{}{\code{countLnodeHits(x)}: Counts the number of hits for each left node, returning an integer vector. } \item{}{\code{countRnodeHits(x)}: Counts the number of hits for each right node, returning an integer vector. } } The following accessors are just aliases for the above accessors: \describe{ \item{}{\code{queryHits(x)}: alias for \code{from(x)}.} \item{}{\code{subjectHits(x)}: alias for \code{to(x)}.} \item{}{\code{queryLength(x)}: alias for \code{nLnode(x)}.} \item{}{\code{subjectLength(x)}: alias for \code{nRnode(x)}.} \item{}{\code{countQueryHits(x)}: alias for \code{countLnodeHits(x)}.} \item{}{\code{countSubjectHits(x)}: alias for \code{countRnodeHits(x)}.} } } \section{Coercion}{ In the code snippets below, \code{x} is a \code{Hits} object. \describe{ \item{}{\code{as.matrix(x)}: Coerces \code{x} to a two column integer matrix, with each row representing a hit between a left node (first column) and a right node (second column). } \item{}{\code{as.table(x)}: Counts the number of hits for each left node in \code{x} and outputs the counts as a \code{table}. } } } \section{Subsetting}{ In the code snippets below, \code{x} is a \code{Hits} object. \describe{ \item{}{\code{x[i]}: Subset the Hits object.} } } \section{Other transformations}{ In the code snippets below, \code{x} is a \code{Hits} object. \describe{ \item{}{\code{t(x)}: Transpose \code{x} by interchanging the left and right nodes. This allows, for example, counting the number of hits for each right node using \code{as.table}. } \item{}{\code{remapHits(x, Lnodes.remapping=NULL, new.nLnode=NA, Rnodes.remapping=NULL, new.nRnode=NA)}: Only supports SortedByQueryHits objects at the moment. Remaps the left and/or right nodes in \code{x}. The left nodes are remapped thru the map specified via the \code{Lnodes.remapping} and \code{new.nLnode} arguments. The right nodes are remapped thru the map specified via the \code{Rnodes.remapping} and \code{new.nRnode} arguments. \code{Lnodes.remapping} must represent a function defined on the 1..M interval that takes values in the 1..N interval, where N is \code{nLnode(x)} and M is the value specified by the user via the \code{new.nLnode} argument. Note that this mapping function doesn't need to be injective or surjective. Also it is not represented by an R function but by an integer vector of length M with no NAs. More precisely \code{Lnodes.remapping} can be NULL (identity map), or a vector of \code{nLnode(x)} non-NA integers that are >= 1 and <= \code{new.nLnode}, or a factor of length \code{nLnode(x)} with no NAs (a factor is treated as an integer vector, and, if missing, \code{new.nLnode} is taken to be its number of levels). Note that a factor will typically be used to represent a mapping function that is not injective. The same applies to the \code{Rnodes.remapping}. \code{remapHits} returns a Hits object where \code{from(x)} and \code{to(x)} have been remapped thru the 2 specified maps. This remapping is actually only the 1st step of the transformation, and is followed by 2 additional steps: (2) the removal of duplicated hits, and (3) the reordering of the hits (first by query hits, then by subject hits). Note that if the 2 maps are injective then the remapping won't introduce duplicated hits, so, in that case, step (2) is a no-op (but is still performed). Also if the "query map" is strictly ascending and the "subject map" ascending then the remapping will preserve the order of the hits, so, in that case, step (3) is also a no-op (but is still performed). } \item{}{\code{breakTies(x, method=c("first", "last"))}: Restrict the hits so that every left node maps to at most one right node. If \code{method} is \dQuote{first}, for each left node, select the edge with the first (lowest rank) right node, if any. If \code{method} is \dQuote{last}, select the edge with the last (highest rank) right node. } } } \section{SelfHits}{ A SelfHits object is a Hits object where the left and right nodes are identical. For a SelfHits object \code{x}, \code{nLnode(x)} is equal to \code{nRnode(x)}. The object can be seen as an oriented graph where \code{nLnode} is the nb of nodes and the hits are the (oriented) edges. SelfHits objects support the same set of accessors as Hits objects plus the \code{nnode()} accessor that is equivalent to \code{nLnode()} and \code{nRnode()}. We also provide two little utilities to operate on a SelfHits object \code{x}: \describe{ \item{}{\code{isSelfHit(x)}: A \emph{self hit} is an edge from a node to itself. \code{isSelfHit(x)} returns a logical vector \emph{parallel} to \code{x} indicating which elements in \code{x} are self hits. } \item{}{\code{isRedundantHit(x)}: When there is more than 1 edge between 2 given nodes (regardless of orientation), the extra edges are considered to be \emph{redundant hits}. \code{isRedundantHit(x)} returns a logical vector \emph{parallel} to \code{x} indicating which elements in \code{x} are redundant hits. } } } \author{Michael Lawrence and Hervé Pagès} \seealso{ \itemize{ \item \link{Hits-comparison} for comparing and ordering hits. \item The \code{\link[IRanges]{findOverlaps}} function in the \pkg{IRanges} package which returns SortedByQueryHits object. \item \link[IRanges]{Hits-examples} in the \pkg{IRanges} package, for some examples of Hits object basic manipulation. \item \link[IRanges]{setops-methods} in the \pkg{IRanges} package, for set operations on Hits objects. } } \examples{ from <- c(5, 2, 3, 3, 3, 2) to <- c(11, 15, 5, 4, 5, 11) id <- letters[1:6] Hits(from, to, 7, 15, id) Hits(from, to, 7, 15, id, sort.by.query=TRUE) ## --------------------------------------------------------------------- ## selectHits() ## --------------------------------------------------------------------- x <- c("a", "b", "a", "c", "d") table <- c("a", "e", "d", "a", "a", "d") hits <- findMatches(x, table) # sorts the hits by query hits selectHits(hits, select="all") # no-op selectHits(hits, select="first") selectHits(hits, select="last") selectHits(hits, select="arbitrary") selectHits(hits, select="count") ## --------------------------------------------------------------------- ## remapHits() ## --------------------------------------------------------------------- Lnodes.remapping <- factor(c(a="A", b="B", c="C", d="D")[x], levels=LETTERS[1:4]) remapHits(hits, Lnodes.remapping=Lnodes.remapping) ## See ?`Hits-examples` in the IRanges package for more examples of basic ## manipulation of Hits objects. ## --------------------------------------------------------------------- ## SelfHits objects ## --------------------------------------------------------------------- hits2 <- SelfHits(c(2, 3, 3, 3, 3, 3, 4, 4, 4), c(4, 3, 2:4, 2, 2:3, 2), 4) ## Hits 2 and 4 are self hits (from 3rd node to itself): which(isSelfHit(hits2)) ## Hits 4, 6, 7, 8, and 9, are redundant hits: which(isRedundantHit(hits2)) hits3 <- findMatches(x) hits3[!isSelfHit(hits3)] hits3[!(isSelfHit(hits3) | isRedundantHit(hits3))] } \keyword{methods} \keyword{classes} S4Vectors/man/Hits-comparison.Rd0000644000175400017540000001102213175714520017556 0ustar00biocbuildbiocbuild\name{Hits-comparison} \alias{Hits-comparison} \alias{pcompare,Hits,Hits-method} \alias{match,Hits,Hits-method} \alias{order,Hits-method} \title{Comparing and ordering hits} \description{ \code{==}, \code{!=}, \code{<=}, \code{>=}, \code{<}, \code{>}, \code{match()}, \code{\%in\%}, \code{order()}, \code{sort()}, and \code{rank()} can be used on \link{Hits} objects to compare and order hits. Note that only the \code{"pcompare"}, \code{"match"}, and \code{"order"} methods are actually defined for \link{Hits} objects. This is all what is needed to make all the other comparing and ordering operations (i.e. \code{==}, \code{!=}, \code{<=}, \code{>=}, \code{<}, \code{>}, \code{\%in\%}, \code{sort()}, and \code{rank()}) work on these objects (see \code{?`\link{Vector-comparison}`} for more information about this). } \usage{ \S4method{pcompare}{Hits,Hits}(x, y) \S4method{match}{Hits,Hits}(x, table, nomatch=NA_integer_, incomparables=NULL, method=c("auto", "quick", "hash")) \S4method{order}{Hits}(..., na.last=TRUE, decreasing=FALSE, method=c("auto", "shell", "radix")) } \arguments{ \item{x, y, table}{ \emph{Compatible} \link{Hits} objects, that is, \link{Hits} objects with the same subject and query lengths. } \item{nomatch}{ The value to be returned in the case when no match is found. It is coerced to an \code{integer}. } \item{incomparables}{ Not supported. } \item{method}{ For \code{match}: Use a Quicksort-based (\code{method="quick"}) or a hash-based (\code{method="hash"}) algorithm. The latter tends to give better performance, except maybe for some pathological input that we've not encountered so far. When \code{method="auto"} is specified, the most efficient algorithm will be used, that is, the hash-based algorithm if \code{length(x) <= 2^29}, otherwise the Quicksort-based algorithm. For \code{order}: The \code{method} argument is ignored. } \item{...}{ One or more \link{Hits} objects. The additional \link{Hits} objects are used to break ties. } \item{na.last}{ Ignored. } \item{decreasing}{ \code{TRUE} or \code{FALSE}. } } \details{ Only hits that belong to \link{Hits} objects with same subject and query lengths can be compared. Hits are ordered by query hit first, and then by subject hit. On a \link{Hits} object, \code{order}, \code{sort}, and \code{rank} are consistent with this order. \describe{ \item{}{ \code{pcompare(x, y)}: Performs element-wise (aka "parallel") comparison of 2 \link{Hits} objects \code{x} and \code{y}, that is, returns an integer vector where the i-th element is less than, equal to, or greater than zero if \code{x[i]} is considered to be respectively less than, equal to, or greater than \code{y[i]}. See \code{?`\link{Vector-comparison}`} for how \code{x} or \code{y} is recycled when the 2 objects don't have the same length. } \item{}{ \code{match(x, table, nomatch=NA_integer_, method=c("auto", "quick", "hash"))}: Returns an integer vector of the length of \code{x}, containing the index of the first matching hit in \code{table} (or \code{nomatch} if there is no matching hit) for each hit in \code{x}. } \item{}{ \code{order(...)}: Returns a permutation which rearranges its first argument (a \link{Hits} object) into ascending order, breaking ties by further arguments (also \link{Hits} objects). } } } \author{Hervé Pagès} \seealso{ \itemize{ \item \link{Hits} objects. \item \link{Vector-comparison} for general information about comparing, ordering, and tabulating vector-like objects. } } \examples{ ## --------------------------------------------------------------------- ## A. ELEMENT-WISE (AKA "PARALLEL") COMPARISON OF 2 Hits OBJECTS ## --------------------------------------------------------------------- hits <- Hits(c(2, 4, 4, 4, 5, 5), c(3, 1, 3, 2, 3, 2), 6, 3) hits pcompare(hits, hits[3]) pcompare(hits[3], hits) hits == hits[3] hits != hits[3] hits >= hits[3] hits < hits[3] ## --------------------------------------------------------------------- ## B. match(), %in% ## --------------------------------------------------------------------- table <- hits[-c(1, 3)] match(hits, table) hits \%in\% table ## --------------------------------------------------------------------- ## C. order(), sort(), rank() ## --------------------------------------------------------------------- order(hits) sort(hits) rank(hits) } \keyword{methods} S4Vectors/man/Hits-setops.Rd0000644000175400017540000000417413175714520016733 0ustar00biocbuildbiocbuild\name{Hits-setops} \alias{Hits-setops} \alias{union,Hits,Hits-method} \title{Set operations on Hits objects} \description{ Perform set operations on \link{Hits} objects. } \details{ \code{union(x, y)}, \code{intersect(x, y)}, \code{setdiff(x, y)}, and \code{setequal(x, y)} work on \link{Hits} objects \code{x} and \code{y} only if the objects are \emph{compatible Hits objects}, that is, if they have the same subject and query lengths. These operations return respectively the union, intersection, (asymmetric!) difference, and equality of the \emph{sets} of hits in \code{x} and \code{y}. } \value{ \code{union} returns a \link{Hits} object obtained by appending to \code{x} the hits in \code{y} that are not already in \code{x}. \code{intersect} returns a \link{Hits} object obtained by keeping only the hits in \code{x} that are also in \code{y}. \code{setdiff} returns a \link{Hits} object obtained by dropping from \code{x} the hits that are in \code{y}. \code{setequal} returns \code{TRUE} if \code{x} and \code{y} contain the same \emph{sets} of hits and \code{FALSE} otherwise. \code{union}, \code{intersect}, and \code{setdiff} propagate the names and metadata columns of their first argument (\code{x}). } \author{Hervé Pagès and Michael Lawrence} \seealso{ \itemize{ \item \link{Hits} objects. \item \link{Hits-comparison} for comparing and ordering hits. \item \code{BiocGenerics::\link[BiocGenerics]{union}}, \code{BiocGenerics::\link[BiocGenerics]{intersect}}, and \code{BiocGenerics::\link[BiocGenerics]{setdiff}} in the \pkg{BiocGenerics} package for general information about these generic functions. } } \examples{ x <- Hits(c(2, 4, 4, 4, 5, 5), c(3, 1, 3, 2, 3, 2), 6, 3, score=11:16) x y <- Hits(c(1, 3, 4, 4, 5, 5, 5), c(3, 3, 2, 1, 2, 1, 3), 6, 3, score=21:27) y union(x, y) union(y, x) # same hits as in union(x, y), but in different order intersect(x, y) intersect(y, x) # same hits as in intersect(x, y), but in # different order setdiff(x, y) setdiff(y, x) setequal(x, y) } \keyword{methods} S4Vectors/man/HitsList-class.Rd0000644000175400017540000000626013175714520017355 0ustar00biocbuildbiocbuild\name{HitsList-class} \docType{class} \alias{class:HitsList} \alias{HitsList-class} \alias{HitsList} \alias{class:SelfHitsList} \alias{SelfHitsList-class} \alias{SelfHitsList} \alias{class:SortedByQueryHitsList} \alias{SortedByQueryHitsList-class} \alias{SortedByQueryHitsList} \alias{class:SortedByQuerySelfHitsList} \alias{SortedByQuerySelfHitsList-class} \alias{SortedByQuerySelfHitsList} % accessors \alias{space} \alias{space,HitsList-method} \alias{subjectHits,HitsList-method} \alias{queryHits,HitsList-method} \alias{relistToClass,Hits-method} \alias{relistToClass,SortedByQueryHits-method} % coercion \alias{as.matrix,HitsList-method} \alias{as.table,HitsList-method} \alias{t,HitsList-method} \title{List of Hits objects} \description{ The HitsList class stores a set of \link{Hits} objects. It's typically used to represent the result of \code{\link[IRanges]{findOverlaps}} on two \link[IRanges]{RangesList} objects. } \details{ Roughly the same set of utilities are provided for HitsList as for \link{Hits}: The \code{as.matrix} method coerces a HitsList object in a similar way to \link{Hits}, except a column is prepended that indicates which space (or element in the query \link[IRanges]{RangesList}) to which the row corresponds. The \code{as.table} method flattens or unlists the list, counts the number of hits for each query range and outputs the counts as a \code{table}, which has the same shape as from a single \link{Hits} object. To transpose a HitsList object \code{x}, so that the subject and query in each space are interchanged, call \code{t(x)}. This allows, for example, counting the number of hits for each subject element using \code{as.table}. } \section{Accessors}{ \describe{ \item{}{\code{queryHits(x)}: Equivalent to \code{unname(as.matrix(x)[,1])}. } \item{}{\code{subjectHits(x)}: Equivalent to \code{unname(as.matrix(x)[,2])}. } \item{}{\code{space(x)}: gets the character vector naming the space in the query \link[IRanges]{RangesList} for each hit, or \code{NULL} if the query did not have any names. } } } \section{Coercion}{ In the code snippets below, \code{x} is a HitsList object. \describe{ \item{}{\code{as.matrix(x)}: calls \code{as.matrix} on each \link{Hits}, combines them row-wise and offsets the indices so that they are aligned with the result of calling \code{unlist} on the query and subject. } \item{}{\code{as.table(x)}: counts the number of hits for each query element in \code{x} and outputs the counts as a \code{table}, which is aligned with the result of calling \code{unlist} on the query. } \item{}{\code{t(x)}: Interchange the query and subject in each space of \code{x}, returns a transposed HitsList object.} } } \note{This class is highly experimental. It has not been well tested and may disappear at any time.} \author{ Michael Lawrence } \seealso{ \itemize{ \item \code{\link[IRanges]{findOverlaps}} in the \pkg{IRanges} package, which returns a HitsList object when the query and subject are \link[IRanges]{RangesList} objects. } } \keyword{methods} \keyword{classes} S4Vectors/man/LLint-class.Rd0000644000175400017540000001652513175714520016641 0ustar00biocbuildbiocbuild\name{LLint-class} \docType{class} \alias{class:LLint} \alias{LLint-class} \alias{is.LLint} \alias{length,LLint-method} \alias{coerce,logical,LLint-method} \alias{coerce,integer,LLint-method} \alias{coerce,numeric,LLint-method} \alias{coerce,character,LLint-method} \alias{as.LLint} \alias{as.logical.LLint} \alias{as.logical,LLint-method} \alias{as.integer.LLint} \alias{as.integer,LLint-method} \alias{as.numeric.LLint} \alias{as.numeric,LLint-method} \alias{as.character.LLint} \alias{as.character,LLint-method} \alias{LLint} \alias{show,LLint-method} \alias{showAsCell,LLint-method} \alias{c,LLint-method} \alias{NA_LLint_} \alias{is.na,LLint-method} \alias{Ops,LLint,LLint-method} \alias{Ops,LLint,numeric-method} \alias{Ops,numeric,LLint-method} \alias{Summary,LLint-method} \title{LLint vectors} \description{ The LLint class is a container for storing a vector of \emph{large integers} (i.e. long long int values at the C level). } \usage{ LLint(length=0L) as.LLint(x) is.LLint(x) } \arguments{ \item{length}{ A non-negative number (i.e. integer, double, or LLint value) specifying the desired length. } \item{x}{ Object to be coerced or tested. } } \details{ LLint vectors aim to provide the same functionality as integer vectors in base R but their values are stored as long long int values at the C level vs int values for integer vectors. Note that on Intel platforms long long int values are 64-bit and int values 32-bit only. Therefore LLint vectors can hold values in the +/-9.223e18 range (approximately) vs +/-2.147e9 only for integer vectors. NAs are supported and the \code{NA_LLint_} constant is predefined for convenience as \code{as(NA, "LLint")}. Names are not supported for now. Coercions from/to logical, integer, double, and character are supported. Operations from the \code{\link{Arith}}, \code{\link{Compare}} and \code{\link{Summary}} groups are supported. More operations coming soon... } \author{Hervé Pagès} \seealso{ \itemize{ \item \link[base]{integer} vectors in base R. \item The \code{\link{Arith}}, \code{\link{Compare}} and \code{\link{Summary}} group generics in the \pkg{methods} package. } } \examples{ ## A long long int uses 8 bytes (i.e. 64 bits) in C: .Machine$sizeof.longlong ## --------------------------------------------------------------------- ## SIMPLE EXAMPLES ## --------------------------------------------------------------------- LLint() LLint(10) as.LLint(3e9) as.LLint("3000000000") x <- as.LLint(1:10 * 111111111) x * x 5 * x # result as vector of doubles (i.e. 'x' coerced to double) 5L * x # result as LLint vector (i.e. 5L coerced to LLint vector) max(x) min(x) range(x) sum(x) x <- as.LLint(1:20) prod(x) x <- as.LLint(1:21) prod(x) # result is out of LLint range (+/-9.223e18) prod(as.numeric(x)) x <- as.LLint(1:75000) sum(x * x * x) == sum(x) * sum(x) ## Note that max(), min() and range() *always* return an LLint vector ## when called on an LLint vector, even when the vector is empty: max(LLint()) # NA with no warning min(LLint()) # NA with no warning ## This differs from how max(), min() and range() behave on an empty ## integer vector: max(integer()) # -Inf with a warning min(integer()) # Inf with a warning ## --------------------------------------------------------------------- ## GOING FROM STRINGS TO INTEGERS ## --------------------------------------------------------------------- ## as.integer() behaves like as.integer(as.double()) on a character ## vector. With the following consequence: s <- "-2.9999999999999999" as.integer(s) # -3 ## as.LLint() converts the string *directly* to LLint, without ## coercing to double first: as.LLint(s) # decimal part ignored ## --------------------------------------------------------------------- ## GOING FROM DOUBLE-PRECISION VALUES TO INTEGERS AND VICE-VERSA ## --------------------------------------------------------------------- ## Be aware that a double-precision value is not guaranteed to represent ## exactly an integer > 2^53. This can cause some surprises: 2^53 == 2^53 + 1 # TRUE, yep! ## And therefore: as.LLint(2^53) == as.LLint(2^53 + 1) # also TRUE ## This can be even more disturbing when passing a big literal integer ## value because the R parser will turn it into a double-precision value ## before passing it to as.LLint(): x1 <- as.LLint(9007199254740992) # same as as.LLint(2^53) x1 x2 <- as.LLint(9007199254740993) # same as as.LLint(2^53 + 1) x2 x1 == x2 # still TRUE ## However, no precision is lost if a string literal is used instead: x1 <- as.LLint("9007199254740992") x1 x2 <- as.LLint("9007199254740993") x2 x1 == x2 # FALSE x2 - x1 d1 <- as.double(x1) d2 <- as.double(x2) # warning! d1 == d2 # TRUE ## --------------------------------------------------------------------- ## LLint IS IMPLEMENTED AS AN S4 CLASS ## --------------------------------------------------------------------- class(LLint(10)) typeof(LLint(10)) # S4 storage.mode(LLint(10)) # S4 is.vector(LLint(10)) # FALSE is.atomic(LLint(10)) # FALSE ## This means that an LLint vector cannot go in an ordinary data ## frame: \dontrun{ data.frame(id=as.LLint(1:5)) # error! } ## A DataFrame needs to be used instead: DataFrame(id=as.LLint(1:5)) ## --------------------------------------------------------------------- ## SANITY CHECKS ## --------------------------------------------------------------------- x <- as.integer(c(0, 1, -1, -3, NA, -99)) y <- as.integer(c(-6, NA, -4:3, 0, 1999, 6:10, NA)) xx <- as.LLint(x) yy <- as.LLint(y) ## Operations from "Arith" group: stopifnot(identical(x + y, as.integer(xx + yy))) stopifnot(identical(as.LLint(y + x), yy + xx)) stopifnot(identical(x - y, as.integer(xx - yy))) stopifnot(identical(as.LLint(y - x), yy - xx)) stopifnot(identical(x * y, as.integer(xx * yy))) stopifnot(identical(as.LLint(y * x), yy * xx)) stopifnot(identical(x / y, xx / yy)) stopifnot(identical(y / x, yy / xx)) stopifnot(identical(x \%/\% y, as.integer(xx \%/\% yy))) stopifnot(identical(as.LLint(y \%/\% x), yy \%/\% xx)) stopifnot(identical(x \%\% y, as.integer(xx \%\% yy))) stopifnot(identical(as.LLint(y \%\% x), yy \%\% xx)) stopifnot(identical(x ^ y, xx ^ yy)) stopifnot(identical(y ^ x, yy ^ xx)) ## Operations from "Compare" group: stopifnot(identical(x == y, xx == yy)) stopifnot(identical(y == x, yy == xx)) stopifnot(identical(x != y, xx != yy)) stopifnot(identical(y != x, yy != xx)) stopifnot(identical(x <= y, xx <= yy)) stopifnot(identical(y <= x, yy <= xx)) stopifnot(identical(x >= y, xx >= yy)) stopifnot(identical(y >= x, yy >= xx)) stopifnot(identical(x < y, xx < yy)) stopifnot(identical(y < x, yy < xx)) stopifnot(identical(x > y, xx > yy)) stopifnot(identical(y > x, yy > xx)) ## Operations from "Summary" group: stopifnot(identical(max(y), as.integer(max(yy)))) stopifnot(identical(max(y, na.rm=TRUE), as.integer(max(yy, na.rm=TRUE)))) stopifnot(identical(min(y), as.integer(min(yy)))) stopifnot(identical(min(y, na.rm=TRUE), as.integer(min(yy, na.rm=TRUE)))) stopifnot(identical(range(y), as.integer(range(yy)))) stopifnot(identical(range(y, na.rm=TRUE), as.integer(range(yy, na.rm=TRUE)))) stopifnot(identical(sum(y), as.integer(sum(yy)))) stopifnot(identical(sum(y, na.rm=TRUE), as.integer(sum(yy, na.rm=TRUE)))) stopifnot(identical(prod(y), as.double(prod(yy)))) stopifnot(identical(prod(y, na.rm=TRUE), as.double(prod(yy, na.rm=TRUE)))) } \keyword{methods} \keyword{classes} S4Vectors/man/List-class.Rd0000644000175400017540000002402213175714520016521 0ustar00biocbuildbiocbuild\name{List-class} \docType{class} % List class, functions and methods: \alias{class:List} \alias{List-class} \alias{List} \alias{elementType} \alias{elementType,List-method} \alias{elementType,vector-method} \alias{elementNROWS} \alias{elementNROWS,ANY-method} \alias{elementNROWS,List-method} \alias{isEmpty} \alias{isEmpty,ANY-method} \alias{isEmpty,List-method} \alias{parallelVectorNames,List-method} \alias{relistToClass} \alias{relistToClass,ANY-method} \alias{coerce,List,list-method} \alias{coerce,ANY,List-method} \alias{coerce,integer,List-method} \alias{as.list,List-method} \alias{unlist,List-method} \alias{as.data.frame,List-method} \alias{coerce,List,data.frame-method} \alias{[,List-method} \alias{[<-,List-method} \alias{[[,List-method} \alias{[[<-,List-method} \alias{$,List-method} \alias{$<-,List-method} \alias{show,List-method} \title{List objects} \description{ List objects are \link{Vector} objects with a \code{"[["}, \code{elementType} and \code{elementNROWS} method. The List class serves a similar role as \link[base]{list} in base R. It adds one slot, the \code{elementType} slot, to the two slots shared by all \link{Vector} objects. The \code{elementType} slot is the preferred location for List subclasses to store the type of data represented in the sequence. It is designed to take a character of length 1 representing the class of the sequence elements. While the List class performs no validity checking based on \code{elementType}, if a subclass expects elements to be of a given type, that subclass is expected to perform the necessary validity checking. For example, the subclass \link[IRanges]{IntegerList} (defined in the \pkg{IRanges} package) has \code{elementType = "integer"} and its validity method checks if this condition is TRUE. To be functional, a class that inherits from List must define at least a \code{"[["} method (in addition to the minimum set of \link{Vector} methods). } \section{Construction}{ List objects are typically constructed using one of the 3 following methods: \itemize{ \item Use of a constructor function. Many constructor functions are provided for List objects e.g. \code{List}, \code{\link[IRanges]{IntegerList}}, \code{\link[IRanges]{RleList}}, \code{\link[IRanges]{RangesList}}, \code{\link[GenomicRanges]{GRangesList}}, etc... Which one to use depends on the particular type of List object to construct. The name of a constructor function is always the name of a valid class. If it's the name of a \emph{concrete} class (e.g. the \code{\link[GenomicRanges]{GRangesList}} constructor defined in the \pkg{GenomicRanges} package), then the constructor function returns an instance of that class. If it's the name of a \emph{virtual} class (e.g. the \code{List} constructor defined in this package, or the \code{\link[IRanges]{IntegerList}} or \code{\link[IRanges]{RleList}} or \code{\link[IRanges]{RangesList}} constructors defined in the \pkg{IRanges} package), then the returned object belongs to a concrete subclass of that virtual class. Which subclass exactly depends on each constructor function (see man page of a particular constructor function for the details). \item Coercion to List or to a List subclass. Many coercion methods are provided to turn any object into a List object. One general and convenient way to convert any vector-like object into a List is to call \code{as(x, "List")}. This will typically yield an object from a subclass of \link[IRanges]{CompressedList}. \item Use of \code{\link[IRanges]{extractList}}. This function, defined in the \pkg{IRanges} package, extracts user-specified groups of elements from a vector-like object and returns them in a List (or sometimes list) object. } } \section{Accessors}{ In the following code snippets, \code{x} is a List object. \describe{ \item{}{ \code{length(x)}: Get the number of list elements in \code{x}. } \item{}{ \code{names(x)}, \code{names(x) <- value}: Get or set the names of the elements in the List. } \item{}{ \code{mcols(x, use.names=FALSE)}, \code{mcols(x) <- value}: Get or set the metadata columns. See \link{Vector} man page for more information. } \item{}{ \code{elementType(x)}: Get the scalar string naming the class from which all elements must derive. } \item{}{ \code{elementNROWS(x)}: Get the length (or nb of row for a matrix-like object) of each of the elements. Equivalent to \code{sapply(x, NROW)}. } \item{}{ \code{isEmpty(x)}: Returns a logical indicating either if the sequence has no elements or if all its elements are empty. } } } \section{Coercion}{ To List. \describe{ \item{}{ \code{as(x, "List")}: Converts a vector-like object into a List, usually a \link[IRanges]{CompressedList} derivative. One notable exception is when \code{x} is an ordinary list, in which case \code{as(x, "List")} returns a \link{SimpleList} derivative. To explicitly request a \link{SimpleList} derivative, call \code{as(x, "SimpleList")}. See \code{?\link[IRanges]{CompressedList}} (you might need to load the \pkg{IRanges} package first) and \code{?\link{SimpleList}} for more information about the CompressedList and SimpleList representations. } } From List. In the code snippets below, \code{x} is a List object. \describe{ \item{}{ \code{as.list(x, ...)}, \code{as(from, "list")}: Turns \code{x} into an ordinary list. } \item{}{ \code{unlist(x, recursive=TRUE, use.names=TRUE)}: Concatenates the elements of \code{x} into a single vector-like object (of class \code{elementType(x)}). } \item{}{ \code{as.data.frame(x, row.names=NULL, optional=FALSE , value.name="value", use.outer.mcols=FALSE, group_name.as.factor=FALSE, ...)}: Coerces a \code{List} to a \code{data.frame}. The result has the same length as unlisted \code{x} with two additional columns, \code{group} and \code{group_name}. \code{group} is an \code{integer} that indicates which list element the record came from. \code{group_name} holds the list name associated with each record; value is \code{character} by default and \code{factor} when \code{group_name.as.factor} is TRUE. When \code{use.outer.mcols} is TRUE the metadata columns on the outer list elements of \code{x} are replicated out and included in the \code{data.frame}. List objects that unlist to a single vector (column) are given the column name `value` by default. A custom name can be provided in \code{value.name}. Splitting values in the resulting \code{data.frame} by the original groups in \code{x} should be done using the \code{group} column as the \code{f} argument to \code{splitAsList}. To relist data, use \code{x} as the \code{skeleton} argument to \code{relist}. } } } \section{Subsetting}{ In the code snippets below, \code{x} is a List object. \describe{ \item{}{ \code{x[i]}: Return a new List object made of the list elements selected by subscript \code{i}. Subscript \code{i} can be of any type supported by subsetting of a Vector object (see \link{Vector} man page for the details), plus the following types: \link[IRanges]{IntegerList}, \link[IRanges]{LogicalList}, \link[IRanges]{CharacterList}, integer-\link[IRanges]{RleList}, logical-\link[IRanges]{RleList}, character-\link[IRanges]{RleList}, and \link[IRanges]{RangesList}. Those additional types perform subsetting within the list elements rather than across them. } \item{}{ \code{x[i] <- value}: Replacement version of \code{x[i]}. } \item{}{ \code{x[[i]]}: Return the selected list element \code{i}, where \code{i} is an numeric or character vector of length 1. } \item{}{ \code{x[[i]] <- value}: Replacement version of \code{x[[i]]}. } \item{}{ \code{x$name}, \code{x$name <- value}: Similar to \code{x[[name]]} and \code{x[[name]] <- value}, but \code{name} is taken literally as an element name. } } } \section{relistToClass}{ \code{relistToClass(x)} is the opposite of \code{elementType(y)} in the sense that the former returns the class of the result of relisting (or splitting) \code{x} while the latter returns the class of the result of unlisting (or unsplitting) \code{y}. More formally, if \code{x} is an object that is relistable and \code{y} a list-like object: \preformatted{ relistToClass(x) is class(relist(x, some_skeleton)) elementType(y) is class(unlist(y)) } As a consequence, for any object \code{x} for which \code{relistToClass(x)} is defined and returns a valid class, \code{elementType(new(relistToClass(x)))} should return \code{class(x)}. } \author{P. Aboyoun and H. Pagès} \seealso{ \itemize{ \item \link{List-utils} for common operations on List objects. \item \link{Vector} objects for the parent class. \item The \link{SimpleList} class for a direct extension of the List class. \item The \link[IRanges]{CompressedList} class defined in the \pkg{IRanges} package for another direct extension of the List class. \item The \link[IRanges]{IntegerList}, \link[IRanges]{RleList}, and \link[IRanges]{IRanges} classes and constructors defined in the \pkg{IRanges} package for more examples of concrete List subclasses. \item The \link[IRanges]{extractList} function defined in the \pkg{IRanges} package for grouping elements of a vector-like object into a list-like object. } } \examples{ showClass("List") # shows (some of) the known subclasses } \keyword{methods} \keyword{classes} S4Vectors/man/List-utils.Rd0000644000175400017540000001774713175714520016574 0ustar00biocbuildbiocbuild\name{List-utils} \alias{List-utils} \alias{lapply,List-method} \alias{sapply,List-method} \alias{endoapply} \alias{endoapply,list-method} \alias{endoapply,data.frame-method} \alias{endoapply,List-method} \alias{revElements} \alias{revElements,list-method} \alias{revElements,List-method} \alias{mendoapply} \alias{mendoapply,list-method} \alias{mendoapply,data.frame-method} \alias{mendoapply,List-method} \alias{pc} \alias{Reduce,List-method} \alias{Filter,List-method} \alias{Find,List-method} \alias{Map,List-method} \alias{Position,List-method} \alias{within,List-method} \alias{rbind,List-method} \alias{cbind,List-method} \alias{droplevels.List} \alias{droplevels,List-method} \title{Common operations on List objects} \description{ Various functions and methods for looping on \link{List} objects, functional programming on \link{List} objects, and evaluation of an expression in a \link{List} object. } \usage{ ## Looping on List objects: ## ------------------------ \S4method{lapply}{List}(X, FUN, ...) \S4method{sapply}{List}(X, FUN, ..., simplify=TRUE, USE.NAMES=TRUE) endoapply(X, FUN, ...) revElements(x, i) mendoapply(FUN, ..., MoreArgs=NULL) pc(...) ## Functional programming methods for List objects: ## ------------------------------------------------ \S4method{Reduce}{List}(f, x, init, right=FALSE, accumulate=FALSE) \S4method{Filter}{List}(f, x) \S4method{Find}{List}(f, x, right=FALSE, nomatch=NULL) \S4method{Map}{List}(f, ...) \S4method{Position}{List}(f, x, right=FALSE, nomatch=NA_integer_) ## Evaluation of an expression in a List object: ## --------------------------------------------- \S4method{within}{List}(data, expr, ...) ## Constructing list matrices: ## --------------------------------------------- \S4method{rbind}{List}(..., deparse.level=1L) \S4method{cbind}{List}(..., deparse.level=1L) } \arguments{ \item{X, x}{ A list, data.frame or \link{List} object. } \item{FUN}{ The function to be applied to each element of \code{X} (for \code{endoapply}) or for the elements in \code{...} (for \code{mendoapply}). } \item{...}{ For \code{lapply}, \code{sapply}, and \code{endoapply}, optional arguments to \code{FUN}. For \code{mendoapply}, a set of list, data.frame or \link{List} objects to compute over. For \code{pc}, one or more list-like objects. For \code{Map}, one or more \link{List} objects. (FIXME: Mixing \link{List} objects with ordinary lists doesn't seem to work properly at the moment.) } \item{simplify, USE.NAMES}{ See \code{?base::\link[base]{sapply}} for a description of these arguments. } \item{MoreArgs}{ A list of other arguments to \code{FUN}. } \item{i}{ Index specifying the elements to replace. Can be anything supported by \code{`[<-`}. } \item{f, init, right, accumulate, nomatch}{ See \code{?base::\link[base]{Reduce}} for a description of these arguments. } \item{data}{ A \link{List} object. } \item{expr}{ Expression to evaluate. } \item{deparse.level}{ See \code{?base::\link[base]{rbind}} for a description of this argument. } } \details{ \subsection{Looping on List objects}{ Like the standard \code{\link[base]{lapply}} function defined in the \pkg{base} package, the \code{lapply} method for \link{List} objects returns a list of the same length as \code{X}, with each element being the result of applying \code{FUN} to the corresponding element of \code{X}. Like the standard \code{\link[base]{sapply}} function defined in the \pkg{base} package, the \code{sapply} method for \link{List} objects is a user-friendly version of \code{lapply} by default returning a vector or matrix if appropriate. \code{endoapply} and \code{mendoapply} perform the endomorphic equivalents of \code{\link[base]{lapply}} and \code{\link[base]{mapply}} by returning objects of the same class as the inputs rather than a list. \code{revElements(x, i)} reverses the list elements in \code{x} specified by \code{i}. It's equivalent to, but faster than, doing \code{x[i] <- endoapply(x[i], rev)}. \code{pc(...)} combines list-like objects in an element-wise fashion. It's similar to, but faster than, \code{mapply(c, ..., SIMPLIFY=FALSE)}. With the following differences: \enumerate{ \item \code{pc()} ignores the supplied objects that are NULL. \item \code{pc()} does not recycle its arguments. All the supplied objects must have the same length. \item If one of the supplied objects is a \link{List} object, then \code{pc()} returns a \link{List} object. \item \code{pc()} always returns a homogenous list or \link{List} object, that is, an object where all the list elements have the same type. } } \subsection{Functional programming methods for List objects}{ The R base package defines some higher-order functions that are commonly found in Functional Programming Languages. See \code{?base::\link[base]{Reduce}} for the details, and, in particular, for a description of their arguments. The \pkg{S4Vectors} package provides methods for \link{List} objects, so, in addition to be an ordinary vector or list, the \code{x} argument can also be a \link{List} object. } \subsection{Evaluation of an expression in a List object}{ \code{within} evaluates \code{expr} within \code{as.env(data)} via \code{eval(data)}. Similar to \code{with}, except assignments made during evaluation are taken as assignments into \code{data}, i.e., new symbols have their value appended to \code{data}, and assigning new values to existing symbols results in replacement. } \subsection{Binding Lists into a matrix}{ There are methods for \code{cbind} and \code{rbind} that will bind multiple lists together into a basic list matrix. The usual geometric constraints apply. In the future, this might return a List (+ dimensions), but for now the return value is an ordinary list. } } \value{ \code{endoapply} returns an object of the same class as \code{X}, each element of which is the result of applying \code{FUN} to the corresponding element of \code{X}. \code{mendoapply} returns an object of the same class as the first object specified in \code{\dots}, each element of which is the result of applying \code{FUN} to the corresponding elements of \code{\dots}. \code{pc} returns a list or List object of the same length as the input objects. See \code{?base::\link[base]{Reduce}} for the value returned by the functional programming methods. See \code{?base::\link[base]{within}} for the value returned by \code{within}. \code{cbind} and \code{rbind} return a list matrix. } \author{P. Aboyoun and H. Pagès} \seealso{ \itemize{ \item The \link{List} class. \item \code{base::\link[base]{lapply}} and \code{base::\link[base]{mapply}} for the default \code{lapply} and \code{mapply} methods. \item \code{base::\link[base]{Reduce}} for the default functional programming methods. \item \code{base::\link[base]{within}} for the default \code{within} method. \item \code{base::\link[base]{cbind}} and \code{base::\link[base]{rbind}} for the default matrix binding methods. } } \examples{ a <- data.frame(x = 1:10, y = rnorm(10)) b <- data.frame(x = 1:10, y = rnorm(10)) endoapply(a, function(x) (x - mean(x))/sd(x)) mendoapply(function(e1, e2) (e1 - mean(e1)) * (e2 - mean(e2)), a, b) x <- list(a=11:13, b=26:21, c=letters) y <- list(-(5:1), c("foo", "bar"), 0.25) pc(x, y) library(IRanges) x <- IntegerList(a=11:13, b=26:21, c=31:36, d=4:2) y <- NumericList(-(5:1), 1:2, numeric(0), 0.25) pc(x, y) Reduce("+", x) Filter(is.unsorted, x) pos1 <- Position(is.unsorted, x) stopifnot(identical(Find(is.unsorted, x), x[[pos1]])) pos2 <- Position(is.unsorted, x, right=TRUE) stopifnot(identical(Find(is.unsorted, x, right=TRUE), x[[pos2]])) y <- x * 1000L Map("c", x, y) rbind(x, y) cbind(x, y) } \keyword{utilities} \keyword{methods} S4Vectors/man/Pairs-class.Rd0000644000175400017540000000677213175714520016700 0ustar00biocbuildbiocbuild\name{Pairs-class} \docType{class} \alias{class:Pairs} \alias{Pairs-class} % accessors \alias{first} \alias{first,Pairs-method} \alias{second} \alias{second,Pairs-method} \alias{first<-} \alias{first<-,Pairs-method} \alias{second<-} \alias{second<-,Pairs-method} \alias{names,Pairs-method} \alias{names<-,Pairs-method} \alias{length,Pairs-method} \alias{parallelSlotNames,Pairs-method} % comparison \alias{match,Pairs,Pairs-method} % constructor \alias{Pairs} % coercion \alias{zipup,Pairs,missing-method} \alias{coerce,Pairs,DataFrame-method} \alias{as.data.frame,Pairs-method} % combination \alias{c,Pairs-method} % displaying \alias{show,Pairs-method} \title{Pairs objects} \description{ \code{Pairs} is a \code{Vector} that stores two parallel vectors (any object that can be a column in a \code{\linkS4class{DataFrame}}). It provides conveniences for performing binary operations on the vectors, as well as for converting between an equivalent list representation. Virtually all of the typical R vector operations should behave as expected. A typical use case is representing the pairing from a \code{\link[IRanges]{findOverlaps}} call, for which \code{\link[IRanges]{findOverlapPairs}} is a shortcut. } \section{Constructor}{ \describe{ \item{}{ \code{Pairs(first, second, ..., names = NULL, hits = NULL)}: Constructs a Pairs object by aligning the vectors \code{first} and \code{second}. The vectors must have the same length, unless \code{hits} is specified. Arguments in \code{\dots} are combined as columns in the \code{mcols} of the result. The \code{names} argument specifies the names on the result. If \code{hits} is not \code{NULL}, it should be a \code{\linkS4class{Hits}} object that collates the elements in \code{first} and \code{second} to produce the corresponding pairs. } } } \section{Accessors}{ In the code snippets below, \code{x} is a \code{Pairs} object. \describe{ \item{}{\code{names(x)}, \code{names(x) <- value}: get or set the names} \item{}{\code{first(x)}, \code{first(x) <- value}: get or set the first member of each pair} \item{}{\code{second(x)}, \code{second(x) <- value}: get or set the second member of each pair} } } \section{Coercion}{ \describe{ \item{}{\code{zipup(x)}: Interleaves the \code{Pairs} object \code{x} into a list, where each element is composed of a pair. The type of list depends on the type of the elements. } \item{}{\code{zipdown(x)}: The inverse of \code{zipup()}. Converts \code{x}, a list where every element is of length 2, to a \code{Pairs} object, by assuming that each element of the list represents a pair. } } } \section{Subsetting}{ In the code snippets below, \code{x} is a \code{Pairs} object. \describe{ \item{}{\code{x[i]}: Subset the Pairs object.} } } \author{Michael Lawrence} \seealso{ \itemize{ \item \link{Hits-class}, a typical way to define a pairing. \item \code{\link[IRanges]{findOverlapPairs}} in the \pkg{IRanges} package, which generates an instance of this class based on overlaps. \item \link[IRanges]{setops-methods} in the \pkg{IRanges} package, for set operations on Pairs objects. } } \examples{ p <- Pairs(1:10, Rle(1L, 10), score=rnorm(10), names=letters[1:10]) identical(first(p), 1:10) mcols(p)$score p %in% p[1:5] as.data.frame(p) z <- zipup(p) first(p) <- Rle(1:10) identical(zipdown(z), p) } \keyword{methods} \keyword{classes} S4Vectors/man/Rle-class.Rd0000644000175400017540000002677713175714520016353 0ustar00biocbuildbiocbuild\name{Rle-class} \docType{class} \alias{class:Rle} \alias{Rle-class} \alias{Rle} \alias{Rle,ANY-method} \alias{Rle,Rle-method} \alias{length,Rle-method} \alias{runLength} \alias{runLength,Rle-method} \alias{runValue} \alias{runValue,Rle-method} \alias{nrun} \alias{nrun,Rle-method} \alias{start,Rle-method} \alias{end,Rle-method} \alias{width,Rle-method} \alias{runLength<-} \alias{runLength<-,Rle-method} \alias{runValue<-} \alias{runValue<-,Rle-method} \alias{as.vector,Rle-method} \alias{as.vector.Rle} \alias{as.factor,Rle-method} \alias{as.data.frame,Rle-method} \alias{as.list,Rle-method} \alias{coerce,Rle,list-method} \alias{coerce,vector,Rle-method} \alias{coerce,logical,Rle-method} \alias{coerce,integer,Rle-method} \alias{coerce,numeric,Rle-method} \alias{coerce,complex,Rle-method} \alias{coerce,character,Rle-method} \alias{coerce,raw,Rle-method} \alias{coerce,factor,Rle-method} \alias{coerce,Rle,vector-method} \alias{coerce,Rle,logical-method} \alias{coerce,Rle,integer-method} \alias{coerce,Rle,numeric-method} \alias{coerce,Rle,complex-method} \alias{coerce,Rle,character-method} \alias{coerce,Rle,raw-method} \alias{coerce,Rle,factor-method} \alias{coerce,Rle,data.frame-method} \alias{decode} \alias{decode,ANY-method} \alias{decode,Rle-method} \alias{extractROWS,Rle,ANY-method} \alias{[,Rle-method} \alias{[<-,Rle-method} \alias{extractROWS,Rle,RangeNSBS-method} \alias{rev.Rle} \alias{rev,Rle-method} \alias{rep.int,Rle-method} \alias{rep,Rle-method} \alias{c,Rle-method} \alias{append,Rle,vector-method} \alias{append,vector,Rle-method} \alias{\%in\%,Rle,ANY-method} \alias{findRun} \alias{findRun,Rle-method} \alias{is.na,Rle-method} \alias{is.unsorted,Rle-method} \alias{match,Rle,ANY-method} \alias{show,Rle-method} \alias{showAsCell,Rle-method} \alias{sort,Rle-method} \alias{table,Rle-method} \alias{tabulate,Rle-method} \alias{unique,Rle-method} \alias{duplicated,Rle-method} \alias{setdiff,Rle,Rle-method} \alias{setdiff,ANY,Rle-method} \alias{setdiff,Rle,ANY-method} \alias{intersect,Rle,Rle-method} \alias{intersect,ANY,Rle-method} \alias{intersect,Rle,ANY-method} \alias{union,Rle,Rle-method} \alias{union,ANY,Rle-method} \alias{union,Rle,ANY-method} \title{Rle objects} \description{ The Rle class is a general container for storing an atomic vector that is stored in a run-length encoding format. It is based on the \code{\link[base]{rle}} function from the base package. } \section{Constructor}{ \describe{ \item{}{ \code{Rle(values, lengths)}: This constructor creates an Rle instance out of an atomic vector or factor object \code{values} and an integer or numeric vector \code{lengths} with all positive elements that represent how many times each value is repeated. The length of these two vectors must be the same. \code{lengths} can be missing in which case \code{values} is turned into an Rle. } } } \section{Getters}{ In the code snippets below, \code{x} is an Rle object: \describe{ \item{}{ \code{runLength(x)}: Returns the run lengths for \code{x}. } \item{}{ \code{runValue(x)}: Returns the run values for \code{x}. } \item{}{ \code{nrun(x)}: Returns the number of runs in \code{x}. } \item{}{ \code{start(x)}: Returns the starts of the runs for \code{x}. } \item{}{ \code{end(x)}: Returns the ends of the runs for \code{x}. } \item{}{ \code{width(x)}: Same as \code{runLength(x)}. } } } \section{Setters}{ In the code snippets below, \code{x} is an Rle object: \describe{ \item{}{ \code{runLength(x) <- value}: Replaces \code{x} with a new Rle object using run values \code{runValue(x)} and run lengths \code{value}. } \item{}{ \code{runValue(x) <- value}: Replaces \code{x} with a new Rle object using run values \code{value} and run lengths \code{runLength(x)}. } } } \section{Coercion}{ \subsection{From atomic vector to Rle}{ In the code snippets below, \code{from} is an atomic vector: \describe{ \item{}{ \code{as(from, "Rle")}: This coercion creates an Rle instances out of an atomic vector \code{from}. } } } \subsection{From Rle to other objects}{ In the code snippets below, \code{x} and \code{from} are Rle objects: \describe{ \item{}{ \code{as.vector(x, mode="any")}, \code{as(from, "vector")}: Creates an atomic vector based on the values contained in \code{x}. The vector will be coerced to the requested \code{mode}, unless \code{mode} is "any", in which case the most appropriate type is chosen. } \item{}{ \code{as.factor(x)}, \code{as(from, "factor")}: Creates a factor object based on the values contained in \code{x}. } \item{}{ \code{as.data.frame(x)}, \code{as(from, "data.frame")}: Creates a \code{data.frame} with a single column holding the result of \code{as.vector(x)}. } \item{}{ \code{decode(x)}: Converts an Rle to its native form, such as an atomic vector or factor. Calling \code{decode} on a non-Rle will return \code{x} by default, so it is generally safe for ensuring that an object is native. } } } } \section{General Methods}{ In the code snippets below, \code{x} is an Rle object: \describe{ \item{}{ \code{x[i, drop=getOption("dropRle", default=FALSE)]}: Subsets \code{x} by index \code{i}, where \code{i} can be positive integers, negative integers, a logical vector of the same length as \code{x}, an Rle object of the same length as \code{x} containing logical values, or an \link[IRanges]{IRanges} object. When \code{drop=FALSE} returns an Rle object. When \code{drop=TRUE}, returns an atomic vector. } \item{}{ \code{x[i] <- value}: Replaces elements in \code{x} specified by \code{i} with corresponding elements in \code{value}. Supports the same types for \code{i} as \code{x[i]}. } \item{}{ \code{x \%in\% table}: Returns a logical Rle representing set membership in \code{table}. } \item{}{ \code{append(x, values, after = length(x))}: Insert one Rle into another Rle. \describe{ \item{\code{values}}{the Rle to insert.} \item{\code{after}}{the subscript in \code{x} after which the values are to be inserted.} } } \item{}{ \code{c(x, ...)}: Combines a set of Rle objects. } \item{}{ \code{findRun(x, vec)}: Returns an integer vector indicating the run indices in Rle \code{vec} that are referenced by the indices in the integer vector \code{x}. } \item{}{ \code{head(x, n = 6L)}: If \code{n} is non-negative, returns the first n elements of \code{x}. If \code{n} is negative, returns all but the last \code{abs(n)} elements of \code{x}. } \item{}{ \code{is.na(x)}: Returns a logical Rle indicating with values are \code{NA}. } \item{}{ \code{is.unsorted(x, na.rm = FALSE, strictly = FALSE)}: Returns a logical value specifying if \code{x} is unsorted. \describe{ \item{\code{na.rm}}{remove missing values from check.} \item{\code{strictly}}{check for _strictly_ increasing values.} } } \item{}{ \code{length(x)}: Returns the underlying vector length of \code{x}. } \item{}{ \code{match(x, table, nomatch = NA_integer_, incomparables = NULL)}: Matches the values in \code{x} to \code{table}: \describe{ \item{\code{table}}{the values to be matched against.} \item{\code{nomatch}}{the value to be returned in the case when no match is found.} \item{\code{incomparables}}{a vector of values that cannot be matched. Any value in \code{x} matching a value in this vector is assigned the \code{nomatch} value.} } } \item{}{ \code{rep(x, times, length.out, each)}, \code{rep.int(x, times)}: Repeats the values in \code{x} through one of the following conventions: \describe{ \item{\code{times}}{Vector giving the number of times to repeat each element if of length \code{length(x)}, or to repeat the whole vector if of length 1.} \item{\code{length.out}}{Non-negative integer. The desired length of the output vector.} \item{\code{each}}{Non-negative integer. Each element of \code{x} is repeated \code{each} times.} } } \item{}{ \code{rev(x)}: Reverses the order of the values in \code{x}. } \item{}{ \code{show(object)}: Prints out the Rle object in a user-friendly way. } \item{}{ \code{order(..., na.last=TRUE, decreasing=FALSE, method=c("auto", "shell", "radix"))}: Returns a permutation which rearranges its first argument into ascending or descending order, breaking ties by further arguments. See \code{\link[BiocGenerics]{order}}. } \item{}{ \code{sort(x, decreasing=FALSE, na.last=NA)}: Sorts the values in \code{x}. \describe{ \item{\code{decreasing}}{If \code{TRUE}, sort values in decreasing order. If \code{FALSE}, sort values in increasing order.} \item{\code{na.last}}{If \code{TRUE}, missing values are placed last. If \code{FALSE}, they are placed first. If \code{NA}, they are removed.} } } \item{}{ \code{subset(x, subset)}: Returns a new Rle object made of the subset using logical vector \code{subset}. } \item{}{ \code{table(...)}: Returns a table containing the counts of the unique values. Supported arguments include \code{useNA} with values of `no' and `ifany'. Multiple Rle's must be combined with \code{c()} before calling \code{table}. } \item{}{ \code{tabulate(bin, nbins = max(bin, 1L, na.rm = TRUE))}: Just like \code{\link{tabulate}}, except optimized for Rle. } \item{}{ \code{tail(x, n = 6L)}: If \code{n} is non-negative, returns the last n elements of \code{x}. If \code{n} is negative, returns all but the first \code{abs(n)} elements of \code{x}. } \item{}{ \code{unique(x, incomparables = FALSE, ...)}: Returns the unique run values. The \code{incomparables} argument takes a vector of values that cannot be compared with \code{FALSE} being a special value that means that all values can be compared. } } } \section{Set Operations}{ In the code snippets below, \code{x} and \code{y} are Rle object or some other vector-like object: \describe{ \item{}{ \code{setdiff(x, y)}: Returns the unique elements in \code{x} that are not in \code{y}. } \item{}{ \code{union(x, y)}: Returns the unique elements in either \code{x} or \code{y}. } \item{}{ \code{intersect(x, y)}: Returns the unique elements in both \code{x} and \code{y}. } } } \author{P. Aboyoun} \seealso{ \link{Rle-utils}, \link{Rle-runstat}, and \link[S4Vectors]{aggregate} for more operations on Rle objects. \code{\link[base]{rle}} \link{Vector-class} } \examples{ x <- Rle(10:1, 1:10) x runLength(x) runValue(x) nrun(x) diff(x) unique(x) sort(x) x[c(1,3,5,7,9)] x > 4 x2 <- Rle(LETTERS[c(21:26, 25:26)], 8:1) table(x2) y <- Rle(c(TRUE,TRUE,FALSE,FALSE,TRUE,FALSE,TRUE,TRUE,TRUE)) y as.vector(y) rep(y, 10) c(y, x > 5) } \keyword{methods} \keyword{classes} S4Vectors/man/Rle-runstat.Rd0000644000175400017540000001120313175714520016720 0ustar00biocbuildbiocbuild\name{Rle-runstat} \alias{Rle-runstat} \alias{runsum} \alias{runsum,Rle-method} \alias{runmean} \alias{runmean,Rle-method} \alias{smoothEnds,Rle-method} \alias{runmed,Rle-method} \alias{runwtsum} \alias{runwtsum,Rle-method} \alias{runq} \alias{runq,Rle-method} \title{Fixed-width running window summaries} \description{ The \code{runsum}, \code{runmean}, \code{runmed}, \code{runwtsum}, \code{runq} functions calculate the sum, mean, median, weighted sum, and order statistic for fixed width running windows. } \usage{ runsum(x, k, endrule = c("drop", "constant"), ...) runmean(x, k, endrule = c("drop", "constant"), ...) \S4method{smoothEnds}{Rle}(y, k = 3) \S4method{runmed}{Rle}(x, k, endrule = c("median", "keep", "drop", "constant"), algorithm = NULL, print.level = 0) runwtsum(x, k, wt, endrule = c("drop", "constant"), ...) runq(x, k, i, endrule = c("drop", "constant"), ...) } \arguments{ \item{x,y}{ The data object. } \item{k}{ An integer indicating the fixed width of the running window. Must be odd when \code{endrule != "drop"}. } \item{endrule}{ A character string indicating how the values at the beginning and the end (of the data) should be treated. \describe{ \item{\code{"median"}}{see \code{\link[stats]{runmed}};} \item{\code{"keep"}}{see \code{\link[stats]{runmed}};} \item{\code{"drop"}}{do not extend the running statistics to be the same length as the underlying vectors;} \item{\code{"constant"}}{copies running statistic to the first values and analogously for the last ones making the smoothed ends \emph{constant}.} } } \item{wt}{ A numeric vector of length \code{k} that provides the weights to use. } \item{i}{ An integer in [0, k] indicating which order statistic to calculate. } \item{algorithm,print.level}{ See \code{?stats::\link[stats]{runmed}} for a description of these arguments. } \item{\dots}{Additional arguments passed to methods. Specifically, \code{na.rm}. When \code{na.rm = TRUE}, the \code{NA} and \code{NaN} values are removed. When \code{na.rm = FALSE}, \code{NA} is returned if either \code{NA} or \code{NaN} are in the specified window. } } \details{ The \code{runsum}, \code{runmean}, \code{runmed}, \code{runwtsum}, and \code{runq} functions provide efficient methods for calculating the specified numeric summary by performing the looping in compiled code. } \value{ An object of the same class as \code{x}. } \author{P. Aboyoun and V. Obenchain} \seealso{ \code{\link[stats]{runmed}}, \link{Rle-class}, \link[IRanges]{RleList-class} } \examples{ x <- Rle(1:10, 1:10) runsum(x, k = 3) runsum(x, k = 3, endrule = "constant") runmean(x, k = 3) runwtsum(x, k = 3, wt = c(0.25, 0.5, 0.25)) runq(x, k = 5, i = 3, endrule = "constant") ## Missing and non-finite values x <- Rle(c(1, 2, NA, 0, 3, Inf, 4, NaN)) runsum(x, k = 2) runsum(x, k = 2, na.rm = TRUE) runmean(x, k = 2, na.rm = TRUE) runwtsum(x, k = 2, wt = c(0.25, 0.5), na.rm = TRUE) runq(x, k = 2, i = 2, na.rm = TRUE) ## max value in window ## The .naive_runsum() function demonstrates the semantics of ## runsum(). This test ensures the behavior is consistent with ## base::sum(). .naive_runsum <- function(x, k, na.rm=FALSE) sapply(0:(length(x)-k), function(offset) sum(x[1:k + offset], na.rm=na.rm)) x0 <- c(1, Inf, 3, 4, 5, NA) x <- Rle(x0) target1 <- .naive_runsum(x0, 3, na.rm = TRUE) target2 <- .naive_runsum(x, 3, na.rm = TRUE) stopifnot(target1 == target2) current <- as.vector(runsum(x, 3, na.rm = TRUE)) stopifnot(target1 == current) ## runmean() and runwtsum() : x <- Rle(c(2, 1, NA, 0, 1, -Inf)) runmean(x, k = 3) runmean(x, k = 3, na.rm = TRUE) runwtsum(x, k = 3, wt = c(0.25, 0.50, 0.25)) runwtsum(x, k = 3, wt = c(0.25, 0.50, 0.25), na.rm = TRUE) ## runq() : runq(x, k = 3, i = 1, na.rm = TRUE) ## smallest value in window runq(x, k = 3, i = 3, na.rm = TRUE) ## largest value in window ## When na.rm = TRUE, it is possible the number of non-NA ## values in the window will be less than the 'i' specified. ## Here we request the 4th smallest value in the window, ## which tranlates to the value at the 4/5 (0.8) percentile. x <- Rle(c(1, 2, 3, 4, 5)) runq(x, k=length(x), i=4, na.rm=TRUE) ## The same request on a Rle with two missing values ## finds the value at the 0.8 percentile of the vector ## at the new length of 3 after the NA's have been removed. ## This translates to round((0.8) * 3). x <- Rle(c(1, 2, 3, NA, NA)) runq(x, k=length(x), i=4, na.rm=TRUE) } \keyword{methods} \keyword{algebra} \keyword{arith} S4Vectors/man/Rle-utils.Rd0000644000175400017540000002437113175714520016372 0ustar00biocbuildbiocbuild\name{Rle-utils} \alias{Rle-utils} \alias{Ops,Rle,Rle-method} \alias{Ops,Rle,vector-method} \alias{Ops,vector,Rle-method} \alias{Math,Rle-method} \alias{Math2,Rle-method} \alias{Summary,Rle-method} \alias{Complex,Rle-method} \alias{summary.Rle} \alias{summary,Rle-method} \alias{!,Rle-method} \alias{which,Rle-method} \alias{pmax,Rle-method} \alias{pmin,Rle-method} \alias{pmax.int,Rle-method} \alias{pmin.int,Rle-method} \alias{which.max,Rle-method} \alias{diff.Rle} \alias{diff,Rle-method} \alias{mean.Rle} \alias{mean,Rle-method} \alias{var,Rle,missing-method} \alias{var,Rle,Rle-method} \alias{cov,Rle,Rle-method} \alias{cor,Rle,Rle-method} \alias{sd,Rle-method} \alias{median.Rle} \alias{median,Rle-method} \alias{quantile.Rle} \alias{quantile,Rle-method} \alias{mad.Rle} \alias{mad,Rle-method} \alias{IQR,Rle-method} \alias{nchar,Rle-method} \alias{substr,Rle-method} \alias{substring,Rle-method} \alias{chartr,ANY,ANY,Rle-method} \alias{tolower,Rle-method} \alias{toupper,Rle-method} \alias{sub,ANY,ANY,Rle-method} \alias{gsub,ANY,ANY,Rle-method} \alias{paste,Rle-method} \alias{levels.Rle} \alias{levels,Rle-method} \alias{levels<-,Rle-method} \alias{droplevels.Rle} \alias{droplevels,Rle-method} \title{Common operations on Rle objects} \description{ Common operations on \link{Rle} objects. } \section{Group Generics}{ Rle objects have support for S4 group generic functionality: \describe{ \item{\code{Arith}}{\code{"+"}, \code{"-"}, \code{"*"}, \code{"^"}, \code{"\%\%"}, \code{"\%/\%"}, \code{"/"}} \item{\code{Compare}}{\code{"=="}, \code{">"}, \code{"<"}, \code{"!="}, \code{"<="}, \code{">="}} \item{\code{Logic}}{\code{"&"}, \code{"|"}} \item{\code{Ops}}{\code{"Arith"}, \code{"Compare"}, \code{"Logic"}} \item{\code{Math}}{\code{"abs"}, \code{"sign"}, \code{"sqrt"}, \code{"ceiling"}, \code{"floor"}, \code{"trunc"}, \code{"cummax"}, \code{"cummin"}, \code{"cumprod"}, \code{"cumsum"}, \code{"log"}, \code{"log10"}, \code{"log2"}, \code{"log1p"}, \code{"acos"}, \code{"acosh"}, \code{"asin"}, \code{"asinh"}, \code{"atan"}, \code{"atanh"}, \code{"exp"}, \code{"expm1"}, \code{"cos"}, \code{"cosh"}, \code{"sin"}, \code{"sinh"}, \code{"tan"}, \code{"tanh"}, \code{"gamma"}, \code{"lgamma"}, \code{"digamma"}, \code{"trigamma"}} \item{\code{Math2}}{\code{"round"}, \code{"signif"}} \item{\code{Summary}}{\code{"max"}, \code{"min"}, \code{"range"}, \code{"prod"}, \code{"sum"}, \code{"any"}, \code{"all"}} \item{\code{Complex}}{\code{"Arg"}, \code{"Conj"}, \code{"Im"}, \code{"Mod"}, \code{"Re"}} } See \link[methods]{S4groupGeneric} for more details. } \section{Summary}{ In the code snippets below, \code{x} is an Rle object: \describe{ \item{}{ \code{summary(object, ..., digits = max(3, getOption("digits") - 3))}: Summarizes the Rle object using an atomic vector convention. The \code{digits} argument is used for number formatting with \code{signif()}. } } } \section{Logical Data Methods}{ In the code snippets below, \code{x} is an Rle object: \describe{ \item{}{ \code{!x}: Returns logical negation (NOT) of \code{x}. } \item{}{ \code{which(x)}: Returns an integer vector representing the \code{TRUE} indices of \code{x}. } } } \section{Numerical Data Methods}{ In the code snippets below, \code{x} is an Rle object: \describe{ \item{}{ \code{diff(x, lag = 1, differences = 1}: Returns suitably lagged and iterated differences of \code{x}. \describe{ \item{\code{lag}}{An integer indicating which lag to use.} \item{\code{differences}}{An integer indicating the order of the difference.} } } \item{}{ \code{pmax(..., na.rm = FALSE)}, \code{pmax.int(..., na.rm = FALSE)}: Parallel maxima of the Rle input values. Removes \code{NA}s when \code{na.rm = TRUE}. } \item{}{ \code{pmin(..., na.rm = FALSE)}, \code{pmin.int(..., na.rm = FALSE)}: Parallel minima of the Rle input values. Removes \code{NA}s when \code{na.rm = TRUE}. } \item{}{ \code{which.max(x)}: Returns the index of the first element matching the maximum value of \code{x}. } \item{}{ \code{mean(x, na.rm = FALSE)}: Calculates the mean of \code{x}. Removes \code{NA}s when \code{na.rm = TRUE}. } \item{}{ \code{var(x, y = NULL, na.rm = FALSE)}: Calculates the variance of \code{x} or covariance of \code{x} and \code{y} if both are supplied. Removes \code{NA}s when \code{na.rm = TRUE}. } \item{}{ \code{cov(x, y, use = "everything")}, \code{cor(x, y, use = "everything")}: Calculates the covariance and correlation respectively of Rle objects \code{x} and \code{y}. The \code{use} argument is an optional character string giving a method for computing covariances in the presence of missing values. This must be (an abbreviation of) one of the strings \code{"everything"}, \code{"all.obs"}, \code{"complete.obs"}, \code{"na.or.complete"}, or \code{"pairwise.complete.obs"}. } \item{}{ \code{sd(x, na.rm = FALSE)}: Calculates the standard deviation of \code{x}. Removes \code{NA}s when \code{na.rm = TRUE}. } \item{}{ \code{median(x, na.rm = FALSE)}: Calculates the median of \code{x}. Removes \code{NA}s when \code{na.rm = TRUE}. } \item{}{ \code{quantile(x, probs = seq(0, 1, 0.25), na.rm = FALSE, names = TRUE, type = 7, ...)}: Calculates the specified quantiles of \code{x}. \describe{ \item{\code{probs}}{A numeric vector of probabilities with values in [0,1].} \item{\code{na.rm}}{If \code{TRUE}, removes \code{NA}s from \code{x} before the quantiles are computed.} \item{\code{names}}{If \code{TRUE}, the result has names describing the quantiles.} \item{\code{type}}{An integer between 1 and 9 selecting one of the nine quantile algorithms detailed in \code{\link[stats]{quantile}}.} \item{\dots}{Further arguments passed to or from other methods.} } } \item{}{ \code{mad(x, center = median(x), constant = 1.4826, na.rm = FALSE, low = FALSE, high = FALSE)}: Calculates the median absolute deviation of \code{x}. \describe{ \item{\code{center}}{The center to calculate the deviation from.} \item{\code{constant}}{The scale factor.} \item{\code{na.rm}}{If \code{TRUE}, removes \code{NA}s from \code{x} before the mad is computed.} \item{\code{low}}{If \code{TRUE}, compute the 'lo-median'.} \item{\code{high}}{If \code{TRUE}, compute the 'hi-median'.} } } \item{}{ \code{IQR(x, na.rm = FALSE)}: Calculates the interquartile range of \code{x}. \describe{ \item{\code{na.rm}}{If \code{TRUE}, removes \code{NA}s from \code{x} before the IQR is computed.} } } \item{}{ \code{smoothEnds(y, k = 3)}: Smooth end points of an Rle \code{y} using subsequently smaller medians and Tukey's end point rule at the very end. \describe{ \item{\code{k}}{An integer indicating the width of largest median window; must be odd.} } } } } \section{Character Data Methods}{ In the code snippets below, \code{x} is an Rle object: \describe{ \item{}{ \code{nchar(x, type = "chars", allowNA = FALSE)}: Returns an integer Rle representing the number of characters in the corresponding values of \code{x}. \describe{ \item{\code{type}}{One of \code{c("bytes", "chars", "width")}.} \item{\code{allowNA}}{Should \code{NA} be returned for invalid multibyte strings rather than throwing an error?} } } \item{}{ \code{substr(x, start, stop)}, \code{substring(text, first, last = 1000000L)}: Returns a character or factor Rle containing the specified substrings beginning at \code{start}/\code{first} and ending at \code{stop}/\code{last}. } \item{}{ \code{chartr(old, new, x)}: Returns a character or factor Rle containing a translated version of \code{x}. \describe{ \item{\code{old}}{A character string specifying the characters to be translated.} \item{\code{new}}{A character string specifying the translations.} } } \item{}{ \code{tolower(x)}: Returns a character or factor Rle containing a lower case version of \code{x}. } \item{}{ \code{toupper(x)}: Returns a character or factor Rle containing an upper case version of \code{x}. } \item{}{ \code{sub(pattern, replacement, x, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE)}: Returns a character or factor Rle containing replacements based on matches determined by regular expression matching. See \code{\link{sub}} for a description of the arguments. } \item{}{ \code{gsub(pattern, replacement, x, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE)}: Returns a character or factor Rle containing replacements based on matches determined by regular expression matching. See \code{\link{gsub}} for a description of the arguments. } \item{}{ \code{paste(..., sep = " ", collapse = NULL)}: Returns a character or factor Rle containing a concatenation of the values in \code{...}. } } } \section{Factor Data Methods}{ In the code snippets below, \code{x} is an Rle object: \describe{ \item{}{ \code{levels(x)}, \code{levels(x) <- value}: Gets and sets the factor levels, respectively. } \item{}{ \code{nlevels(x)}: Returns the number of factor levels. } \item{}{ \code{droplevels(x)}: Drops unused factor levels. } } } \author{P. Aboyoun} \seealso{ \link{Rle} objects \link[methods]{S4groupGeneric} } \examples{ x <- Rle(10:1, 1:10) x sqrt(x) x^2 + 2 * x + 1 range(x) sum(x) mean(x) z <- c("the", "quick", "red", "fox", "jumps", "over", "the", "lazy", "brown", "dog") z <- Rle(z, seq_len(length(z))) chartr("a", "@", z) toupper(z) } \keyword{utilities} \keyword{methods} \keyword{arith} S4Vectors/man/S4Vectors-internals.Rd0000644000175400017540000000232613175714520020337 0ustar00biocbuildbiocbuild\name{S4Vectors internals} % Stuff from R/S4-utils.R: \alias{class:character_OR_NULL} \alias{character_OR_NULL-class} \alias{character_OR_NULL} \alias{class:vector_OR_factor} \alias{vector_OR_factor-class} \alias{vector_OR_factor} \alias{coerce,ANY,vector-method} \alias{setValidity2} \alias{new2} \alias{setMethods} % Stuff from R/utils.R: \alias{wmsg} \alias{.Call2} \alias{get_showHeadLines} \alias{get_showTailLines} \alias{printAtomicVectorInAGrid} % Low-level helper functions from R/normarg-utils.R: \alias{isTRUEorFALSE} \alias{isSingleInteger} \alias{isSingleNumber} \alias{isSingleString} \alias{isSingleNumberOrNA} \alias{isSingleStringOrNA} \alias{recycleIntegerArg} \alias{recycleNumericArg} \alias{recycleCharacterArg} \alias{recycleLogicalArg} \alias{recycleArg} \alias{fold} \alias{duplicatedIntegerPairs} \alias{duplicatedIntegerQuads} \alias{matchIntegerPairs} \alias{matchIntegerQuads} \alias{orderIntegerPairs} \alias{orderIntegerQuads} \alias{selfmatchIntegerPairs} \alias{selfmatchIntegerQuads} \title{S4Vectors internals} \description{ Objects, classes and methods defined in the \pkg{S4Vectors} package that are not intended to be used directly. } \keyword{internal} \keyword{classes} \keyword{methods} S4Vectors/man/SimpleList-class.Rd0000644000175400017540000000555413175714520017704 0ustar00biocbuildbiocbuild\name{SimpleList-class} \docType{class} \alias{class:SimpleList} \alias{SimpleList} \alias{SimpleList-class} % accessors \alias{length,SimpleList-method} \alias{names,SimpleList-method} \alias{names<-,SimpleList-method} % coercion \alias{as.list,SimpleList-method} \alias{coerce,ANY,SimpleList-method} \alias{coerce,list,List-method} % combining \alias{c,SimpleList-method} % looping \alias{lapply,SimpleList-method} \alias{endoapply,SimpleList-method} \alias{mendoapply,SimpleList-method} % displaying \alias{classNameForDisplay,SimpleList-method} \title{SimpleList objects} \description{ The (non-virtual) SimpleList class extends the \link{List} virtual class. } \details{ The SimpleList class is the simplest, most generic concrete implementation of the \link{List} abstraction. It provides an implementation that subclasses can easily extend. In a SimpleList object the list elements are stored internally in an ordinary list. } \section{Constructor}{ See the \link{List} man page for a quick overview of how to construct \link{List} objects in general. The following constructor is provided for SimpleList objects: \describe{ \item{}{\code{SimpleList(...)}: Takes possibly named objects as elements for the new SimpleList object. } } } \section{Accessors}{ Same as for \link{List} objects. See the \link{List} man page for more information. } \section{Coercion}{ All the coercions documented in the \link{List} man page apply to \link{SimpleList} objects. } \section{Subsetting}{ Same as for \link{List} objects. See the \link{List} man page for more information. } \section{Looping and functional programming}{ Same as for \link{List} objects. See \code{?`\link{List-utils}`} for more information. } \section{Displaying}{ When a SimpleList object is displayed, the "Simple" prefix is removed from the real class name of the object. See \code{\link{classNameForDisplay}} for more information about this. } \seealso{ \itemize{ \item \link{List} objects for the parent class. \item The \link[IRanges]{CompressedList} class defined in the \pkg{IRanges} package for a more efficient alternative to SimpleList. \item The \link[IRanges]{SimpleIntegerList} class defined in the \pkg{IRanges} package for a SimpleList subclass example. \item The \link{DataFrame} class for another SimpleList subclass example. } } \examples{ ## Displaying a SimpleList object: x1 <- SimpleList(a=letters, i=Rle(22:20, 4:2)) class(x1) ## The "Simple" prefix is removed from the real class name of the ## object: x1 library(IRanges) x2 <- IntegerList(11:12, integer(0), 3:-2, compress=FALSE) class(x2) ## The "Simple" prefix is removed from the real class name of the ## object: x2 ## This is controlled by internal helper classNameForDisplay(): classNameForDisplay(x2) } \keyword{methods} \keyword{classes} S4Vectors/man/Vector-class.Rd0000644000175400017540000002251713175714520017057 0ustar00biocbuildbiocbuild\name{Vector-class} \docType{class} \alias{class:DataTable_OR_NULL} \alias{DataTable_OR_NULL-class} \alias{DataTable_OR_NULL} % Vector class, functions and methods: \alias{class:Vector} \alias{Vector-class} \alias{Vector} \alias{length,Vector-method} \alias{lengths,Vector-method} \alias{NROW,Vector-method} \alias{ROWNAMES,Vector-method} \alias{elementMetadata} \alias{elementMetadata,Vector-method} \alias{mcols} \alias{mcols,Vector-method} \alias{values} \alias{values,Vector-method} \alias{elementMetadata<-} \alias{elementMetadata<-,Vector-method} \alias{mcols<-} \alias{mcols<-,Vector-method} \alias{values<-} \alias{values<-,Vector-method} \alias{rename} \alias{rename,vector-method} \alias{rename,Vector-method} \alias{as.logical,Vector-method} \alias{as.integer,Vector-method} \alias{as.numeric,Vector-method} \alias{as.double,Vector-method} \alias{as.complex,Vector-method} \alias{as.character,Vector-method} \alias{as.raw,Vector-method} \alias{as.matrix,Vector-method} \alias{as.matrix.Vector} \alias{coerce,Vector,vector-method} \alias{coerce,Vector,logical-method} \alias{coerce,Vector,integer-method} \alias{coerce,Vector,numeric-method} \alias{coerce,Vector,double-method} \alias{coerce,Vector,complex-method} \alias{coerce,Vector,character-method} \alias{coerce,Vector,raw-method} \alias{coerce,Vector,factor-method} \alias{coerce,Vector,data.frame-method} \alias{as.data.frame.Vector} \alias{as.data.frame,Vector-method} \alias{as.env,Vector-method} \alias{as.list.Vector} \alias{as.list,Vector-method} \alias{[,Vector-method} \alias{[<-,Vector-method} \alias{replaceROWS,Vector-method} \alias{window} \alias{window.Vector} \alias{window,Vector-method} \alias{head} \alias{head.Vector} \alias{head,Vector-method} \alias{tail} \alias{tail.Vector} \alias{tail,Vector-method} \alias{append,Vector,Vector-method} \alias{expand.grid} \alias{expand.grid,Vector-method} \alias{parallelVectorNames} \alias{parallelVectorNames,ANY-method} \alias{anyNA,Vector-method} \alias{is.na,Vector-method} \title{Vector objects} \description{ The Vector virtual class serves as the heart of the S4Vectors package and has over 90 subclasses. It serves a similar role as \link[base]{vector} in base R. The Vector class supports the storage of \emph{global} and \emph{element-wise} metadata: \enumerate{ \item The \emph{global} metadata annotates the object as a whole: this metadata is accessed via the \code{metadata} accessor and is represented as an ordinary list; \item The \emph{element-wise} metadata annotates individual elements of the object: this metadata is accessed via the \code{mcols} accessor (\code{mcols} stands for \emph{metadata columns}) and is represented as a \link{DataTable} object (i.e. as an instance of a concrete subclass of \link{DataTable}, e.g. a \link{DataFrame} object), with a row for each element and a column for each metadata variable. Note that the element-wise metadata can also be \code{NULL}. } To be functional, a class that inherits from Vector must define at least a \code{length} and a \code{"["} method. } \section{Accessors}{ In the following code snippets, \code{x} is a Vector object. \describe{ \item{}{ \code{length(x)}: Get the number of elements in \code{x}. } \item{}{ \code{lengths(x, use.names=TRUE)}: Get the length of each of the elements. Note: The \code{lengths} method for Vector objects is currently defined as an alias for \code{\link{elementNROWS}} (with addition of the \code{use.names} argument), so is equivalent to \code{sapply(x, NROW)}, not to \code{sapply(x, length)}. See \code{?BiocGenerics::\link[BiocGenerics]{lengths}} in the \pkg{BiocGenerics} package for more information about this. } \item{}{ \code{NROW(x)}: Defined as \code{length(x)} for any Vector object that is \emph{not} a \link{DataTable} object. If \code{x} is a \link{DataTable} object, then it's defined as \code{nrow(x)}. } \item{}{ \code{names(x)}, \code{names(x) <- value}: Get or set the names of the elements in the Vector. } \item{}{ \code{rename(x, value, ...)}: Replace the names of \code{x} according to a mapping defined by a named character vector, formed by concatenating \code{value} with any arguments in \code{...}. The names of the character vector indicate the source names, and the corresponding values the destination names. This also works on a plain old \code{vector}. } \item{}{ \code{nlevels(x)}: Returns the number of factor levels. } \item{}{ \code{mcols(x, use.names=FALSE)}, \code{mcols(x) <- value}: Get or set the metadata columns. If \code{use.names=TRUE} and the metadata columns are not \code{NULL}, then the names of \code{x} are propagated as the row names of the returned \link{DataTable} object. When setting the metadata columns, the supplied value must be \code{NULL} or a \link{DataTable} object holding element-wise metadata. } \item{}{ \code{elementMetadata(x, use.names=FALSE)}, \code{elementMetadata(x) <- value}, \code{values(x, use.names=FALSE)}, \code{values(x) <- value}: Alternatives to \code{mcols} functions. Their use is discouraged. } } } \section{Coercion}{ \describe{ \item{}{\code{as(from, "data.frame")}, \code{as.data.frame(from)}: Coerces \code{from}, a \code{Vector}, to a \code{data.frame} by first coercing the \code{Vector} to a \code{vector} via \code{as.vector}. Note that many \code{Vector} derivatives do not support \code{as.vector}, so this coercion is possible only for certain types. } \item{}{\code{as.env(x)}: Constructs an environment object containing the elements of \code{mcols(x)}. } } } \section{Subsetting}{ In the code snippets below, \code{x} is a Vector object or regular R vector object. The R vector object methods for \code{window} are defined in this package and the remaining methods are defined in base R. \describe{ \item{}{ \code{x[i, drop=TRUE]}: If defined, returns a new Vector object made of selected elements \code{i}, which can be missing; an NA-free logical, numeric, or character vector; or a logical Rle object. The \code{drop} argument specifies whether or not to coerce the returned sequence to an ordinary vector. } \item{}{ \code{x[i] <- value}: Replacement version of \code{x[i]}. } } } \section{Combining}{ In the code snippets below, \code{x} is a Vector object. \describe{ \item{}{ \code{c(x, ...)}: Combine \code{x} and the Vector objects in \code{...} together. Any object in \code{...} must belong to the same class as \code{x}, or to one of its subclasses, or must be \code{NULL}. The result is an object of the same class as \code{x}. } \item{}{\code{append(x, values, after = length(x))}: Insert the \code{Vector} \code{values} onto \code{x} at the position given by \code{after}. \code{values} must have an \code{elementType} that extends that of \code{x}. } \item{}{\code{expand.grid(...)}: Find cartesian product of every vector in \code{\dots} and return a data.frame, each column of which corresponds to an argument. See \code{\link[base]{expand.grid}}. } } } \section{Displaying}{ [FOR ADVANCED USERS OR DEVELOPERS] Displaying of a Vector object is controlled by 2 internal helpers, \code{classNameForDisplay} and \code{showAsCell}. For most objects \code{classNameForDisplay(x)} just returns \code{class(x)}. However, for some objects it can return the name of a parent class that is more suitable for display because it's simpler and as informative as the real class name. See \link{SimpleList} objects (defined in this package) and \link[IRanges]{CompressedList} objects (defined in the \pkg{IRanges} package) for examples of objects for which \code{classNameForDisplay} returns the name of a parent class. \code{showAsCell(x)} produces a character vector \emph{parallel} to \code{x} (i.e. with one string per vector element in \code{x}) that contains compact string representations of each elements in \code{x}. Note that \code{classNameForDisplay} and \code{showAsCell} are generic functions so developers can implement methods to control how their own Vector extension gets displayed. } \seealso{ \itemize{ \item \link{Rle}, \link{Hits}, \link[IRanges]{IRanges} and \link[XVector]{XRaw} for example implementations. \item \link{Vector-comparison} for comparing, ordering, and tabulating vector-like objects. \item \link{Vector-setops} for set operations on vector-like objects. \item \link{Vector-merge} for merging vector-like objects. \item \link{List} for a direct Vector extension that serves a similar role as \link[base]{list} in base R. \item \link[IRanges]{extractList} for grouping elements of a vector-like object into a list-like object. \item \link{DataTable} which is the type of objects returned by the \code{mcols} accessor. \item The \link{Annotated} class, which Vector extends. } } \examples{ showClass("Vector") # shows (some of) the known subclasses } \keyword{methods} \keyword{classes} S4Vectors/man/Vector-comparison.Rd0000644000175400017540000003273613175714520020130 0ustar00biocbuildbiocbuild\name{Vector-comparison} \alias{Vector-comparison} \alias{pcompare} \alias{==,Vector,Vector-method} \alias{==,Vector,ANY-method} \alias{==,ANY,Vector-method} \alias{<=,Vector,Vector-method} \alias{<=,Vector,ANY-method} \alias{<=,ANY,Vector-method} \alias{!=,Vector,Vector-method} \alias{!=,Vector,ANY-method} \alias{!=,ANY,Vector-method} \alias{>=,Vector,Vector-method} \alias{>=,Vector,ANY-method} \alias{>=,ANY,Vector-method} \alias{<,Vector,Vector-method} \alias{<,Vector,ANY-method} \alias{<,ANY,Vector-method} \alias{>,Vector,Vector-method} \alias{>,Vector,ANY-method} \alias{>,ANY,Vector-method} \alias{selfmatch} \alias{selfmatch,ANY-method} \alias{selfmatch,factor-method} \alias{duplicated,Vector-method} \alias{duplicated.Vector} \alias{unique,Vector-method} \alias{unique.Vector} \alias{\%in\%,Vector,Vector-method} \alias{\%in\%,Vector,ANY-method} \alias{\%in\%,ANY,Vector-method} \alias{findMatches} \alias{findMatches,ANY,ANY-method} \alias{findMatches,ANY,missing-method} \alias{countMatches} \alias{countMatches,ANY,ANY-method} \alias{sort,Vector-method} \alias{sort.Vector} \alias{rank,Vector-method} \alias{table,Vector-method} \title{Compare, order, tabulate vector-like objects} \description{ Generic functions and methods for comparing, ordering, and tabulating vector-like objects. } \usage{ ## Element-wise (aka "parallel") comparison of 2 Vector objects ## ------------------------------------------------------------ pcompare(x, y) \S4method{==}{Vector,Vector}(e1, e2) \S4method{==}{Vector,ANY}(e1, e2) \S4method{==}{ANY,Vector}(e1, e2) \S4method{<=}{Vector,Vector}(e1, e2) \S4method{<=}{Vector,ANY}(e1, e2) \S4method{<=}{ANY,Vector}(e1, e2) \S4method{!=}{Vector,Vector}(e1, e2) \S4method{!=}{Vector,ANY}(e1, e2) \S4method{!=}{ANY,Vector}(e1, e2) \S4method{>=}{Vector,Vector}(e1, e2) \S4method{>=}{Vector,ANY}(e1, e2) \S4method{>=}{ANY,Vector}(e1, e2) \S4method{<}{Vector,Vector}(e1, e2) \S4method{<}{Vector,ANY}(e1, e2) \S4method{<}{ANY,Vector}(e1, e2) \S4method{>}{Vector,Vector}(e1, e2) \S4method{>}{Vector,ANY}(e1, e2) \S4method{>}{ANY,Vector}(e1, e2) ## selfmatch() ## ----------- selfmatch(x, ...) ## duplicated() & unique() ## ----------------------- \S4method{duplicated}{Vector}(x, incomparables=FALSE, ...) \S4method{unique}{Vector}(x, incomparables=FALSE, ...) ## %in% ## ---- \S4method{\%in\%}{Vector,Vector}(x, table) \S4method{\%in\%}{Vector,ANY}(x, table) \S4method{\%in\%}{ANY,Vector}(x, table) ## findMatches() & countMatches() ## ------------------------------ findMatches(x, table, select=c("all", "first", "last"), ...) countMatches(x, table, ...) ## sort() ## ------ \S4method{sort}{Vector}(x, decreasing=FALSE, na.last=NA, by) ## table() ## ------- \S4method{table}{Vector}(...) } \arguments{ \item{x, y, e1, e2, table}{ Vector-like objects. } \item{incomparables}{ The \code{duplicated} method for \link{Vector} objects does NOT support this argument. The \code{unique} method for \link{Vector} objects, which is implemented on top of \code{duplicated}, propagates this argument to its call to \code{duplicated}. See \code{?base::\link[base]{duplicated}} and \code{?base::\link[base]{unique}} for more information about this argument. } \item{select}{ Only \code{select="all"} is supported at the moment. Note that you can use \code{match} if you want to do \code{select="first"}. Otherwise you're welcome to request this on the Bioconductor mailing list. } \item{decreasing, na.last}{ See \code{?base::\link[base]{sort}}. } \item{by}{A formula referencing the metadata columns by which to sort, e.g., \code{~ x + y} sorts by column \dQuote{x}, breaking ties with column \dQuote{y}. } \item{...}{ A \link{Vector} object for \code{table} (the \code{table} method for \link{Vector} objects can only take one input object). Otherwise, extra arguments supported by specific methods. In particular: \itemize{ \item The default \code{selfmatch} method, which is implemented on top of \code{match}, propagates the extra arguments to its call to \code{match}. \item The \code{duplicated} method for \link{Vector} objects, which is implemented on top of \code{selfmatch}, accepts extra argument \code{fromLast} and propagates the other extra arguments to its call to \code{selfmatch}. See \code{?base::\link[base]{duplicated}} for more information about this argument. \item The \code{unique} method for \link{Vector} objects, which is implemented on top of \code{duplicated}, propagates the extra arguments to its call to \code{duplicated}. \item The default \code{findMatches} and \code{countMatches} methods, which are implemented on top of \code{match} and \code{selfmatch}, propagate the extra arguments to their calls to \code{match} and \code{selfmatch}. \item The \code{sort} method for \link{Vector} objects, which is implemented on top of \code{order}, only accepts extra argument \code{na.last} and propagates it to its call to \code{order}. } } } \details{ Doing \code{pcompare(x, y)} on 2 vector-like objects \code{x} and \code{y} of length 1 must return an integer less than, equal to, or greater than zero if the single element in \code{x} is considered to be respectively less than, equal to, or greater than the single element in \code{y}. If \code{x} or \code{y} have a length != 1, then they are typically expected to have the same length so \code{pcompare(x, y)} can operate element-wise, that is, in that case it returns an integer vector of the same length as \code{x} and \code{y} where the i-th element is the result of compairing \code{x[i]} and \code{y[i]}. If \code{x} and \code{y} don't have the same length and are not zero-length vectors, then the shortest is first recycled to the length of the longest. If one of them is a zero-length vector then \code{pcompare(x, y)} returns a zero-length integer vector. \code{selfmatch(x, ...)} is equivalent to \code{match(x, x, ...)}. This is actually how the default method is implemented. However note that \code{selfmatch(x, ...)} will typically be more efficient than \code{match(x, x, ...)} on vector-like objects for which a specific \code{selfmatch} method is implemented. \code{findMatches} is an enhanced version of \code{match} which, by default (i.e. if \code{select="all"}), returns all the matches in a \link{Hits} object. \code{countMatches} returns an integer vector of the length of \code{x} containing the number of matches in \code{table} for each element in \code{x}. } \value{ For \code{pcompare}: see Details section above. For \code{selfmatch}: an integer vector of the same length as \code{x}. For \code{duplicated}, \code{unique}, and \code{\%in\%}: see \code{?BiocGenerics::\link[BiocGenerics]{duplicated}}, \code{?BiocGenerics::\link[BiocGenerics]{unique}}, and \code{?`\link{\%in\%}`}. For \code{findMatches}: a \link{Hits} object by default (i.e. if \code{select="all"}). For \code{countMatches}: an integer vector of the length of \code{x} containing the number of matches in \code{table} for each element in \code{x}. For \code{sort}: see \code{?BiocGenerics::\link[BiocGenerics]{sort}}. For \code{table}: a 1D array of integer values promoted to the \code{"table"} class. See \code{?BiocGeneric::\link[BiocGenerics]{table}} for more information. } \note{ The following notes are for developers who want to implement comparing, ordering, and tabulating methods for their own \link{Vector} subclass: \enumerate{ \item The 6 traditional binary comparison operators are: \code{==}, \code{!=}, \code{<=}, \code{>=}, \code{<}, and \code{>}. The \pkg{S4Vectors} package provides the following methods for these operators: \preformatted{ setMethod("==", c("Vector", "Vector"), function(e1, e2) { pcompare(e1, e2) == 0L } ) setMethod("<=", c("Vector", "Vector"), function(e1, e2) { pcompare(e1, e2) <= 0L } ) setMethod("!=", c("Vector", "Vector"), function(e1, e2) { !(e1 == e2) } ) setMethod(">=", c("Vector", "Vector"), function(e1, e2) { e2 <= e1 } ) setMethod("<", c("Vector", "Vector"), function(e1, e2) { !(e2 <= e1) } ) setMethod(">", c("Vector", "Vector"), function(e1, e2) { !(e1 <= e2) } ) } With these definitions, the 6 binary operators work out-of-the-box on \link{Vector} objects for which \code{pcompare} works the expected way. If \code{pcompare} is not implemented, then it's enough to implement \code{==} and \code{<=} methods to have the 4 remaining operators (\code{!=}, \code{>=}, \code{<}, and \code{>}) work out-of-the-box. \item The \pkg{S4Vectors} package provides no \code{pcompare} method for \link{Vector} objects. Specific \code{pcompare} methods need to be implemented for specific \link{Vector} subclasses (e.g. for \link{Hits} and \link[IRanges]{Ranges} objects). These specific methods must obey the rules described in the Details section above. \item The \code{duplicated}, \code{unique}, and \code{\%in\%} methods for \link{Vector} objects are implemented on top of \code{selfmatch}, \code{duplicated}, and \code{match}, respectively, so they work out-of-the-box on \link{Vector} objects for which \code{selfmatch}, \code{duplicated}, and \code{match} work the expected way. \item Also the default \code{findMatches} and \code{countMatches} methods are implemented on top of \code{match} and \code{selfmatch} so they work out-of-the-box on \link{Vector} objects for which those things work the expected way. \item However, since \code{selfmatch} itself is also implemented on top of \code{match}, then having \code{match} work the expected way is actually enough to get \code{selfmatch}, \code{duplicated}, \code{unique}, \code{\%in\%}, \code{findMatches}, and \code{countMatches} work out-of-the-box on \link{Vector} objects. \item The \code{sort} method for \link{Vector} objects is implemented on top of \code{order}, so it works out-of-the-box on \link{Vector} objects for which \code{order} works the expected way. \item The \code{table} method for \link{Vector} objects is implemented on top of \code{selfmatch}, \code{order}, and \code{as.character}, so it works out-of-the-box on a \link{Vector} object for which those things work the expected way. \item The \pkg{S4Vectors} package provides no \code{match} or \code{order} methods for \link{Vector} objects. Specific methods need to be implemented for specific \link{Vector} subclasses (e.g. for \link{Hits} and \link[IRanges]{Ranges} objects). } } \author{Hervé Pagès} \seealso{ \itemize{ \item The \link{Vector} class. \item \link{Hits-comparison} for comparing and ordering hits. \item \link{Vector-setops} for set operations on vector-like objects. \item \link{Vector-merge} for merging vector-like objects. \item \link[IRanges]{Ranges-comparison} in the \pkg{IRanges} package for comparing and ordering ranges. \item \code{\link{==}} and \code{\link{\%in\%}} in the \pkg{base} package, and \code{BiocGenerics::\link[BiocGenerics]{match}}, \code{BiocGenerics::\link[BiocGenerics]{duplicated}}, \code{BiocGenerics::\link[BiocGenerics]{unique}}, \code{BiocGenerics::\link[BiocGenerics]{order}}, \code{BiocGenerics::\link[BiocGenerics]{sort}}, \code{BiocGenerics::\link[BiocGenerics]{rank}} in the \pkg{BiocGenerics} package for general information about the comparison/ordering operators and functions. \item The \link{Hits} class. \item \code{BiocGeneric::\link[BiocGenerics]{table}} in the \pkg{BiocGenerics} package. } } \examples{ ## --------------------------------------------------------------------- ## A. SIMPLE EXAMPLES ## --------------------------------------------------------------------- y <- c(16L, -3L, -2L, 15L, 15L, 0L, 8L, 15L, -2L) selfmatch(y) x <- c(unique(y), 999L) findMatches(x, y) countMatches(x, y) ## See ?`Ranges-comparison` for more examples (on Ranges objects). You ## might need to load the IRanges package first. ## --------------------------------------------------------------------- ## B. FOR DEVELOPERS: HOW TO IMPLEMENT THE BINARY COMPARISON OPERATORS ## FOR YOUR Vector SUBCLASS ## --------------------------------------------------------------------- ## The answer is: don't implement them. Just implement pcompare() and the ## binary comparison operators will work out-of-the-box. Here is an ## example: ## (1) Implement a simple Vector subclass. setClass("Raw", contains="Vector", representation(data="raw")) setMethod("length", "Raw", function(x) length(x@data)) setMethod("[", "Raw", function(x, i, j, ..., drop) { x@data <- x@data[i]; x } ) x <- new("Raw", data=charToRaw("AB.x0a-BAA+C")) stopifnot(identical(length(x), 12L)) stopifnot(identical(x[7:3], new("Raw", data=charToRaw("-a0x.")))) ## (2) Implement a "pcompare" method for Raw objects. setMethod("pcompare", c("Raw", "Raw"), function(x, y) {as.integer(x@data) - as.integer(y@data)} ) stopifnot(identical(which(x == x[1]), c(1L, 9L, 10L))) stopifnot(identical(x[x < x[5]], new("Raw", data=charToRaw(".-+")))) } \keyword{methods} S4Vectors/man/Vector-merge.Rd0000644000175400017540000000627313175714520017052 0ustar00biocbuildbiocbuild\name{Vector-merge} \alias{Vector-merge} \alias{merge} \alias{merge,Vector,Vector-method} \title{Merge vector-like objects} \description{ A \code{merge} method for vector-like objects. } \usage{ \S4method{merge}{Vector,Vector}(x, y, ..., all=FALSE, all.x=NA, all.y=NA, sort=TRUE) } \arguments{ \item{x, y, ...}{ Vector-like objects, typically all of the same class and typically not list-like objects (even though some list-like objects like \link[IRanges]{Ranges} and \link[Biostrings]{DNAStringSet} are supported). Duplicated elements in each object are removed with a warning. } \item{all}{ \code{TRUE} or \code{FALSE}. Whether the vector elements in the result should be the union (when \code{all=TRUE}) or intersection (when \code{all=FALSE}) of the vector elements in \code{x}, \code{y}, \code{...}. } \item{all.x, all.y}{ To be used only when merging 2 objects (binary merge). Both \code{all.x} and \code{all.y} must be single logicals. If any of them is \code{NA}, then it's set to the value of \code{all}. Setting both of them to \code{TRUE} or both of them to \code{FALSE} is equivalent to setting \code{all} to \code{TRUE} or to \code{FALSE}, respectively (see above). If \code{all.x} is \code{TRUE} and \code{all.y} is \code{FALSE} then the vector elements in the result will be the unique elements in \code{x}. If \code{all.x} is \code{FALSE} and \code{all.y} is \code{TRUE} then the vector elements in the result will be the unique elements in \code{y}. } \item{sort}{ Whether to sort the merged result. } } \details{ This \code{merge} method acts much like \code{\link{merge.data.frame}}, except for 3 important differences: \enumerate{ \item The matching is based on the vector values, not arbitrary columns in a table. \item Self merging is a no-op if \code{sort=FALSE} (or object already sorted) and if the object has no duplicates. \item This \code{merge} method accepts an arbitrary number of vector-like objects (n-ary merge). } If some of the objects to merge are list-like objects not supported by the method described here, then the merging is simply done by calling \code{base::merge()} on the objects. This might succeed or not... } \value{ A vector-like object of the same class as the input objects (if they all have the same class) containing the merged vector values and metadata columns. } \seealso{ \itemize{ \item The \link{Vector} class. \item \link{Vector-comparison} for comparing and ordering vector-like objects. \item \link{Vector-setops} for set operations on vector-like objects. } } \examples{ library(GenomicRanges) x <- GRanges(c("chr1:1-1000", "chr2:2000-3000"), score=c(0.45, 0.1), a1=c(5L, 7L), a2=c(6, 8)) y <- GRanges(c("chr2:150-151", "chr1:1-10", "chr2:2000-3000"), score=c(0.7, 0.82, 0.1), b1=c(0L, 5L, 1L), b2=c(1, -2, 1)) merge(x, y) merge(x, y, all=TRUE) merge(x, y, all.x=TRUE) merge(x, y, all.y=TRUE) ## Shared metadata columns must agree: mcols(x)$score[2] <- 0.11 #merge(x, y) # error! ## NAs agree with anything: mcols(x)$score[2] <- NA merge(x, y) } \keyword{methods} S4Vectors/man/Vector-setops.Rd0000644000175400017540000000553013175714520017263 0ustar00biocbuildbiocbuild\name{Vector-setops} \alias{Vector-setops} \alias{union.Vector} \alias{union,Vector,Vector-method} \alias{intersect.Vector} \alias{intersect,Vector,Vector-method} \alias{setdiff.Vector} \alias{setdiff,Vector,Vector-method} \alias{setequal.Vector} \alias{setequal,Vector,Vector-method} \title{Set operations on vector-like objects} \description{ Perform set operations on \link{Vector} objects. } \usage{ \S4method{union}{Vector,Vector}(x, y) \S4method{intersect}{Vector,Vector}(x, y) \S4method{setdiff}{Vector,Vector}(x, y) \S4method{setequal}{Vector,Vector}(x, y) } \arguments{ \item{x, y}{ Vector-like objects. } } \details{ The \code{union}, \code{intersect}, and \code{setdiff} methods for \link{Vector} objects return a \link{Vector} object containing respectively the union, intersection, and (asymmetric!) difference of the 2 sets of vector elements in \code{x} and \code{y}. The \code{setequal} method for \link{Vector} objects checks for \emph{set equality} between \code{x} and \code{y}. They're defined as follow: \preformatted{ setMethod("union", c("Vector", "Vector"), function(x, y) unique(c(x, y)) ) setMethod("intersect", c("Vector", "Vector"), function(x, y) unique(x[x \%in\% y]) ) setMethod("setdiff", c("Vector", "Vector"), function(x, y) unique(x[!(x \%in\% y)]) ) setMethod("setequal", c("Vector", "Vector"), function(x, y) all(x \%in\% y) && all(y \%in\% x) ) } so they work out-of-the-box on \link{Vector} objects for which \code{c}, \code{unique}, and \code{\%in\%} are defined. } \value{ \code{union} returns a \link{Vector} object obtained by appending to \code{x} the elements in \code{y} that are not already in \code{x}. \code{intersect} returns a \link{Vector} object obtained by keeping only the elements in \code{x} that are also in \code{y}. \code{setdiff} returns a \link{Vector} object obtained by dropping from \code{x} the elements that are in \code{y}. \code{setequal} returns \code{TRUE} if \code{x} and \code{y} contain the same \emph{sets} of vector elements and \code{FALSE} otherwise. \code{union}, \code{intersect}, and \code{setdiff} propagate the names and metadata columns of their first argument (\code{x}). } \author{Hervé Pagès} \seealso{ \itemize{ \item \link{Vector-comparison} for comparing and ordering vector-like objects. \item \link{Vector-merge} for merging vector-like objects. \item \link{Vector} objects. \item \code{BiocGenerics::\link[BiocGenerics]{union}}, \code{BiocGenerics::\link[BiocGenerics]{intersect}}, and \code{BiocGenerics::\link[BiocGenerics]{setdiff}} in the \pkg{BiocGenerics} package for general information about these generic functions. } } \examples{ ## See ?`Hits-setops` for some examples. } \keyword{methods} S4Vectors/man/aggregate-methods.Rd0000644000175400017540000000716713175714520020105 0ustar00biocbuildbiocbuild\name{aggregate-methods} \alias{aggregate-methods} \alias{aggregate} \alias{aggregate,matrix-method} \alias{aggregate,data.frame-method} \alias{aggregate,ts-method} \alias{aggregate.Vector} \alias{aggregate,Vector-method} \alias{aggregate,Rle-method} \alias{aggregate,List-method} \title{Compute summary statistics of subsets of vector-like objects} \description{ The \pkg{S4Vectors} package defines \code{\link[stats]{aggregate}} methods for \link{Vector}, \link{Rle}, and \link{List} objects. } \usage{ \S4method{aggregate}{Vector}(x, by, FUN, start=NULL, end=NULL, width=NULL, frequency=NULL, delta=NULL, ..., simplify=TRUE) \S4method{aggregate}{Rle}(x, by, FUN, start=NULL, end=NULL, width=NULL, frequency=NULL, delta=NULL, ..., simplify=TRUE) \S4method{aggregate}{List}(x, by, FUN, start=NULL, end=NULL, width=NULL, frequency=NULL, delta=NULL, ..., simplify=TRUE) } \arguments{ \item{x}{ A \link{Vector}, \link{Rle}, or \link{List} object. } \item{by}{ An object with \code{\link[BiocGenerics]{start}}, \code{\link[BiocGenerics]{end}}, and \code{\link[BiocGenerics]{width}} methods. If \code{x} is a \link{List} object, the \code{by} parameter can be a \link[IRanges]{RangesList} object to aggregate within the list elements rather than across them. When \code{by} is a \link[IRanges]{RangesList} object, the output is either a \link[IRanges]{SimpleAtomicList} object, if possible, or a \link{SimpleList} object, if not. } \item{FUN}{ The function, found via \code{match.fun}, to be applied to each subset of \code{x}. } \item{start, end, width}{ The start, end, and width of the subsets. If \code{by} is missing, then two of the three must be supplied and have the same length. } \item{frequency, delta}{ Optional arguments that specify the sampling frequency and increment within the subsets (in the same fashion as \code{\link[stats]{window}} from the \pkg{stats} package does). } \item{...}{ Optional arguments to \code{FUN}. } \item{simplify}{ A logical value specifying whether the result should be simplified to a vector or matrix if possible. } } \details{ Subsets of \code{x} can be specified either via the \code{by} argument or via the \code{start}, \code{end}, \code{width}, \code{frequency}, and \code{delta} arguments. For example, if \code{start} and \code{end} are specified, then: \preformatted{ aggregate(x, FUN=FUN, start=start, end=end, ..., simplify=simplify) } is equivalent to: \preformatted{ sapply(seq_along(start), function(i) FUN(x[start[i]:end[i]], ...), simplify=simplify) } (replace \code{x[start[i]:end[i]]} with 2D-style subsetting \code{x[start[i]:end[i], ]} if \code{x} is a \link{DataFrame} object). } \seealso{ \itemize{ \item The \code{\link[stats]{aggregate}} function in the \pkg{stats} package. \item \link{Vector}, \link{Rle}, \link{List}, and \link{DataFrame} objects. \item The \code{\link[BiocGenerics]{start}}, \code{\link[BiocGenerics]{end}}, and \code{\link[BiocGenerics]{width}} generic functions defined in the \pkg{BiocGenerics} package. } } \examples{ x <- Rle(10:2, 1:9) aggregate(x, x > 4, mean) aggregate(x, FUN=mean, start=1:26, width=20) ## Note that aggregate() works on a DataFrame object the same way it ## works on an ordinary data frame: aggregate(DataFrame(state.x77), list(Region=state.region), mean) aggregate(weight ~ feed, data=DataFrame(chickwts), mean) library(IRanges) by <- IRanges(start=1:26, width=20, names=LETTERS) aggregate(x, by, is.unsorted) } \keyword{methods} \keyword{utilities} S4Vectors/man/character-utils.Rd0000644000175400017540000001115113175714520017574 0ustar00biocbuildbiocbuild\name{character-utils} \alias{unstrsplit} \alias{unstrsplit,list-method} \alias{unstrsplit,character-method} \alias{safeExplode} \alias{strsplitAsListOfIntegerVectors} \alias{svn.time} \title{Some utility functions to operate on strings} \description{ Some low-level string utilities that operate on ordinary character vectors. For more advanced string manipulations, see the \pkg{Biostrings} package. } \usage{ unstrsplit(x, sep="") # 'sep' default is "" (empty string) strsplitAsListOfIntegerVectors(x, sep=",") # 'sep' default is "," } \arguments{ \item{x}{ For \code{unstrsplit}: A list-like object where each list element is a character vector, or a character vector (identity). For \code{strsplitAsListOfIntegerVectors}: A character vector where each element is a string containing comma-separated decimal integer values. } \item{sep}{ A single string containing the separator character. For \code{strsplitAsListOfIntegerVectors}, the separator must be a single-byte character. } } \details{ \subsection{unstrsplit}{ \code{unstrsplit(x, sep)} is equivalent to (but much faster than) \code{sapply(x, paste0, collapse=sep)}. It's performing the reverse transformation of \code{\link{strsplit}( , fixed=TRUE)}, that is, if \code{x} is a character vector with no NAs and \code{sep} a single string, then \code{unstrsplit(strsplit(x, split=sep, fixed=TRUE), sep)} is identical to \code{x}. A notable exception to this though is when \code{strsplit} finds a match at the end of a string, in which case the last element of the output (which should normally be an empty string) is not returned (see \code{?strsplit} for the details). } \subsection{strsplitAsListOfIntegerVectors}{ \code{strsplitAsListOfIntegerVectors} is similar to the \code{strsplitAsListOfIntegerVectors2} function shown in the Examples section below, except that the former generally raises an error where the latter would have inserted an \code{NA} in the returned object. More precisely: \itemize{ \item The latter accepts NAs in the input, the former doesn't (raises an error). \item The latter introduces NAs by coercion (with a warning), the former doesn't (raises an error). \item The latter supports "inaccurate integer conversion in coercion" when the value to coerce is > INT_MAX (then it's coerced to INT_MAX), the former doesn't (raises an error). \item The latter coerces non-integer values (e.g. 10.3) to an int by truncating them, the former doesn't (raises an error). } When it fails, \code{strsplitAsListOfIntegerVectors} will print an informative error message. Finally, \code{strsplitAsListOfIntegerVectors} is faster and uses much less memory than \code{strsplitAsListOfIntegerVectors2}. } } \value{ \code{unstrsplit} returns a character vector with one string per list element in \code{x}. \code{strsplitAsListOfIntegerVectors} returns a list where each list element is an integer vector. There is one list element per string in \code{x}. } \author{Hervé Pagès} \seealso{ \itemize{ \item The \code{\link[base]{strsplit}} function in the \pkg{base} package. } } \examples{ ## --------------------------------------------------------------------- ## unstrsplit() ## --------------------------------------------------------------------- x <- list(A=c("abc", "XY"), B=NULL, C=letters[1:4]) unstrsplit(x) unstrsplit(x, sep=",") unstrsplit(x, sep=" => ") data(islands) x <- names(islands) y <- strsplit(x, split=" ", fixed=TRUE) x2 <- unstrsplit(y, sep=" ") stopifnot(identical(x, x2)) ## But... names(x) <- x y <- strsplit(x, split="in", fixed=TRUE) x2 <- unstrsplit(y, sep="in") y[x != x2] ## In other words: strsplit() behavior sucks :-/ ## --------------------------------------------------------------------- ## strsplitAsListOfIntegerVectors() ## --------------------------------------------------------------------- x <- c("1116,0,-19", " +55291 , 2476,", "19184,4269,5659,6470,6721,7469,14601", "7778889, 426900, -4833,5659,6470,6721,7096", "19184 , -99999") y <- strsplitAsListOfIntegerVectors(x) y ## In normal situations (i.e. when the input is well-formed), ## strsplitAsListOfIntegerVectors() does actually the same as the ## function below but is more efficient (both in speed and memory ## footprint): strsplitAsListOfIntegerVectors2 <- function(x, sep=",") { tmp <- strsplit(x, sep, fixed=TRUE) lapply(tmp, as.integer) } y2 <- strsplitAsListOfIntegerVectors2(x) stopifnot(identical(y, y2)) } \keyword{utilities} S4Vectors/man/expand-methods.Rd0000644000175400017540000000414713175714520017431 0ustar00biocbuildbiocbuild\name{expand} \alias{expand} \alias{expand,DataFrame-method} \alias{expand,Vector-method} \title{Unlist the list-like columns of a DataFrame object} \description{ \code{expand} transforms a \link{DataFrame} object into a new \link{DataFrame} object where the columns specified by the user are unlisted. The transformed \link{DataFrame} object has the same colnames as the original but typically more rows. } \usage{ \S4method{expand}{DataFrame}(x, colnames, keepEmptyRows = FALSE) } \arguments{ \item{x}{ A \link{DataFrame} object with list-like columns or a \link{Vector} object with list-like metadata columns (i.e. with list-like columns in \code{mcols(x)}). } \item{colnames}{ A \code{character} or \code{numeric} vector containing the names or indices of the list-like columns to unlist. The order in which columns are unlisted is controlled by the column order in this vector. This defaults to all of the recursive (list-like) columns in \code{x}. } \item{keepEmptyRows}{ A \code{logical} indicating if rows containing empty list elements in the specified \code{colnames} should be retained or dropped. When \code{TRUE}, list elements are replaced with NA and all rows are kept. When \code{FALSE}, rows with empty list elements in the \code{colnames} columns are dropped. } } \value{ A \link{DataFrame} object that has been expanded row-wise to match the length of the unlisted columns. } \seealso{ \itemize{ \item \link{DataFrame} objects. } } \examples{ library(IRanges) aa <- CharacterList("a", paste0("d", 1:2), paste0("b", 1:3), c(), "c") bb <- CharacterList(paste0("sna", 1:2),"foo", paste0("bar",1:3),c(),"hica") df <- DataFrame(aa=aa, bb=bb, cc=11:15) ## Expand by all list-like columns (aa, bb), dropping rows with empty ## list elements: expand(df) ## Expand the aa column only: expand(df, colnames="aa", keepEmptyRows=TRUE) expand(df, colnames="aa", keepEmptyRows=FALSE) ## Expand the aa and then the bb column: expand(df, colnames=c("aa","bb"), keepEmptyRows=TRUE) expand(df, colnames=c("aa","bb"), keepEmptyRows=FALSE) } \keyword{methods} S4Vectors/man/isSorted.Rd0000644000175400017540000000634613175714520016310 0ustar00biocbuildbiocbuild\name{isSorted} \alias{isSorted} \alias{isSorted,ANY-method} \alias{isConstant} \alias{isConstant,integer-method} \alias{isConstant,numeric-method} \alias{isConstant,array-method} \alias{isStrictlySorted} \alias{isStrictlySorted,ANY-method} \title{Test if a vector-like object is sorted} \description{ \code{isSorted} and \code{isStrictlySorted} test if a vector-like object is sorted or strictly sorted, respectively. \code{isConstant} tests if a vector-like or array-like object is constant. Currently only \code{isConstant} methods for vectors or arrays of type integer or double are implemented. } \usage{ isSorted(x) isStrictlySorted(x) isConstant(x) } \arguments{ \item{x}{ A vector-like object. Can also be an array-like object for \code{isConstant}. } } \details{ Vector-like objects of length 0 or 1 are always considered to be sorted, strictly sorted, and constant. Strictly sorted and constant objects are particular cases of sorted objects. \code{isStrictlySorted(x)} is equivalent to \code{isSorted(x) && !anyDuplicated(x)} } \value{ A single logical i.e. \code{TRUE}, \code{FALSE} or \code{NA}. } \author{Hervé Pagès} \seealso{ \itemize{ \item \code{\link{is.unsorted}}. \item \code{\link{duplicated}} and \code{\link{unique}}. \item \code{\link{all.equal}}. \item \code{\link{NA}} and \code{\link{is.finite}}. } } \examples{ ## --------------------------------------------------------------------- ## A. isSorted() and isStrictlySorted() ## --------------------------------------------------------------------- x <- 1:10 isSorted(x) # TRUE isSorted(-x) # FALSE isSorted(rev(x)) # FALSE isSorted(-rev(x)) # TRUE isStrictlySorted(x) # TRUE x2 <- rep(x, each=2) isSorted(x2) # TRUE isStrictlySorted(x2) # FALSE ## --------------------------------------------------------------------- ## B. "isConstant" METHOD FOR integer VECTORS ## --------------------------------------------------------------------- ## On a vector with no NAs: stopifnot(isConstant(rep(-29L, 10000))) ## On a vector with NAs: stopifnot(!isConstant(c(0L, NA, -29L))) stopifnot(is.na(isConstant(c(-29L, -29L, NA)))) ## On a vector of length <= 1: stopifnot(isConstant(NA_integer_)) ## --------------------------------------------------------------------- ## C. "isConstant" METHOD FOR numeric VECTORS ## --------------------------------------------------------------------- ## This method does its best to handle rounding errors and special ## values NA, NaN, Inf and -Inf in a way that "makes sense". ## Below we only illustrate handling of rounding errors. ## Here values in 'x' are "conceptually" the same: x <- c(11/3, 2/3 + 4/3 + 5/3, 50 + 11/3 - 50, 7.00001 - 1000003/300000) ## However, due to machine rounding errors, they are not *strictly* ## equal: duplicated(x) unique(x) ## only *nearly* equal: all.equal(x, rep(11/3, 4)) # TRUE ## 'isConstant(x)' uses 'all.equal()' internally to decide whether ## the values in 'x' are all the same or not: stopifnot(isConstant(x)) ## This is not perfect though: isConstant((x - 11/3) * 1e8) # FALSE on Intel Pentium paltforms # (but this is highly machine dependent!) } \keyword{utilities} S4Vectors/man/shiftApply-methods.Rd0000644000175400017540000000406113175714520020270 0ustar00biocbuildbiocbuild\name{shiftApply-methods} \alias{shiftApply-methods} \alias{shiftApply} \alias{shiftApply,Vector,Vector-method} \alias{shiftApply,vector,vector-method} \title{Apply a function over subsequences of 2 vector-like objects} \description{ \code{shiftApply} loops and applies a function overs subsequences of vector-like objects \code{X} and \code{Y}. } \usage{ shiftApply(SHIFT, X, Y, FUN, ..., OFFSET=0L, simplify=TRUE, verbose=FALSE) } \arguments{ \item{SHIFT}{A non-negative integer vector of shift values.} \item{X, Y}{The vector-like objects to shift.} \item{FUN}{The function, found via \code{match.fun}, to be applied to each set of shifted vectors.} \item{...}{Further arguments for \code{FUN}.} \item{OFFSET}{A non-negative integer offset to maintain throughout the shift operations.} \item{simplify}{A logical value specifying whether or not the result should be simplified to a vector or matrix if possible.} \item{verbose}{A logical value specifying whether or not to print the \code{i} indices to track the iterations.} } \details{ Let \code{i} be the indices in \code{SHIFT}, \code{X_i = window(X, 1 + OFFSET, length(X) - SHIFT[i])}, and \code{Y_i = window(Y, 1 + SHIFT[i], length(Y) - OFFSET)}. \code{shiftApply} calculates the set of \code{FUN(X_i, Y_i, ...)} values and returns the results in a convenient form. } \seealso{ \itemize{ \item The \code{\link[S4Vectors]{window}} and \code{\link[S4Vectors]{aggregate}} methods for vector-like objects defined in the \pkg{S4Vectors} package. \item \link{Vector} and \link{Rle} objects. } } \examples{ set.seed(0) lambda <- c(rep(0.001, 4500), seq(0.001, 10, length = 500), seq(10, 0.001, length = 500)) xRle <- Rle(rpois(1e7, lambda)) yRle <- Rle(rpois(1e7, lambda[c(251:length(lambda), 1:250)])) cor(xRle, yRle) shifts <- seq(235, 265, by=3) corrs <- shiftApply(shifts, yRle, xRle, FUN=cor) cor(xRle, yRle) shiftApply(249:251, yRle, xRle, FUN=function(x, y) var(x, y) / (sd(x) * sd(y))) } \keyword{methods} \keyword{utilities} S4Vectors/man/show-utils.Rd0000644000175400017540000000106213175714520016620 0ustar00biocbuildbiocbuild\name{show-utils} \alias{show-utils} \alias{classNameForDisplay} \alias{classNameForDisplay,ANY-method} \alias{classNameForDisplay,AsIs-method} \alias{showAsCell} \alias{showAsCell,ANY-method} \alias{showAsCell,AsIs-method} \alias{showAsCell,Date-method} \alias{showAsCell,POSIXt-method} \title{Display utilities} \description{ Low-level utility functions and classes defined in the \pkg{S4Vectors} package to support display of vector-like objects. They are not intended to be used directly. } \keyword{utilities} \keyword{classes} \keyword{methods} S4Vectors/man/split-methods.Rd0000644000175400017540000000412113175714520017275 0ustar00biocbuildbiocbuild\name{split-methods} \alias{split-methods} \alias{split} \alias{split,Vector,ANY-method} \alias{split,ANY,Vector-method} \alias{split,Vector,Vector-method} \alias{split,list,Vector-method} \title{Divide a vector-like object into groups} \description{ \code{split} divides the data in a vector-like object \code{x} into the groups defined by \code{f}. NOTE: This man page is for the \code{split} methods defined in the \pkg{S4Vectors} package. See \code{?base::\link[base]{split}} for the default method (defined in the \pkg{base} package). } \usage{ \S4method{split}{Vector,ANY}(x, f, drop=FALSE) \S4method{split}{ANY,Vector}(x, f, drop=FALSE) \S4method{split}{Vector,Vector}(x, f, drop=FALSE) \S4method{split}{list,Vector}(x, f, drop=FALSE, ...) } \arguments{ \item{x, f}{ 2 vector-like objects of the same length. \code{f} will typically be a factor, but not necessarily. } \item{drop}{ Logical indicating if levels that do not occur should be dropped (if \code{f} is a factor). } \item{...}{ Arguments passed to \code{base::\link{split}} (see Details below). } } \details{ The first 3 methods just delegate to the \code{IRanges::\link[IRanges]{splitAsList}} function defined in the \pkg{IRanges} package. The method for \code{list} does: \preformatted{ split(x, as.vector(f), drop=drop, ...) } } \value{ All these methods behave like \code{base::\link{split}} except that the first 3 methods return a \link{List} object instead of an ordinary list. } \seealso{ \itemize{ \item The \code{\link[base]{split}} function in the \pkg{base} package. \item The \code{\link[IRanges]{splitAsList}} function in the \pkg{IRanges} package. \item \link{Vector} and \link{List} objects. \item \link{Rle} and \link{DataFrame} objects. } } \examples{ ## On an Rle object: x <- Rle(101:105, 6:2) split(x, c("B", "B", "A", "B", "A")) ## On a DataFrame object: groups <- c("group1", "group2") DF <- DataFrame( a=letters[1:10], i=101:110, group=rep(factor(groups, levels=groups), c(3, 7)) ) split(DF, DF$group) } \keyword{manip} S4Vectors/man/subsetting-utils.Rd0000644000175400017540000000247213175714520020035 0ustar00biocbuildbiocbuild\name{subsetting-utils} \alias{subsetting-utils} \alias{class:NSBS} \alias{NSBS-class} \alias{length,NSBS-method} \alias{anyDuplicated.NSBS} \alias{anyDuplicated,NSBS-method} \alias{isStrictlySorted,NSBS-method} \alias{NSBS} \alias{NSBS,NSBS-method} \alias{NSBS,missing-method} \alias{NSBS,NULL-method} \alias{NSBS,numeric-method} \alias{NSBS,logical-method} \alias{NSBS,character-method} \alias{NSBS,factor-method} \alias{NSBS,array-method} \alias{as.integer,NativeNSBS-method} \alias{as.integer,RangeNSBS-method} \alias{length,RangeNSBS-method} \alias{anyDuplicated,RangeNSBS-method} \alias{isStrictlySorted,RangeNSBS-method} \alias{show,RangeNSBS-method} \alias{normalizeSingleBracketSubscript} \alias{normalizeSingleBracketReplacementValue} \alias{normalizeSingleBracketReplacementValue,ANY-method} \alias{extractROWS} \alias{extractROWS,ANY,ANY-method} \alias{extractROWS,vector_OR_factor,RangeNSBS-method} \alias{replaceROWS} \alias{replaceROWS,ANY-method} \alias{normalizeDoubleBracketSubscript} \alias{getListElement} \alias{setListElement} \title{Subsetting utilities} \description{ Low-level utility functions and classes defined in the \pkg{S4Vectors} package to support subsetting of vector-like objects. They are not intended to be used directly. } \keyword{utilities} \keyword{classes} \keyword{methods} S4Vectors/man/zip-methods.Rd0000644000175400017540000000214013175714520016743 0ustar00biocbuildbiocbuild\name{zip-methods} \alias{zipup} \alias{zipup,ANY,ANY-method} \alias{zipdown} \alias{zipdown,ANY-method} \alias{zipdown,List-method} \title{Convert between parallel vectors and lists} \description{ The \code{zipup} and \code{zipdown} functions convert between two parallel vectors and a list of doublets (elements of length 2). The metaphor, borrowed from Python's \code{zip}, is that of a zipper. The \code{zipup} function interleaves the elements of the parallel vectors into a list of doublets. The inverse operation is \code{zipdown}, which returns a \code{\linkS4class{Pairs}} object. } \usage{ zipup(x, y, ...) zipdown(x, ...) } \arguments{ \item{x,y}{ For \code{zipup}, any vector-like object. For \code{zipdown}, a doublet list. } \item{\dots}{ Arguments passed to methods. } } \value{ For \code{zipup}, a list-like object, where every element is of length 2. For \code{zipdown}, a \code{\linkS4class{Pairs}} object. } \seealso{ \itemize{ \item \linkS4class{Pairs} objects. } } \examples{ z <- zipup(1:10, Rle(1L, 10)) pairs <- zipdown(z) } \keyword{methods} S4Vectors/src/0000755000175400017540000000000013175736135014236 5ustar00biocbuildbiocbuildS4Vectors/src/AEbufs.c0000644000175400017540000007771413175736136015570 0ustar00biocbuildbiocbuild/**************************************************************************** * Auto-Extending buffers * * Author: H. Pag\`es * ****************************************************************************/ #include "S4Vectors.h" #include /* for malloc, free, realloc */ #include /* for INT_MAX */ #define MAX_BUFLENGTH_INC 33554432ULL // 2^25 /* IMPORTANT: Keep MAX_BUFLENGTH <= R_XLEN_T_MAX (i.e. 2^52, see Rinternals.h) otherwise casting a buffer length (size_t) to R_xlen_t will not do the right thing (undefined behavior). For now we set MAX_BUFLENGTH to 4294967296 only (i.e. 2^32). This is big enough to support buffers of the length of the human genome. */ #define MAX_BUFLENGTH (128ULL * MAX_BUFLENGTH_INC) // 2^32 /* Guaranteed to return a value > 'buflength', or to raise an error. */ size_t _increase_buflength(size_t buflength) { if (buflength >= MAX_BUFLENGTH) error("S4Vectors internal error in _increase_buflength(): " "MAX_BUFLENGTH reached"); if (buflength == 0) return 128; if (buflength <= MAX_BUFLENGTH_INC) return 2 * buflength; buflength += MAX_BUFLENGTH_INC; if (buflength <= MAX_BUFLENGTH) return buflength; return MAX_BUFLENGTH; } /**************************************************************************** * Low-level memory management. */ static int use_malloc = 0; SEXP AEbufs_use_malloc(SEXP x) { use_malloc = LOGICAL(x)[0]; return R_NilValue; } static void *alloc2(size_t nmemb, size_t memb_size) { void *ptr; if (nmemb > MAX_BUFLENGTH) error("S4Vectors internal error in alloc2(): " "buffer is too big"); if (use_malloc) { //printf("alloc2: nmemb=%d\n", nmemb); memb_size *= nmemb; ptr = malloc(memb_size); if (ptr == NULL) error("S4Vectors internal error in alloc2(): " "cannot allocate memory"); } else { ptr = (void *) R_alloc(nmemb, (int) memb_size); } return ptr; } /* 'new_nmemb' must be > 'old_nmemb'. */ static void *realloc2(void *ptr, size_t old_nmemb, size_t new_nmemb, size_t memb_size) { void *new_ptr; if (new_nmemb > MAX_BUFLENGTH) error("S4Vectors internal error in realloc2(): " "buffer is too big"); if (new_nmemb <= old_nmemb) error("S4Vectors internal error in realloc2(): " "'new_nmemb' must be > 'old_nmemb'"); if (old_nmemb == 0) return alloc2(new_nmemb, memb_size); if (use_malloc) { //printf("realloc2: new_nmemb=%lu old_nmemb=%lu\n", // new_nmemb, old_nmemb); memb_size *= new_nmemb; new_ptr = realloc(ptr, memb_size); if (new_ptr == NULL) error("S4Vectors internal error in realloc2(): " "cannot reallocate memory"); } else { new_ptr = (void *) R_alloc(new_nmemb, (int) memb_size); memcpy(new_ptr, ptr, old_nmemb * memb_size); } return new_ptr; } /**************************************************************************** * IntAE buffers */ #define INTAE_POOL_MAXLEN 256 static IntAE *IntAE_pool[INTAE_POOL_MAXLEN]; static int IntAE_pool_len = 0; size_t _IntAE_get_nelt(const IntAE *ae) { return ae->_nelt; } size_t _IntAE_set_nelt(IntAE *ae, size_t nelt) { if (nelt > ae->_buflength) error("S4Vectors internal error in _IntAE_set_nelt(): " "trying to set a nb of buffer elements that exceeds " "the buffer length"); return ae->_nelt = nelt; } static IntAE *new_empty_IntAE() { IntAE *ae; if (use_malloc && IntAE_pool_len >= INTAE_POOL_MAXLEN) error("S4Vectors internal error in new_empty_IntAE(): " "IntAE pool is full"); ae = (IntAE *) alloc2(1, sizeof(IntAE)); ae->_buflength = ae->_nelt = 0; if (use_malloc) IntAE_pool[IntAE_pool_len++] = ae; return ae; } void _IntAE_set_val(const IntAE *ae, int val) { size_t ae_nelt, i; int *elt_p; ae_nelt = _IntAE_get_nelt(ae); elt_p = ae->elts; for (i = 0; i < ae_nelt; i++) *(elt_p++) = val; return; } void _IntAE_extend(IntAE *ae, size_t new_buflength) { ae->elts = (int *) realloc2(ae->elts, ae->_buflength, new_buflength, sizeof(int)); ae->_buflength = new_buflength; return; } static int IntAE_extend_if_full(IntAE *ae) { if (_IntAE_get_nelt(ae) < ae->_buflength) return 0; _IntAE_extend(ae, _increase_buflength(ae->_buflength)); return 1; } void _IntAE_insert_at(IntAE *ae, size_t at, int val) { size_t ae_nelt, i; int *elt1_p; const int *elt2_p; ae_nelt = _IntAE_get_nelt(ae); if (at > ae_nelt) error("S4Vectors internal error in _IntAE_insert_at(): " "trying to insert a buffer element at an invalid " "buffer position"); IntAE_extend_if_full(ae); elt1_p = ae->elts + ae_nelt; elt2_p = elt1_p - 1; for (i = ae_nelt; i > at; i--) *(elt1_p--) = *(elt2_p--); *elt1_p = val; _IntAE_set_nelt(ae, ae_nelt + 1); return; } IntAE *_new_IntAE(size_t buflength, size_t nelt, int val) { IntAE *ae; ae = new_empty_IntAE(); if (buflength != 0) { _IntAE_extend(ae, buflength); _IntAE_set_nelt(ae, nelt); _IntAE_set_val(ae, val); } return ae; } void _IntAE_append(IntAE *ae, const int *newvals, size_t nnewval) { size_t ae_nelt, new_nelt; int *dest; ae_nelt = _IntAE_get_nelt(ae); new_nelt = ae_nelt + nnewval; if (new_nelt > ae->_buflength) _IntAE_extend(ae, new_nelt); dest = ae->elts + ae_nelt; memcpy(dest, newvals, nnewval * sizeof(int)); _IntAE_set_nelt(ae, new_nelt); return; } /* * Delete 'nelt' elements, starting at position 'at'. * Calling _IntAE_delete_at(x, at, nelt) is equivalent to calling * _IntAE_delete_at(x, at, 1) nelt times. */ void _IntAE_delete_at(IntAE *ae, size_t at, size_t nelt) { int *elt1_p; const int *elt2_p; size_t ae_nelt, i2; if (nelt == 0) return; elt1_p = ae->elts + at; elt2_p = elt1_p + nelt; ae_nelt = _IntAE_get_nelt(ae); for (i2 = at + nelt; i2 < ae_nelt; i2++) *(elt1_p++) = *(elt2_p++); _IntAE_set_nelt(ae, ae_nelt - nelt); return; } void _IntAE_shift(const IntAE *ae, size_t offset, int shift) { size_t ae_nelt, i; int *elt_p; ae_nelt = _IntAE_get_nelt(ae); elt_p = ae->elts + offset; for (i = offset; i < ae_nelt; i++) *(elt_p++) += shift; return; } /* * Left and right IntAE buffers must have the same length. */ void _IntAE_sum_and_shift(const IntAE *ae1, const IntAE *ae2, int shift) { size_t ae1_nelt, ae2_nelt, i; int *elt1_p; const int *elt2_p; ae1_nelt = _IntAE_get_nelt(ae1); ae2_nelt = _IntAE_get_nelt(ae2); if (ae1_nelt != ae2_nelt) error("S4Vectors internal error in _IntAE_sum_and_shift(): " "the 2 IntAE buffers to sum must have the same length"); elt1_p = ae1->elts; elt2_p = ae2->elts; for (i = 0; i < ae1_nelt; i++) *(elt1_p++) += *(elt2_p++) + shift; return; } void _IntAE_qsort(const IntAE *ae, size_t offset, int desc) { size_t ae_nelt; ae_nelt = _IntAE_get_nelt(ae); if (offset > ae_nelt) error("S4Vectors internal error in _IntAE_qsort(): " "'offset' must be < nb of elements in buffer"); _sort_int_array(ae->elts + offset, ae_nelt - offset, desc); return; } /* * Delete repeated elements i.e. same semantic as 'uniq' command in Unix. * To get the R unique() behavior (modulo re-ordering of the elements), call * _IntAE_qsort() first. */ void _IntAE_uniq(IntAE *ae, size_t offset) { size_t ae_nelt, i2; int *elt1_p; const int *elt2_p; ae_nelt = _IntAE_get_nelt(ae); if (offset > ae_nelt) error("S4Vectors internal error in _IntAE_uniq(): " "'offset' must be < nb of elements in buffer"); if (ae_nelt - offset <= 1) return; elt1_p = ae->elts + offset; elt2_p = elt1_p + 1; for (i2 = offset + 1; i2 < ae_nelt; i2++) { if (*elt2_p != *elt1_p) *(++elt1_p) = *elt2_p; elt2_p++; } _IntAE_set_nelt(ae, elt1_p - ae->elts + 1); return; } SEXP _new_INTEGER_from_IntAE(const IntAE *ae) { size_t ae_nelt; SEXP ans; ae_nelt = _IntAE_get_nelt(ae); /* ae_nelt <= R_XLEN_T_MAX so casting is safe. */ PROTECT(ans = NEW_INTEGER((R_xlen_t) ae_nelt)); memcpy(INTEGER(ans), ae->elts, ae_nelt * sizeof(int)); UNPROTECT(1); return ans; } IntAE *_new_IntAE_from_INTEGER(SEXP x) { size_t x_len; IntAE *ae; /* Casting R_xlen_t to size_t is safe. */ x_len = (size_t) XLENGTH(x); ae = _new_IntAE(x_len, 0, 0); _IntAE_append(ae, INTEGER(x), x_len); return ae; } IntAE *_new_IntAE_from_CHARACTER(SEXP x, int keyshift) { size_t x_len, i; IntAE *ae; int *elt_p; /* Casting R_xlen_t to size_t is safe. */ x_len = (size_t) XLENGTH(x); ae = _new_IntAE(x_len, 0, 0); elt_p = ae->elts; for (i = 0; i < x_len; i++) { sscanf(CHAR(STRING_ELT(x, i)), "%d", elt_p); *(elt_p++) += keyshift; } _IntAE_set_nelt(ae, x_len); return ae; } /* Must be used on a malloc-based IntAE */ static void IntAE_free(IntAE *ae) { if (ae->_buflength != 0) free(ae->elts); free(ae); return; } static void flush_IntAE_pool() { IntAE *ae; while (IntAE_pool_len > 0) { IntAE_pool_len--; ae = IntAE_pool[IntAE_pool_len]; IntAE_free(ae); } return; } static int remove_from_IntAE_pool(const IntAE *ae) { int i; IntAE **ae1_p, **ae2_p; i = IntAE_pool_len; while (--i >= 0 && IntAE_pool[i] != ae) {;} if (i < 0) return -1; ae1_p = IntAE_pool + i; ae2_p = ae1_p + 1; for (i = i + 1; i < IntAE_pool_len; i++) *(ae1_p++) = *(ae2_p++); IntAE_pool_len--; return 0; } /**************************************************************************** * IntAEAE buffers */ #define INTAEAE_POOL_MAXLEN 256 static IntAEAE *IntAEAE_pool[INTAEAE_POOL_MAXLEN]; static int IntAEAE_pool_len = 0; size_t _IntAEAE_get_nelt(const IntAEAE *aeae) { return aeae->_nelt; } size_t _IntAEAE_set_nelt(IntAEAE *aeae, size_t nelt) { if (nelt > aeae->_buflength) error("S4Vectors internal error in _IntAEAE_set_nelt(): " "trying to set a nb of buffer elements that exceeds " "the buffer length"); return aeae->_nelt = nelt; } static IntAEAE *new_empty_IntAEAE() { IntAEAE *aeae; if (use_malloc && IntAEAE_pool_len >= INTAEAE_POOL_MAXLEN) error("S4Vectors internal error in new_empty_IntAEAE(): " "IntAEAE pool is full"); aeae = (IntAEAE *) alloc2(1, sizeof(IntAEAE)); aeae->_buflength = aeae->_nelt = 0; if (use_malloc) IntAEAE_pool[IntAEAE_pool_len++] = aeae; return aeae; } void _IntAEAE_extend(IntAEAE *aeae, size_t new_buflength) { size_t old_buflength, i; old_buflength = aeae->_buflength; aeae->elts = (IntAE **) realloc2(aeae->elts, old_buflength, new_buflength, sizeof(IntAE *)); for (i = old_buflength; i < new_buflength; i++) aeae->elts[i] = NULL; aeae->_buflength = new_buflength; return; } static int IntAEAE_extend_if_full(IntAEAE *aeae) { if (_IntAEAE_get_nelt(aeae) < aeae->_buflength) return 0; _IntAEAE_extend(aeae, _increase_buflength(aeae->_buflength)); return 1; } void _IntAEAE_insert_at(IntAEAE *aeae, size_t at, IntAE *ae) { size_t aeae_nelt, i; IntAE **ae1_p, **ae2_p; aeae_nelt = _IntAEAE_get_nelt(aeae); if (at > aeae_nelt) error("S4Vectors internal error in _IntAEAE_insert_at(): " "trying to insert a buffer element at an invalid " "buffer position"); IntAEAE_extend_if_full(aeae); if (use_malloc && remove_from_IntAE_pool(ae) == -1) error("S4Vectors internal error in _IntAEAE_insert_at(): " "IntAE to insert cannot be found in pool for removal"); ae1_p = aeae->elts + aeae_nelt; ae2_p = ae1_p - 1; for (i = aeae_nelt; i > at; i--) *(ae1_p--) = *(ae2_p--); *ae1_p = ae; _IntAEAE_set_nelt(aeae, aeae_nelt + 1); return; } IntAEAE *_new_IntAEAE(size_t buflength, size_t nelt) { IntAEAE *aeae; size_t i; IntAE *ae; aeae = new_empty_IntAEAE(); if (buflength != 0) { _IntAEAE_extend(aeae, buflength); for (i = 0; i < nelt; i++) { ae = new_empty_IntAE(); _IntAEAE_insert_at(aeae, i, ae); } } return aeae; } /* * Parallel append: left and right IntAEAE buffers must have the same length. */ void _IntAEAE_pappend(const IntAEAE *aeae1, const IntAEAE *aeae2) { size_t aeae1_nelt, aeae2_nelt, i; IntAE *ae1; const IntAE *ae2; aeae1_nelt = _IntAEAE_get_nelt(aeae1); aeae2_nelt = _IntAEAE_get_nelt(aeae2); if (aeae1_nelt != aeae2_nelt) error("S4Vectors internal error in _IntAEAE_pappend(): " "the 2 IntAEAE buffers to pappend must have " "the same length"); for (i = 0; i < aeae1_nelt; i++) { ae1 = aeae1->elts[i]; ae2 = aeae2->elts[i]; _IntAE_append(ae1, ae2->elts, _IntAE_get_nelt(ae2)); } return; } void _IntAEAE_shift(const IntAEAE *aeae, int shift) { size_t aeae_nelt, i; IntAE *ae; aeae_nelt = _IntAEAE_get_nelt(aeae); for (i = 0; i < aeae_nelt; i++) { ae = aeae->elts[i]; _IntAE_shift(ae, 0, shift); } return; } /* * Left and right IntAEAE buffers must have the same length. */ void _IntAEAE_sum_and_shift(const IntAEAE *aeae1, const IntAEAE *aeae2, int shift) { size_t aeae1_nelt, aeae2_nelt, i; IntAE *ae1; const IntAE *ae2; aeae1_nelt = _IntAEAE_get_nelt(aeae1); aeae2_nelt = _IntAEAE_get_nelt(aeae2); if (aeae1_nelt != aeae2_nelt) error("S4Vectors internal error in _IntAEAE_sum_and_shift(): " "the 2 IntAEAE buffers to sum_and_shift must have " "the same length"); for (i = 0; i < aeae1_nelt; i++) { ae1 = aeae1->elts[i]; ae2 = aeae2->elts[i]; _IntAE_sum_and_shift(ae1, ae2, shift); } return; } /* * 'mode' controls how empty list elements should be represented: * 0 -> integer(0); 1 -> NULL; 2 -> NA */ SEXP _new_LIST_from_IntAEAE(const IntAEAE *aeae, int mode) { size_t aeae_nelt, i; SEXP ans, ans_elt; const IntAE *ae; aeae_nelt = _IntAEAE_get_nelt(aeae); /* ae_nelt <= R_XLEN_T_MAX so casting is safe. */ PROTECT(ans = NEW_LIST((R_xlen_t) aeae_nelt)); for (i = 0; i < aeae_nelt; i++) { ae = aeae->elts[i]; if (_IntAE_get_nelt(ae) != 0 || mode == 0) { PROTECT(ans_elt = _new_INTEGER_from_IntAE(ae)); } else if (mode == 1) { continue; } else { // Not sure new LOGICALs are initialized with NAs, // need to check! If not, then LOGICAL(ans_elt)[0] must // be set to NA but I don't know how to do this :-/ PROTECT(ans_elt = NEW_LOGICAL(1)); } SET_VECTOR_ELT(ans, i, ans_elt); UNPROTECT(1); } UNPROTECT(1); return ans; } IntAEAE *_new_IntAEAE_from_LIST(SEXP x) { size_t x_len; IntAEAE *aeae; size_t i; SEXP x_elt; IntAE *ae; /* Casting R_xlen_t to size_t is safe. */ x_len = (size_t) XLENGTH(x); aeae = _new_IntAEAE(x_len, 0); for (i = 0; i < aeae->_buflength; i++) { x_elt = VECTOR_ELT(x, i); if (TYPEOF(x_elt) != INTSXP) error("S4Vectors internal error in " "_new_IntAEAE_from_LIST(): " "not all elements in the list " "are integer vectors"); ae = _new_IntAE_from_INTEGER(x_elt); _IntAEAE_insert_at(aeae, i, ae); } return aeae; } SEXP _IntAEAE_toEnvir(const IntAEAE *aeae, SEXP envir, int keyshift) { size_t aeae_nelt, i; const IntAE *ae; char key[11]; SEXP value; aeae_nelt = _IntAEAE_get_nelt(aeae); for (i = 0; i < aeae_nelt; i++) { ae = aeae->elts[i]; if (_IntAE_get_nelt(ae) == 0) continue; //snprintf(key, sizeof(key), "%d", i + keyshift); snprintf(key, sizeof(key), "%010lu", i + keyshift); PROTECT(value = _new_INTEGER_from_IntAE(ae)); defineVar(install(key), value, envir); UNPROTECT(1); } return envir; } /* Must be used on a malloc-based IntAEAE */ static void IntAEAE_free(IntAEAE *aeae) { size_t buflength, i; IntAE *ae; buflength = aeae->_buflength; for (i = 0; i < buflength; i++) { ae = aeae->elts[i]; if (ae != NULL) IntAE_free(ae); } if (buflength != 0) free(aeae->elts); free(aeae); return; } static void flush_IntAEAE_pool() { IntAEAE *aeae; while (IntAEAE_pool_len > 0) { IntAEAE_pool_len--; aeae = IntAEAE_pool[IntAEAE_pool_len]; IntAEAE_free(aeae); } return; } /**************************************************************************** * IntPairAE buffers */ #define INTPAIRAE_POOL_MAXLEN 256 static IntPairAE *IntPairAE_pool[INTPAIRAE_POOL_MAXLEN]; static int IntPairAE_pool_len = 0; size_t _IntPairAE_get_nelt(const IntPairAE *ae) { return _IntAE_get_nelt(ae->a); } size_t _IntPairAE_set_nelt(IntPairAE *ae, size_t nelt) { _IntAE_set_nelt(ae->a, nelt); _IntAE_set_nelt(ae->b, nelt); return nelt; } static IntPairAE *new_empty_IntPairAE() { IntAE *a, *b; IntPairAE *ae; if (use_malloc && IntPairAE_pool_len >= INTPAIRAE_POOL_MAXLEN) error("S4Vectors internal error in new_empty_IntPairAE(): " "IntPairAE pool is full"); a = new_empty_IntAE(); b = new_empty_IntAE(); ae = (IntPairAE *) alloc2(1, sizeof(IntPairAE)); ae->a = a; ae->b = b; if (use_malloc) { if (remove_from_IntAE_pool(a) == -1 || remove_from_IntAE_pool(b) == -1) error("S4Vectors internal error " "in new_empty_IntPairAE(): " "IntAEs to stick in IntPairAE cannot be found " "in pool for removal"); IntPairAE_pool[IntPairAE_pool_len++] = ae; } return ae; } void _IntPairAE_extend(IntPairAE *ae, size_t new_buflength) { _IntAE_extend(ae->a, new_buflength); _IntAE_extend(ae->b, new_buflength); return; } void _IntPairAE_insert_at(IntPairAE *ae, size_t at, int a, int b) { _IntAE_insert_at(ae->a, at, a); _IntAE_insert_at(ae->b, at, b); return; } IntPairAE *_new_IntPairAE(size_t buflength, size_t nelt) { IntPairAE *ae; ae = new_empty_IntPairAE(); if (buflength != 0) { _IntPairAE_extend(ae, buflength); /* Elements are NOT initialized. */ _IntPairAE_set_nelt(ae, nelt); } return ae; } /* Must be used on a malloc-based IntPairAE */ static void IntPairAE_free(IntPairAE *ae) { IntAE_free(ae->a); IntAE_free(ae->b); free(ae); return; } static void flush_IntPairAE_pool() { IntPairAE *ae; while (IntPairAE_pool_len > 0) { IntPairAE_pool_len--; ae = IntPairAE_pool[IntPairAE_pool_len]; IntPairAE_free(ae); } return; } static int remove_from_IntPairAE_pool(const IntPairAE *ae) { int i; IntPairAE **ae1_p, **ae2_p; i = IntPairAE_pool_len; while (--i >= 0 && IntPairAE_pool[i] != ae) {;} if (i < 0) return -1; ae1_p = IntPairAE_pool + i; ae2_p = ae1_p + 1; for (i = i + 1; i < IntPairAE_pool_len; i++) *(ae1_p++) = *(ae2_p++); IntPairAE_pool_len--; return 0; } /**************************************************************************** * IntPairAEAE buffers */ #define INTPAIRAEAE_POOL_MAXLEN 256 static IntPairAEAE *IntPairAEAE_pool[INTPAIRAEAE_POOL_MAXLEN]; static int IntPairAEAE_pool_len = 0; size_t _IntPairAEAE_get_nelt(const IntPairAEAE *aeae) { return aeae->_nelt; } size_t _IntPairAEAE_set_nelt(IntPairAEAE *aeae, size_t nelt) { if (nelt > aeae->_buflength) error("S4Vectors internal error in _IntPairAEAE_set_nelt(): " "trying to set a nb of buffer elements that exceeds " "the buffer length"); return aeae->_nelt = nelt; } static IntPairAEAE *new_empty_IntPairAEAE() { IntPairAEAE *aeae; if (use_malloc && IntPairAEAE_pool_len >= INTPAIRAEAE_POOL_MAXLEN) error("S4Vectors internal error in new_empty_IntPairAEAE(): " "IntPairAEAE pool is full"); aeae = (IntPairAEAE *) alloc2(1, sizeof(IntPairAEAE)); aeae->_buflength = aeae->_nelt = 0; if (use_malloc) IntPairAEAE_pool[IntPairAEAE_pool_len++] = aeae; return aeae; } void _IntPairAEAE_extend(IntPairAEAE *aeae, size_t new_buflength) { size_t old_buflength, i; old_buflength = aeae->_buflength; aeae->elts = (IntPairAE **) realloc2(aeae->elts, old_buflength, new_buflength, sizeof(IntPairAE *)); for (i = old_buflength; i < new_buflength; i++) aeae->elts[i] = NULL; aeae->_buflength = new_buflength; return; } static int IntPairAEAE_extend_if_full(IntPairAEAE *aeae) { if (_IntPairAEAE_get_nelt(aeae) < aeae->_buflength) return 0; _IntPairAEAE_extend(aeae, _increase_buflength(aeae->_buflength)); return 1; } void _IntPairAEAE_insert_at(IntPairAEAE *aeae, size_t at, IntPairAE *ae) { size_t aeae_nelt, i; IntPairAE **ae1_p, **ae2_p; aeae_nelt = _IntPairAEAE_get_nelt(aeae); if (at > aeae_nelt) error("S4Vectors internal error in _IntPairAEAE_insert_at(): " "trying to insert a buffer element at an invalid " "buffer position"); IntPairAEAE_extend_if_full(aeae); if (use_malloc && remove_from_IntPairAE_pool(ae) == -1) error("S4Vectors internal error in _IntPairAEAE_insert_at(): " "IntPairAE to insert cannot be found in pool for " "removal"); ae1_p = aeae->elts + aeae_nelt; ae2_p = ae1_p - 1; for (i = aeae_nelt; i > at; i--) *(ae1_p--) = *(ae2_p--); *ae1_p = ae; _IntPairAEAE_set_nelt(aeae, aeae_nelt + 1); return; } IntPairAEAE *_new_IntPairAEAE(size_t buflength, size_t nelt) { IntPairAEAE *aeae; size_t i; IntPairAE *ae; aeae = new_empty_IntPairAEAE(); if (buflength != 0) { _IntPairAEAE_extend(aeae, buflength); for (i = 0; i < nelt; i++) { ae = new_empty_IntPairAE(); _IntPairAEAE_insert_at(aeae, i, ae); } } return aeae; } /* Must be used on a malloc-based IntPairAEAE */ static void IntPairAEAE_free(IntPairAEAE *aeae) { size_t buflength, i; IntPairAE *ae; buflength = aeae->_buflength; for (i = 0; i < buflength; i++) { ae = aeae->elts[i]; if (ae != NULL) IntPairAE_free(ae); } if (buflength != 0) free(aeae->elts); free(aeae); return; } static void flush_IntPairAEAE_pool() { IntPairAEAE *aeae; while (IntPairAEAE_pool_len > 0) { IntPairAEAE_pool_len--; aeae = IntPairAEAE_pool[IntPairAEAE_pool_len]; IntPairAEAE_free(aeae); } return; } /**************************************************************************** * LLongAE buffers */ #define LLONGAE_POOL_MAXLEN 256 static LLongAE *LLongAE_pool[LLONGAE_POOL_MAXLEN]; static int LLongAE_pool_len = 0; size_t _LLongAE_get_nelt(const LLongAE *ae) { return ae->_nelt; } size_t _LLongAE_set_nelt(LLongAE *ae, size_t nelt) { if (nelt > ae->_buflength) error("S4Vectors internal error in _LLongAE_set_nelt(): " "trying to set a nb of buffer elements that exceeds " "the buffer length"); return ae->_nelt = nelt; } static LLongAE *new_empty_LLongAE() { LLongAE *ae; if (use_malloc && LLongAE_pool_len >= LLONGAE_POOL_MAXLEN) error("S4Vectors internal error in new_empty_LLongAE(): " "LLongAE pool is full"); ae = (LLongAE *) alloc2(1, sizeof(LLongAE)); ae->_buflength = ae->_nelt = 0; if (use_malloc) LLongAE_pool[LLongAE_pool_len++] = ae; return ae; } void _LLongAE_set_val(const LLongAE *ae, long long val) { size_t ae_nelt, i; long long *elt_p; ae_nelt = _LLongAE_get_nelt(ae); elt_p = ae->elts; for (i = 0; i < ae_nelt; i++) *(elt_p++) = val; return; } void _LLongAE_extend(LLongAE *ae, size_t new_buflength) { ae->elts = (long long *) realloc2(ae->elts, ae->_buflength, new_buflength, sizeof(long long)); ae->_buflength = new_buflength; return; } static int LLongAE_extend_if_full(LLongAE *ae) { if (_LLongAE_get_nelt(ae) < ae->_buflength) return 0; _LLongAE_extend(ae, _increase_buflength(ae->_buflength)); return 1; } void _LLongAE_insert_at(LLongAE *ae, size_t at, long long val) { size_t ae_nelt, i; long long *elt1_p; const long long *elt2_p; ae_nelt = _LLongAE_get_nelt(ae); if (at > ae_nelt) error("S4Vectors internal error in _LLongAE_insert_at(): " "trying to insert a buffer element at an invalid " "buffer position"); LLongAE_extend_if_full(ae); elt1_p = ae->elts + ae_nelt; elt2_p = elt1_p - 1; for (i = ae_nelt; i > at; i--) *(elt1_p--) = *(elt2_p--); *elt1_p = val; _LLongAE_set_nelt(ae, ae_nelt + 1); return; } LLongAE *_new_LLongAE(size_t buflength, size_t nelt, long long val) { LLongAE *ae; ae = new_empty_LLongAE(); if (buflength != 0) { _LLongAE_extend(ae, buflength); _LLongAE_set_nelt(ae, nelt); _LLongAE_set_val(ae, val); } return ae; } /* Must be used on a malloc-based LLongAE */ static void LLongAE_free(LLongAE *ae) { if (ae->_buflength != 0) free(ae->elts); free(ae); return; } static void flush_LLongAE_pool() { LLongAE *ae; while (LLongAE_pool_len > 0) { LLongAE_pool_len--; ae = LLongAE_pool[LLongAE_pool_len]; LLongAE_free(ae); } return; } /**************************************************************************** * CharAE buffers */ #define CHARAE_POOL_MAXLEN 256 static CharAE *CharAE_pool[CHARAE_POOL_MAXLEN]; static int CharAE_pool_len = 0; size_t _CharAE_get_nelt(const CharAE *ae) { return ae->_nelt; } size_t _CharAE_set_nelt(CharAE *ae, size_t nelt) { if (nelt > ae->_buflength) error("S4Vectors internal error in _CharAE_set_nelt(): " "trying to set a nb of buffer elements that exceeds " "the buffer length"); return ae->_nelt = nelt; } static CharAE *new_empty_CharAE() { CharAE *ae; if (use_malloc && CharAE_pool_len >= CHARAE_POOL_MAXLEN) error("S4Vectors internal error in new_empty_CharAE(): " "CharAE pool is full"); ae = (CharAE *) alloc2(1, sizeof(CharAE)); ae->_buflength = ae->_nelt = 0; if (use_malloc) CharAE_pool[CharAE_pool_len++] = ae; return ae; } void _CharAE_extend(CharAE *ae, size_t new_buflength) { ae->elts = (char *) realloc2(ae->elts, ae->_buflength, new_buflength, sizeof(char)); ae->_buflength = new_buflength; return; } static int CharAE_extend_if_full(CharAE *ae) { if (_CharAE_get_nelt(ae) < ae->_buflength) return 0; _CharAE_extend(ae, _increase_buflength(ae->_buflength)); return 1; } void _CharAE_insert_at(CharAE *ae, size_t at, char c) { size_t ae_nelt, i; char *elt1_p; const char *elt2_p; ae_nelt = _CharAE_get_nelt(ae); if (at > ae_nelt) error("S4Vectors internal error in _CharAE_insert_at(): " "trying to insert a buffer element at an invalid " "buffer position"); CharAE_extend_if_full(ae); elt1_p = ae->elts + ae_nelt; elt2_p = elt1_p - 1; for (i = ae_nelt; i > at; i--) *(elt1_p--) = *(elt2_p--); *elt1_p = c; _CharAE_set_nelt(ae, ae_nelt + 1); return; } CharAE *_new_CharAE(size_t buflength) { CharAE *ae; ae = new_empty_CharAE(); if (buflength != 0) _CharAE_extend(ae, buflength); return ae; } CharAE *_new_CharAE_from_string(const char *string) { CharAE *ae; ae = _new_CharAE(strlen(string)); _CharAE_set_nelt(ae, ae->_buflength); memcpy(ae->elts, string, ae->_buflength); return ae; } void _CharAE_append_string(CharAE *ae, const char *string) { size_t nnewval, ae_nelt, new_nelt; char *dest; nnewval = strlen(string); ae_nelt = _CharAE_get_nelt(ae); new_nelt = ae_nelt + nnewval; if (new_nelt > ae->_buflength) _CharAE_extend(ae, new_nelt); dest = ae->elts + ae_nelt; memcpy(dest, string, sizeof(char) * nnewval); _CharAE_set_nelt(ae, new_nelt); return; } /* * Delete 'nelt' elements, starting at position 'at'. * Calling _CharAE_delete_at(x, at, nelt) is equivalent to calling * _CharAE_delete_at(x, at, 1) nelt times. */ void _CharAE_delete_at(CharAE *ae, size_t at, size_t nelt) { char *c1_p; const char *c2_p; size_t ae_nelt, i2; if (nelt == 0) return; c1_p = ae->elts + at; c2_p = c1_p + nelt; ae_nelt = _CharAE_get_nelt(ae); for (i2 = at + nelt; i2 < ae_nelt; i2++) *(c1_p++) = *(c2_p++); _CharAE_set_nelt(ae, ae_nelt - nelt); return; } SEXP _new_CHARSXP_from_CharAE(const CharAE *ae) { size_t ae_nelt; ae_nelt = _CharAE_get_nelt(ae); if (ae_nelt > INT_MAX) error("S4Vectors internal error in " "_new_CHARSXP_from_CharAE: character " "buffer is too long for mkCharLen()"); return mkCharLen(ae->elts, (int) ae_nelt); } SEXP _new_RAW_from_CharAE(const CharAE *ae) { size_t ae_nelt; SEXP ans; if (sizeof(Rbyte) != sizeof(char)) // should never happen! error("_new_RAW_from_CharAE(): sizeof(Rbyte) != sizeof(char)"); ae_nelt = _CharAE_get_nelt(ae); /* ae_nelt <= R_XLEN_T_MAX so casting is safe. */ PROTECT(ans = NEW_RAW((R_xlen_t) ae_nelt)); memcpy(RAW(ans), ae->elts, ae_nelt * sizeof(char)); UNPROTECT(1); return ans; } /* only until we have a bitset or something smaller than char */ SEXP _new_LOGICAL_from_CharAE(const CharAE *ae) { size_t ae_nelt, i; SEXP ans; const char *elt_p; ae_nelt = _CharAE_get_nelt(ae); /* ae_nelt <= R_XLEN_T_MAX so casting is safe. */ PROTECT(ans = NEW_LOGICAL((R_xlen_t) ae_nelt)); elt_p = ae->elts; for (i = 0; i < ae_nelt; i++) LOGICAL(ans)[i] = *(elt_p++); UNPROTECT(1); return ans; } /* Must be used on a malloc-based CharAE */ static void CharAE_free(CharAE *ae) { if (ae->_buflength != 0) free(ae->elts); free(ae); return; } static void flush_CharAE_pool() { CharAE *ae; while (CharAE_pool_len > 0) { CharAE_pool_len--; ae = CharAE_pool[CharAE_pool_len]; CharAE_free(ae); } return; } static int remove_from_CharAE_pool(const CharAE *ae) { int i; CharAE **ae1_p, **ae2_p; i = CharAE_pool_len; while (--i >= 0 && CharAE_pool[i] != ae) {;} if (i < 0) return -1; ae1_p = CharAE_pool + i; ae2_p = ae1_p + 1; for (i = i + 1; i < CharAE_pool_len; i++) *(ae1_p++) = *(ae2_p++); CharAE_pool_len--; return 0; } /**************************************************************************** * CharAEAE buffers */ #define CHARAEAE_POOL_MAXLEN 256 static CharAEAE *CharAEAE_pool[CHARAEAE_POOL_MAXLEN]; static int CharAEAE_pool_len = 0; size_t _CharAEAE_get_nelt(const CharAEAE *aeae) { return aeae->_nelt; } size_t _CharAEAE_set_nelt(CharAEAE *aeae, size_t nelt) { if (nelt > aeae->_buflength) error("S4Vectors internal error in _CharAEAE_set_nelt(): " "trying to set a nb of buffer elements that exceeds " "the buffer length"); return aeae->_nelt = nelt; } static CharAEAE *new_empty_CharAEAE() { CharAEAE *aeae; if (use_malloc && CharAEAE_pool_len >= CHARAEAE_POOL_MAXLEN) error("S4Vectors internal error in new_empty_CharAEAE(): " "CharAEAE pool is full"); aeae = (CharAEAE *) alloc2(1, sizeof(CharAEAE)); aeae->_buflength = aeae->_nelt = 0; if (use_malloc) CharAEAE_pool[CharAEAE_pool_len++] = aeae; return aeae; } void _CharAEAE_extend(CharAEAE *aeae, size_t new_buflength) { size_t old_buflength, i; old_buflength = aeae->_buflength; aeae->elts = (CharAE **) realloc2(aeae->elts, old_buflength, new_buflength, sizeof(CharAE *)); for (i = old_buflength; i < new_buflength; i++) aeae->elts[i] = NULL; aeae->_buflength = new_buflength; return; } static int CharAEAE_extend_if_full(CharAEAE *aeae) { if (_CharAEAE_get_nelt(aeae) < aeae->_buflength) return 0; _CharAEAE_extend(aeae, _increase_buflength(aeae->_buflength)); return 1; } void _CharAEAE_insert_at(CharAEAE *aeae, size_t at, CharAE *ae) { size_t aeae_nelt, i; CharAE **ae1_p, **ae2_p; aeae_nelt = _CharAEAE_get_nelt(aeae); if (at > aeae_nelt) error("S4Vectors internal error in _CharAEAE_insert_at(): " "trying to insert a buffer element at an invalid " "buffer position"); CharAEAE_extend_if_full(aeae); if (use_malloc && remove_from_CharAE_pool(ae) == -1) error("S4Vectors internal error in _CharAEAE_insert_at(): " "CharAE to insert cannot be found in pool for removal"); ae1_p = aeae->elts + aeae_nelt; ae2_p = ae1_p - 1; for (i = aeae_nelt; i > at; i--) *(ae1_p--) = *(ae2_p--); *ae1_p = ae; _CharAEAE_set_nelt(aeae, aeae_nelt + 1); return; } CharAEAE *_new_CharAEAE(size_t buflength, size_t nelt) { CharAEAE *aeae; size_t i; CharAE *ae; aeae = new_empty_CharAEAE(); if (buflength != 0) { _CharAEAE_extend(aeae, buflength); for (i = 0; i < nelt; i++) { ae = new_empty_CharAE(); _CharAEAE_insert_at(aeae, i, ae); } } return aeae; } void _CharAEAE_append_string(CharAEAE *aeae, const char *string) { CharAE *ae; ae = _new_CharAE_from_string(string); _CharAEAE_insert_at(aeae, _CharAEAE_get_nelt(aeae), ae); return; } SEXP _new_CHARACTER_from_CharAEAE(const CharAEAE *aeae) { size_t aeae_nelt, i; SEXP ans, ans_elt; CharAE *ae; aeae_nelt = _CharAEAE_get_nelt(aeae); /* ae_nelt <= R_XLEN_T_MAX so casting is safe. */ PROTECT(ans = NEW_CHARACTER((R_xlen_t) aeae_nelt)); for (i = 0; i < aeae_nelt; i++) { ae = aeae->elts[i]; PROTECT(ans_elt = _new_CHARSXP_from_CharAE(ae)); SET_STRING_ELT(ans, i, ans_elt); UNPROTECT(1); } UNPROTECT(1); return ans; } /* Must be used on a malloc-based CharAEAE */ static void CharAEAE_free(CharAEAE *aeae) { size_t buflength, i; CharAE *ae; buflength = aeae->_buflength; for (i = 0; i < buflength; i++) { ae = aeae->elts[i]; if (ae != NULL) CharAE_free(ae); } if (buflength != 0) free(aeae->elts); free(aeae); return; } static void flush_CharAEAE_pool() { CharAEAE *aeae; while (CharAEAE_pool_len > 0) { CharAEAE_pool_len--; aeae = CharAEAE_pool[CharAEAE_pool_len]; CharAEAE_free(aeae); } return; } /**************************************************************************** * Freeing the malloc-based AEbufs. */ SEXP AEbufs_free() { flush_IntAE_pool(); flush_IntAEAE_pool(); flush_IntPairAE_pool(); flush_IntPairAEAE_pool(); flush_LLongAE_pool(); flush_CharAE_pool(); flush_CharAEAE_pool(); return R_NilValue; } S4Vectors/src/DataFrame_class.c0000644000175400017540000000145713175736136017423 0ustar00biocbuildbiocbuild/**************************************************************************** * Low-level manipulation of DataFrame objects ****************************************************************************/ #include "S4Vectors.h" static SEXP rownames_symbol = NULL, nrows_symbol = NULL; static void set_DataFrame_rownames(SEXP x, SEXP value) { INIT_STATIC_SYMBOL(rownames) SET_SLOT(x, rownames_symbol, value); } static void set_DataFrame_nrows(SEXP x, SEXP value) { INIT_STATIC_SYMBOL(nrows) SET_SLOT(x, nrows_symbol, value); } SEXP _new_DataFrame(const char *classname, SEXP vars, SEXP rownames, SEXP nrows) { SEXP ans; PROTECT(ans = _new_SimpleList(classname, vars)); set_DataFrame_rownames(ans, rownames); set_DataFrame_nrows(ans, nrows); UNPROTECT(1); return ans; } S4Vectors/src/Hits_class.c0000644000175400017540000002360613175736136016506 0ustar00biocbuildbiocbuild/**************************************************************************** * Low-level manipulation of Hits objects * ****************************************************************************/ #include "S4Vectors.h" /**************************************************************************** * C-level constructors */ static SEXP new_Hits0(const char *classname, SEXP from, SEXP to, int nLnode, int nRnode) { SEXP classdef, ans, ans_nLnode, ans_nRnode; PROTECT(classdef = MAKE_CLASS(classname)); PROTECT(ans = NEW_OBJECT(classdef)); SET_SLOT(ans, install("from"), from); SET_SLOT(ans, install("to"), to); PROTECT(ans_nLnode = ScalarInteger(nLnode)); SET_SLOT(ans, install("nLnode"), ans_nLnode); UNPROTECT(1); PROTECT(ans_nRnode = ScalarInteger(nRnode)); SET_SLOT(ans, install("nRnode"), ans_nRnode); UNPROTECT(1); UNPROTECT(2); return ans; } static SEXP new_Hits1(const char *classname, const int *from, const int *to, int nhit, int nLnode, int nRnode) { SEXP ans_from, ans_to, ans; size_t n; PROTECT(ans_from = NEW_INTEGER(nhit)); PROTECT(ans_to = NEW_INTEGER(nhit)); n = sizeof(int) * nhit; memcpy(INTEGER(ans_from), from, n); memcpy(INTEGER(ans_to), to, n); ans = new_Hits0(classname, ans_from, ans_to, nLnode, nRnode); UNPROTECT(2); return ans; } /**************************************************************************** * High-level user-friendly constructor */ /* Based on qsort(). Time is O(nhit*log(nhit)). If 'revmap' is not NULL, then 'from_in' is not modified. */ static void qsort_hits(int *from_in, const int *to_in, int *from_out, int *to_out, int nhit, int *revmap) { int k; if (revmap == NULL) revmap = to_out; _get_order_of_int_array(from_in, nhit, 0, revmap, 0); for (k = 0; k < nhit; k++) from_out[k] = from_in[revmap[k]]; if (revmap == to_out) { memcpy(from_in, revmap, sizeof(int) * nhit); revmap = from_in; } for (k = 0; k < nhit; k++) to_out[k] = to_in[revmap[k]++]; return; } /* Tabulated sorting. Time is O(nhit). WARNINGS: 'nhit' MUST be >= 'nLnode'. 'from_in' is ALWAYS modified. */ static void tsort_hits(int *from_in, const int *to_in, int *from_out, int *to_out, int nhit, int nLnode, int *revmap) { int i, k, offset, count, prev_offset, j; /* Compute nb of hits per left node. We need a place for this so we temporarily use 'from_out' which is assumed to have at least 'nLnode' elements. */ for (i = 0; i < nLnode; i++) from_out[i] = 0; for (k = 0; k < nhit; k++) from_out[--from_in[k]]++; /* make 'from_in[k]' 0-based */ /* Replace counts with offsets. */ offset = 0; for (i = 0; i < nLnode; i++) { count = from_out[i]; from_out[i] = offset; offset += count; } /* Fill 'to_out' and 'revmap'. */ for (k = 0; k < nhit; k++) { offset = from_out[from_in[k]]++; to_out[offset] = to_in[k]; if (revmap != NULL) revmap[offset] = k + 1; } /* Fill 'from_out'. */ memcpy(from_in, from_out, sizeof(int) * nLnode); k = offset = 0; for (i = 1; i <= nLnode; i++) { prev_offset = offset; offset = from_in[i - 1]; for (j = prev_offset; j < offset; j++) from_out[k++] = i; } return; } SEXP _new_Hits(int *from, const int *to, int nhit, int nLnode, int nRnode, int already_sorted) { SEXP ans_from, ans_to, ans; int *from_out, *to_out; if (already_sorted || nhit <= 1 || nLnode <= 1) return new_Hits1("SortedByQueryHits", from, to, nhit, nLnode, nRnode); PROTECT(ans_from = NEW_INTEGER(nhit)); PROTECT(ans_to = NEW_INTEGER(nhit)); from_out = INTEGER(ans_from); to_out = INTEGER(ans_to); if (nhit >= nLnode) tsort_hits(from, to, from_out, to_out, nhit, nLnode, NULL); else qsort_hits(from, to, from_out, to_out, nhit, NULL); ans = new_Hits0("SortedByQueryHits", ans_from, ans_to, nLnode, nRnode); UNPROTECT(2); return ans; } static SEXP new_Hits_with_revmap(const char *classname, const int *from, const int *to, int nhit, int nLnode, int nRnode, int *revmap) { SEXP ans_from, ans_to, ans; int *from2, *from_out, *to_out; if (revmap == NULL || nhit >= nLnode) { from2 = (int *) R_alloc(sizeof(int), nhit); memcpy(from2, from, sizeof(int) * nhit); } if (revmap == NULL) return _new_Hits(from2, to, nhit, nLnode, nRnode, 0); PROTECT(ans_from = NEW_INTEGER(nhit)); PROTECT(ans_to = NEW_INTEGER(nhit)); from_out = INTEGER(ans_from); to_out = INTEGER(ans_to); if (nhit >= nLnode) { tsort_hits(from2, to, from_out, to_out, nhit, nLnode, revmap); } else { qsort_hits((int *) from, to, from_out, to_out, nhit, revmap); } ans = new_Hits0(classname, ans_from, ans_to, nLnode, nRnode); UNPROTECT(2); return ans; } static int get_nnode(SEXP len, const char *side) { int len0; if (!IS_INTEGER(len) || LENGTH(len) != 1) error("'n%snode(x)' must be a single integer", side); len0 = INTEGER(len)[0]; if (len0 == NA_INTEGER || len0 < 0) error("'n%snode(x)' must be a single non-negative integer", side); return len0; } /* Return 1 if 'from' is already sorted and 0 otherwise. */ static int check_hits(const int *from, const int *to, int nhit, int nLnode, int nRnode) { int already_sorted, prev_i, k, i, j; already_sorted = 1; prev_i = -1; for (k = 0; k < nhit; k++, from++, to++) { i = *from; if (i == NA_INTEGER || i < 1 || i > nLnode) error("'from(x)' must contain non-NA values " ">= 1 and <= 'nLnode(x)'"); if (i < prev_i) already_sorted = 0; prev_i = i; j = *to; if (j == NA_INTEGER || j < 1 || j > nRnode) error("'to(x)' must contain non-NA values " ">= 1 and <= 'nRnode(x)'"); } return already_sorted; } /* --- .Call ENTRY POINT --- */ SEXP Hits_new(SEXP Class, SEXP from, SEXP to, SEXP nLnode, SEXP nRnode, SEXP revmap_envir) { const char *classname; int nhit, nLnode0, nRnode0, already_sorted, *revmap_p; const int *from_p, *to_p; SEXP ans, revmap, symbol; classname = CHAR(STRING_ELT(Class, 0)); nhit = _check_integer_pairs(from, to, &from_p, &to_p, "from(x)", "to(x)"); nLnode0 = get_nnode(nLnode, "L"); nRnode0 = get_nnode(nRnode, "R"); already_sorted = check_hits(from_p, to_p, nhit, nLnode0, nRnode0); if (already_sorted) return new_Hits1(classname, from_p, to_p, nhit, nLnode0, nRnode0); if (revmap_envir == R_NilValue) { revmap_p = NULL; } else { PROTECT(revmap = NEW_INTEGER(nhit)); revmap_p = INTEGER(revmap); } PROTECT(ans = new_Hits_with_revmap(classname, from_p, to_p, nhit, nLnode0, nRnode0, revmap_p)); if (revmap_envir == R_NilValue) { UNPROTECT(1); return ans; } PROTECT(symbol = mkChar("revmap")); defineVar(install(translateChar(symbol)), revmap, revmap_envir); UNPROTECT(3); return ans; } /**************************************************************************** * select_hits() */ int _get_select_mode(SEXP select) { const char *select0; if (!IS_CHARACTER(select) || LENGTH(select) != 1) error("'select' must be a single string"); select = STRING_ELT(select, 0); if (select == NA_STRING) error("'select' cannot be NA"); select0 = CHAR(select); if (strcmp(select0, "all") == 0) return ALL_HITS; if (strcmp(select0, "first") == 0) return FIRST_HIT; if (strcmp(select0, "last") == 0) return LAST_HIT; if (strcmp(select0, "arbitrary") == 0) return ARBITRARY_HIT; if (strcmp(select0, "count") == 0) return COUNT_HITS; error("'select' must be \"all\", \"first\", " "\"last\", \"arbitrary\", or \"count\""); return 0; } /* --- .Call ENTRY POINT --- */ SEXP select_hits(SEXP from, SEXP to, SEXP nLnode, SEXP select) { int nhit, annRnode, select_mode, init_val, i, k, j1; const int *from_p, *to_p; SEXP ans; nhit = _check_integer_pairs(from, to, &from_p, &to_p, "from(x)", "to(x)"); annRnode = INTEGER(nLnode)[0]; select_mode = _get_select_mode(select); PROTECT(ans = NEW_INTEGER(annRnode)); init_val = select_mode == COUNT_HITS ? 0 : NA_INTEGER; for (i = 0; i < annRnode; i++) INTEGER(ans)[i] = init_val; for (k = 0; k < nhit; k++, from_p++, to_p++) { i = *from_p - 1; if (select_mode == COUNT_HITS) { INTEGER(ans)[i]++; continue; } j1 = *to_p; if (INTEGER(ans)[i] == NA_INTEGER || (select_mode == FIRST_HIT) == (j1 < INTEGER(ans)[i])) INTEGER(ans)[i] = j1; } UNPROTECT(1); return ans; } /**************************************************************************** * make_all_group_inner_hits() * * --- .Call ENTRY POINT --- * 'hit_type' must be 0, -1 or 1 (single integer). */ SEXP make_all_group_inner_hits(SEXP group_sizes, SEXP hit_type) { int ngroup, htype, annRnode, i, j, k, gs, nhit, iofeig, *left, *right; const int *group_sizes_elt; SEXP ans_from, ans_to, ans; ngroup = LENGTH(group_sizes); htype = INTEGER(hit_type)[0]; for (i = annRnode = 0, group_sizes_elt = INTEGER(group_sizes); i < ngroup; i++, group_sizes_elt++) { gs = *group_sizes_elt; if (gs == NA_INTEGER || gs < 0) error("'group_sizes' contains NAs or negative values"); nhit = htype == 0 ? gs * gs : (gs * (gs - 1)) / 2; annRnode += nhit; } PROTECT(ans_from = NEW_INTEGER(annRnode)); PROTECT(ans_to = NEW_INTEGER(annRnode)); left = INTEGER(ans_from); right = INTEGER(ans_to); iofeig = 0; /* 0-based Index Of First Element In Group */ for (i = 0, group_sizes_elt = INTEGER(group_sizes); i < ngroup; i++, group_sizes_elt++) { gs = *group_sizes_elt; if (htype > 0) { for (j = 1; j < gs; j++) { for (k = j + 1; k <= gs; k++) { *(left++) = j + iofeig; *(right++) = k + iofeig; } } } else if (htype < 0) { for (j = 2; j <= gs; j++) { for (k = 1; k < j; k++) { *(left++) = j + iofeig; *(right++) = k + iofeig; } } } else { for (j = 1; j <= gs; j++) { for (k = 1; k <= gs; k++) { *(left++) = j + iofeig; *(right++) = k + iofeig; } } } iofeig += gs; } ans = new_Hits0("SortedByQuerySelfHits", ans_from, ans_to, iofeig, iofeig); UNPROTECT(2); return ans; } S4Vectors/src/LLint_class.c0000644000175400017540000004213413175736136016616 0ustar00biocbuildbiocbuild/**************************************************************************** * Low-level manipulation of LLint objects * * Author: H. Pag\`es * ****************************************************************************/ #include "S4Vectors.h" #include /* for isspace() and isdigit() */ #define BYTES_PER_LLINT (sizeof(long long int) / sizeof(char)) #define NEW_LLINT(n) _alloc_LLint("LLint", (n)) #define LLINT(x) _get_LLint_dataptr(x) int _is_LLint(SEXP x) { return isObject(x) && strcmp(CHAR(STRING_ELT(GET_CLASS(x), 0)), "LLint") == 0; } /* --- .Call ENTRY POINT --- */ SEXP make_RAW_from_NA_LLINT() { SEXP ans; PROTECT(ans = NEW_RAW(BYTES_PER_LLINT)); *((long long int *) RAW(ans)) = NA_LLINT; UNPROTECT(1); return ans; } /**************************************************************************** * C-level getters and setter. */ static SEXP bytes_symbol = NULL; static SEXP get_LLint_bytes(SEXP x) { INIT_STATIC_SYMBOL(bytes) return GET_SLOT(x, bytes_symbol); } R_xlen_t _get_LLint_length(SEXP x) { return XLENGTH(get_LLint_bytes(x)) / BYTES_PER_LLINT; } long long int *_get_LLint_dataptr(SEXP x) { return (long long int *) RAW(get_LLint_bytes(x)); } static void set_LLint_bytes(SEXP x, SEXP value) { INIT_STATIC_SYMBOL(bytes) SET_SLOT(x, bytes_symbol, value); return; } /**************************************************************************** * C-level constructors. * * Be aware that these functions do NOT duplicate their arguments before * putting them in the slots of the returned object. * Thus they cannot be made .Call entry points! */ static SEXP new_LLint_from_bytes(const char *classname, SEXP bytes) { SEXP classdef, ans; PROTECT(classdef = MAKE_CLASS(classname)); PROTECT(ans = NEW_OBJECT(classdef)); set_LLint_bytes(ans, bytes); UNPROTECT(2); return ans; } /* Allocation WITHOUT initialization. */ SEXP _alloc_LLint(const char *classname, R_xlen_t length) { SEXP bytes, ans; PROTECT(bytes = NEW_RAW(length * BYTES_PER_LLINT)); PROTECT(ans = new_LLint_from_bytes(classname, bytes)); UNPROTECT(2); return ans; } /**************************************************************************** * Low-level coercion helper functions */ static void from_ints_to_llints(const int *from, long long int *to, R_xlen_t n) { R_xlen_t i; int from_elt; for (i = 0; i < n; i++, from++, to++) { from_elt = *from; if (from_elt == NA_INTEGER) { *to = NA_LLINT; continue; } *to = (long long int) from_elt; } return; } static void from_doubles_to_llints(const double *from, long long int *to, R_xlen_t n) { int first_time; R_xlen_t i; double from_elt; first_time = 1; for (i = 0; i < n; i++, from++, to++) { from_elt = *from; if (from_elt == NA_REAL) { *to = NA_LLINT; continue; } if (from_elt > (double) LLONG_MAX || from_elt < (double) -LLONG_MAX) { if (first_time) { warning("out-of-range values coerced to NAs " "in coercion to LLint"); first_time = 0; } *to = NA_LLINT; continue; } *to = (long long int) from_elt; } return; } /* Return 1 if the string to parse 's' contains a number that is syntactically correct AND cannot be represented by a long long int (overflow). Otherwise return 0. */ static int scan_llint(const char *s, long long int *out) { char c, sign; long long int val; *out = NA_LLINT; /* Skip leading spaces. */ while (isspace(c = *(s++))) {}; if (c == '\0') return 0; /* syntactically incorrect */ /* Scan unary +/- sign. */ if (c == '+' || c == '-') { sign = c; c = *(s++); } else { sign = '+'; } if (!isdigit(c)) return 0; /* syntactically incorrect */ /* Scan digits. */ _reset_ovflow_flag(); val = c - '0'; while (isdigit(c = *(s++))) { val = _safe_llint_mult(val, 10LL); val = _safe_llint_add(val, (long long int) c - '0'); } if (sign == '-') val = -val; if (c == '\0') goto syntactically_correct; /* Scan decimal part. */ if (c == '.') { /* Decimal part is ignored. */ while (isdigit(c = *(s++))) {}; if (c == '\0') goto syntactically_correct; } /* Skip trailing spaces. */ if (isspace(c)) while (isspace(c = *(s++))) {}; if (c != '\0') return 0; /* syntactically incorrect */ syntactically_correct: *out = val; return _get_ovflow_flag(); } static void from_STRSXP_to_llints(SEXP from, long long int *to) { R_xlen_t n, i; int first_time1, first_time2; SEXP from_elt; n = XLENGTH(from); first_time1 = first_time2 = 1; for (i = 0; i < n; i++, to++) { from_elt = STRING_ELT(from, i); if (from_elt == NA_STRING) { *to = NA_LLINT; continue; } if (scan_llint(CHAR(from_elt), to)) { /* syntactically correct number but overflow */ if (first_time1) { warning("out-of-range values coerced to NAs " "in coercion to LLint"); first_time1 = 0; } continue; } if (*to != NA_LLINT) continue; if (first_time2) { /* syntactically incorrect number */ warning("syntactically incorrect numbers " "coerced to NAs in coercion to LLint"); first_time2 = 0; } } return; } static void from_llints_to_bools(const long long int *from, int *to, R_xlen_t n) { R_xlen_t i; long long int from_elt; for (i = 0; i < n; i++, from++, to++) { from_elt = *from; if (from_elt == NA_LLINT) { *to = NA_LOGICAL; continue; } *to = from_elt != 0LL; } return; } static void from_llints_to_ints(const long long int *from, int *to, R_xlen_t n) { int first_time; R_xlen_t i; long long int from_elt; first_time = 1; for (i = 0; i < n; i++, from++, to++) { from_elt = *from; if (from_elt == NA_LLINT) { *to = NA_INTEGER; continue; } if (from_elt > (long long int) INT_MAX || from_elt < (long long int) -INT_MAX) { if (first_time) { warning("out-of-range values coerced to NAs " "in coercion to integer"); first_time = 0; } *to = NA_INTEGER; continue; } *to = (int) from_elt; } return; } static void from_llints_to_doubles(const long long int *from, double *to, R_xlen_t n) { int first_time; R_xlen_t i; long long int from_elt; first_time = 1; for (i = 0; i < n; i++, from++, to++) { from_elt = *from; if (from_elt == NA_LLINT) { *to = NA_REAL; continue; } *to = (double) from_elt; if (first_time && (long long int) *to != from_elt) { warning("non reversible coercion to double " "(integer values > 2^53 cannot be exactly\n" " represented by double values)"); first_time = 0; } } return; } static void from_llints_to_STRSXP(const long long int *from, SEXP to) { R_xlen_t n, i; long long int from_elt; /* LLONG_MAX is 19 digits + sign + terminating null byte */ char val_buf[21]; SEXP to_elt; n = XLENGTH(to); for (i = 0; i < n; i++, from++) { from_elt = *from; if (from_elt == NA_LLINT) { SET_STRING_ELT(to, i, NA_STRING); continue; } /* sprintf() should always succeed here but we check for an error anyway, just to be safe. */ if (sprintf(val_buf, "%lld", from_elt) < 0) error("S4Vectors internal error in " "from_llints_to_STRSXP(): " "sprintf() returned a negative value"); PROTECT(to_elt = mkChar(val_buf)); SET_STRING_ELT(to, i, to_elt); UNPROTECT(1); } return; } /**************************************************************************** * Coercion. */ static SEXP new_LLint_from_ints(const int *x, R_xlen_t x_len) { SEXP ans; PROTECT(ans = NEW_LLINT(x_len)); from_ints_to_llints(x, LLINT(ans), x_len); UNPROTECT(1); return ans; } /* --- .Call ENTRY POINT --- */ SEXP new_LLint_from_LOGICAL(SEXP x) { return new_LLint_from_ints(LOGICAL(x), XLENGTH(x)); } /* --- .Call ENTRY POINT --- */ SEXP new_LLint_from_INTEGER(SEXP x) { return new_LLint_from_ints(INTEGER(x), XLENGTH(x)); } /* --- .Call ENTRY POINT --- */ SEXP new_LLint_from_NUMERIC(SEXP x) { R_xlen_t x_len; SEXP ans; x_len = XLENGTH(x); PROTECT(ans = NEW_LLINT(x_len)); from_doubles_to_llints(REAL(x), LLINT(ans), x_len); UNPROTECT(1); return ans; } /* --- .Call ENTRY POINT --- */ SEXP new_LLint_from_CHARACTER(SEXP x) { R_xlen_t x_len; SEXP ans; x_len = XLENGTH(x); PROTECT(ans = NEW_LLINT(x_len)); from_STRSXP_to_llints(x, LLINT(ans)); UNPROTECT(1); return ans; } /* --- .Call ENTRY POINT --- */ SEXP new_LOGICAL_from_LLint(SEXP x) { R_xlen_t ans_len; SEXP ans; ans_len = _get_LLint_length(x); PROTECT(ans = NEW_LOGICAL(ans_len)); from_llints_to_bools(LLINT(x), LOGICAL(ans), ans_len); UNPROTECT(1); return ans; } /* --- .Call ENTRY POINT --- */ SEXP new_INTEGER_from_LLint(SEXP x) { R_xlen_t ans_len; SEXP ans; ans_len = _get_LLint_length(x); PROTECT(ans = NEW_INTEGER(ans_len)); from_llints_to_ints(LLINT(x), INTEGER(ans), ans_len); UNPROTECT(1); return ans; } /* --- .Call ENTRY POINT --- */ SEXP new_NUMERIC_from_LLint(SEXP x) { R_xlen_t ans_len; SEXP ans; ans_len = _get_LLint_length(x); PROTECT(ans = NEW_NUMERIC(ans_len)); from_llints_to_doubles(LLINT(x), REAL(ans), ans_len); UNPROTECT(1); return ans; } /* --- .Call ENTRY POINT --- */ SEXP new_CHARACTER_from_LLint(SEXP x) { R_xlen_t ans_len; SEXP ans; ans_len = _get_LLint_length(x); PROTECT(ans = NEW_CHARACTER(ans_len)); from_llints_to_STRSXP(LLINT(x), ans); UNPROTECT(1); return ans; } /**************************************************************************** * Operations from "Ops" group */ static void print_not_multiple_warning() { warning("longer object length is not a multiple " "of shorter object length"); return; } static R_xlen_t compute_ans_length(R_xlen_t e1_len, R_xlen_t e2_len) { if (e1_len == 0 || e2_len == 0) return 0; if (e1_len >= e2_len) { if (e1_len % e2_len != 0) print_not_multiple_warning(); return e1_len; } if (e2_len % e1_len != 0) print_not_multiple_warning(); return e2_len; } static long long int llint_div(long long int x, long long int y) { long long int z; if (x == NA_LLINT || y == NA_LLINT || y == 0LL) return NA_LLINT; z = x / y; if (x == 0LL || (x > 0LL) == (y > 0LL) || y * z == x) return z; return z - 1LL; } static long long int llint_mod(long long int x, long long int y) { long long int z; if (x == NA_LLINT || y == NA_LLINT || y == 0LL) return NA_LLINT; z = x % y; /* The contortions below are meant to make sure that the result has the sign of 'y'. */ if (z == 0LL || (z > 0LL) == (y > 0LL)) return z; /* z and y have opposite signs. */ return z + y; /* same sign as 'y' */ } static double llint_div_as_double(long long int x, long long int y) { if (x == NA_LLINT || y == NA_LLINT) return NA_REAL; return (double) x / (double) y; } static double llint_pow_as_double(long long int x, long long int y) { if (x == 1LL || y == 0LL) return 1.0; if (x == NA_LLINT || y == NA_LLINT) return NA_REAL; return pow((double) x, (double) y); } typedef long long int (*Arith1FunType)(long long int x, long long int y); typedef double (*Arith2FunType)(long long int x, long long int y); static Arith1FunType get_arith1_fun(const char *generic) { if (strcmp(generic, "+") == 0) return _safe_llint_add; if (strcmp(generic, "-") == 0) return _safe_llint_subtract; if (strcmp(generic, "*") == 0) return _safe_llint_mult; if (strcmp(generic, "%/%") == 0) return llint_div; if (strcmp(generic, "%%") == 0) return llint_mod; return NULL; } static Arith2FunType get_arith2_fun(const char *generic) { if (strcmp(generic, "/") == 0) return llint_div_as_double; if (strcmp(generic, "^") == 0) return llint_pow_as_double; return NULL; } static void llints_arith1(Arith1FunType arith_fun, const long long int *x, R_xlen_t x_len, const long long int *y, R_xlen_t y_len, long long int *out, R_xlen_t out_len) { R_xlen_t i, j, k; _reset_ovflow_flag(); for (i = j = k = 0; k < out_len; i++, j++, k++) { if (i >= x_len) i = 0; if (j >= y_len) j = 0; out[k] = arith_fun(x[i], y[j]); } if (_get_ovflow_flag()) warning("NAs produced by LLint overflow"); return; } static void llints_arith2(Arith2FunType arith_fun, const long long int *x, R_xlen_t x_len, const long long int *y, R_xlen_t y_len, double *out, R_xlen_t out_len) { R_xlen_t i, j, k; for (i = j = k = 0; k < out_len; i++, j++, k++) { if (i >= x_len) i = 0; if (j >= y_len) j = 0; out[k] = arith_fun(x[i], y[j]); } return; } /* Operations from "Compare" group */ #define EQ_OP 1 /* equal to */ #define NEQ_OP 2 /* not equal to */ #define LEQ_OP 3 /* less than or equal to */ #define GEQ_OP 4 /* greater than or equal to */ #define LT_OP 5 /* less than */ #define GT_OP 6 /* greater than */ static int get_compare_op(const char *generic) { if (strcmp(generic, "==") == 0) return EQ_OP; if (strcmp(generic, "!=") == 0) return NEQ_OP; if (strcmp(generic, "<=") == 0) return LEQ_OP; if (strcmp(generic, ">=") == 0) return GEQ_OP; if (strcmp(generic, "<") == 0) return LT_OP; if (strcmp(generic, ">") == 0) return GT_OP; return 0; } static void llints_compare(int op, const long long int *x, R_xlen_t x_len, const long long int *y, R_xlen_t y_len, int *out, R_xlen_t out_len) { R_xlen_t i, j, k; long long int x_elt, y_elt; for (i = j = k = 0; k < out_len; i++, j++, k++) { if (i >= x_len) i = 0; if (j >= y_len) j = 0; x_elt = x[i]; y_elt = y[j]; if (x_elt == NA_LLINT || y_elt == NA_LLINT) { out[k] = NA_LOGICAL; continue; } switch (op) { case EQ_OP: out[k] = x_elt == y_elt; break; case NEQ_OP: out[k] = x_elt != y_elt; break; case LEQ_OP: out[k] = x_elt <= y_elt; break; case GEQ_OP: out[k] = x_elt >= y_elt; break; case LT_OP: out[k] = x_elt < y_elt; break; case GT_OP: out[k] = x_elt > y_elt; break; } } return; } /* --- .Call ENTRY POINT --- */ SEXP LLint_Ops(SEXP Generic, SEXP e1, SEXP e2) { R_xlen_t e1_len, e2_len, ans_len; const long long int *e1_elts, *e2_elts; const char *generic; Arith1FunType arith1_fun; Arith2FunType arith2_fun; int compare_op; SEXP ans; e1_len = _get_LLint_length(e1); e2_len = _get_LLint_length(e2); ans_len = compute_ans_length(e1_len, e2_len); e1_elts = LLINT(e1); e2_elts = LLINT(e2); generic = CHAR(STRING_ELT(Generic, 0)); /* Operations from "Arith" group */ arith1_fun = get_arith1_fun(generic); if (arith1_fun != NULL) { PROTECT(ans = NEW_LLINT(ans_len)); llints_arith1(arith1_fun, e1_elts, e1_len, e2_elts, e2_len, LLINT(ans), ans_len); UNPROTECT(1); return ans; } arith2_fun = get_arith2_fun(generic); if (arith2_fun != NULL) { PROTECT(ans = NEW_NUMERIC(ans_len)); llints_arith2(arith2_fun, e1_elts, e1_len, e2_elts, e2_len, REAL(ans), ans_len); UNPROTECT(1); return ans; } /* Operations from "Compare" group */ compare_op = get_compare_op(generic); if (compare_op != 0) { PROTECT(ans = NEW_LOGICAL(ans_len)); llints_compare(compare_op, e1_elts, e1_len, e2_elts, e2_len, LOGICAL(ans), ans_len); UNPROTECT(1); return ans; } error("\"%s\": operation not supported on LLint objects", generic); return R_NilValue; } /**************************************************************************** * Operations from "Summary" group */ #define MAX_OP 1 #define MIN_OP 2 #define SUM_OP 3 #define PROD_OP 4 static int get_summary_op(const char *generic) { if (strcmp(generic, "max") == 0) return MAX_OP; if (strcmp(generic, "min") == 0) return MIN_OP; if (strcmp(generic, "sum") == 0) return SUM_OP; if (strcmp(generic, "prod") == 0) return PROD_OP; return 0; } static long long int llints_summary(int op, const long long int *in, R_xlen_t in_len, int na_rm) { R_xlen_t i; long long int res, in_elt; switch (op) { case MAX_OP: case MIN_OP: res = NA_LLINT; break; case SUM_OP: res = 0LL; break; case PROD_OP: res = 1LL; break; } for (i = 0; i < in_len; i++) { in_elt = in[i]; if (in_elt == NA_LLINT) { if (na_rm) continue; return NA_LLINT; } switch (op) { case MAX_OP: if (res == NA_LLINT || in_elt > res) res = in_elt; break; case MIN_OP: if (res == NA_LLINT || in_elt < res) res = in_elt; break; case SUM_OP: res = _safe_llint_add(res, in_elt); if (res == NA_LLINT) { warning("LLint overflow - " "use sum(as.numeric(.))"); return res; } break; case PROD_OP: res = _safe_llint_mult(res, in_elt); if (res == NA_LLINT) { warning("LLint overflow - " "use prod(as.numeric(.))"); return res; } break; } } return res; } SEXP LLint_Summary(SEXP Generic, SEXP x, SEXP na_rm) { R_xlen_t x_len; const long long int *x_elts; const char *generic; int summary_op; SEXP ans; x_len = _get_LLint_length(x); x_elts = LLINT(x); generic = CHAR(STRING_ELT(Generic, 0)); summary_op = get_summary_op(generic); if (summary_op != 0) { PROTECT(ans = NEW_LLINT(1)); LLINT(ans)[0] = llints_summary(summary_op, x_elts, x_len, LOGICAL(na_rm)[0]); UNPROTECT(1); return ans; } if (strcmp(generic, "range") == 0) { PROTECT(ans = NEW_LLINT(2)); LLINT(ans)[0] = llints_summary(MIN_OP, x_elts, x_len, LOGICAL(na_rm)[0]); LLINT(ans)[1] = llints_summary(MAX_OP, x_elts, x_len, LOGICAL(na_rm)[0]); UNPROTECT(1); return ans; } error("\"%s\": operation not supported on LLint objects", generic); return R_NilValue; } S4Vectors/src/List_class.c0000644000175400017540000000165513175736136016512 0ustar00biocbuildbiocbuild/**************************************************************************** * Low-level manipulation of List objects * * Authors: P. Aboyoun, M. Lawrence, and H. Pag\`es * ****************************************************************************/ #include "S4Vectors.h" /**************************************************************************** * C-level slot getters. */ static SEXP elementType_symbol = NULL; const char *_get_List_elementType(SEXP x) { INIT_STATIC_SYMBOL(elementType) return CHAR(STRING_ELT(GET_SLOT(x, elementType_symbol), 0)); } /**************************************************************************** * C-level slot setters. */ void _set_List_elementType(SEXP x, const char *type) { SEXP value; INIT_STATIC_SYMBOL(elementType) PROTECT(value = mkString(type)); SET_SLOT(x, elementType_symbol, value); UNPROTECT(1); return; } S4Vectors/src/R_init_S4Vectors.c0000644000175400017540000001675513175736136017561 0ustar00biocbuildbiocbuild#include "S4Vectors.h" #define CALLMETHOD_DEF(fun, numArgs) {#fun, (DL_FUNC) &fun, numArgs} #define REGISTER_CCALLABLE(fun) \ R_RegisterCCallable("S4Vectors", #fun, (DL_FUNC) &fun) static const R_CallMethodDef callMethods[] = { /* sort_utils.c */ CALLMETHOD_DEF(test_sort_ushort_array, 2), /* AEbufs.c */ CALLMETHOD_DEF(AEbufs_use_malloc, 1), CALLMETHOD_DEF(AEbufs_free, 0), /* anyMissing.c */ CALLMETHOD_DEF(anyMissing, 1), /* LLint_class.c */ CALLMETHOD_DEF(make_RAW_from_NA_LLINT, 0), CALLMETHOD_DEF(new_LLint_from_LOGICAL, 1), CALLMETHOD_DEF(new_LLint_from_INTEGER, 1), CALLMETHOD_DEF(new_LLint_from_NUMERIC, 1), CALLMETHOD_DEF(new_LLint_from_CHARACTER, 1), CALLMETHOD_DEF(new_LOGICAL_from_LLint, 1), CALLMETHOD_DEF(new_INTEGER_from_LLint, 1), CALLMETHOD_DEF(new_NUMERIC_from_LLint, 1), CALLMETHOD_DEF(new_CHARACTER_from_LLint, 1), /* subsetting_utils.c */ CALLMETHOD_DEF(vector_OR_factor_extract_positions, 2), CALLMETHOD_DEF(vector_OR_factor_extract_ranges, 3), /* vector_utils.c */ CALLMETHOD_DEF(sapply_NROW, 1), /* logical_utils.c */ CALLMETHOD_DEF(logical_sum, 2), CALLMETHOD_DEF(logical2_sum, 2), /* integer_utils.c */ CALLMETHOD_DEF(Integer_any_missing_or_outside, 3), CALLMETHOD_DEF(Integer_diff_with_0, 1), CALLMETHOD_DEF(Integer_diff_with_last, 2), CALLMETHOD_DEF(Integer_order, 3), CALLMETHOD_DEF(Integer_pcompare2, 4), CALLMETHOD_DEF(Integer_sorted2, 4), CALLMETHOD_DEF(Integer_order2, 4), CALLMETHOD_DEF(Integer_match2_quick, 5), CALLMETHOD_DEF(Integer_selfmatch2_quick, 2), CALLMETHOD_DEF(Integer_match2_hash, 5), CALLMETHOD_DEF(Integer_selfmatch2_hash, 2), CALLMETHOD_DEF(Integer_sorted4, 6), CALLMETHOD_DEF(Integer_order4, 6), CALLMETHOD_DEF(Integer_match4_quick, 9), CALLMETHOD_DEF(Integer_selfmatch4_quick, 4), CALLMETHOD_DEF(Integer_match4_hash, 9), CALLMETHOD_DEF(Integer_selfmatch4_hash, 4), CALLMETHOD_DEF(Integer_tabulate2, 4), CALLMETHOD_DEF(Integer_explode_bits, 2), CALLMETHOD_DEF(Integer_sorted_merge, 2), CALLMETHOD_DEF(Integer_mseq, 2), CALLMETHOD_DEF(Integer_fancy_mseq, 3), CALLMETHOD_DEF(findIntervalAndStartFromWidth, 2), /* character_utils.c */ CALLMETHOD_DEF(unstrsplit_list, 2), CALLMETHOD_DEF(safe_strexplode, 1), CALLMETHOD_DEF(strsplit_as_list_of_ints, 2), CALLMETHOD_DEF(svn_time, 0), /* eval_utils.c */ CALLMETHOD_DEF(top_prenv, 2), CALLMETHOD_DEF(top_prenv_dots, 1), /* map_ranges_to_runs.c */ CALLMETHOD_DEF(map_ranges, 4), CALLMETHOD_DEF(map_positions, 3), /* Hits_class.c */ CALLMETHOD_DEF(Hits_new, 6), CALLMETHOD_DEF(select_hits, 4), CALLMETHOD_DEF(make_all_group_inner_hits, 2), /* Rle_class.c */ CALLMETHOD_DEF(Rle_length, 1), CALLMETHOD_DEF(Rle_valid, 1), CALLMETHOD_DEF(Rle_constructor, 2), CALLMETHOD_DEF(Rle_start, 1), CALLMETHOD_DEF(Rle_end, 1), CALLMETHOD_DEF(Rle_extract_range, 3), CALLMETHOD_DEF(Rle_extract_ranges, 5), CALLMETHOD_DEF(Rle_extract_positions, 3), CALLMETHOD_DEF(Rle_getStartEndRunAndOffset, 3), CALLMETHOD_DEF(Rle_window_aslist, 5), /* Rle_utils.c */ CALLMETHOD_DEF(Rle_runsum, 3), CALLMETHOD_DEF(Rle_runwtsum, 4), CALLMETHOD_DEF(Rle_runq, 4), {NULL, NULL, 0} }; void R_init_S4Vectors(DllInfo *info) { R_registerRoutines(info, NULL, callMethods, NULL, NULL); /* safe_arithm.c */ REGISTER_CCALLABLE(_reset_ovflow_flag); REGISTER_CCALLABLE(_get_ovflow_flag); REGISTER_CCALLABLE(_safe_int_add); REGISTER_CCALLABLE(_safe_int_mult); /* sort_utils.c */ REGISTER_CCALLABLE(_sort_ints); REGISTER_CCALLABLE(_get_order_of_int_array); REGISTER_CCALLABLE(_sort_int_array); REGISTER_CCALLABLE(_get_order_of_int_pairs); REGISTER_CCALLABLE(_sort_int_pairs); REGISTER_CCALLABLE(_get_matches_of_ordered_int_pairs); REGISTER_CCALLABLE(_get_order_of_int_quads); REGISTER_CCALLABLE(_get_matches_of_ordered_int_quads); /* hash_utils.c */ REGISTER_CCALLABLE(_new_htab); REGISTER_CCALLABLE(_get_hbucket_val); REGISTER_CCALLABLE(_set_hbucket_val); /* AEbufs.c */ REGISTER_CCALLABLE(_increase_buflength); REGISTER_CCALLABLE(_IntAE_get_nelt); REGISTER_CCALLABLE(_IntAE_set_nelt); REGISTER_CCALLABLE(_IntAE_set_val); REGISTER_CCALLABLE(_IntAE_extend); REGISTER_CCALLABLE(_IntAE_insert_at); REGISTER_CCALLABLE(_new_IntAE); REGISTER_CCALLABLE(_IntAE_append); REGISTER_CCALLABLE(_IntAE_delete_at); REGISTER_CCALLABLE(_IntAE_shift); REGISTER_CCALLABLE(_IntAE_sum_and_shift); REGISTER_CCALLABLE(_IntAE_qsort); REGISTER_CCALLABLE(_IntAE_uniq); REGISTER_CCALLABLE(_new_INTEGER_from_IntAE); REGISTER_CCALLABLE(_new_IntAE_from_INTEGER); REGISTER_CCALLABLE(_new_IntAE_from_CHARACTER); REGISTER_CCALLABLE(_IntAEAE_get_nelt); REGISTER_CCALLABLE(_IntAEAE_set_nelt); REGISTER_CCALLABLE(_IntAEAE_extend); REGISTER_CCALLABLE(_IntAEAE_insert_at); REGISTER_CCALLABLE(_new_IntAEAE); REGISTER_CCALLABLE(_IntAEAE_pappend); REGISTER_CCALLABLE(_IntAEAE_shift); REGISTER_CCALLABLE(_IntAEAE_sum_and_shift); REGISTER_CCALLABLE(_new_LIST_from_IntAEAE); REGISTER_CCALLABLE(_new_IntAEAE_from_LIST); REGISTER_CCALLABLE(_IntAEAE_toEnvir); REGISTER_CCALLABLE(_IntPairAE_get_nelt); REGISTER_CCALLABLE(_IntPairAE_set_nelt); REGISTER_CCALLABLE(_IntPairAE_extend); REGISTER_CCALLABLE(_IntPairAE_insert_at); REGISTER_CCALLABLE(_new_IntPairAE); REGISTER_CCALLABLE(_IntPairAEAE_get_nelt); REGISTER_CCALLABLE(_IntPairAEAE_set_nelt); REGISTER_CCALLABLE(_IntPairAEAE_extend); REGISTER_CCALLABLE(_IntPairAEAE_insert_at); REGISTER_CCALLABLE(_new_IntPairAEAE); REGISTER_CCALLABLE(_LLongAE_get_nelt); REGISTER_CCALLABLE(_LLongAE_set_nelt); REGISTER_CCALLABLE(_LLongAE_set_val); REGISTER_CCALLABLE(_LLongAE_extend); REGISTER_CCALLABLE(_LLongAE_insert_at); REGISTER_CCALLABLE(_new_LLongAE); REGISTER_CCALLABLE(_CharAE_get_nelt); REGISTER_CCALLABLE(_CharAE_set_nelt); REGISTER_CCALLABLE(_CharAE_extend); REGISTER_CCALLABLE(_CharAE_insert_at); REGISTER_CCALLABLE(_new_CharAE); REGISTER_CCALLABLE(_new_CharAE_from_string); REGISTER_CCALLABLE(_CharAE_append_string); REGISTER_CCALLABLE(_CharAE_delete_at); REGISTER_CCALLABLE(_new_CHARSXP_from_CharAE); REGISTER_CCALLABLE(_new_RAW_from_CharAE); REGISTER_CCALLABLE(_new_LOGICAL_from_CharAE); REGISTER_CCALLABLE(_CharAEAE_get_nelt); REGISTER_CCALLABLE(_CharAEAE_set_nelt); REGISTER_CCALLABLE(_CharAEAE_extend); REGISTER_CCALLABLE(_CharAEAE_insert_at); REGISTER_CCALLABLE(_new_CharAEAE); REGISTER_CCALLABLE(_CharAEAE_append_string); REGISTER_CCALLABLE(_new_CHARACTER_from_CharAEAE); /* SEXP_utils.c */ REGISTER_CCALLABLE(_get_classname); /* LLint_class.c */ REGISTER_CCALLABLE(_is_LLint); REGISTER_CCALLABLE(_get_LLint_length); REGISTER_CCALLABLE(_get_LLint_dataptr); REGISTER_CCALLABLE(_alloc_LLint); /* subsetting_utils.c */ REGISTER_CCALLABLE(_copy_vector_block); REGISTER_CCALLABLE(_copy_vector_positions); REGISTER_CCALLABLE(_copy_vector_ranges); /* vector_utils.c */ REGISTER_CCALLABLE(_vector_memcmp); REGISTER_CCALLABLE(_list_as_data_frame); /* integer_utils.c */ REGISTER_CCALLABLE(_check_integer_pairs); REGISTER_CCALLABLE(_find_interv_and_start_from_width); /* Hits_class.c */ REGISTER_CCALLABLE(_new_Hits); REGISTER_CCALLABLE(_get_select_mode); /* Rle_class.c */ REGISTER_CCALLABLE(_construct_logical_Rle); REGISTER_CCALLABLE(_construct_integer_Rle); REGISTER_CCALLABLE(_construct_numeric_Rle); REGISTER_CCALLABLE(_construct_complex_Rle); REGISTER_CCALLABLE(_construct_character_Rle); REGISTER_CCALLABLE(_construct_raw_Rle); REGISTER_CCALLABLE(_construct_Rle); /* List_class.c */ REGISTER_CCALLABLE(_get_List_elementType); REGISTER_CCALLABLE(_set_List_elementType); /* SimpleList_class.c */ REGISTER_CCALLABLE(_new_SimpleList); /* DataFrame_class.c */ REGISTER_CCALLABLE(_new_DataFrame); return; } S4Vectors/src/Rle_class.c0000644000175400017540000010575413175736136016326 0ustar00biocbuildbiocbuild#include "S4Vectors.h" static SEXP _new_Rle(SEXP values, SEXP lengths) { SEXP classdef, ans; PROTECT(classdef = MAKE_CLASS("Rle")); PROTECT(ans = NEW_OBJECT(classdef)); SET_SLOT(ans, install("values"), values); SET_SLOT(ans, install("lengths"), lengths); UNPROTECT(2); return ans; } /**************************************************************************** * Rle_length() */ static long long int sum_int_lengths(const int *lengths, R_xlen_t nrun) { long long int sum; R_xlen_t i; sum = 0; for (i = 0; i < nrun; i++, lengths++) sum += *lengths; return sum; } static long long int sum_llint_lengths(const long long int *lengths, R_xlen_t nrun) { long long int sum; R_xlen_t i; sum = 0; for (i = 0; i < nrun; i++, lengths++) sum += *lengths; return sum; } /* --- .Call ENTRY POINT --- */ SEXP Rle_length(SEXP x) { SEXP x_lengths, ans; R_xlen_t x_nrun; void *x_lengths_dataptr; long long int sum; x_lengths = GET_SLOT(x, install("lengths")); if (IS_INTEGER(x_lengths)) { x_nrun = XLENGTH(x_lengths); x_lengths_dataptr = INTEGER(x_lengths); sum = sum_int_lengths(x_lengths_dataptr, x_nrun); } else if (_is_LLint(x_lengths)) { x_nrun = _get_LLint_length(x_lengths); x_lengths_dataptr = _get_LLint_dataptr(x_lengths); sum = sum_llint_lengths(x_lengths_dataptr, x_nrun); } else { error("S4Vectors internal error in Rle_length(): " "'runLengths(x)' is not an integer\n" " or LLint vector"); } if (sum < 0) error("S4Vectors internal error in Rle_length(): " "Rle vector has a negative length"); if (sum > R_XLEN_T_MAX) error("S4Vectors internal error in Rle_length(): " "Rle vector is too long"); PROTECT(ans = _alloc_LLint("LLint", 1)); _get_LLint_dataptr(ans)[0] = sum; UNPROTECT(1); return ans; } /**************************************************************************** * Rle_valid() */ static char validity_msg[200]; static int check_int_lengths(const int *lengths, R_xlen_t nrun) { R_xlen_t i; for (i = 0; i < nrun; i++, lengths++) { if (*lengths == NA_INTEGER) { snprintf(validity_msg, sizeof(validity_msg), "some run lengths are NA"); return 1; } if (*lengths <= 0) { snprintf(validity_msg, sizeof(validity_msg), "some run lengths are non-positive"); return 1; } } return 0; } static int check_llint_lengths(const long long int *lengths, R_xlen_t nrun) { int no_big_lengths; R_xlen_t i; no_big_lengths = 1; for (i = 0; i < nrun; i++, lengths++) { if (*lengths == NA_LLINT) { snprintf(validity_msg, sizeof(validity_msg), "some run lengths are NA"); return 1; } if (*lengths <= 0) { snprintf(validity_msg, sizeof(validity_msg), "some run lengths are non-positive"); return 1; } if (*lengths > INT_MAX) no_big_lengths = 0; } if (no_big_lengths) { snprintf(validity_msg, sizeof(validity_msg), "the run lengths are stored in an LLint vector\n" " when they could be in an integer vector"); return 1; } return 0; } static int valid_run_lengths(SEXP lengths) { R_xlen_t nrun; void *lengths_dataptr; if (IS_INTEGER(lengths)) { nrun = XLENGTH(lengths); lengths_dataptr = INTEGER(lengths); return check_int_lengths(lengths_dataptr, nrun); } if (_is_LLint(lengths)) { nrun = _get_LLint_length(lengths); lengths_dataptr = _get_LLint_dataptr(lengths); return check_llint_lengths(lengths_dataptr, nrun); } snprintf(validity_msg, sizeof(validity_msg), "'runLengths(x)' must be an integer or LLint vector"); return 1; } /* --- .Call ENTRY POINT --- */ SEXP Rle_valid(SEXP x) { SEXP x_lengths; /* Check 'lengths' slot. */ x_lengths = GET_SLOT(x, install("lengths")); if (valid_run_lengths(x_lengths)) return mkString(validity_msg); return R_NilValue; } /**************************************************************************** * Low-level helpers used by "The C level Rle smart constructors". */ #define CHECK_RUN_LENGTH_IN(len_in, lengths_in_is_L) \ { \ if (lengths_in_is_L) { \ if ((len_in) == NA_LLINT) \ error("some run lengths are NA"); \ if ((len_in) > R_XLEN_T_MAX) \ error("Rle vector is too long"); \ } else { \ if ((len_in) == NA_INTEGER) \ error("some run lengths are NA"); \ } \ if ((len_in) == 0) \ continue; \ if ((len_in) < 0) \ error("some run lengths are negative"); \ } static R_xlen_t check_integer_runs(R_xlen_t nrun_in, const int *values_in, const void *lengths_in, int lengths_in_is_L, unsigned long long int *max_len_out_p) { R_xlen_t nrun_out, i; int not_empty; long long int len_in; unsigned long long int sum, len_out; int val_in, val_out; nrun_out = 0; not_empty = 0; *max_len_out_p = 0; sum = 0; for (i = 0, len_in = 1; i < nrun_in; i++, values_in++) { if (lengths_in != NULL) { len_in = GET_INT_OR_LLINT(lengths_in, lengths_in_is_L, i); CHECK_RUN_LENGTH_IN(len_in, lengths_in_is_L); } val_in = *values_in; if (not_empty) { if (val_in == val_out) { sum += len_in; if (sum > R_XLEN_T_MAX) error("Rle vector is too long"); len_out += len_in; continue; } /* End of run */ if (len_out > *max_len_out_p) *max_len_out_p = len_out; nrun_out++; } else { not_empty = 1; } /* Beginning of run */ sum += len_out = len_in; if (sum > R_XLEN_T_MAX) error("Rle vector is too long"); val_out = val_in; } if (not_empty) { /* End of run */ if (len_out > *max_len_out_p) *max_len_out_p = len_out; nrun_out++; } return nrun_out; } static void fill_integer_runs(R_xlen_t nrun_in, const int *values_in, const void *lengths_in, int lengths_in_is_L, int *values_out, void *lengths_out, int lengths_out_is_L) { R_xlen_t nrun_out, i; int not_empty; long long int len_in, len_out; int val_in, val_out; nrun_out = 0; not_empty = 0; for (i = 0, len_in = 1; i < nrun_in; i++, values_in++) { if (lengths_in != NULL) { len_in = GET_INT_OR_LLINT(lengths_in, lengths_in_is_L, i); if (len_in == 0) continue; } val_in = *values_in; if (not_empty) { if (val_in == val_out) { len_out += len_in; continue; } /* End of run */ SET_INT_OR_LLINT(lengths_out, lengths_out_is_L, nrun_out, len_out); values_out[nrun_out] = val_out; nrun_out++; } else { not_empty = 1; } /* Beginning of run */ len_out = len_in; val_out = val_in; } if (not_empty) { /* End of run */ SET_INT_OR_LLINT(lengths_out, lengths_out_is_L, nrun_out, len_out); values_out[nrun_out] = val_out; } return; } #define SAME_DOUBLE_VALS(x, y) \ ((x) == (y) || (R_IsNA(x) && R_IsNA(y)) || (R_IsNaN(x) && R_IsNaN(y))) static R_xlen_t check_numeric_runs(R_xlen_t nrun_in, const double *values_in, const void *lengths_in, int lengths_in_is_L, unsigned long long int *max_len_out_p) { R_xlen_t nrun_out, i; int not_empty; long long int len_in; unsigned long long int sum, len_out; double val_in, val_out; nrun_out = 0; not_empty = 0; *max_len_out_p = 0; sum = 0; for (i = 0, len_in = 1; i < nrun_in; i++, values_in++) { if (lengths_in != NULL) { len_in = GET_INT_OR_LLINT(lengths_in, lengths_in_is_L, i); CHECK_RUN_LENGTH_IN(len_in, lengths_in_is_L); } val_in = *values_in; if (not_empty) { if (SAME_DOUBLE_VALS(val_in, val_out)) { sum += len_in; if (sum > R_XLEN_T_MAX) error("Rle vector is too long"); len_out += len_in; continue; } /* End of run */ if (len_out > *max_len_out_p) *max_len_out_p = len_out; nrun_out++; } else { not_empty = 1; } /* Beginning of run */ sum += len_out = len_in; if (sum > R_XLEN_T_MAX) error("Rle vector is too long"); val_out = val_in; } if (not_empty) { /* End of run */ if (len_out > *max_len_out_p) *max_len_out_p = len_out; nrun_out++; } return nrun_out; } static void fill_numeric_runs(R_xlen_t nrun_in, const double *values_in, const void *lengths_in, int lengths_in_is_L, double *values_out, void *lengths_out, int lengths_out_is_L) { R_xlen_t nrun_out, i; int not_empty; long long int len_in, len_out; double val_in, val_out; nrun_out = 0; not_empty = 0; for (i = 0, len_in = 1; i < nrun_in; i++, values_in++) { if (lengths_in != NULL) { len_in = GET_INT_OR_LLINT(lengths_in, lengths_in_is_L, i); if (len_in == 0) continue; } val_in = *values_in; if (not_empty) { if (SAME_DOUBLE_VALS(val_in, val_out)) { len_out += len_in; continue; } /* End of run */ SET_INT_OR_LLINT(lengths_out, lengths_out_is_L, nrun_out, len_out); values_out[nrun_out] = val_out; nrun_out++; } else { not_empty = 1; } /* Beginning of run */ len_out = len_in; val_out = val_in; } if (not_empty) { /* End of run */ SET_INT_OR_LLINT(lengths_out, lengths_out_is_L, nrun_out, len_out); values_out[nrun_out] = val_out; } return; } static R_xlen_t check_complex_runs(R_xlen_t nrun_in, const Rcomplex *values_in, const void *lengths_in, int lengths_in_is_L, unsigned long long int *max_len_out_p) { R_xlen_t nrun_out, i; int not_empty; long long int len_in; unsigned long long int sum, len_out; Rcomplex val_in, val_out; nrun_out = 0; not_empty = 0; *max_len_out_p = 0; sum = 0; for (i = 0, len_in = 1; i < nrun_in; i++, values_in++) { if (lengths_in != NULL) { len_in = GET_INT_OR_LLINT(lengths_in, lengths_in_is_L, i); CHECK_RUN_LENGTH_IN(len_in, lengths_in_is_L); } val_in = *values_in; if (not_empty) { if (SAME_DOUBLE_VALS(val_in.r, val_out.r) && SAME_DOUBLE_VALS(val_in.i, val_out.i)) { sum += len_in; if (sum > R_XLEN_T_MAX) error("Rle vector is too long"); len_out += len_in; continue; } /* End of run */ if (len_out > *max_len_out_p) *max_len_out_p = len_out; nrun_out++; } else { not_empty = 1; } /* Beginning of run */ sum += len_out = len_in; if (sum > R_XLEN_T_MAX) error("Rle vector is too long"); val_out = val_in; } if (not_empty) { /* End of run */ if (len_out > *max_len_out_p) *max_len_out_p = len_out; nrun_out++; } return nrun_out; } static void fill_complex_runs(R_xlen_t nrun_in, const Rcomplex *values_in, const void *lengths_in, int lengths_in_is_L, Rcomplex *values_out, void *lengths_out, int lengths_out_is_L) { R_xlen_t nrun_out, i; int not_empty; long long int len_in, len_out; Rcomplex val_in, val_out; nrun_out = 0; not_empty = 0; for (i = 0, len_in = 1; i < nrun_in; i++, values_in++) { if (lengths_in != NULL) { len_in = GET_INT_OR_LLINT(lengths_in, lengths_in_is_L, i); if (len_in == 0) continue; } val_in = *values_in; if (not_empty) { if (SAME_DOUBLE_VALS(val_in.r, val_out.r) && SAME_DOUBLE_VALS(val_in.i, val_out.i)) { len_out += len_in; continue; } /* End of run */ SET_INT_OR_LLINT(lengths_out, lengths_out_is_L, nrun_out, len_out); values_out[nrun_out] = val_out; nrun_out++; } else { not_empty = 1; } /* Beginning of run */ len_out = len_in; val_out = val_in; } if (not_empty) { /* End of run */ SET_INT_OR_LLINT(lengths_out, lengths_out_is_L, nrun_out, len_out); values_out[nrun_out] = val_out; } return; } static R_xlen_t check_character_runs( SEXP values_in, const void *lengths_in, int lengths_in_is_L, unsigned long long int *max_len_out_p) { R_xlen_t nrun_in, nrun_out, i; int not_empty; long long int len_in; unsigned long long int sum, len_out; SEXP val_in, val_out; nrun_in = XLENGTH(values_in); nrun_out = 0; not_empty = 0; *max_len_out_p = 0; sum = 0; for (i = 0, len_in = 1; i < nrun_in; i++) { if (lengths_in != NULL) { len_in = GET_INT_OR_LLINT(lengths_in, lengths_in_is_L, i); CHECK_RUN_LENGTH_IN(len_in, lengths_in_is_L); } val_in = STRING_ELT(values_in, i); if (not_empty) { if (val_in == val_out) { sum += len_in; if (sum > R_XLEN_T_MAX) error("Rle vector is too long"); len_out += len_in; continue; } /* End of run */ if (len_out > *max_len_out_p) *max_len_out_p = len_out; nrun_out++; } else { not_empty = 1; } /* Beginning of run */ sum += len_out = len_in; if (sum > R_XLEN_T_MAX) error("Rle vector is too long"); val_out = val_in; } if (not_empty) { /* End of run */ if (len_out > *max_len_out_p) *max_len_out_p = len_out; nrun_out++; } return nrun_out; } static void fill_character_runs( SEXP values_in, const void *lengths_in, int lengths_in_is_L, SEXP values_out, void *lengths_out, int lengths_out_is_L) { R_xlen_t nrun_in, nrun_out, i; int not_empty; long long int len_in, len_out; SEXP val_in, val_out; nrun_in = XLENGTH(values_in); nrun_out = 0; not_empty = 0; for (i = 0, len_in = 1; i < nrun_in; i++) { if (lengths_in != NULL) { len_in = GET_INT_OR_LLINT(lengths_in, lengths_in_is_L, i); if (len_in == 0) continue; } val_in = STRING_ELT(values_in, i); if (not_empty) { if (val_in == val_out) { len_out += len_in; continue; } /* End of run */ SET_INT_OR_LLINT(lengths_out, lengths_out_is_L, nrun_out, len_out); SET_STRING_ELT(values_out, nrun_out, val_out); nrun_out++; } else { not_empty = 1; } /* Beginning of run */ len_out = len_in; val_out = val_in; } if (not_empty) { /* End of run */ SET_INT_OR_LLINT(lengths_out, lengths_out_is_L, nrun_out, len_out); SET_STRING_ELT(values_out, nrun_out, val_out); } return; } static R_xlen_t check_raw_runs(R_xlen_t nrun_in, const Rbyte *values_in, const void *lengths_in, int lengths_in_is_L, unsigned long long int *max_len_out_p) { R_xlen_t nrun_out, i; int not_empty; long long int len_in; unsigned long long int sum, len_out; Rbyte val_in, val_out; nrun_out = 0; not_empty = 0; *max_len_out_p = 0; sum = 0; for (i = 0, len_in = 1; i < nrun_in; i++, values_in++) { if (lengths_in != NULL) { len_in = GET_INT_OR_LLINT(lengths_in, lengths_in_is_L, i); CHECK_RUN_LENGTH_IN(len_in, lengths_in_is_L); } val_in = *values_in; if (not_empty) { if (val_in == val_out) { sum += len_in; if (sum > R_XLEN_T_MAX) error("Rle vector is too long"); len_out += len_in; continue; } /* End of run */ if (len_out > *max_len_out_p) *max_len_out_p = len_out; nrun_out++; } else { not_empty = 1; } /* Beginning of run */ sum += len_out = len_in; if (sum > R_XLEN_T_MAX) error("Rle vector is too long"); val_out = val_in; } if (not_empty) { /* End of run */ if (len_out > *max_len_out_p) *max_len_out_p = len_out; nrun_out++; } return nrun_out; } static void fill_raw_runs(R_xlen_t nrun_in, const Rbyte *values_in, const void *lengths_in, int lengths_in_is_L, Rbyte *values_out, void *lengths_out, int lengths_out_is_L) { R_xlen_t nrun_out, i; int not_empty; long long int len_in, len_out; Rbyte val_in, val_out; nrun_out = 0; not_empty = 0; for (i = 0, len_in = 1; i < nrun_in; i++, values_in++) { if (lengths_in != NULL) { len_in = GET_INT_OR_LLINT(lengths_in, lengths_in_is_L, i); if (len_in == 0) continue; } val_in = *values_in; if (not_empty) { if (val_in == val_out) { len_out += len_in; continue; } /* End of run */ SET_INT_OR_LLINT(lengths_out, lengths_out_is_L, nrun_out, len_out); values_out[nrun_out] = val_out; nrun_out++; } else { not_empty = 1; } /* Beginning of run */ len_out = len_in; val_out = val_in; } if (not_empty) { /* End of run */ SET_INT_OR_LLINT(lengths_out, lengths_out_is_L, nrun_out, len_out); values_out[nrun_out] = val_out; } return; } /**************************************************************************** * The C level Rle smart constructors. */ static SEXP alloc_lengths(R_xlen_t nrun_out, int lengths_out_is_L, void **dataptr_p) { SEXP lengths; /* No need to PROTECT() */ if (lengths_out_is_L) { lengths = _alloc_LLint("LLint", nrun_out); *dataptr_p = _get_LLint_dataptr(lengths); } else { lengths = NEW_INTEGER(nrun_out); *dataptr_p = INTEGER(lengths); } return lengths; } SEXP _construct_logical_Rle(R_xlen_t nrun_in, const int *values_in, const void *lengths_in, int lengths_in_is_L) { R_xlen_t nrun_out; unsigned long long int max_len_out; void *lengths_out; int lengths_out_is_L; int *values_out; SEXP ans_lengths, ans_values, ans; nrun_out = check_integer_runs(nrun_in, values_in, lengths_in, lengths_in_is_L, &max_len_out); lengths_out_is_L = max_len_out > INT_MAX; PROTECT(ans_values = NEW_LOGICAL(nrun_out)); values_out = LOGICAL(ans_values); PROTECT(ans_lengths = alloc_lengths(nrun_out, lengths_out_is_L, &lengths_out)); fill_integer_runs(nrun_in, values_in, lengths_in, lengths_in_is_L, values_out, lengths_out, lengths_out_is_L); PROTECT(ans = _new_Rle(ans_values, ans_lengths)); UNPROTECT(3); return ans; } SEXP _construct_integer_Rle(R_xlen_t nrun_in, const int *values_in, const void *lengths_in, int lengths_in_is_L) { R_xlen_t nrun_out; unsigned long long int max_len_out; void *lengths_out; int lengths_out_is_L; int *values_out; SEXP ans_lengths, ans_values, ans; nrun_out = check_integer_runs(nrun_in, values_in, lengths_in, lengths_in_is_L, &max_len_out); lengths_out_is_L = max_len_out > INT_MAX; PROTECT(ans_values = NEW_INTEGER(nrun_out)); values_out = INTEGER(ans_values); PROTECT(ans_lengths = alloc_lengths(nrun_out, lengths_out_is_L, &lengths_out)); fill_integer_runs(nrun_in, values_in, lengths_in, lengths_in_is_L, values_out, lengths_out, lengths_out_is_L); PROTECT(ans = _new_Rle(ans_values, ans_lengths)); UNPROTECT(3); return ans; } SEXP _construct_numeric_Rle(R_xlen_t nrun_in, const double *values_in, const void *lengths_in, int lengths_in_is_L) { R_xlen_t nrun_out; unsigned long long int max_len_out; void *lengths_out; int lengths_out_is_L; double *values_out; SEXP ans_lengths, ans_values, ans; nrun_out = check_numeric_runs(nrun_in, values_in, lengths_in, lengths_in_is_L, &max_len_out); lengths_out_is_L = max_len_out > INT_MAX; PROTECT(ans_values = NEW_NUMERIC(nrun_out)); values_out = REAL(ans_values); PROTECT(ans_lengths = alloc_lengths(nrun_out, lengths_out_is_L, &lengths_out)); fill_numeric_runs(nrun_in, values_in, lengths_in, lengths_in_is_L, values_out, lengths_out, lengths_out_is_L); PROTECT(ans = _new_Rle(ans_values, ans_lengths)); UNPROTECT(3); return ans; } SEXP _construct_complex_Rle(R_xlen_t nrun_in, const Rcomplex *values_in, const void *lengths_in, int lengths_in_is_L) { R_xlen_t nrun_out; unsigned long long int max_len_out; void *lengths_out; int lengths_out_is_L; Rcomplex *values_out; SEXP ans_lengths, ans_values, ans; nrun_out = check_complex_runs(nrun_in, values_in, lengths_in, lengths_in_is_L, &max_len_out); lengths_out_is_L = max_len_out > INT_MAX; PROTECT(ans_values = NEW_COMPLEX(nrun_out)); values_out = COMPLEX(ans_values); PROTECT(ans_lengths = alloc_lengths(nrun_out, lengths_out_is_L, &lengths_out)); fill_complex_runs(nrun_in, values_in, lengths_in, lengths_in_is_L, values_out, lengths_out, lengths_out_is_L); PROTECT(ans = _new_Rle(ans_values, ans_lengths)); UNPROTECT(3); return ans; } SEXP _construct_character_Rle(SEXP values_in, const void *lengths_in, int lengths_in_is_L) { R_xlen_t nrun_out; unsigned long long int max_len_out; void *lengths_out; int lengths_out_is_L; SEXP ans_lengths, ans_values, ans; nrun_out = check_character_runs( values_in, lengths_in, lengths_in_is_L, &max_len_out); lengths_out_is_L = max_len_out > INT_MAX; PROTECT(ans_values = NEW_CHARACTER(nrun_out)); PROTECT(ans_lengths = alloc_lengths(nrun_out, lengths_out_is_L, &lengths_out)); fill_character_runs(values_in, lengths_in, lengths_in_is_L, ans_values, lengths_out, lengths_out_is_L); PROTECT(ans = _new_Rle(ans_values, ans_lengths)); UNPROTECT(3); return ans; } SEXP _construct_raw_Rle(R_xlen_t nrun_in, const Rbyte *values_in, const void *lengths_in, int lengths_in_is_L) { R_xlen_t nrun_out; unsigned long long int max_len_out; void *lengths_out; int lengths_out_is_L; Rbyte *values_out; SEXP ans_lengths, ans_values, ans; nrun_out = check_raw_runs(nrun_in, values_in, lengths_in, lengths_in_is_L, &max_len_out); lengths_out_is_L = max_len_out > INT_MAX; PROTECT(ans_values = NEW_RAW(nrun_out)); values_out = RAW(ans_values); PROTECT(ans_lengths = alloc_lengths(nrun_out, lengths_out_is_L, &lengths_out)); fill_raw_runs(nrun_in, values_in, lengths_in, lengths_in_is_L, values_out, lengths_out, lengths_out_is_L); PROTECT(ans = _new_Rle(ans_values, ans_lengths)); UNPROTECT(3); return ans; } SEXP _construct_Rle(SEXP values_in, const void *lengths_in, int lengths_in_is_L) { R_xlen_t nrun_in; SEXP ans, ans_values, ans_values_class, ans_values_levels; nrun_in = XLENGTH(values_in); switch (TYPEOF(values_in)) { case LGLSXP: PROTECT(ans = _construct_logical_Rle(nrun_in, LOGICAL(values_in), lengths_in, lengths_in_is_L)); break; case INTSXP: PROTECT(ans = _construct_integer_Rle(nrun_in, INTEGER(values_in), lengths_in, lengths_in_is_L)); /* 'values_in' could be a factor in which case we need to propagate its levels. */ if (isFactor(values_in)) { ans_values = GET_SLOT(ans, install("values")); /* Levels must be set before class. */ PROTECT(ans_values_levels = duplicate(GET_LEVELS(values_in))); SET_LEVELS(ans_values, ans_values_levels); UNPROTECT(1); PROTECT(ans_values_class = duplicate(GET_CLASS(values_in))); SET_CLASS(ans_values, ans_values_class); UNPROTECT(1); } break; case REALSXP: PROTECT(ans = _construct_numeric_Rle(nrun_in, REAL(values_in), lengths_in, lengths_in_is_L)); break; case CPLXSXP: PROTECT(ans = _construct_complex_Rle(nrun_in, COMPLEX(values_in), lengths_in, lengths_in_is_L)); break; case STRSXP: PROTECT(ans = _construct_character_Rle( values_in, lengths_in, lengths_in_is_L)); break; case RAWSXP: PROTECT(ans = _construct_raw_Rle(nrun_in, RAW(values_in), lengths_in, lengths_in_is_L)); break; default: error("Rle of type '%s' is not supported", CHAR(type2str(TYPEOF(values_in)))); } UNPROTECT(1); return ans; } /**************************************************************************** * The Rle constructor. * * --- .Call ENTRY POINT --- * Args: * lengths_in: An integer or LLint vector of the same length as 'values' * with no NAs or negative values, or a NULL. If NULL then * all the runs are considered to be of length 1 like if * lengths_in was 'rep(1, length(values))'. */ SEXP Rle_constructor(SEXP values_in, SEXP lengths_in) { R_xlen_t nrun_in, lengths_in_len; /* If lengths_in_is_L == 1 then 'lengths_in_dataptr' points to an array of long long ints. Otherwise it points to an array of ints. */ int lengths_in_is_L; const void *lengths_in_dataptr; nrun_in = XLENGTH(values_in); lengths_in_is_L = 0; if (isNull(lengths_in)) { lengths_in_dataptr = NULL; } else { if (IS_INTEGER(lengths_in)) { lengths_in_len = XLENGTH(lengths_in); lengths_in_dataptr = INTEGER(lengths_in); } else if (_is_LLint(lengths_in)) { lengths_in_is_L = 1; lengths_in_len = _get_LLint_length(lengths_in); lengths_in_dataptr = _get_LLint_dataptr(lengths_in); } else { error("the supplied 'lengths' must be an integer or " "LLint vector, or a NULL"); } if (nrun_in != lengths_in_len) error("'length(values)' != 'length(lengths)'"); } return _construct_Rle(values_in, lengths_in_dataptr, lengths_in_is_L); } /**************************************************************************** * The Rle start() and end() getters (.Call ENTRY POINTS). */ SEXP Rle_start(SEXP x) { int i, nrun, *len_elt, *prev_start, *curr_start; SEXP lengths, ans; lengths = GET_SLOT(x, install("lengths")); nrun = LENGTH(lengths); PROTECT(ans = NEW_INTEGER(nrun)); if (nrun > 0) { INTEGER(ans)[0] = 1; for(i = 1, len_elt = INTEGER(lengths), prev_start = INTEGER(ans), curr_start = INTEGER(ans) + 1; i < nrun; i++, len_elt++, prev_start++, curr_start++) { *curr_start = *prev_start + *len_elt; } } UNPROTECT(1); return ans; } SEXP Rle_end(SEXP x) { int i, nrun, *len_elt, *prev_end, *curr_end; SEXP lengths, ans; lengths = GET_SLOT(x, install("lengths")); nrun = LENGTH(lengths); PROTECT(ans = NEW_INTEGER(nrun)); if (nrun > 0) { INTEGER(ans)[0] = INTEGER(lengths)[0]; for(i = 1, len_elt = INTEGER(lengths) + 1, prev_end = INTEGER(ans), curr_end = INTEGER(ans) + 1; i < nrun; i++, len_elt++, prev_end++, curr_end++) { *curr_end = *prev_end + *len_elt; } } UNPROTECT(1); return ans; } /**************************************************************************** * Rle_extract_range(), Rle_extract_ranges(), and Rle_extract_positions() */ static SEXP extract_Rle_mapped_range(SEXP x_values, const int *x_lengths, int mapped_range_start, int mapped_range_span, int mapped_range_Ltrim, int mapped_range_Rtrim) { SEXP ans_values, ans_lengths, ans; PROTECT(ans_values = _subset_vector_OR_factor_by_ranges(x_values, &mapped_range_start, &mapped_range_span, 1)); PROTECT(ans_lengths = NEW_INTEGER(mapped_range_span)); if (mapped_range_span != 0) { memcpy(INTEGER(ans_lengths), x_lengths + mapped_range_start - 1, sizeof(int) * mapped_range_span); INTEGER(ans_lengths)[0] -= mapped_range_Ltrim; INTEGER(ans_lengths)[mapped_range_span - 1] -= mapped_range_Rtrim; } PROTECT(ans = _new_Rle(ans_values, ans_lengths)); UNPROTECT(3); return ans; } /* * Extract 'nranges' Rle's from 'x'. The i-th Rle to extract corresponds to * the i-th "mapped range", which is defined by 4 int values: * 1. mapped_range_start[i]: The first run in 'x' spanned by the mapped * range (specified as 1-based index). * 2. mapped_range_span[i]: The nb of runs in 'x' spanned by the mapped * range. * 3. mapped_range_Ltrim[i]: The nb of unspanned positions in the first * spanned run. * 4. mapped_range_Rtrim[i]: The nb of unspanned positions in the last * spanned run. * If 'as_list' is TRUE, then the extracted Rle's are returned in a list of * length 'nranges'. Otherwise, the single Rle obtained by concatenating them * all together is returned. */ static SEXP subset_Rle_by_mapped_ranges(SEXP x, const int *mapped_range_start, const int *mapped_range_span, const int *mapped_range_Ltrim, const int *mapped_range_Rtrim, int nranges, int as_list) { SEXP x_values, x_lengths, tmp_values, ans, ans_elt; int tmp_nrun, *tmp_lengths, i, n; x_values = GET_SLOT(x, install("values")); x_lengths = GET_SLOT(x, install("lengths")); if (as_list == 1) { PROTECT(ans = NEW_LIST(nranges)); for (i = 0; i < nranges; i++) { PROTECT(ans_elt = extract_Rle_mapped_range(x_values, INTEGER(x_lengths), mapped_range_start[i], mapped_range_span[i], mapped_range_Ltrim[i], mapped_range_Rtrim[i])); SET_VECTOR_ELT(ans, i, ans_elt); UNPROTECT(1); } UNPROTECT(1); return ans; } if (nranges == 1) return extract_Rle_mapped_range(x_values, INTEGER(x_lengths), mapped_range_start[0], mapped_range_span[0], mapped_range_Ltrim[0], mapped_range_Rtrim[0]); PROTECT(tmp_values = _subset_vector_OR_factor_by_ranges(x_values, mapped_range_start, mapped_range_span, nranges)); tmp_nrun = LENGTH(tmp_values); tmp_lengths = (int *) R_alloc(sizeof(int), tmp_nrun); for (i = tmp_nrun = 0; i < nranges; i++) { n = mapped_range_span[i]; if (n == 0) continue; memcpy(tmp_lengths + tmp_nrun, INTEGER(x_lengths) + mapped_range_start[i] - 1, sizeof(int) * n); tmp_lengths[tmp_nrun] -= mapped_range_Ltrim[i]; tmp_nrun += n; tmp_lengths[tmp_nrun - 1] -= mapped_range_Rtrim[i]; } PROTECT(ans = _construct_Rle(tmp_values, tmp_lengths, 0)); UNPROTECT(2); return ans; } static SEXP subset_Rle_by_mapped_pos(SEXP x, const int *mapped_pos, int npos) { SEXP x_values, tmp_values, ans; x_values = GET_SLOT(x, install("values")); PROTECT(tmp_values = _subset_vector_OR_factor_by_positions(x_values, mapped_pos, npos)); PROTECT(ans = _construct_Rle(tmp_values, NULL, 0)); UNPROTECT(2); return ans; } SEXP _subset_Rle_by_ranges(SEXP x, const int *start, const int *width, int nranges, int method, int as_list) { SEXP x_lengths; int x_nrun, *mapped_range_start, *mapped_range_span, *mapped_range_Ltrim, *mapped_range_Rtrim, i; const char *errmsg; x_lengths = GET_SLOT(x, install("lengths")); x_nrun = LENGTH(x_lengths); mapped_range_start = (int *) R_alloc(sizeof(int), nranges); mapped_range_span = (int *) R_alloc(sizeof(int), nranges); mapped_range_Ltrim = (int *) R_alloc(sizeof(int), nranges); mapped_range_Rtrim = (int *) R_alloc(sizeof(int), nranges); errmsg = _ranges_mapper(INTEGER(x_lengths), x_nrun, start, width, nranges, mapped_range_start, /* will be filled with offsets */ mapped_range_span, mapped_range_Ltrim, mapped_range_Rtrim, method); if (errmsg != NULL) error(errmsg); for (i = 0; i < nranges; i++) mapped_range_start[i]++; /* add 1 to get the starts */ return subset_Rle_by_mapped_ranges(x, mapped_range_start, mapped_range_span, mapped_range_Ltrim, mapped_range_Rtrim, nranges, as_list); } SEXP _subset_Rle_by_positions(SEXP x, const int *pos, int npos, int method) { SEXP x_lengths; int x_nrun, *mapped_pos; const char *errmsg; x_lengths = GET_SLOT(x, install("lengths")); x_nrun = LENGTH(x_lengths); mapped_pos = (int *) R_alloc(sizeof(int), npos); errmsg = _positions_mapper(INTEGER(x_lengths), x_nrun, pos, npos, mapped_pos, method); if (errmsg != NULL) error(errmsg); return subset_Rle_by_mapped_pos(x, mapped_pos, npos); } /* --- .Call ENTRY POINT --- */ SEXP Rle_extract_range(SEXP x, SEXP start, SEXP end) { int nranges, x_nrun, mapped_range_offset, mapped_range_span, mapped_range_Ltrim, mapped_range_Rtrim; const int *range_start_p, *range_end_p; SEXP x_values, x_lengths; const char *errmsg; nranges = _check_integer_pairs(start, end, &range_start_p, &range_end_p, "start", "end"); if (nranges != 1) error("'start' and 'end' must be of length 1"); x_values = GET_SLOT(x, install("values")); x_lengths = GET_SLOT(x, install("lengths")); x_nrun = LENGTH(x_lengths); errmsg = _simple_range_mapper(INTEGER(x_lengths), x_nrun, range_start_p[0], range_end_p[0], &mapped_range_offset, &mapped_range_span, &mapped_range_Ltrim, &mapped_range_Rtrim); if (errmsg != NULL) error(errmsg); mapped_range_offset++; /* add 1 to get the start */ return extract_Rle_mapped_range(x_values, INTEGER(x_lengths), mapped_range_offset, mapped_range_span, mapped_range_Ltrim, mapped_range_Rtrim); } /* --- .Call ENTRY POINT --- */ SEXP Rle_extract_ranges(SEXP x, SEXP start, SEXP width, SEXP method, SEXP as_list) { int nranges; const int *start_p, *width_p; nranges = _check_integer_pairs(start, width, &start_p, &width_p, "start", "width"); return _subset_Rle_by_ranges(x, start_p, width_p, nranges, INTEGER(method)[0], LOGICAL(as_list)[0]); } /* --- .Call ENTRY POINT --- */ SEXP Rle_extract_positions(SEXP x, SEXP pos, SEXP method) { int npos; npos = LENGTH(pos); return _subset_Rle_by_positions(x, INTEGER(pos), npos, INTEGER(method)[0]); } /**************************************************************************** * Rle_getStartEndRunAndOffset() */ static SEXP get_StartEndRunAndOffset_from_runLength( const int *runlength, int runlength_len, const int *start, const int *end, int length) { int i, *soff_elt, *eoff_elt; const int *start_elt, *end_elt, *erun_elt; SEXP info_start, info_end, ans, ans_names; SEXP ans_start, ans_start_names, ans_end, ans_end_names; SEXP start_run, start_offset, end_run, end_offset; PROTECT(info_start = _find_interv_and_start_from_width(start, length, runlength, runlength_len)); PROTECT(info_end = _find_interv_and_start_from_width(end, length, runlength, runlength_len)); start_run = VECTOR_ELT(info_start, 0); start_offset = VECTOR_ELT(info_start, 1); end_run = VECTOR_ELT(info_end, 0); end_offset = VECTOR_ELT(info_end, 1); for (i = 0, start_elt = start, end_elt = end, soff_elt = INTEGER(start_offset), eoff_elt = INTEGER(end_offset), erun_elt = INTEGER(end_run); i < length; i++, start_elt++, end_elt++, soff_elt++, eoff_elt++, erun_elt++) { *soff_elt = *start_elt - *soff_elt; *eoff_elt = *eoff_elt + runlength[*erun_elt - 1] - 1 - *end_elt; } PROTECT(ans_start = NEW_LIST(2)); PROTECT(ans_start_names = NEW_CHARACTER(2)); SET_VECTOR_ELT(ans_start, 0, start_run); SET_VECTOR_ELT(ans_start, 1, start_offset); SET_STRING_ELT(ans_start_names, 0, mkChar("run")); SET_STRING_ELT(ans_start_names, 1, mkChar("offset")); SET_NAMES(ans_start, ans_start_names); PROTECT(ans_end = NEW_LIST(2)); PROTECT(ans_end_names = NEW_CHARACTER(2)); SET_VECTOR_ELT(ans_end, 0, end_run); SET_VECTOR_ELT(ans_end, 1, end_offset); SET_STRING_ELT(ans_end_names, 0, mkChar("run")); SET_STRING_ELT(ans_end_names, 1, mkChar("offset")); SET_NAMES(ans_end, ans_end_names); PROTECT(ans = NEW_LIST(2)); PROTECT(ans_names = NEW_CHARACTER(2)); SET_VECTOR_ELT(ans, 0, ans_start); SET_VECTOR_ELT(ans, 1, ans_end); SET_STRING_ELT(ans_names, 0, mkChar("start")); SET_STRING_ELT(ans_names, 1, mkChar("end")); SET_NAMES(ans, ans_names); UNPROTECT(8); return ans; } /* --- .Call ENTRY POINT --- */ SEXP Rle_getStartEndRunAndOffset(SEXP x, SEXP start, SEXP end) { int n; SEXP lengths; n = LENGTH(start); if (LENGTH(end) != n) error("length of 'start' must equal length of 'end'"); lengths = GET_SLOT(x, install("lengths")); return get_StartEndRunAndOffset_from_runLength( INTEGER(lengths), LENGTH(lengths), INTEGER(start), INTEGER(end), n); } /* --- .Call ENTRY POINT --- */ SEXP Rle_window_aslist(SEXP x, SEXP runStart, SEXP runEnd, SEXP offsetStart, SEXP offsetEnd) { SEXP values, lengths, runWidth, ans, ans_names, ans_values, ans_lengths; values = GET_SLOT(x, install("values")); lengths = GET_SLOT(x, install("lengths")); if (!IS_INTEGER(runStart) || LENGTH(runStart) != 1 || INTEGER(runStart)[0] == NA_INTEGER || INTEGER(runStart)[0] < 1) error("invalid 'runStart' argument"); if (!IS_INTEGER(runEnd) || LENGTH(runEnd) != 1 || INTEGER(runEnd)[0] == NA_INTEGER || (INTEGER(runEnd)[0] + 1) < INTEGER(runStart)[0] || INTEGER(runEnd)[0] > LENGTH(values)) error("invalid 'runWidth' argument"); PROTECT(runWidth = NEW_INTEGER(1)); INTEGER(runWidth)[0] = INTEGER(runEnd)[0] - INTEGER(runStart)[0] + 1; PROTECT(ans = NEW_LIST(2)); PROTECT(ans_names = NEW_CHARACTER(2)); PROTECT(ans_values = vector_OR_factor_extract_ranges(values, runStart, runWidth)); PROTECT(ans_lengths = vector_OR_factor_extract_ranges(lengths, runStart, runWidth)); if (INTEGER(runWidth)[0] > 0) { INTEGER(ans_lengths)[0] -= INTEGER(offsetStart)[0]; INTEGER(ans_lengths)[INTEGER(runWidth)[0] - 1] -= INTEGER(offsetEnd)[0]; } SET_VECTOR_ELT(ans, 0, ans_values); SET_VECTOR_ELT(ans, 1, ans_lengths); SET_STRING_ELT(ans_names, 0, mkChar("values")); SET_STRING_ELT(ans_names, 1, mkChar("lengths")); SET_NAMES(ans, ans_names); UNPROTECT(5); return ans; } S4Vectors/src/Rle_utils.c0000644000175400017540000005005313175736136016350 0ustar00biocbuildbiocbuild#include "S4Vectors.h" #include /* * roundingScale() function taken from the src/lib/common.c file from the Kent * source tree: * http://genome-source.cse.ucsc.edu/gitweb/?p=kent.git;a=blob_plain;f=src/lib/common.c */ static int roundingScale(int a, int p, int q) /* returns rounded a*p/q */ { if (a > 100000 || p > 100000) { double x = a; x *= p; x /= q; return round(x); } else return (a*p + q/2)/q; } static R_xlen_t compute_nrun_out(int nrun_in, const void *lengths_in, int lengths_in_is_L, int k) { R_xlen_t nrun_out, i; long long int len_in; nrun_out = 0; for (i = 0; i < nrun_in; i++) { len_in = GET_INT_OR_LLINT(lengths_in, lengths_in_is_L, i); nrun_out += k < len_in ? k : len_in; } if (nrun_out < k) error("S4Vectors internal error in compute_nrun_out(): " "k > length of Rle vector"); nrun_out -= k - 1; return nrun_out; } static void compute_runsum_integer_runs(R_xlen_t nrun_in, const int *values_in, const void *lengths_in, int lengths_in_is_L, int k, int narm, R_xlen_t nrun_out, int *values_out, void *lengths_out) { R_xlen_t i, j, i2; long long int len_in, offset_in_run, k2, times; int val_in, val_out, val2_in; j = 0; for (i = 0; i < nrun_in; i++) { len_in = GET_INT_OR_LLINT(lengths_in, lengths_in_is_L, i); val_in = values_in[i]; if (narm && val_in == NA_INTEGER) val_in = 0; if (k <= len_in) { values_out[j] = _safe_int_mult(k, val_in); offset_in_run = len_in - k + 1; SET_INT_OR_LLINT(lengths_out, lengths_in_is_L, j, offset_in_run); if (++j == nrun_out) return; if (j % 500000 == 0) R_CheckUserInterrupt(); } else { offset_in_run = 0; } while (offset_in_run < len_in) { k2 = len_in - offset_in_run; /* < k */ val_out = _safe_int_mult(k2, val_in); i2 = i; do { i2++; k2 += times = GET_INT_OR_LLINT(lengths_in, lengths_in_is_L, i2); if (k2 > k) times -= k2 - k; val2_in = values_in[i2]; if (narm && val2_in == NA_INTEGER) val2_in = 0; val_out = _safe_int_add(val_out, _safe_int_mult(times, val2_in)); } while (k2 < k); values_out[j] = val_out; SET_INT_OR_LLINT(lengths_out, lengths_in_is_L, j, 1); if (++j == nrun_out) return; if (j % 500000 == 0) R_CheckUserInterrupt(); offset_in_run++; } } return; } static void compute_runsum_numeric_runs(R_xlen_t nrun_in, const double *values_in, const void *lengths_in, int lengths_in_is_L, int k, int narm, R_xlen_t nrun_out, double *values_out, void *lengths_out) { R_xlen_t i, j, i2; long long int len_in, offset_in_run, k2, times; double val_in, val_out, val2_in; j = 0; for (i = 0; i < nrun_in; i++) { len_in = GET_INT_OR_LLINT(lengths_in, lengths_in_is_L, i); val_in = values_in[i]; if (narm && ISNAN(val_in)) val_in = 0.0; if (k <= len_in) { values_out[j] = k * val_in; offset_in_run = len_in - k + 1; SET_INT_OR_LLINT(lengths_out, lengths_in_is_L, j, offset_in_run); if (++j == nrun_out) return; if (j % 500000 == 0) R_CheckUserInterrupt(); } else { offset_in_run = 0; } while (offset_in_run < len_in) { k2 = len_in - offset_in_run; /* < k */ val_out = k2 * val_in; i2 = i; do { i2++; k2 += times = GET_INT_OR_LLINT(lengths_in, lengths_in_is_L, i2); if (k2 > k) times -= k2 - k; val2_in = values_in[i2]; if (narm && ISNAN(val2_in)) val2_in = 0.0; val_out += times * val2_in; } while (k2 < k); values_out[j] = val_out; SET_INT_OR_LLINT(lengths_out, lengths_in_is_L, j, 1); if (++j == nrun_out) return; if (j % 500000 == 0) R_CheckUserInterrupt(); offset_in_run++; } } return; } /* * --- .Call ENTRY POINT --- */ SEXP Rle_runsum(SEXP x, SEXP k, SEXP na_rm) { int k0, narm, lengths_in_is_L; SEXP x_lengths, x_values; R_xlen_t nrun_in, nrun_out; const void *lengths_in; void *lengths_out, *values_out; if (!IS_INTEGER(k) || LENGTH(k) != 1 || (k0 = INTEGER(k)[0]) == NA_INTEGER || k0 <= 0) error("'k' must be a positive integer"); if (!IS_LOGICAL(na_rm) || LENGTH(na_rm) != 1 || (narm = LOGICAL(na_rm)[0]) == NA_LOGICAL) error("'na_rm' must be TRUE or FALSE"); x_lengths = GET_SLOT(x, install("lengths")); if (IS_INTEGER(x_lengths)) { nrun_in = XLENGTH(x_lengths); lengths_in = INTEGER(x_lengths); lengths_in_is_L = 0; } else { nrun_in = _get_LLint_length(x_lengths); lengths_in = _get_LLint_dataptr(x_lengths); lengths_in_is_L = 1; } nrun_out = compute_nrun_out(nrun_in, lengths_in, lengths_in_is_L, k0); if (lengths_in_is_L) { lengths_out = (long long int *) R_alloc(nrun_out, sizeof(long long int)); } else { lengths_out = (int *) R_alloc(nrun_out, sizeof(int)); } x_values = GET_SLOT(x, install("values")); if (IS_INTEGER(x_values)) { values_out = (int *) R_alloc(nrun_out, sizeof(int)); _reset_ovflow_flag(); compute_runsum_integer_runs(nrun_in, INTEGER(x_values), lengths_in, lengths_in_is_L, k0, narm, nrun_out, values_out, lengths_out); if (_get_ovflow_flag()) warning("NAs produced by integer overflow. " "You can use:\n" " runValue(x) <- as.numeric(runValue(x))\n" " runsum(x, ...)\n" " to work around it."); return _construct_integer_Rle(nrun_out, values_out, lengths_out, lengths_in_is_L); } if (IS_NUMERIC(x_values)) { values_out = (double *) R_alloc(nrun_out, sizeof(double)); compute_runsum_numeric_runs(nrun_in, REAL(x_values), lengths_in, lengths_in_is_L, k0, narm, nrun_out, values_out, lengths_out); return _construct_numeric_Rle(nrun_out, values_out, lengths_out, lengths_in_is_L); } error("runsum only supported for integer- and numeric-Rle vectors"); return R_NilValue; } SEXP Rle_integer_runwtsum(SEXP x, SEXP k, SEXP wt, SEXP na_rm) { int i, j, nrun, window_len, buf_len, ans_len; int start_offset, curr_offset; double stat; int stat_na; int *curr_value_na, *values_elt_na; int *lengths_elt, *curr_length, *buf_lengths, *buf_lengths_elt; int *values_elt, *curr_value; double *wt_elt, *buf_values, *buf_values_elt; SEXP values, lengths; SEXP orig_values, na_index; const int narm = LOGICAL(na_rm)[0]; if (!IS_INTEGER(k) || LENGTH(k) != 1 || INTEGER(k)[0] == NA_INTEGER || INTEGER(k)[0] <= 0) error("'k' must be a positive integer"); /* Set NA values to 0 * Create NA index : 1 = NA; 0 = not NA */ orig_values = GET_SLOT(x, install("values")); values = PROTECT(Rf_allocVector(INTSXP, LENGTH(orig_values))); na_index = PROTECT(Rf_allocVector(INTSXP, LENGTH(orig_values))); int *vlu = INTEGER(orig_values); for(i = 0; i < LENGTH(orig_values); i++) { if (vlu[i] == NA_INTEGER) { INTEGER(na_index)[i] = 1; INTEGER(values)[i] = 0; } else { INTEGER(na_index)[i] = 0; INTEGER(values)[i] = INTEGER(orig_values)[i]; } } lengths = GET_SLOT(x, install("lengths")); nrun = LENGTH(lengths); window_len = INTEGER(k)[0]; if (!IS_NUMERIC(wt) || LENGTH(wt) != window_len) error("'wt' must be a numeric vector of length 'k'"); ans_len = 0; buf_len = - window_len + 1; for(i = 0, lengths_elt = INTEGER(lengths); i < nrun; i++, lengths_elt++) { buf_len += *lengths_elt; if (window_len < *lengths_elt) buf_len -= *lengths_elt - window_len; } buf_values = NULL; buf_lengths = NULL; if (buf_len > 0) { buf_values = (double *) R_alloc((long) buf_len, sizeof(double)); buf_lengths = (int *) R_alloc((long) buf_len, sizeof(int)); memset(buf_lengths, 0, buf_len * sizeof(int)); buf_values_elt = buf_values; buf_lengths_elt = buf_lengths; values_elt = INTEGER(values); values_elt_na = INTEGER(na_index); lengths_elt = INTEGER(lengths); start_offset = INTEGER(lengths)[0]; for (i = 0; i < buf_len; i++) { if (i % 100000 == 99999) R_CheckUserInterrupt(); /* calculate stat */ stat = 0; stat_na = 0; curr_value = values_elt; curr_value_na = values_elt_na; curr_length = lengths_elt; curr_offset = start_offset; for (j = 0, wt_elt = REAL(wt); j < window_len; j++, wt_elt++) { stat += (*wt_elt) * (*curr_value); stat_na += *curr_value_na; curr_offset--; if (curr_offset == 0) { curr_value++; curr_value_na++; curr_length++; curr_offset = *curr_length; } } /* assign value */ if (ans_len == 0) { ans_len = 1; } else { /* increment values and lengths based on stat */ if (narm | (stat_na == 0)) { if (stat != *buf_values_elt) { ans_len++; buf_values_elt++; buf_lengths_elt++; } } else { if ((stat_na != 0) && (*buf_values_elt != NA_REAL)) { ans_len++; buf_values_elt++; buf_lengths_elt++; } } } /* NA handling */ if (!narm && (stat_na != 0)) *buf_values_elt = NA_REAL; else *buf_values_elt = stat; /* determine length */ if (window_len < start_offset) { *buf_lengths_elt += *lengths_elt - window_len + 1; start_offset = window_len - 1; } else { *buf_lengths_elt += 1; start_offset--; } /* move pointers if end of run */ if (start_offset == 0) { values_elt++; values_elt_na++; lengths_elt++; start_offset = *lengths_elt; } } } UNPROTECT(2); return _construct_numeric_Rle(ans_len, buf_values, buf_lengths, 0); } SEXP Rle_real_runwtsum(SEXP x, SEXP k, SEXP wt, SEXP na_rm) { int i, j, nrun, window_len, buf_len, ans_len; int start_offset, curr_offset; double stat; int *lengths_elt, *curr_length, *buf_lengths, *buf_lengths_elt; double *values_elt, *curr_value; double *wt_elt, *buf_values, *buf_values_elt; SEXP values, lengths; SEXP orig_values; const int narm = LOGICAL(na_rm)[0]; if (!IS_INTEGER(k) || LENGTH(k) != 1 || INTEGER(k)[0] == NA_INTEGER || INTEGER(k)[0] <= 0) error("'k' must be a positive integer"); window_len = INTEGER(k)[0]; if (!IS_NUMERIC(wt) || LENGTH(wt) != window_len) error("'wt' must be a numeric vector of length 'k'"); if (narm) { /* set NA and NaN values to 0 */ orig_values = GET_SLOT(x, install("values")); values = PROTECT(Rf_allocVector(REALSXP, LENGTH(orig_values))); double *vlu = REAL(orig_values); for(i = 0; i < LENGTH(orig_values); i++) { if (ISNAN(vlu[i])) REAL(values)[i] = 0; else REAL(values)[i] = REAL(orig_values)[i]; } } else { values = GET_SLOT(x, install("values")); } lengths = GET_SLOT(x, install("lengths")); nrun = LENGTH(lengths); ans_len = 0; buf_len = - window_len + 1; for(i = 0, lengths_elt = INTEGER(lengths); i < nrun; i++, lengths_elt++) { buf_len += *lengths_elt; if (window_len < *lengths_elt) buf_len -= *lengths_elt - window_len; } buf_values = NULL; buf_lengths = NULL; if (buf_len > 0) { buf_values = (double *) R_alloc((long) buf_len, sizeof(double)); buf_lengths = (int *) R_alloc((long) buf_len, sizeof(int)); memset(buf_lengths, 0, buf_len * sizeof(int)); buf_values_elt = buf_values; buf_lengths_elt = buf_lengths; values_elt = REAL(values); lengths_elt = INTEGER(lengths); start_offset = INTEGER(lengths)[0]; for (i = 0; i < buf_len; i++) { if (i % 100000 == 99999) R_CheckUserInterrupt(); /* calculate stat */ stat = 0; curr_value = values_elt; curr_length = lengths_elt; curr_offset = start_offset; for (j = 0, wt_elt = REAL(wt); j < window_len; j++, wt_elt++) { stat += (*wt_elt) * (*curr_value); curr_offset--; if (curr_offset == 0) { curr_value++; curr_length++; curr_offset = *curr_length; } } /* assign value */ if (ans_len == 0) { ans_len = 1; } else if (!R_FINITE(stat) && !R_FINITE(*buf_values_elt)) { if ((R_IsNA(stat) && !R_IsNA(*buf_values_elt)) || (!R_IsNA(stat) && R_IsNA(*buf_values_elt)) || (R_IsNaN(stat) && !R_IsNaN(*buf_values_elt)) || (!R_IsNaN(stat) && R_IsNaN(*buf_values_elt)) || ((stat == R_PosInf) && (*buf_values_elt != R_PosInf)) || ((stat != R_PosInf) && (*buf_values_elt == R_PosInf)) || ((stat == R_NegInf) && (*buf_values_elt != R_NegInf)) || ((stat != R_NegInf) && (*buf_values_elt == R_NegInf))) { ans_len++; buf_values_elt++; buf_lengths_elt++; } } else { if (stat != *buf_values_elt) { ans_len++; buf_values_elt++; buf_lengths_elt++; } } *buf_values_elt = stat; /* determine length */ if (window_len < start_offset) { *buf_lengths_elt += *lengths_elt - window_len + 1; start_offset = window_len - 1; } else { *buf_lengths_elt += 1; start_offset--; } /* move pointers if end of run */ if (start_offset == 0) { values_elt++; lengths_elt++; start_offset = *lengths_elt; } } } if (narm) UNPROTECT(1); return _construct_numeric_Rle(ans_len, buf_values, buf_lengths, 0); } /* * --- .Call ENTRY POINT --- */ SEXP Rle_runwtsum(SEXP x, SEXP k, SEXP wt, SEXP na_rm) { SEXP ans = R_NilValue; switch(TYPEOF(GET_SLOT(x, install("values")))) { case INTSXP: PROTECT(ans = Rle_integer_runwtsum(x, k, wt, na_rm)); break; case REALSXP: PROTECT(ans = Rle_real_runwtsum(x, k, wt, na_rm)); break; default: error("runwtsum only supported for integer and numeric Rle objects"); } UNPROTECT(1); return ans; } SEXP Rle_integer_runq(SEXP x, SEXP k, SEXP which, SEXP na_rm) { int i, j, nrun, window_len, buf_len, ans_len; int start_offset, curr_offset; int q_index; int stat, count_na, window_len_na; int *lengths_elt, *curr_length, *buf_lengths, *buf_lengths_elt; int *window, *values_elt, *curr_value, *buf_values, *buf_values_elt; SEXP values, lengths; const int narm = LOGICAL(na_rm)[0]; const int constw = INTEGER(which)[0]; const int constk = INTEGER(k)[0]; if (!IS_INTEGER(k) || LENGTH(k) != 1 || INTEGER(k)[0] == NA_INTEGER || INTEGER(k)[0] <= 0) error("'k' must be a positive integer"); if (!IS_INTEGER(which) || LENGTH(which) != 1 || INTEGER(which)[0] == NA_INTEGER || INTEGER(which)[0] < 1 || INTEGER(which)[0] > INTEGER(k)[0]) error("'i' must be an integer in [0, k]"); values = GET_SLOT(x, install("values")); lengths = GET_SLOT(x, install("lengths")); nrun = LENGTH(lengths); window_len = INTEGER(k)[0]; ans_len = 0; buf_len = - window_len + 1; for(i = 0, lengths_elt = INTEGER(lengths); i < nrun; i++, lengths_elt++) { buf_len += *lengths_elt; if (window_len < *lengths_elt) buf_len -= *lengths_elt - window_len; } buf_values = NULL; buf_lengths = NULL; if (buf_len > 0) { window = (int *) R_alloc(window_len, sizeof(int)); buf_values = (int *) R_alloc((long) buf_len, sizeof(int)); buf_lengths = (int *) R_alloc((long) buf_len, sizeof(int)); memset(buf_lengths, 0, buf_len * sizeof(int)); buf_values_elt = buf_values; buf_lengths_elt = buf_lengths; values_elt = INTEGER(values); lengths_elt = INTEGER(lengths); start_offset = INTEGER(lengths)[0]; for (i = 0; i < buf_len; i++) { if (i % 100000 == 99999) R_CheckUserInterrupt(); /* create window */ count_na = 0; curr_value = values_elt; curr_length = lengths_elt; curr_offset = start_offset; window_len_na = INTEGER(k)[0]; q_index = INTEGER(which)[0] - 1; for(j = 0; j < window_len; j++) { if (*curr_value == NA_INTEGER) count_na += 1; window[j] = *curr_value; curr_offset--; if (curr_offset == 0) { curr_value++; curr_length++; curr_offset = *curr_length; } } /* calculate stat */ if (!narm && count_na > 0) { stat = NA_INTEGER; } else { /* NA handling */ if (count_na != 0) { window_len_na = window_len - count_na; q_index = roundingScale(window_len_na, constw, constk); if (q_index > 0) q_index = q_index - 1; } /* If window shrank to 0, return NA. */ if (window_len_na == 0) { stat = NA_INTEGER; } else { /* NA's sorted last in iPsort */ iPsort(window, window_len, q_index); stat = window[q_index]; } } if (ans_len == 0) { ans_len = 1; } else if (stat != *buf_values_elt) { ans_len++; buf_values_elt++; buf_lengths_elt++; } *buf_values_elt = stat; /* determine length */ if (window_len < start_offset) { *buf_lengths_elt += *lengths_elt - window_len + 1; start_offset = window_len - 1; } else { *buf_lengths_elt += 1; start_offset--; } /* move pointers if end of run */ if (start_offset == 0) { values_elt++; lengths_elt++; start_offset = *lengths_elt; } } } return _construct_integer_Rle(ans_len, buf_values, buf_lengths, 0); } SEXP Rle_real_runq(SEXP x, SEXP k, SEXP which, SEXP na_rm) { int i, j, nrun, window_len, buf_len, ans_len; int start_offset, curr_offset; int q_index; double stat; int count_na, window_len_na; int *lengths_elt, *curr_length, *buf_lengths, *buf_lengths_elt; double *window, *values_elt, *curr_value, *buf_values, *buf_values_elt; SEXP values, lengths; const int narm = LOGICAL(na_rm)[0]; const int constw = INTEGER(which)[0]; const int constk = INTEGER(k)[0]; if (!IS_INTEGER(k) || LENGTH(k) != 1 || INTEGER(k)[0] == NA_INTEGER || INTEGER(k)[0] <= 0) error("'k' must be a positive integer"); if (!IS_INTEGER(which) || LENGTH(which) != 1 || INTEGER(which)[0] == NA_INTEGER || INTEGER(which)[0] < 1 || INTEGER(which)[0] > INTEGER(k)[0]) error("'which' must be an integer in [0, k]"); values = GET_SLOT(x, install("values")); lengths = GET_SLOT(x, install("lengths")); nrun = LENGTH(lengths); window_len = INTEGER(k)[0]; ans_len = 0; buf_len = - window_len + 1; for(i = 0, lengths_elt = INTEGER(lengths); i < nrun; i++, lengths_elt++) { buf_len += *lengths_elt; if (window_len < *lengths_elt) buf_len -= *lengths_elt - window_len; } buf_values = NULL; buf_lengths = NULL; if (buf_len > 0) { window = (double *) R_alloc(window_len, sizeof(double)); buf_values = (double *) R_alloc((long) buf_len, sizeof(double)); buf_lengths = (int *) R_alloc((long) buf_len, sizeof(int)); memset(buf_lengths, 0, buf_len * sizeof(int)); buf_values_elt = buf_values; buf_lengths_elt = buf_lengths; values_elt = REAL(values); lengths_elt = INTEGER(lengths); start_offset = INTEGER(lengths)[0]; for (i = 0; i < buf_len; i++) { if (i % 100000 == 99999) R_CheckUserInterrupt(); /* create window */ count_na = 0; curr_value = values_elt; curr_length = lengths_elt; curr_offset = start_offset; window_len_na = INTEGER(k)[0]; q_index = INTEGER(which)[0] - 1; for(j = 0; j < window_len; j++) { if (ISNAN(*curr_value)) count_na += 1; window[j] = *curr_value; curr_offset--; if (curr_offset == 0) { curr_value++; curr_length++; curr_offset = *curr_length; } } /* calculate stat */ if (!narm && count_na > 0) { stat = NA_REAL; } else { /* NA handling */ if (count_na != 0) window_len_na = window_len - count_na; q_index = roundingScale(window_len_na, constw, constk); if (q_index >0) q_index = q_index - 1; /* If window shrank to 0, return NA. */ if (window_len_na == 0) { stat = NA_REAL; } else { /* NA's sorted last in rPsort */ rPsort(window, window_len, q_index); stat = window[q_index]; } } if (ans_len == 0) { ans_len = 1; } else if (stat != *buf_values_elt) { ans_len++; buf_values_elt++; buf_lengths_elt++; } *buf_values_elt = stat; /* determine length */ if (window_len < start_offset) { *buf_lengths_elt += *lengths_elt - window_len + 1; start_offset = window_len - 1; } else { *buf_lengths_elt += 1; start_offset--; } /* move pointers if end of run */ if (start_offset == 0) { values_elt++; lengths_elt++; start_offset = *lengths_elt; } } } return _construct_numeric_Rle(ans_len, buf_values, buf_lengths, 0); } /* * --- .Call ENTRY POINT --- */ SEXP Rle_runq(SEXP x, SEXP k, SEXP which, SEXP na_rm) { SEXP ans = R_NilValue; switch(TYPEOF(GET_SLOT(x, install("values")))) { case INTSXP: PROTECT(ans = Rle_integer_runq(x, k, which, na_rm)); break; case REALSXP: PROTECT(ans = Rle_real_runq(x, k, which, na_rm)); break; default: error("runq only supported for integer and numeric Rle objects"); } UNPROTECT(1); return ans; } S4Vectors/src/S4Vectors.h0000644000175400017540000003275513175736136016260 0ustar00biocbuildbiocbuild#include "../inst/include/S4Vectors_defines.h" #include #define INIT_STATIC_SYMBOL(NAME) \ { \ if (NAME ## _symbol == NULL) \ NAME ## _symbol = install(# NAME); \ } /* safe_arithm.c */ void _reset_ovflow_flag(); int _get_ovflow_flag(); int _safe_int_add( int x, int y ); int _safe_int_subtract( int x, int y ); int _safe_int_mult( int x, int y ); long long int _safe_llint_add( long long int x, long long int y ); long long int _safe_llint_subtract( long long int x, long long int y ); long long int _safe_llint_mult( long long int x, long long int y ); /* sort_utils.c */ SEXP test_sort_ushort_array( SEXP x, SEXP desc ); void _sort_int_array( int *x, size_t nelt, int desc ); void _get_order_of_int_array( const int *x, int nelt, int desc, int *out, int out_shift ); int _sort_ints( int *base, int base_len, const int *x, int desc, int use_radix, unsigned short int *rxbuf1, int *rxbuf2 ); void _pcompare_int_pairs( const int *a1, const int *b1, int nelt1, const int *a2, const int *b2, int nelt2, int *out, int out_len, int with_warning ); int _int_pairs_are_sorted( const int *a, const int *b, int nelt, int desc, int strict ); void _get_order_of_int_pairs( const int *a, const int *b, int nelt, int a_desc, int b_desc, int *out, int out_shift ); int _sort_int_pairs( int *base, int base_len, const int *a, const int *b, int a_desc, int b_desc, int use_radix, unsigned short int *rxbuf1, int *rxbuf2 ); void _get_matches_of_ordered_int_pairs( const int *a1, const int *b1, const int *o1, int nelt1, const int *a2, const int *b2, const int *o2, int nelt2, int nomatch, int *out, int out_shift ); int _int_quads_are_sorted( const int *a, const int *b, const int *c, const int *d, int nelt, int desc, int strict ); void _get_order_of_int_quads( const int *a, const int *b, const int *c, const int *d, int nelt, int a_desc, int b_desc, int c_desc, int d_desc, int *out, int out_shift ); int _sort_int_quads( int *base, int base_len, const int *a, const int *b, const int *c, const int *d, int a_desc, int b_desc, int c_desc, int d_desc, int use_radix, unsigned short int *rxbuf1, int *rxbuf2 ); void _get_matches_of_ordered_int_quads( const int *a1, const int *b1, const int *c1, const int *d1, const int *o1, int nelt1, const int *a2, const int *b2, const int *c2, const int *d2, const int *o2, int nelt2, int nomatch, int *out, int out_shift ); /* hash_utils.c */ struct htab _new_htab(int n); int _get_hbucket_val( const struct htab *htab, int bucket_idx ); void _set_hbucket_val( struct htab *htab, int bucket_idx, int val ); /* AEbufs.c */ SEXP AEbufs_use_malloc(SEXP x); size_t _increase_buflength(size_t buflength); size_t _IntAE_get_nelt(const IntAE *ae); size_t _IntAE_set_nelt( IntAE *ae, size_t nelt ); void _IntAE_set_val( const IntAE *ae, int val ); void _IntAE_extend( IntAE *ae, size_t new_buflength ); void _IntAE_insert_at( IntAE *ae, size_t at, int val ); IntAE *_new_IntAE( size_t buflength, size_t nelt, int val ); void _IntAE_append( IntAE *ae, const int *newvals, size_t nnewval ); void _IntAE_delete_at( IntAE *ae, size_t at, size_t nelt ); void _IntAE_shift( const IntAE *ae, size_t offset, int shift ); void _IntAE_sum_and_shift( const IntAE *ae1, const IntAE *ae2, int shift ); void _IntAE_qsort( const IntAE *ae, size_t offset, int desc ); void _IntAE_uniq( IntAE *ae, size_t offset ); SEXP _new_INTEGER_from_IntAE(const IntAE *ae); IntAE *_new_IntAE_from_INTEGER(SEXP x); IntAE *_new_IntAE_from_CHARACTER( SEXP x, int keyshift ); size_t _IntAEAE_get_nelt(const IntAEAE *aeae); size_t _IntAEAE_set_nelt( IntAEAE *aeae, size_t nelt ); void _IntAEAE_extend( IntAEAE *aeae, size_t new_buflength ); void _IntAEAE_insert_at( IntAEAE *aeae, size_t at, IntAE *ae ); IntAEAE *_new_IntAEAE( size_t buflength, size_t nelt ); void _IntAEAE_pappend( const IntAEAE *aeae1, const IntAEAE *aeae2 ); void _IntAEAE_shift( const IntAEAE *aeae, int shift ); void _IntAEAE_sum_and_shift( const IntAEAE *aeae1, const IntAEAE *aeae2, int shift ); SEXP _new_LIST_from_IntAEAE( const IntAEAE *aeae, int mode ); IntAEAE *_new_IntAEAE_from_LIST(SEXP x); SEXP _IntAEAE_toEnvir( const IntAEAE *aeae, SEXP envir, int keyshift ); size_t _IntPairAE_get_nelt(const IntPairAE *ae); size_t _IntPairAE_set_nelt( IntPairAE *ae, size_t nelt ); void _IntPairAE_extend( IntPairAE *ae, size_t new_buflength ); void _IntPairAE_insert_at( IntPairAE *ae, size_t at, int a, int b ); IntPairAE *_new_IntPairAE( size_t buflength, size_t nelt ); size_t _IntPairAEAE_get_nelt(const IntPairAEAE *aeae); size_t _IntPairAEAE_set_nelt( IntPairAEAE *aeae, size_t nelt ); void _IntPairAEAE_extend( IntPairAEAE *aeae, size_t new_buflength ); void _IntPairAEAE_insert_at( IntPairAEAE *aeae, size_t at, IntPairAE *ae ); IntPairAEAE *_new_IntPairAEAE( size_t buflength, size_t nelt ); size_t _LLongAE_get_nelt(const LLongAE *ae); size_t _LLongAE_set_nelt( LLongAE *ae, size_t nelt ); void _LLongAE_set_val( const LLongAE *ae, long long val ); void _LLongAE_extend( LLongAE *ae, size_t new_buflength ); void _LLongAE_insert_at( LLongAE *ae, size_t at, long long val ); LLongAE *_new_LLongAE( size_t buflength, size_t nelt, long long val ); size_t _CharAE_get_nelt(const CharAE *ae); size_t _CharAE_set_nelt( CharAE *ae, size_t nelt ); void _CharAE_extend( CharAE *ae, size_t new_buflength ); void _CharAE_insert_at( CharAE *ae, size_t at, char c ); CharAE *_new_CharAE(size_t buflength); CharAE *_new_CharAE_from_string(const char *string); void _CharAE_append_string( CharAE *ae, const char *string ); void _CharAE_delete_at( CharAE *ae, size_t at, size_t nelt ); SEXP _new_CHARSXP_from_CharAE(const CharAE *ae); SEXP _new_RAW_from_CharAE(const CharAE *ae); SEXP _new_LOGICAL_from_CharAE(const CharAE *ae); size_t _CharAEAE_get_nelt(const CharAEAE *aeae); size_t _CharAEAE_set_nelt( CharAEAE *aeae, size_t nelt ); void _CharAEAE_extend( CharAEAE *aeae, size_t new_buflength ); void _CharAEAE_insert_at( CharAEAE *aeae, size_t at, CharAE *ae ); CharAEAE *_new_CharAEAE( size_t buflength, size_t nelt ); void _CharAEAE_append_string( CharAEAE *aeae, const char *string ); SEXP _new_CHARACTER_from_CharAEAE(const CharAEAE *aeae); SEXP AEbufs_free(); /* SEXP_utils.c */ const char *_get_classname(SEXP x); /* anyMissing.c */ SEXP anyMissing(SEXP x); /* LLint_class.c */ int _is_LLint(SEXP x); SEXP make_RAW_from_NA_LLINT(); R_xlen_t _get_LLint_length(SEXP x); long long int *_get_LLint_dataptr(SEXP x); SEXP _alloc_LLint(const char *classname, R_xlen_t length); SEXP new_LLint_from_LOGICAL(SEXP x); SEXP new_LLint_from_INTEGER(SEXP x); SEXP new_LLint_from_NUMERIC(SEXP x); SEXP new_LLint_from_CHARACTER(SEXP x); SEXP new_LOGICAL_from_LLint(SEXP x); SEXP new_INTEGER_from_LLint(SEXP x); SEXP new_NUMERIC_from_LLint(SEXP x); SEXP new_CHARACTER_from_LLint(SEXP x); /* subsetting_utils.c */ int _copy_vector_block( SEXP dest, int dest_offset, SEXP src, int src_offset, int block_width ); int _copy_vector_positions( SEXP dest, int dest_offset, SEXP src, const int *pos, int npos ); int _copy_vector_ranges( SEXP dest, int dest_offset, SEXP src, const int *start, const int *width, int nranges ); SEXP _subset_vector_OR_factor_by_positions( SEXP x, const int *pos, int npos ); SEXP _subset_vector_OR_factor_by_ranges( SEXP x, const int *start, const int *width, int nranges ); SEXP vector_OR_factor_extract_positions( SEXP x, SEXP pos ); SEXP vector_OR_factor_extract_ranges( SEXP x, SEXP start, SEXP width ); /* vector_utils.c */ int _vector_memcmp( SEXP x1, int x1_offset, SEXP x2, int x2_offset, int nelt ); SEXP sapply_NROW(SEXP x); SEXP _list_as_data_frame( SEXP x, int nrow ); /* logical_utils.c */ SEXP logical_sum( SEXP x, SEXP na_rm ); SEXP logical2_sum( SEXP x, SEXP na_rm ); /* integer_utils.c */ SEXP Integer_any_missing_or_outside(SEXP x, SEXP lower, SEXP upper); SEXP Integer_diff_with_0(SEXP x); SEXP Integer_diff_with_last(SEXP x, SEXP last); SEXP Integer_order( SEXP x, SEXP decreasing, SEXP use_radix ); int _check_integer_pairs( SEXP a, SEXP b, const int **a_p, const int **b_p, const char *a_argname, const char *b_argname ); SEXP Integer_pcompare2( SEXP a1, SEXP b1, SEXP a2, SEXP b2 ); SEXP Integer_sorted2( SEXP a, SEXP b, SEXP decreasing, SEXP strictly ); SEXP Integer_order2( SEXP a, SEXP b, SEXP decreasing, SEXP use_radix ); SEXP Integer_match2_quick( SEXP a1, SEXP b1, SEXP a2, SEXP b2, SEXP nomatch ); SEXP Integer_selfmatch2_quick( SEXP a, SEXP b ); SEXP Integer_match2_hash( SEXP a1, SEXP b1, SEXP a2, SEXP b2, SEXP nomatch ); SEXP Integer_selfmatch2_hash( SEXP a, SEXP b ); int _check_integer_quads( SEXP a, SEXP b, SEXP c, SEXP d, const int **a_p, const int **b_p, const int **c_p, const int **d_p, const char *a_argname, const char *b_argname, const char *c_argname, const char *d_argname ); SEXP Integer_sorted4( SEXP a, SEXP b, SEXP c, SEXP d, SEXP decreasing, SEXP strictly ); SEXP Integer_order4( SEXP a, SEXP b, SEXP c, SEXP d, SEXP decreasing, SEXP use_radix ); SEXP Integer_match4_quick( SEXP a1, SEXP b1, SEXP c1, SEXP d1, SEXP a2, SEXP b2, SEXP c2, SEXP d2, SEXP nomatch ); SEXP Integer_selfmatch4_quick( SEXP a, SEXP b, SEXP c, SEXP d ); SEXP Integer_match4_hash( SEXP a1, SEXP b1, SEXP c1, SEXP d1, SEXP a2, SEXP b2, SEXP c2, SEXP d2, SEXP nomatch ); SEXP Integer_selfmatch4_hash( SEXP a, SEXP b, SEXP c, SEXP d ); SEXP Integer_tabulate2( SEXP x, SEXP nbins, SEXP weight, SEXP strict ); SEXP Integer_explode_bits( SEXP x, SEXP bitpos ); SEXP Integer_sorted_merge( SEXP x, SEXP y ); SEXP Integer_mseq( SEXP from, SEXP to ); SEXP Integer_fancy_mseq( SEXP lengths, SEXP offset, SEXP rev ); SEXP _find_interv_and_start_from_width( const int *x, int x_len, const int *width, int width_len ); SEXP findIntervalAndStartFromWidth( SEXP x, SEXP vec ); /* character_utils.c */ SEXP unstrsplit_list(SEXP x, SEXP sep); SEXP safe_strexplode(SEXP s); SEXP strsplit_as_list_of_ints(SEXP x, SEXP sep); SEXP svn_time(); /* eval_utils.c */ SEXP top_prenv(SEXP nm, SEXP env); SEXP top_prenv_dots(SEXP env); /* map_ranges_to_runs.c */ const char *_simple_range_mapper( const int *run_lengths, int nrun, int range_start, int range_end, int *mapped_range_offset, int *mapped_range_span, int *mapped_range_Ltrim, int *mapped_range_Rtrim ); const char *_simple_position_mapper( const int *run_lengths, int nrun, int pos, int *mapped_pos ); const char *_ranges_mapper( const int *run_lengths, int nrun, const int *start, const int *width, int nranges, int *mapped_range_offset, int *mapped_range_span, int *mapped_range_Ltrim, int *mapped_range_Rtrim, int method ); const char *_positions_mapper( const int *run_lengths, int nrun, const int *pos, int npos, int *mapped_pos, int method ); SEXP map_ranges( SEXP run_lengths, SEXP start, SEXP width, SEXP method ); SEXP map_positions( SEXP run_lengths, SEXP pos, SEXP method ); /* Hits_class.c */ SEXP _new_Hits( int *from, const int *to, int nhit, int nLnode, int nRnode, int already_sorted ); SEXP Hits_new( SEXP Class, SEXP from, SEXP to, SEXP nLnode, SEXP nRnode, SEXP revmap_envir ); int _get_select_mode(SEXP select); SEXP select_hits( SEXP from, SEXP to, SEXP nLnode, SEXP select ); SEXP make_all_group_inner_hits( SEXP group_sizes, SEXP hit_type ); /* Rle_class.c */ SEXP Rle_length(SEXP x); SEXP Rle_valid(SEXP x); SEXP _construct_logical_Rle( R_xlen_t nrun_in, const int *values_in, const void *lengths_in, int lengths_in_is_L ); SEXP _construct_integer_Rle( R_xlen_t nrun_in, const int *values_in, const void *lengths_in, int lengths_in_is_L ); SEXP _construct_numeric_Rle( R_xlen_t nrun_in, const double *values_in, const void *lengths_in, int lengths_in_is_L ); SEXP _construct_complex_Rle( R_xlen_t nrun_in, const Rcomplex *values_in, const void *lengths_in, int lengths_in_is_L ); SEXP _construct_character_Rle( SEXP values_in, const void *lengths_in, int lengths_in_is_L ); SEXP _construct_raw_Rle( R_xlen_t nrun_in, const Rbyte *values_in, const void *lengths_in, int lengths_in_is_L ); SEXP _construct_Rle( SEXP values_in, const void *lengths_in, int lengths_in_is_L ); SEXP Rle_constructor( SEXP values_in, SEXP lengths_in ); SEXP Rle_start(SEXP x); SEXP Rle_end(SEXP x); SEXP _subset_Rle_by_ranges( SEXP x, const int *start, const int *width, int nranges, int method, int as_list ); SEXP _subset_Rle_by_positions( SEXP x, const int *pos, int npos, int method ); SEXP Rle_extract_range( SEXP x, SEXP start, SEXP end ); SEXP Rle_extract_ranges( SEXP x, SEXP start, SEXP width, SEXP method, SEXP as_list ); SEXP Rle_extract_positions( SEXP x, SEXP pos, SEXP method ); SEXP Rle_getStartEndRunAndOffset( SEXP x, SEXP start, SEXP end ); SEXP Rle_window_aslist( SEXP x, SEXP runStart, SEXP runEnd, SEXP offsetStart, SEXP offsetEnd ); /* Rle_utils.c */ SEXP Rle_runsum( SEXP x, SEXP k, SEXP na_rm ); SEXP Rle_runwtsum( SEXP x, SEXP k, SEXP wt, SEXP na_rm ); SEXP Rle_runq( SEXP x, SEXP k, SEXP which, SEXP na_rm ); /* List_class.c */ const char *_get_List_elementType(SEXP x); void _set_List_elementType( SEXP x, const char *type ); /* SimpleList_class.c */ SEXP _new_SimpleList( const char *classname, SEXP listData ); /* DataFrame_class.c */ SEXP _new_DataFrame( const char *classname, SEXP vars, SEXP rownames, SEXP nrows ); S4Vectors/src/SEXP_utils.c0000644000175400017540000000015413175736136016402 0ustar00biocbuildbiocbuild#include "S4Vectors.h" const char *_get_classname(SEXP x) { return CHAR(STRING_ELT(GET_CLASS(x), 0)); } S4Vectors/src/SimpleList_class.c0000644000175400017540000000124013175736136017652 0ustar00biocbuildbiocbuild/**************************************************************************** * Low-level manipulation of SimpleList objects ****************************************************************************/ #include "S4Vectors.h" static SEXP listData_symbol = NULL; static void set_SimpleList_listData(SEXP x, SEXP value) { INIT_STATIC_SYMBOL(listData) SET_SLOT(x, listData_symbol, value); return; } SEXP _new_SimpleList(const char *classname, SEXP listData) { SEXP classdef, ans; PROTECT(classdef = MAKE_CLASS(classname)); PROTECT(ans = NEW_OBJECT(classdef)); set_SimpleList_listData(ans, listData); UNPROTECT(2); return ans; } S4Vectors/src/anyMissing.c0000644000175400017540000000403413175736136016525 0ustar00biocbuildbiocbuild/*************************************************************************** Public methods: anyMissing(SEXP x) TO DO: Support list():s too. Copyright Henrik Bengtsson, 2007 **************************************************************************/ /* Include R packages */ #include SEXP anyMissing(SEXP x) { SEXP ans; int n, ii; PROTECT(ans = allocVector(LGLSXP, 1)); LOGICAL(ans)[0] = 0; n = length(x); /* anyMissing() on zero-length objects should always return FALSE, just like any(double(0)). */ if (n == 0) { UNPROTECT(1); return(ans); } switch (TYPEOF(x)) { case REALSXP: for (ii=0; ii < n; ii++) { if ISNAN(REAL(x)[ii]) { LOGICAL(ans)[0] = 1; break; } } break; case INTSXP: for (ii=0; ii < n; ii++) { if (INTEGER(x)[ii] == NA_INTEGER) { LOGICAL(ans)[0] = 1; break; } } break; case LGLSXP: for (ii=0; ii < n; ii++) { if (LOGICAL(x)[ii] == NA_LOGICAL) { LOGICAL(ans)[0] = 1; break; } } break; case CPLXSXP: for (ii=0; ii < n; ii++) { if (ISNAN(COMPLEX(x)[ii].r) || ISNAN(COMPLEX(x)[ii].i)) { LOGICAL(ans)[0] = 1; break; } } break; case STRSXP: for (ii=0; ii < n; ii++) { if (STRING_ELT(x, ii) == NA_STRING) { LOGICAL(ans)[0] = 1; break; } } break; case RAWSXP: /* no such thing as a raw NA */ break; default: break; /* warningcall(call, _("%s() applied to non-vector of type '%s'"), "anyMissing", type2char(TYPEOF(x))); */ } /* switch() */ UNPROTECT(1); /* ans */ return(ans); } // anyMissing() /*************************************************************************** HISTORY: 2007-08-14 [HB] o Created using do_isna() in src/main/coerce.c as a template. **************************************************************************/ S4Vectors/src/character_utils.c0000644000175400017540000001730213175736136017562 0ustar00biocbuildbiocbuild/* * Defining the _XOPEN_SOURCE feature test macro is required in order to obtain * declaration of tzset() and 'timezone' from (see man tzset). * However, it seems that Solaris wants it to have a value (as reported by * Brian D. Ripley to maintainer@bioconductor.org on 2015-29-08). */ #define _XOPEN_SOURCE 600 #include "S4Vectors.h" #include /* for UINT_MAX and UINT_MIN */ #include /* for isblank() and isdigit() */ #include /* for free() */ #include static char errmsg_buf[200]; /**************************************************************************** * unstrsplit_list() */ /* * Assumes 'x' is a character vector (this is NOT checked). * The destination string 'dest' must be large enough to receive the result. */ static void join_strings_in_buf(char *dest, SEXP x, const char *sep, int sep_len) { int x_len, i; SEXP x_elt; x_len = LENGTH(x); for (i = 0; i < x_len; i++) { if (i != 0) { memcpy(dest, sep, sep_len); dest += sep_len; } x_elt = STRING_ELT(x, i); memcpy(dest, CHAR(x_elt), LENGTH(x_elt)); dest += LENGTH(x_elt); } return; } /* * Returns a CHARSXP if success, or R_NilValue if failure. */ static SEXP join_strings(SEXP x, const char *sep, int sep_len) { SEXP ans; int x_len, bufsize, i; char *buf; if (!IS_CHARACTER(x)) { snprintf(errmsg_buf, sizeof(errmsg_buf), "join_strings() expects a character vector"); return R_NilValue; } x_len = LENGTH(x); /* 1st pass: Loop over 'x' to compute the size of the buffer. */ bufsize = 0; if (x_len != 0) { for (i = 0; i < x_len; i++) bufsize += LENGTH(STRING_ELT(x, i)); bufsize += (x_len - 1) * sep_len; } /* Allocate memory for the buffer. */ buf = (char *) malloc((size_t) bufsize); if (buf == NULL) { snprintf(errmsg_buf, sizeof(errmsg_buf), "malloc() failed"); return R_NilValue; } /* 2nd pass: Loop over 'x' again to fill 'buf'. */ join_strings_in_buf(buf, x, sep, sep_len); /* Turn 'buf' into a CHARSXP and return it. */ PROTECT(ans = mkCharLen(buf, bufsize)); free(buf); UNPROTECT(1); return ans; } /* --- .Call ENTRY POINT --- */ SEXP unstrsplit_list(SEXP x, SEXP sep) { SEXP ans, sep0, x_elt, ans_elt, ans_names; int x_len, sep0_len, i; if (!isVectorList(x)) error("'x' must be a list"); if (!(IS_CHARACTER(sep) && LENGTH(sep) == 1)) error("'sep' must be a single string"); x_len = LENGTH(x); sep0 = STRING_ELT(sep, 0); sep0_len = LENGTH(sep0); PROTECT(ans = NEW_CHARACTER(x_len)); for (i = 0; i < x_len; i++) { x_elt = VECTOR_ELT(x, i); if (x_elt == R_NilValue) continue; PROTECT(ans_elt = join_strings(x_elt, CHAR(sep0), sep0_len)); if (ans_elt == R_NilValue) { UNPROTECT(2); error("in list element %d: %s", i + 1, errmsg_buf); } SET_STRING_ELT(ans, i, ans_elt); UNPROTECT(1); } PROTECT(ans_names = duplicate(GET_NAMES(x))); SET_NAMES(ans, ans_names); UNPROTECT(2); return ans; } /**************************************************************************** * --- .Call ENTRY POINT --- * We cannot rely on the strsplit() R function to split a string into single * characters when the string contains junk. For example: * > r <- as.raw(c(10, 255)) * > s <- rawToChar(r) * > s * [1] "\n\xff" * > strsplit(s, NULL, fixed=TRUE)[[1]] * [1] NA * doesn't work! * The function below should be safe, whatever the content of 's' is! * The length of the returned string is the number of chars in single * string 's'. Not vectorized. */ SEXP safe_strexplode(SEXP s) { SEXP s0, ans; int s0_length, i; char buf[2] = "X"; /* we only care about having buf[1] == 0 */ s0 = STRING_ELT(s, 0); s0_length = LENGTH(s0); PROTECT(ans = NEW_CHARACTER(s0_length)); for (i = 0; i < s0_length; i++) { buf[0] = CHAR(s0)[i]; SET_STRING_ELT(ans, i, mkChar(buf)); } UNPROTECT(1); return ans; } /**************************************************************************** * strsplit_as_list_of_ints() */ static SEXP explode_string_as_integer_vector(SEXP s, char sep0, IntAE *tmp_buf) { const char *str; int offset, n, ret; long int val; str = CHAR(s); offset = _IntAE_set_nelt(tmp_buf, 0); while (str[offset]) { ret = sscanf(str + offset, "%ld%n", &val, &n); if (ret != 1) { snprintf(errmsg_buf, sizeof(errmsg_buf), "decimal integer expected at char %d", offset + 1); return R_NilValue; } offset += n; while (isblank(str[offset])) offset++; if (val < INT_MIN || val > INT_MAX) { UNPROTECT(1); snprintf(errmsg_buf, sizeof(errmsg_buf), "out of range integer at char %d", offset + 1); return R_NilValue; } _IntAE_insert_at(tmp_buf, _IntAE_get_nelt(tmp_buf), (int) val); if (str[offset] == '\0') break; if (str[offset] != sep0) { snprintf(errmsg_buf, sizeof(errmsg_buf), "separator expected at char %d", offset + 1); return R_NilValue; } offset++; } return _new_INTEGER_from_IntAE(tmp_buf); } /* --- .Call ENTRY POINT --- */ SEXP strsplit_as_list_of_ints(SEXP x, SEXP sep) { SEXP ans, x_elt, ans_elt; int ans_length, i; char sep0; IntAE *tmp_buf; ans_length = LENGTH(x); sep0 = CHAR(STRING_ELT(sep, 0))[0]; if (isdigit(sep0) || sep0 == '+' || sep0 == '-') error("'sep' cannot be a digit, \"+\" or \"-\""); tmp_buf = _new_IntAE(0, 0, 0); PROTECT(ans = NEW_LIST(ans_length)); for (i = 0; i < ans_length; i++) { x_elt = STRING_ELT(x, i); if (x_elt == NA_STRING) { UNPROTECT(1); error("'x' contains NAs"); } PROTECT(ans_elt = explode_string_as_integer_vector(x_elt, sep0, tmp_buf)); if (ans_elt == R_NilValue) { UNPROTECT(2); error("in list element %d: %s", i + 1, errmsg_buf); } SET_VECTOR_ELT(ans, i, ans_elt); UNPROTECT(1); } UNPROTECT(1); return ans; } /**************************************************************************** * svn_time() returns the time in Subversion format, e.g.: * "2007-12-07 10:03:15 -0800 (Fri, 07 Dec 2007)" * The -0800 part will be adjusted if daylight saving time is in effect. * * TODO: Find a better home for this function. */ /* * 'out_size' should be at least 45 (for year < 10000, 44 chars will be * printed to it + '\0'). */ static int get_svn_time(time_t t, char *out, size_t out_size) { //#if defined(__INTEL_COMPILER) // return -1; //#else /* defined(__INTEL_COMPILER) */ struct tm result; int utc_offset, n; static const char *wday2str[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}, *mon2str[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}, *svn_format = "%d-%02d-%02d %02d:%02d:%02d %+03d00 (%s, %02d %s %d)"; //localtime_r() not available on Windows+MinGW //localtime_r(&t, &result); result = *localtime(&t); #if defined(__APPLE__) || defined(__FreeBSD__) //'struct tm' has no member named 'tm_gmtoff' on Windows+MinGW utc_offset = result.tm_gmtoff / 3600; #else /* defined(__APPLE__) || defined(__FreeBSD__) */ tzset(); //timezone is not portable (is a function, not a long, on OS X Tiger) utc_offset = - (timezone / 3600); if (result.tm_isdst > 0) utc_offset++; #endif /* defined(__APPLE__) || defined(__FreeBSD__) */ n = snprintf(out, out_size, svn_format, result.tm_year + 1900, result.tm_mon + 1, result.tm_mday, result.tm_hour, result.tm_min, result.tm_sec, utc_offset, wday2str[result.tm_wday], result.tm_mday, mon2str[result.tm_mon], result.tm_year + 1900); return n >= out_size ? -1 : 0; //#endif /* defined(__INTEL_COMPILER) */ } /* --- .Call ENTRY POINT --- */ SEXP svn_time() { time_t t; char buf[45]; t = time(NULL); if (t == (time_t) -1) error("S4Vectors internal error in svn_time(): " "time(NULL) failed"); if (get_svn_time(t, buf, sizeof(buf)) != 0) error("S4Vectors internal error in svn_time(): " "get_svn_time() failed"); return mkString(buf); } S4Vectors/src/eval_utils.c0000644000175400017540000000150113175736136016547 0ustar00biocbuildbiocbuild#include "S4Vectors.h" static SEXP _top_prenv(SEXP promise, SEXP env) { while(TYPEOF(promise) == PROMSXP) { env = PRENV(promise); promise = PREXPR(promise); } return env; } /* * --- .Call ENTRY POINT --- * Gets the top environment associated with a (nested) promise. */ SEXP top_prenv(SEXP nm, SEXP env) { SEXP promise = findVar(nm, env); return _top_prenv(promise, env); } /* * --- .Call ENTRY POINT --- * Gets the top environment associated with each promise in '...' */ SEXP top_prenv_dots(SEXP env) { SEXP dots = findVar(R_DotsSymbol, env); if (dots == R_MissingArg) { return(allocVector(VECSXP, 0)); } SEXP ans = allocVector(VECSXP, length(dots)); int i = 0; for (SEXP p = dots; p != R_NilValue; p = CDR(p)) { SET_VECTOR_ELT(ans, i++, _top_prenv(CAR(p), env)); } return ans; } S4Vectors/src/hash_utils.c0000644000175400017540000000214213175736136016545 0ustar00biocbuildbiocbuild/**************************************************************************** * Hash table management * ****************************************************************************/ #include "S4Vectors.h" /* * Author: Martin Morgan * Modified from R_HOME/src/main/unique.c */ static void htab_init(struct htab *htab, int n) { int n2, i; /* max supported value for n is 2^29 */ if (n < 0 || n > 536870912) /* protect against overflow to -ve */ error("length %d is too large for hashing", n); n2 = 2 * n; htab->M = 2; htab->K = 1; while (htab->M < n2) { htab->M *= 2; htab->K += 1; } htab->Mminus1 = htab->M - 1; htab->buckets = (int *) R_alloc(sizeof(int), htab->M); for (i = 0; i < htab->M; i++) htab->buckets[i] = NA_INTEGER; return; } struct htab _new_htab(int n) { struct htab htab; htab_init(&htab, n); return htab; } int _get_hbucket_val(const struct htab *htab, int bucket_idx) { return htab->buckets[bucket_idx]; } void _set_hbucket_val(struct htab *htab, int bucket_idx, int val) { htab->buckets[bucket_idx] = val; return; } S4Vectors/src/integer_utils.c0000644000175400017540000006136613175736136017274 0ustar00biocbuildbiocbuild#include "S4Vectors.h" #include /* for malloc(), free() */ #include /* for INT_MAX */ static int get_bucket_idx_for_int_pair(const struct htab *htab, int a1, int b1, const int *a2, const int *b2) { unsigned int hval; int bucket_idx, i2; const int *buckets; /* use 2 consecutive prime numbers (seems to work well, no serious justification for it) */ hval = 3951551U * a1 + 3951553U * b1; bucket_idx = hval & htab->Mminus1; buckets = htab->buckets; while ((i2 = buckets[bucket_idx]) != NA_INTEGER) { if (a2[i2] == a1 && b2[i2] == b1) break; bucket_idx = (bucket_idx + 1) % htab->M; } return bucket_idx; } static int get_bucket_idx_for_int_quad(const struct htab *htab, int a1, int b1, int c1, int d1, const int *a2, const int *b2, const int *c2, const int *d2) { unsigned int hval; int bucket_idx, i2; const int *buckets; /* use 4 consecutive prime numbers (seems to work well, no serious justification for it) */ hval = 3951551U * a1 + 3951553U * b1 + 3951557U * c1 + 3951559U * d1; bucket_idx = hval & htab->Mminus1; buckets = htab->buckets; while ((i2 = buckets[bucket_idx]) != NA_INTEGER) { if (a2[i2] == a1 && b2[i2] == b1 && c2[i2] == c1 && d2[i2] == d1) break; bucket_idx = (bucket_idx + 1) % htab->M; } return bucket_idx; } /**************************************************************************** * --- .Call ENTRY POINT --- * any(is.na(x) | x < lower | x > upper) */ SEXP Integer_any_missing_or_outside(SEXP x, SEXP lower, SEXP upper) { int x_len, lower0, upper0, ans, i; const int *x_p; x_len = length(x); lower0 = INTEGER(lower)[0]; upper0 = INTEGER(upper)[0]; ans = 0; for (i = 0, x_p = INTEGER(x); i < x_len; i++, x_p++) { if (*x_p == NA_INTEGER || *x_p < lower0 || *x_p > upper0) { ans = 1; break; } } return ScalarLogical(ans); } /**************************************************************************** * --- .Call ENTRY POINT --- * diff(c(0L, x)) */ SEXP Integer_diff_with_0(SEXP x) { int i, len, *x_ptr1, *x_ptr2, *ans_ptr; SEXP ans; len = LENGTH(x); PROTECT(ans = NEW_INTEGER(len)); if (len > 0) { INTEGER(ans)[0] = INTEGER(x)[0]; if (len > 1) { for (i = 1, x_ptr1 = INTEGER(x), x_ptr2 = INTEGER(x) + 1, ans_ptr = INTEGER(ans) + 1; i < len; i++, x_ptr1++, x_ptr2++, ans_ptr++) { *ans_ptr = *x_ptr2 - *x_ptr1; } } } UNPROTECT(1); return ans; } /**************************************************************************** * --- .Call ENTRY POINT --- * diff(c(x, last)) */ SEXP Integer_diff_with_last(SEXP x, SEXP last) { int i, len, *x_ptr1, *x_ptr2, *ans_ptr; SEXP ans; len = LENGTH(x); PROTECT(ans = NEW_INTEGER(len)); if (len > 0) { for (i = 1, x_ptr1 = INTEGER(x), x_ptr2 = INTEGER(x) + 1, ans_ptr = INTEGER(ans); i < len; i++, x_ptr1++, x_ptr2++, ans_ptr++) { *ans_ptr = *x_ptr2 - *x_ptr1; } INTEGER(ans)[len - 1] = INTEGER(last)[0] - INTEGER(x)[len - 1]; } UNPROTECT(1); return ans; } /**************************************************************************** * Fast ordering of an integer vector. */ /* --- .Call ENTRY POINT --- */ SEXP Integer_order(SEXP x, SEXP decreasing, SEXP use_radix) { int ans_len, i, *ans_elt_p; SEXP ans; if (LENGTH(decreasing) != 1) error("S4Vectors internal error in Integer_order(): " "'decreasing' must be of length 1"); ans_len = LENGTH(x); PROTECT(ans = NEW_INTEGER(ans_len)); for (i = 1, ans_elt_p = INTEGER(ans); i <= ans_len; i++, ans_elt_p++) *ans_elt_p = i; i = _sort_ints(INTEGER(ans), ans_len, INTEGER(x) - 1, LOGICAL(decreasing)[0], LOGICAL(use_radix)[0], NULL, NULL); UNPROTECT(1); if (i != 0) error("S4Vectors internal error in Integer_order(): " "memory allocation failed"); return ans; } /**************************************************************************** * Fast ordering/comparing of integer pairs. * * The .Call entry points in this section are the workhorses behind * sortedIntegerPairs(), orderIntegerPairs(), matchIntegerPairs(), and * duplicatedIntegerPairs(). */ /* * Nothing deep, just checking that 'a' and 'b' are integer vectors of the * same length. We don't look at the individual elements in them, and, * in particular, we don't check for NAs. */ int _check_integer_pairs(SEXP a, SEXP b, const int **a_p, const int **b_p, const char *a_argname, const char *b_argname) { int len; if (!IS_INTEGER(a) || !IS_INTEGER(b)) error("'%s' and '%s' must be integer vectors", a_argname, b_argname); len = LENGTH(a); if (LENGTH(b) != len) error("'%s' and '%s' must have the same length", a_argname, b_argname); *a_p = INTEGER(a); *b_p = INTEGER(b); return len; } /* --- .Call ENTRY POINT --- * 'a1' and 'b1': integer vectors of the same length M. * 'a2' and 'b2': integer vectors of the same length N. * The 4 integer vectors are assumed to be NA free. For efficiency reason, this * is not checked. * If M != N then the shorter object is recycled to the length of the longer * object, except if M or N is 0 in which case the object with length != 0 is * truncated to length 0. */ SEXP Integer_pcompare2(SEXP a1, SEXP b1, SEXP a2, SEXP b2) { int npair1, npair2, ans_len; const int *a1_p, *b1_p, *a2_p, *b2_p; SEXP ans; npair1 = _check_integer_pairs(a1, b1, &a1_p, &b1_p, "a1", "b1"); npair2 = _check_integer_pairs(a2, b2, &a2_p, &b2_p, "a2", "b2"); if (npair1 == 0 || npair2 == 0) ans_len = 0; else ans_len = npair1 >= npair2 ? npair1 : npair2; PROTECT(ans = NEW_INTEGER(ans_len)); _pcompare_int_pairs(a1_p, b1_p, npair1, a2_p, b2_p, npair2, INTEGER(ans), ans_len, 1); UNPROTECT(1); return ans; } /* --- .Call ENTRY POINT --- */ SEXP Integer_sorted2(SEXP a, SEXP b, SEXP decreasing, SEXP strictly) { const int *a_p, *b_p; int npair, ans; npair = _check_integer_pairs(a, b, &a_p, &b_p, "a", "b"); ans = _int_pairs_are_sorted(a_p, b_p, npair, LOGICAL(decreasing)[0], LOGICAL(strictly)[0]); return ScalarLogical(ans); } /* --- .Call ENTRY POINT --- */ SEXP Integer_order2(SEXP a, SEXP b, SEXP decreasing, SEXP use_radix) { int ans_len, i, *ans_elt_p; const int *a_p, *b_p; SEXP ans; if (LENGTH(decreasing) != 2) error("S4Vectors internal error in Integer_order2(): " "'decreasing' must be of length 2"); ans_len = _check_integer_pairs(a, b, &a_p, &b_p, "a", "b"); PROTECT(ans = NEW_INTEGER(ans_len)); for (i = 1, ans_elt_p = INTEGER(ans); i <= ans_len; i++, ans_elt_p++) *ans_elt_p = i; i = _sort_int_pairs(INTEGER(ans), ans_len, a_p - 1, b_p - 1, LOGICAL(decreasing)[0], LOGICAL(decreasing)[1], LOGICAL(use_radix)[0], NULL, NULL); UNPROTECT(1); if (i != 0) error("S4Vectors internal error in Integer_order2(): " "memory allocation failed"); return ans; } /* --- .Call ENTRY POINT --- */ SEXP Integer_match2_quick(SEXP a1, SEXP b1, SEXP a2, SEXP b2, SEXP nomatch) { int len1, len2, nomatch0, *o1, *o2; const int *a1_p, *b1_p, *a2_p, *b2_p; SEXP ans; len1 = _check_integer_pairs(a1, b1, &a1_p, &b1_p, "a1", "b1"); len2 = _check_integer_pairs(a2, b2, &a2_p, &b2_p, "a2", "b2"); nomatch0 = INTEGER(nomatch)[0]; o1 = (int *) R_alloc(sizeof(int), len1); o2 = (int *) R_alloc(sizeof(int), len2); _get_order_of_int_pairs(a1_p, b1_p, len1, 0, 0, o1, 0); _get_order_of_int_pairs(a2_p, b2_p, len2, 0, 0, o2, 0); PROTECT(ans = NEW_INTEGER(len1)); _get_matches_of_ordered_int_pairs(a1_p, b1_p, o1, len1, a2_p, b2_p, o2, len2, nomatch0, INTEGER(ans), 1); UNPROTECT(1); return ans; } /* --- .Call ENTRY POINT --- */ SEXP Integer_selfmatch2_quick(SEXP a, SEXP b) { int len, *o1; const int *a_p, *b_p; SEXP ans; len = _check_integer_pairs(a, b, &a_p, &b_p, "a", "b"); o1 = (int *) R_alloc(sizeof(int), len); _get_order_of_int_pairs(a_p, b_p, len, 0, 0, o1, 0); PROTECT(ans = NEW_INTEGER(len)); _get_matches_of_ordered_int_pairs(a_p, b_p, o1, len, a_p, b_p, o1, len, -1, INTEGER(ans), 1); UNPROTECT(1); return ans; } /* --- .Call ENTRY POINT --- */ SEXP Integer_match2_hash(SEXP a1, SEXP b1, SEXP a2, SEXP b2, SEXP nomatch) { int len1, len2, nomatch0, *ans0, i, bucket_idx, i2; const int *a1_p, *b1_p, *a2_p, *b2_p; struct htab htab; SEXP ans; len1 = _check_integer_pairs(a1, b1, &a1_p, &b1_p, "a1", "b1"); len2 = _check_integer_pairs(a2, b2, &a2_p, &b2_p, "a2", "b2"); nomatch0 = INTEGER(nomatch)[0]; htab = _new_htab(len2); for (i = 0; i < len2; i++) { bucket_idx = get_bucket_idx_for_int_pair(&htab, a2_p[i], b2_p[i], a2_p, b2_p); if (_get_hbucket_val(&htab, bucket_idx) == NA_INTEGER) _set_hbucket_val(&htab, bucket_idx, i); } PROTECT(ans = NEW_INTEGER(len1)); ans0 = INTEGER(ans); for (i = 0; i < len1; i++) { bucket_idx = get_bucket_idx_for_int_pair(&htab, a1_p[i], b1_p[i], a2_p, b2_p); i2 = _get_hbucket_val(&htab, bucket_idx); if (i2 == NA_INTEGER) ans0[i] = nomatch0; else ans0[i] = i2 + 1; } UNPROTECT(1); return ans; } /* --- .Call ENTRY POINT --- */ SEXP Integer_selfmatch2_hash(SEXP a, SEXP b) { int ans_len, *ans0, i, bucket_idx, i2; const int *a_p, *b_p; struct htab htab; SEXP ans; ans_len = _check_integer_pairs(a, b, &a_p, &b_p, "a", "b"); htab = _new_htab(ans_len); PROTECT(ans = NEW_INTEGER(ans_len)); ans0 = INTEGER(ans); for (i = 0; i < ans_len; i++) { bucket_idx = get_bucket_idx_for_int_pair(&htab, a_p[i], b_p[i], a_p, b_p); i2 = _get_hbucket_val(&htab, bucket_idx); if (i2 == NA_INTEGER) { _set_hbucket_val(&htab, bucket_idx, i); ans0[i] = i + 1; } else { ans0[i] = i2 + 1; } } UNPROTECT(1); return ans; } /**************************************************************************** * Fast ordering/comparing of integer quadruplets. * * The .Call entry points in this section are the workhorses behind * sortedIntegerQuads(), orderIntegerQuads(), matchIntegerQuads(), and * duplicatedIntegerQuads(). */ /* * Nothing deep, just checking that 'a', 'b', 'c' and 'd' are integer vectors * of the same length. We don't look at the individual elements in them, and, * in particular, we don't check for NAs. */ int _check_integer_quads(SEXP a, SEXP b, SEXP c, SEXP d, const int **a_p, const int **b_p, const int **c_p, const int **d_p, const char *a_argname, const char *b_argname, const char *c_argname, const char *d_argname) { int len; if (!IS_INTEGER(a) || !IS_INTEGER(b) || !IS_INTEGER(c) || !IS_INTEGER(d)) error("'%s', '%s', '%s' and '%s' must be integer vectors", a_argname, b_argname, c_argname, d_argname); len = LENGTH(a); if (LENGTH(b) != len || LENGTH(c) != len || LENGTH(d) != len) error("'%s', '%s', '%s' and '%s' must have the same length", a_argname, b_argname, c_argname, d_argname); *a_p = INTEGER(a); *b_p = INTEGER(b); *c_p = INTEGER(c); *d_p = INTEGER(d); return len; } /* --- .Call ENTRY POINT --- */ SEXP Integer_sorted4(SEXP a, SEXP b, SEXP c, SEXP d, SEXP decreasing, SEXP strictly) { const int *a_p, *b_p, *c_p, *d_p; int nquad, ans; nquad = _check_integer_quads(a, b, c, d, &a_p, &b_p, &c_p, &d_p, "a", "b", "c", "d"); ans = _int_quads_are_sorted(a_p, b_p, c_p, d_p, nquad, LOGICAL(decreasing)[0], LOGICAL(strictly)[0]); return ScalarLogical(ans); } /* --- .Call ENTRY POINT --- */ SEXP Integer_order4(SEXP a, SEXP b, SEXP c, SEXP d, SEXP decreasing, SEXP use_radix) { int ans_len, i, *ans_elt_p; const int *a_p, *b_p, *c_p, *d_p; SEXP ans; if (LENGTH(decreasing) != 4) error("S4Vectors internal error in Integer_order4(): " "'decreasing' must be of length 4"); ans_len = _check_integer_quads(a, b, c, d, &a_p, &b_p, &c_p, &d_p, "a", "b", "c", "d"); PROTECT(ans = NEW_INTEGER(ans_len)); for (i = 1, ans_elt_p = INTEGER(ans); i <= ans_len; i++, ans_elt_p++) *ans_elt_p = i; i = _sort_int_quads(INTEGER(ans), ans_len, a_p - 1, b_p - 1, c_p - 1, d_p - 1, LOGICAL(decreasing)[0], LOGICAL(decreasing)[1], LOGICAL(decreasing)[2], LOGICAL(decreasing)[3], LOGICAL(use_radix)[0], NULL, NULL); UNPROTECT(1); if (i != 0) error("S4Vectors internal error in Integer_order4(): " "memory allocation failed"); return ans; } /* --- .Call ENTRY POINT --- */ SEXP Integer_match4_quick(SEXP a1, SEXP b1, SEXP c1, SEXP d1, SEXP a2, SEXP b2, SEXP c2, SEXP d2, SEXP nomatch) { int len1, len2, nomatch0, *o1, *o2; const int *a1_p, *b1_p, *c1_p, *d1_p, *a2_p, *b2_p, *c2_p, *d2_p; SEXP ans; len1 = _check_integer_quads(a1, b1, c1, d1, &a1_p, &b1_p, &c1_p, &d1_p, "a1", "b1", "c1", "d1"); len2 = _check_integer_quads(a2, b2, c2, d2, &a2_p, &b2_p, &c2_p, &d2_p, "a2", "b2", "c2", "d2"); nomatch0 = INTEGER(nomatch)[0]; o1 = (int *) R_alloc(sizeof(int), len1); o2 = (int *) R_alloc(sizeof(int), len2); _get_order_of_int_quads(a1_p, b1_p, c1_p, d1_p, len1, 0, 0, 0, 0, o1, 0); _get_order_of_int_quads(a2_p, b2_p, c2_p, d2_p, len2, 0, 0, 0, 0, o2, 0); PROTECT(ans = NEW_INTEGER(len1)); _get_matches_of_ordered_int_quads(a1_p, b1_p, c1_p, d1_p, o1, len1, a2_p, b2_p, c2_p, d2_p, o2, len2, nomatch0, INTEGER(ans), 1); UNPROTECT(1); return ans; } /* --- .Call ENTRY POINT --- */ SEXP Integer_selfmatch4_quick(SEXP a, SEXP b, SEXP c, SEXP d) { int len, *o1; const int *a_p, *b_p, *c_p, *d_p; SEXP ans; len = _check_integer_quads(a, b, c, d, &a_p, &b_p, &c_p, &d_p, "a", "b", "c", "d"); o1 = (int *) R_alloc(sizeof(int), len); _get_order_of_int_quads(a_p, b_p, c_p, d_p, len, 0, 0, 0, 0, o1, 0); PROTECT(ans = NEW_INTEGER(len)); _get_matches_of_ordered_int_quads(a_p, b_p, c_p, d_p, o1, len, a_p, b_p, c_p, d_p, o1, len, -1, INTEGER(ans), 1); UNPROTECT(1); return ans; } /* --- .Call ENTRY POINT --- */ SEXP Integer_match4_hash(SEXP a1, SEXP b1, SEXP c1, SEXP d1, SEXP a2, SEXP b2, SEXP c2, SEXP d2, SEXP nomatch) { int len1, len2, nomatch0, *ans0, i, bucket_idx, i2; const int *a1_p, *b1_p, *c1_p, *d1_p, *a2_p, *b2_p, *c2_p, *d2_p; struct htab htab; SEXP ans; len1 = _check_integer_quads(a1, b1, c1, d1, &a1_p, &b1_p, &c1_p, &d1_p, "a1", "b1", "c1", "d1"); len2 = _check_integer_quads(a2, b2, c2, d2, &a2_p, &b2_p, &c2_p, &d2_p, "a2", "b2", "c2", "d2"); nomatch0 = INTEGER(nomatch)[0]; htab = _new_htab(len2); for (i = 0; i < len2; i++) { bucket_idx = get_bucket_idx_for_int_quad(&htab, a2_p[i], b2_p[i], c2_p[i], d2_p[i], a2_p, b2_p, c2_p, d2_p); if (_get_hbucket_val(&htab, bucket_idx) == NA_INTEGER) _set_hbucket_val(&htab, bucket_idx, i); } PROTECT(ans = NEW_INTEGER(len1)); ans0 = INTEGER(ans); for (i = 0; i < len1; i++) { bucket_idx = get_bucket_idx_for_int_quad(&htab, a1_p[i], b1_p[i], c1_p[i], d1_p[i], a2_p, b2_p, c2_p, d2_p); i2 = _get_hbucket_val(&htab, bucket_idx); if (i2 == NA_INTEGER) ans0[i] = nomatch0; else ans0[i] = i2 + 1; } UNPROTECT(1); return ans; } /* --- .Call ENTRY POINT --- */ SEXP Integer_selfmatch4_hash(SEXP a, SEXP b, SEXP c, SEXP d) { int ans_len, *ans0, i, bucket_idx, i2; const int *a_p, *b_p, *c_p, *d_p; struct htab htab; SEXP ans; ans_len = _check_integer_quads(a, b, c, d, &a_p, &b_p, &c_p, &d_p, "a", "b", "c", "d"); htab = _new_htab(ans_len); PROTECT(ans = NEW_INTEGER(ans_len)); ans0 = INTEGER(ans); for (i = 0; i < ans_len; i++) { bucket_idx = get_bucket_idx_for_int_quad(&htab, a_p[i], b_p[i], c_p[i], d_p[i], a_p, b_p, c_p, d_p); i2 = _get_hbucket_val(&htab, bucket_idx); if (i2 == NA_INTEGER) { _set_hbucket_val(&htab, bucket_idx, i); ans0[i] = i + 1; } else { ans0[i] = i2 + 1; } } UNPROTECT(1); return ans; } /**************************************************************************** * An enhanced version of base::tabulate() that: (1) handles integer weights * (NA and negative weights are OK), and (2) throws an error if 'strict' is * TRUE and if 'x' contains NAs or values not in the [1, 'nbins'] interval. */ SEXP Integer_tabulate2(SEXP x, SEXP nbins, SEXP weight, SEXP strict) { SEXP ans; int x_len, nbins0, weight_len, strict0, *one_based_ans_p, i, j, x_elt, weight_elt; const int *x_p, *weight_p; x_len = LENGTH(x); nbins0 = INTEGER(nbins)[0]; weight_len = LENGTH(weight); weight_p = INTEGER(weight); strict0 = LOGICAL(strict)[0]; j = 0; PROTECT(ans = NEW_INTEGER(nbins0)); memset(INTEGER(ans), 0, nbins0 * sizeof(int)); one_based_ans_p = INTEGER(ans) - 1; // We do unsafe arithmetic, which is 40% faster than safe arithmetic. // For now, the only use case for tabulate2() is fast tabulation of // integer- and factor-Rle's (passing the run values and run lengths // to 'x' and 'weight', respectively), so we are safe (the cumulated // run lengths of an Rle must be < 2^31). //_reset_ovflow_flag(); for (i = j = 0, x_p = INTEGER(x); i < x_len; i++, j++, x_p++) { if (j >= weight_len) j = 0; /* recycle */ x_elt = *x_p; if (x_elt == NA_INTEGER || x_elt < 1 || x_elt > nbins0) { if (!strict0) continue; UNPROTECT(1); error("'x' contains NAs or values not in the " "[1, 'nbins'] interval"); } weight_elt = weight_p[j]; //ans_elt = one_based_ans_p[x_elt]; //one_based_ans_p[x_elt] = _safe_int_add(ans_elt, weight_elt); one_based_ans_p[x_elt] += weight_elt; } //if (_get_ovflow_flag()) // warning("NAs produced by integer overflow"); UNPROTECT(1); return ans; } /**************************************************************************** * Bitwise operations. */ SEXP Integer_explode_bits(SEXP x, SEXP bitpos) { SEXP ans; int ans_nrow, ans_ncol, i, j, *ans_elt, bitmask; const int *x_elt, *bitpos_elt; ans_nrow = LENGTH(x); ans_ncol = LENGTH(bitpos); PROTECT(ans = allocMatrix(INTSXP, ans_nrow, ans_ncol)); ans_elt = INTEGER(ans); for (j = 0, bitpos_elt = INTEGER(bitpos); j < ans_ncol; j++, bitpos_elt++) { if (*bitpos_elt == NA_INTEGER || *bitpos_elt < 1) error("'bitpos' must contain values >= 1"); bitmask = 1 << (*bitpos_elt - 1); for (i = 0, x_elt = INTEGER(x); i < ans_nrow; i++, x_elt++) *(ans_elt++) = (*x_elt & bitmask) != 0; } UNPROTECT(1); return ans; } /**************************************************************************** * --- .Call ENTRY POINT --- * Creates the (sorted) union of two sorted integer vectors */ SEXP Integer_sorted_merge(SEXP x, SEXP y) { int x_i, y_i, x_len, y_len, ans_len; const int *x_ptr, *y_ptr; int *ans_ptr; SEXP ans; x_len = LENGTH(x); y_len = LENGTH(y); x_i = 0; y_i = 0; x_ptr = INTEGER(x); y_ptr = INTEGER(y); ans_len = 0; while (x_i < x_len && y_i < y_len) { if (*x_ptr == *y_ptr) { x_ptr++; x_i++; y_ptr++; y_i++; } else if (*x_ptr < *y_ptr) { x_ptr++; x_i++; } else { y_ptr++; y_i++; } ans_len++; } if (x_i < x_len) { ans_len += x_len - x_i; } else if (y_i < y_len) { ans_len += y_len - y_i; } PROTECT(ans = NEW_INTEGER(ans_len)); x_i = 0; y_i = 0; x_ptr = INTEGER(x); y_ptr = INTEGER(y); ans_ptr = INTEGER(ans); while (x_i < x_len && y_i < y_len) { if (*x_ptr == *y_ptr) { *ans_ptr = *x_ptr; x_ptr++; x_i++; y_ptr++; y_i++; } else if (*x_ptr < *y_ptr) { *ans_ptr = *x_ptr; x_ptr++; x_i++; } else { *ans_ptr = *y_ptr; y_ptr++; y_i++; } ans_ptr++; } if (x_i < x_len) { memcpy(ans_ptr, x_ptr, (x_len - x_i) * sizeof(int)); } else if (y_i < y_len) { memcpy(ans_ptr, y_ptr, (y_len - y_i) * sizeof(int)); } UNPROTECT(1); return ans; } /**************************************************************************** * --- .Call ENTRY POINT --- */ SEXP Integer_mseq(SEXP from, SEXP to) { int i, j, n, ans_len, *from_elt, *to_elt, *ans_elt; SEXP ans; if (!IS_INTEGER(from) || !IS_INTEGER(to)) error("'from' and 'to' must be integer vectors"); n = LENGTH(from); if (n != LENGTH(to)) error("lengths of 'from' and 'to' must be equal"); ans_len = 0; for (i = 0, from_elt = INTEGER(from), to_elt = INTEGER(to); i < n; i++, from_elt++, to_elt++) { ans_len += (*from_elt <= *to_elt ? *to_elt - *from_elt : *from_elt - *to_elt) + 1; } PROTECT(ans = NEW_INTEGER(ans_len)); ans_elt = INTEGER(ans); for (i = 0, from_elt = INTEGER(from), to_elt = INTEGER(to); i < n; i++, from_elt++, to_elt++) { if (*from_elt == NA_INTEGER || *to_elt == NA_INTEGER) error("'from' and 'to' contain NAs"); if (*from_elt <= *to_elt) { for (j = *from_elt; j <= *to_elt; j++) { *ans_elt = j; ans_elt++; } } else { for (j = *from_elt; j >= *to_elt; j--) { *ans_elt = j; ans_elt++; } } } UNPROTECT(1); return ans; } SEXP Integer_fancy_mseq(SEXP lengths, SEXP offset, SEXP rev) { int lengths_len, offset_len, rev_len, ans_len, i, length, *ans_elt, i2, i3, offset_elt, rev_elt, j; const int *lengths_elt; SEXP ans; lengths_len = LENGTH(lengths); offset_len = LENGTH(offset); rev_len = LENGTH(rev); if (lengths_len != 0) { if (offset_len == 0) error("'offset' has length 0 but not 'lengths'"); if (rev_len == 0) error("'rev' has length 0 but not 'lengths'"); } ans_len = 0; for (i = 0, lengths_elt = INTEGER(lengths); i < lengths_len; i++, lengths_elt++) { length = *lengths_elt; if (length == NA_INTEGER) error("'lengths' contains NAs"); if (length < 0) length = -length; ans_len += length; } PROTECT(ans = NEW_INTEGER(ans_len)); ans_elt = INTEGER(ans); for (i = i2 = i3 = 0, lengths_elt = INTEGER(lengths); i < lengths_len; i++, i2++, i3++, lengths_elt++) { if (i2 >= offset_len) i2 = 0; /* recycle */ if (i3 >= rev_len) i3 = 0; /* recycle */ length = *lengths_elt; offset_elt = INTEGER(offset)[i2]; if (length != 0 && offset_elt == NA_INTEGER) { UNPROTECT(1); error("'offset' contains NAs"); } rev_elt = INTEGER(rev)[i3]; if (length >= 0) { if (length >= 2 && rev_elt == NA_LOGICAL) { UNPROTECT(1); error("'rev' contains NAs"); } if (rev_elt) { for (j = length; j >= 1; j--) *(ans_elt++) = j + offset_elt; } else { for (j = 1; j <= length; j++) *(ans_elt++) = j + offset_elt; } } else { if (length <= -2 && rev_elt == NA_LOGICAL) { UNPROTECT(1); error("'rev' contains NAs"); } if (rev_elt) { for (j = length; j <= -1; j++) *(ans_elt++) = j - offset_elt; } else { for (j = -1; j >= length; j--) *(ans_elt++) = j - offset_elt; } } } UNPROTECT(1); return ans; } /**************************************************************************** * findIntervalAndStartFromWidth() * * 'x' and 'width' are integer vectors */ SEXP _find_interv_and_start_from_width(const int *x, int x_len, const int *width, int width_len) { int i, interval, start; const int *x_elt, *width_elt; int *interval_elt, *start_elt, *x_order_elt; SEXP ans, ans_class, ans_names, ans_rownames, ans_interval, ans_start; SEXP x_order; for (i = 0, width_elt = width; i < width_len; i++, width_elt++) { if (*width_elt == NA_INTEGER) error("'width' cannot contain missing values"); else if (*width_elt < 0) error("'width' must contain non-negative values"); } width_elt = width; ans_rownames = R_NilValue; PROTECT(ans_interval = NEW_INTEGER(x_len)); PROTECT(ans_start = NEW_INTEGER(x_len)); if (x_len > 0 && width_len > 0) { start = 1; interval = 1; PROTECT(x_order = NEW_INTEGER(x_len)); _get_order_of_int_array(x, x_len, 0, INTEGER(x_order), 0); for (i = 0, x_order_elt = INTEGER(x_order); i < x_len; i++, x_order_elt++) { x_elt = x + *x_order_elt; interval_elt = INTEGER(ans_interval) + *x_order_elt; start_elt = INTEGER(ans_start) + *x_order_elt; if (*x_elt == 0) { *interval_elt = 0; *start_elt = NA_INTEGER; } else if (*x_elt < 0 || *x_elt == NA_INTEGER) { *interval_elt = NA_INTEGER; *start_elt = NA_INTEGER; } else { while (interval < width_len && *x_elt >= (start + *width_elt)) { interval++; start += *width_elt; width_elt++; } if (*x_elt > start + *width_elt) error("'x' values larger than vector length 'sum(width)'"); *interval_elt = interval; *start_elt = start; } } UNPROTECT(1); PROTECT(ans_rownames = NEW_INTEGER(2)); INTEGER(ans_rownames)[0] = NA_INTEGER; INTEGER(ans_rownames)[1] = -x_len; } else { PROTECT(ans_rownames = NEW_INTEGER(0)); } PROTECT(ans = NEW_LIST(2)); PROTECT(ans_class = NEW_CHARACTER(1)); PROTECT(ans_names = NEW_CHARACTER(2)); SET_STRING_ELT(ans_class, 0, mkChar("data.frame")); SET_STRING_ELT(ans_names, 0, mkChar("interval")); SET_STRING_ELT(ans_names, 1, mkChar("start")); SET_NAMES(ans, ans_names); SET_VECTOR_ELT(ans, 0, ans_interval); SET_VECTOR_ELT(ans, 1, ans_start); setAttrib(ans, install("row.names"), ans_rownames); SET_CLASS(ans, ans_class); UNPROTECT(6); return ans; } /* --- .Call ENTRY POINT --- */ SEXP findIntervalAndStartFromWidth(SEXP x, SEXP width) { if (!IS_INTEGER(x)) error("'x' must be an integer vector"); if (!IS_INTEGER(width)) error("'width' must be an integer vector"); return _find_interv_and_start_from_width(INTEGER(x), LENGTH(x), INTEGER(width), LENGTH(width)); } S4Vectors/src/logical_utils.c0000644000175400017540000001167213175736136017244 0ustar00biocbuildbiocbuild#include "S4Vectors.h" // R_XLEN_T_MAX is 2^52 // LLONG_MAX is 2^63-1 static SEXP sum_as_SEXP(R_xlen_t sum) { /* If 'sum' is <= INT_MAX, we return it as an integer vector of length 1. Otherwise, as a double vector of length 1. Since it's guaranteed to be <= R_XLEN_T_MAX, then it can always be exactly represented as a double. */ return sum <= INT_MAX ? ScalarInteger((int) sum) : ScalarReal((double) sum); } /* Unlike base::sum() which can overflow (and return NA_integer_) on a long logical vector, logical_sum() never overflows. It returns a double if the result cannot be represented as an int (which is what length() does). Note that logical_sum() is slightly faster than base::sum(): length(x) base::sum() logical_sum() speedup --------- ----------- ------------- ------- rhino3: 1e8 83 ms 74 ms 12% 1e9 0.84 s 0.75 s 12% 3e9 2.52 s 2.35 s 13% <-- long vector malbec1: 1e8 93 ms 74 ms 26% 1e9 0.92 s 0.75 s 23% veracruz1: 1e8 121 ms 93 ms 30% 1e9 1.27 s 1.01 s 26% - rhino3: Linux server, Intel(R) Xeon(R) CPU E5-2697 v3 @ 2.60GHz (56 cores), 384 GB of RAM, with Ubuntu Ubuntu 14.04.3 LTS, gcc 4.8.4, R 3.4.0 installed from source (default compiler options and flags). - malbec1: HP ProLiant DL360 Gen9 server, Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz (20 cores), 32 GB of RAM, with Ubuntu 16.04.2 LTS, gcc 5.4.0, R 3.4.0 installed from source (default compiler options and flags). - veracruz1: virtualized Mac Pro Server at Mac Stadium (https://www.macstadium.com), Quad-Core Intel Xeon E5 3.7 GHz, 32 GB of RAM, with El Capitan, clang 4.0.0, R 3.4.0 (CRAN binary). I did not time this on Windows. */ SEXP logical_sum(SEXP x, SEXP na_rm) { R_xlen_t x_len, sum, i; const int *x_dataptr; int na_rm0, x_elt; x_len = XLENGTH(x); x_dataptr = LOGICAL(x); na_rm0 = LOGICAL(na_rm)[0]; sum = 0; for (i = 0; i < x_len; i++) { x_elt = x_dataptr[i]; if (x_elt == NA_LOGICAL) { if (na_rm0) continue; return ScalarInteger(NA_INTEGER); } /* IIRC some comments in the R source code seem to suggest that TRUEs are not guaranteed to be represented by ones at the C level. */ if (x_elt) sum++; } return sum_as_SEXP(sum); } /* Playing around with logical vectors stored in char arrays. Storing logical vectors in int arrays like R does is such a waste of memory! By using chars instead of ints very common operations like sum(x < 0.9) (this is probably the primary use case for sum()!) would require 4x less memory. This is particularly relevant if 'x' is a long vector (e.g. length(x) = 3e9) where R currently spends a significant amount of time allocating memory (e.g. 12Gb) to store the temporary logical vector. Unfortunately walking on a char array is significantly faster than base::sum() on Linux but not on Mac where it's more than 3x slower: length(x) base::sum() logical2_sum() speedup --------- ----------- -------------- ------- rhino3: 1e8 83 ms 66 ms 25% 1e9 0.84 s 0.66 s 27% 3e9 2.52 s 1.93 s 30% <-- long vector malbec1: 1e8 93 ms 64 ms 45% 1e9 0.92 s 0.63 s 46% veracruz1: 1e8 121 ms 398 ms not so good! 1e9 1.27 s 4.05 s not so good! To compare base::sum() vs logical_sum() vs logical2_sum(): library(S4Vectors) sum1 <- function(x, na.rm=FALSE) .Call("logical_sum", x, na.rm, PACKAGE="S4Vectors") sum2 <- function(x, na.rm=FALSE) .Call("logical2_sum", x, na.rm, PACKAGE="S4Vectors") x <- as.logical(sample(2L, 1e8, replace=TRUE) - 1L) x2 <- as.raw(x) ## Correctness res0 <- sum(x, na.rm=FALSE) res1 <- sum1(x, na.rm=FALSE) res2 <- sum2(x2, na.rm=FALSE) stopifnot(identical(res0, res1)) stopifnot(identical(res0, res2)) ## Speed system.time(replicate(20, sum(x, na.rm=FALSE))) system.time(replicate(20, sum1(x, na.rm=FALSE))) system.time(replicate(20, sum2(x2, na.rm=FALSE))) */ #define NA_LOGICAL2 127 /* Arbitrary choice. Could be set to anything but 0 or 1. */ #define LOGICAL2(x) ((char *) RAW(x)) SEXP logical2_sum(SEXP x, SEXP na_rm) { R_xlen_t x_len, sum, i; const char *x_dataptr; int na_rm0; char x_elt; x_len = XLENGTH(x); x_dataptr = LOGICAL2(x); na_rm0 = LOGICAL(na_rm)[0]; sum = 0; for (i = 0; i < x_len; i++) { x_elt = x_dataptr[i]; if (x_elt == NA_LOGICAL2) { if (na_rm0) continue; return ScalarInteger(NA_INTEGER); } if (x_elt) sum++; } return sum_as_SEXP(sum); } S4Vectors/src/map_ranges_to_runs.c0000644000175400017540000004540013175736136020273 0ustar00biocbuildbiocbuild/**************************************************************************** * Map a set of ranges to a set of "runs" * * ("runs" are just non-empty adjacent ranges) * * Author: H. Pag\`es * ****************************************************************************/ #include "S4Vectors.h" #include /* for malloc, free */ #include /* for INT_MAX */ static char errmsg_buf[200]; /* Mapping ranges or positions to a set of run is used in the context of subsetting some Vector derivative (like Rle and GPos objects), so we try to display error messages that makes sense in that context. */ static char *VECTOR_TOO_LONG_errmsg() { snprintf(errmsg_buf, sizeof(errmsg_buf), "subsetting a Vector derivative of length " "2^31 or more is not suppported yet"); return errmsg_buf; } static char *NA_INDICES_errmsg() { snprintf(errmsg_buf, sizeof(errmsg_buf), "subscript contains NAs"); return errmsg_buf; } static char *OUTOFBOUND_INDICES_errmsg() { snprintf(errmsg_buf, sizeof(errmsg_buf), "subscript contains out-of-bounds indices"); return errmsg_buf; } static char *INVALID_RANGES_errmsg() { snprintf(errmsg_buf, sizeof(errmsg_buf), "subscript contains invalid ranges " "(in a valid range 'start'/'end'/'width'\n" " cannot be NA and 'width' must be >= 0)"); return errmsg_buf; } static char *OUTOFBOUND_RANGES_errmsg() { snprintf(errmsg_buf, sizeof(errmsg_buf), "subscript contains out-of-bounds ranges"); return errmsg_buf; } /**************************************************************************** * 1st mapping method * * Use a naive algo (inefficient if more than 1 range to map). * Advantage: simple, memory efficient (unlike the other methods, it doesn't * require allocating any temporary vector), and can be used as a reference * to validate the other slightly more complex methods. */ /* Low-level mapper that takes as input a single range only */ const char *_simple_range_mapper( const int *run_lengths, int nrun, int range_start, int range_end, int *mapped_range_offset, int *mapped_range_span, int *mapped_range_Ltrim, int *mapped_range_Rtrim) { unsigned int offset; int i, j; if (range_start == NA_INTEGER || range_end == NA_INTEGER || range_end < range_start - 1) return INVALID_RANGES_errmsg(); if (range_start < 1) return OUTOFBOUND_RANGES_errmsg(); offset = 0; if (range_end >= range_start) { for (i = 0; i < nrun; i++) { offset += run_lengths[i]; if (offset > INT_MAX) return VECTOR_TOO_LONG_errmsg(); if (offset >= range_start) break; } if (i < nrun) *mapped_range_Ltrim = range_start - offset + run_lengths[i] - 1; if (offset >= range_end) { j = i; } else { for (j = i + 1; j < nrun; j++) { offset += run_lengths[j]; if (offset > INT_MAX) return VECTOR_TOO_LONG_errmsg(); if (offset >= range_end) break; } } *mapped_range_Rtrim = offset - range_end; *mapped_range_span = j - i + 1; } else { /* Zero-width range. */ *mapped_range_span = 0; j = -1; while (offset < range_end) { j++; if (j >= nrun) break; offset += run_lengths[j]; if (offset > INT_MAX) return VECTOR_TOO_LONG_errmsg(); } if (offset == range_end) i = j + 1; else i = j; } if (range_end > offset) return OUTOFBOUND_RANGES_errmsg(); *mapped_range_offset = i; return NULL; } /* Low-level mapper that takes as input a single position only */ const char *_simple_position_mapper( const int *run_lengths, int nrun, int pos, int *mapped_pos) { unsigned int offset; int i; if (pos == NA_INTEGER) return NA_INDICES_errmsg(); if (pos < 1) return OUTOFBOUND_INDICES_errmsg(); offset = 0; for (i = 0; i < nrun; i++) { offset += run_lengths[i]; if (offset > INT_MAX) return VECTOR_TOO_LONG_errmsg(); if (offset >= pos) break; } if (pos > offset) return OUTOFBOUND_INDICES_errmsg(); *mapped_pos = i + 1; return NULL; } static const char *ranges_mapper1( const int *run_lengths, int nrun, const int *start, const int *width, int nranges, int *mapped_range_offset, int *mapped_range_span, int *mapped_range_Ltrim, int *mapped_range_Rtrim) { int i, start_i, end_i; const char *errmsg; errmsg = NULL; for (i = 0; i < nranges; i++) { start_i = start[i]; end_i = start_i - 1 + width[i]; errmsg = _simple_range_mapper( run_lengths, nrun, start_i, end_i, mapped_range_offset + i, mapped_range_span + i, mapped_range_Ltrim + i, mapped_range_Rtrim + i); if (errmsg != NULL) break; } return errmsg; } static const char *positions_mapper1( const int *run_lengths, int nrun, const int *pos, int npos, int *mapped_pos) { int i; const char *errmsg; errmsg = NULL; for (i = 0; i < npos; i++) { errmsg = _simple_position_mapper( run_lengths, nrun, pos[i], mapped_pos + i); if (errmsg != NULL) break; } return errmsg; } /**************************************************************************** * 2nd mapping method * * Use a binary search to map the ranges to the ending positions of the runs * (called "run breakpoints"). */ /* Binary search. */ static int int_bsearch(int x, const int *breakpoints, int nbreakpoints) { int n1, n2, n, bp; if (nbreakpoints == 0) return nbreakpoints; /* Check last element. */ n2 = nbreakpoints - 1; bp = breakpoints[n2]; if (x > bp) return nbreakpoints; if (x == bp) return n2; /* Check first element. */ n1 = 0; bp = breakpoints[n1]; if (x <= bp) return n1; /* Binary search. Seems that using >> 1 instead of / 2 is faster, even when compiling with 'gcc -O2' (one would hope that the optimizer is able to do that kind of optimization). */ while ((n = (n1 + n2) >> 1) != n1) { bp = breakpoints[n]; if (x == bp) return n; if (x > bp) n1 = n; else n2 = n; } return n2; } /* Low-level mapper that takes as input a single range only */ static const char *bsearch_range_mapper( const int *run_breakpoints, int nrun, int range_start, int range_end, int *mapped_range_offset, int *mapped_range_span, int *mapped_range_Ltrim, int *mapped_range_Rtrim) { int x_len, end_run; if (range_start == NA_INTEGER || range_end == NA_INTEGER || range_end < range_start - 1) return INVALID_RANGES_errmsg(); x_len = nrun == 0 ? 0 : run_breakpoints[nrun - 1]; if (range_start < 1 || range_end > x_len) return OUTOFBOUND_RANGES_errmsg(); *mapped_range_offset = int_bsearch(range_start, run_breakpoints, nrun); if (range_end >= range_start) { end_run = int_bsearch(range_end, run_breakpoints, nrun); *mapped_range_span = end_run - *mapped_range_offset + 1; *mapped_range_Ltrim = range_start - 1; if (*mapped_range_offset >= 1) *mapped_range_Ltrim -= run_breakpoints[*mapped_range_offset - 1]; *mapped_range_Rtrim = run_breakpoints[end_run] - range_end; } else { /* Zero-width range. */ *mapped_range_span = 0; } return NULL; } /* Low-level mapper that takes as input a single position only */ static const char *bsearch_position_mapper( const int *run_breakpoints, int nrun, int pos, int *mapped_pos) { int x_len; x_len = nrun == 0 ? 0 : run_breakpoints[nrun - 1]; if (pos == NA_INTEGER) return NA_INDICES_errmsg(); if (pos < 1 || pos > x_len) return OUTOFBOUND_INDICES_errmsg(); *mapped_pos = int_bsearch(pos, run_breakpoints, nrun) + 1; return NULL; } static int *alloc_and_compute_run_breakpoints(const int *run_lengths, int nrun) { int *run_breakpoints; unsigned int breakpoint; int i; run_breakpoints = (int *) malloc(sizeof(int) * nrun); if (run_breakpoints == NULL) { snprintf(errmsg_buf, sizeof(errmsg_buf), "failed to allocate temporary vector of breakpoints"); return NULL; } breakpoint = 0; for (i = 0; i < nrun; i++) { breakpoint += run_lengths[i]; if (breakpoint > INT_MAX) { free(run_breakpoints); VECTOR_TOO_LONG_errmsg(); return NULL; } run_breakpoints[i] = breakpoint; } return run_breakpoints; } static const char *ranges_mapper2( const int *run_lengths, int nrun, const int *start, const int *width, int nranges, int *mapped_range_offset, int *mapped_range_span, int *mapped_range_Ltrim, int *mapped_range_Rtrim) { int *run_breakpoints, i, start_i, end_i; const char *errmsg; run_breakpoints = alloc_and_compute_run_breakpoints(run_lengths, nrun); if (run_breakpoints == NULL) return errmsg_buf; errmsg = NULL; for (i = 0; i < nranges; i++) { start_i = start[i]; end_i = start_i - 1 + width[i]; errmsg = bsearch_range_mapper(run_breakpoints, nrun, start_i, end_i, mapped_range_offset + i, mapped_range_span + i, mapped_range_Ltrim + i, mapped_range_Rtrim + i); if (errmsg != NULL) break; } free(run_breakpoints); return errmsg; } static const char *positions_mapper2( const int *run_lengths, int nrun, const int *pos, int npos, int *mapped_pos) { int *run_breakpoints, i; const char *errmsg; run_breakpoints = alloc_and_compute_run_breakpoints(run_lengths, nrun); if (run_breakpoints == NULL) return errmsg_buf; errmsg = NULL; for (i = 0; i < npos; i++) { errmsg = bsearch_position_mapper(run_breakpoints, nrun, pos[i], mapped_pos + i); if (errmsg != NULL) break; } free(run_breakpoints); return errmsg; } /**************************************************************************** * 3rd mapping method * * Use a radix sort to sort the ranges or positions to map. */ /* Sort the starting and ending positions of the ranges in ascending order before mapping them to the runs. */ static const char *ranges_mapper3( const int *run_lengths, int nrun, const int *start, const int *width, int nranges, int *mapped_range_offset, int *mapped_range_span, int *mapped_range_Ltrim, int *mapped_range_Rtrim) { int SEbuf_len, *SEbuf, *SEorder, *SEbuf2, SE, i, j, k, SE_run; unsigned int breakpoint; SEbuf_len = 2 * nranges; SEbuf = (int *) malloc(sizeof(int) * SEbuf_len); SEorder = (int *) malloc(sizeof(int) * SEbuf_len); if (SEbuf == NULL || SEorder == NULL) { if (SEbuf != NULL) free(SEbuf); if (SEorder != NULL) free(SEorder); snprintf(errmsg_buf, sizeof(errmsg_buf), "ranges_mapper3: memory allocation failed"); return errmsg_buf; } memcpy(SEbuf, start, sizeof(int) * nranges); SEbuf2 = SEbuf + nranges; for (i = 0; i < nranges; i++) SEbuf2[i] = start[i] - 1 + width[i]; /* Use radix sort to find order of values in 'SEbuf'. */ for (i = 0; i < SEbuf_len; i++) SEorder[i] = i; _sort_ints(SEorder, SEbuf_len, SEbuf, 0, 1, NULL, NULL); breakpoint = j = 0; for (k = 0; k < SEbuf_len; k++) { i = SEorder[k]; SE = SEbuf[i]; while (breakpoint < SE && j < nrun) { breakpoint += run_lengths[j++]; if (breakpoint > INT_MAX) { free(SEbuf); free(SEorder); return VECTOR_TOO_LONG_errmsg(); } } if (i < nranges) { /* SE is a start. */ if (SE < 1) { free(SEbuf); free(SEorder); return OUTOFBOUND_RANGES_errmsg(); } mapped_range_Ltrim[i] = - breakpoint; if (SE > breakpoint) { SE_run = j; } else { SE_run = j - 1; mapped_range_Ltrim[i] += run_lengths[SE_run]; } mapped_range_offset[i] = SE_run; } else { /* SE is an end. */ if (SE > breakpoint) { free(SEbuf); free(SEorder); return OUTOFBOUND_RANGES_errmsg(); } i -= nranges; mapped_range_Rtrim[i] = breakpoint; SE_run = j - 1; mapped_range_span[i] = SE_run; } } for (i = 0; i < nranges; i++) { if (width[i] != 0) { mapped_range_span[i] -= mapped_range_offset[i] - 1; mapped_range_Ltrim[i] += start[i] - 1; mapped_range_Rtrim[i] -= SEbuf2[i]; } else { /* Zero-width range. */ mapped_range_span[i] = 0; } } free(SEbuf); free(SEorder); return NULL; } /* Sort the positions in ascending order before mapping them to the runs. */ static const char *positions_mapper3( const int *run_lengths, int nrun, const int *pos, int npos, int *mapped_pos) { int *POSorder, POS, i, j, k, POS_run; unsigned int breakpoint; POSorder = (int *) malloc(sizeof(int) * npos); if (POSorder == NULL) { snprintf(errmsg_buf, sizeof(errmsg_buf), "positions_mapper3: memory allocation failed"); return errmsg_buf; } /* Use radix sort to find order of values in 'pos'. */ for (i = 0; i < npos; i++) POSorder[i] = i; _sort_ints(POSorder, npos, pos, 0, 1, NULL, NULL); breakpoint = j = 0; for (k = 0; k < npos; k++) { i = POSorder[k]; POS = pos[i]; while (breakpoint < POS && j < nrun) { breakpoint += run_lengths[j++]; if (breakpoint > INT_MAX) { free(POSorder); return VECTOR_TOO_LONG_errmsg(); } } if (POS == NA_INTEGER) { free(POSorder); return NA_INDICES_errmsg(); } if (POS < 1 || POS > breakpoint) { free(POSorder); return OUTOFBOUND_INDICES_errmsg(); } if (POS > breakpoint) { POS_run = j + 1; } else { POS_run = j; } mapped_pos[i] = POS_run; } free(POSorder); return NULL; } /**************************************************************************** * _ranges_mapper() and _positions_mapper() * * If 'method' is 0, then the "best" method is automatically choosen. * If 'method' is not >= 0 and <= 3, then these functions do nothing (no-op). */ static int choose_best_method(int nranges, int nrun, double cutoff) { if (nranges == 0) return -1; /* will do nothing */ if (nranges == 1) return 1; return nranges <= cutoff * nrun ? 3 : 2; } const char *_ranges_mapper( const int *run_lengths, int nrun, const int *start, const int *width, int nranges, int *mapped_range_offset, int *mapped_range_span, int *mapped_range_Ltrim, int *mapped_range_Rtrim, int method) { const char *(*fun)( const int *run_lengths, int nrun, const int *start, const int *width, int nranges, int *mapped_range_offset, int *mapped_range_span, int *mapped_range_Ltrim, int *mapped_range_Rtrim); if (method == 0) { /* If nranges <= 0.25 * nrun then use algo based on radix sort (method 3), otherwise use algo based on binary search (method 2). This cutoff is totally empirical and is based on some very shallow testing and timings obtained in June 2017 on my laptop (Dell LATITUDE E6440 with 4Gb of RAM and running 64-bit Ubuntu 14.04.5 LTS). */ method = choose_best_method(nranges, nrun, 0.25); } switch (method) { case 1: fun = ranges_mapper1; break; case 2: fun = ranges_mapper2; break; case 3: fun = ranges_mapper3; break; default: return NULL; /* do nothing */ } return fun(run_lengths, nrun, start, width, nranges, mapped_range_offset, mapped_range_span, mapped_range_Ltrim, mapped_range_Rtrim); } const char *_positions_mapper( const int *run_lengths, int nrun, const int *pos, int npos, int *mapped_pos, int method) { const char *(*fun)( const int *run_lengths, int nrun, const int *pos, int npos, int *mapped_pos); if (method == 0) { /* If npos <= 0.75 * nrun then use algo based on radix sort (method 3), otherwise use algo based on binary search (method 2). This cutoff is totally empirical and is based on some very shallow testing and timings obtained in June 2017 on my laptop (Dell LATITUDE E6440 with 4Gb of RAM and running 64-bit Ubuntu 14.04.5 LTS). */ method = choose_best_method(npos, nrun, 0.75); } switch (method) { case 1: fun = positions_mapper1; break; case 2: fun = positions_mapper2; break; case 3: fun = positions_mapper3; break; default: return NULL; /* do nothing */ } return fun(run_lengths, nrun, pos, npos, mapped_pos); } /**************************************************************************** * map_ranges() and map_positions() * * Both functions assume that 'run_lengths' is an integer vector of positive * values with no NAs. For efficiency reasons this is trusted and the * functions don't check it. */ /* --- .Call ENTRY POINT --- * Return an *unnamed* list of 4 integer vectors. Each integer vector is * parallel to the input ranges (i.e. parallel to 'start' and 'width'). * The i-th element of each integer vector forms a quadruplet of integers * that represents the i-th "mapped range". The 4 integers in the quadruplet * are: * 1. The "mapped range offset": this is the first run spanned by the * mapped range (specified as a 0-based index). * 2. The "mapped range span": this is the nb of runs spanned by the * mapped range. * 3. The "mapped range Ltrim": this is the nb of unspanned positions in the * first spanned run. * 4. The "mapped range Rtrim": this is the nb of unspanned positions in the * last spanned run. * * Example: * - with 'run_lengths' set to c(9L, 15L, 17L, 11L) (i.e. 4 runs of lengths * 9, 15, 17, and 11, respectively). * - with 'start' and 'width' set to 21L and 30L, respectively (i.e. a * single range spanning positions 21 to 50). * * 1 2 3 4 5 * 1234567890123456789012345678901234567890123456789012 * * <-run 1-><----run 2----><-----run 3-----><--run 4--> * <--range to map to the runs--> * * Then the quadruplet of integers representing the "mapped range" is: * 1. mapped range offset: 1 * 2. mapped range span: 3 * 3. mapped range Ltrim: 11 * 4. mapped range Rtrim: 2 * So S4Vectors:::map_ranges_to_runs(c(9L, 15L, 17L, 11L), 21L, 30L) will * return list(1L, 3L, 11L, 2L). */ SEXP map_ranges(SEXP run_lengths, SEXP start, SEXP width, SEXP method) { SEXP mapped_range_offset, mapped_range_span, mapped_range_Ltrim, mapped_range_Rtrim, ans; int nrun, nranges; const int *start_p, *width_p; const char *errmsg; nrun = LENGTH(run_lengths); nranges = _check_integer_pairs(start, width, &start_p, &width_p, "start", "width"); PROTECT(mapped_range_offset = NEW_INTEGER(nranges)); PROTECT(mapped_range_span = NEW_INTEGER(nranges)); PROTECT(mapped_range_Ltrim = NEW_INTEGER(nranges)); PROTECT(mapped_range_Rtrim = NEW_INTEGER(nranges)); errmsg = _ranges_mapper(INTEGER(run_lengths), nrun, start_p, width_p, nranges, INTEGER(mapped_range_offset), INTEGER(mapped_range_span), INTEGER(mapped_range_Ltrim), INTEGER(mapped_range_Rtrim), INTEGER(method)[0]); if (errmsg != NULL) { UNPROTECT(4); error(errmsg); } PROTECT(ans = NEW_LIST(4)); SET_VECTOR_ELT(ans, 0, mapped_range_offset); SET_VECTOR_ELT(ans, 1, mapped_range_span); SET_VECTOR_ELT(ans, 2, mapped_range_Ltrim); SET_VECTOR_ELT(ans, 3, mapped_range_Rtrim); UNPROTECT(5); return ans; } /* --- .Call ENTRY POINT --- */ SEXP map_positions(SEXP run_lengths, SEXP pos, SEXP method) { SEXP mapped_pos; int nrun, npos; const char *errmsg; nrun = LENGTH(run_lengths); npos = LENGTH(pos); PROTECT(mapped_pos = NEW_INTEGER(npos)); errmsg = _positions_mapper(INTEGER(run_lengths), nrun, INTEGER(pos), npos, INTEGER(mapped_pos), INTEGER(method)[0]); if (errmsg != NULL) { UNPROTECT(1); error(errmsg); } UNPROTECT(1); return mapped_pos; } S4Vectors/src/safe_arithm.c0000644000175400017540000000657213175736136016677 0ustar00biocbuildbiocbuild/**************************************************************************** * Safe signed integer arithmetic * * ------------------------------ * * TODO: Extend to support safe double arithmetic when the need arises. * ****************************************************************************/ #include "S4Vectors.h" #include /* for INT_MAX, INT_MIN, LLONG_MAX, and LLONG_MIN */ static int ovflow_flag; void _reset_ovflow_flag() { ovflow_flag = 0; return; } int _get_ovflow_flag() { return ovflow_flag; } /**************************************************************************** * Safe arithmetic on int values * * Reference: * The CERT C Secure Coding Standard * Rule INT32-C. Ensure that operations on signed integers do not result * in overflow */ int _safe_int_add(int x, int y) { if (x == NA_INTEGER || y == NA_INTEGER) return NA_INTEGER; if ((y > 0 && x > INT_MAX - y) || (y < 0 && x < INT_MIN - y)) { ovflow_flag = 1; return NA_INTEGER; } return x + y; } int _safe_int_subtract(int x, int y) { if (x == NA_INTEGER || y == NA_INTEGER) return NA_INTEGER; if ((y < 0 && x > INT_MAX + y) || (y > 0 && x < INT_MIN + y)) { ovflow_flag = 1; return NA_INTEGER; } return x - y; } int _safe_int_mult(int x, int y) { if (x == NA_INTEGER || y == NA_INTEGER) return NA_INTEGER; if (x > 0) { /* x is positive */ if (y > 0) { /* x and y are positive */ if (x > (INT_MAX / y)) { ovflow_flag = 1; return NA_INTEGER; } } else { /* x is positive, y is non-positive */ if (y < (INT_MIN / x)) { ovflow_flag = 1; return NA_INTEGER; } } } else { /* x is non-positive */ if (y > 0) { /* x is non-positive, y is positive */ if (x < (INT_MIN / y)) { ovflow_flag = 1; return NA_INTEGER; } } else { /* x and y are non-positive */ if ((x != 0) && (y < (INT_MAX / x))) { ovflow_flag = 1; return NA_INTEGER; } } } return x * y; } /**************************************************************************** * Safe arithmetic on long long int values */ long long int _safe_llint_add(long long int x, long long int y) { if (x == NA_LLINT || y == NA_LLINT) return NA_LLINT; if ((y > 0LL && x > LLONG_MAX - y) || (y < 0LL && x < LLONG_MIN - y)) { ovflow_flag = 1; return NA_LLINT; } return x + y; } long long int _safe_llint_subtract(long long int x, long long int y) { if (x == NA_LLINT || y == NA_LLINT) return NA_LLINT; if ((y < 0LL && x > LLONG_MAX + y) || (y > 0LL && x < LLONG_MIN + y)) { ovflow_flag = 1; return NA_LLINT; } return x - y; } long long int _safe_llint_mult(long long int x, long long int y) { if (x == NA_LLINT || y == NA_LLINT) return NA_LLINT; if (x > 0LL) { /* x is positive */ if (y > 0LL) { /* x and y are positive */ if (x > (LLONG_MAX / y)) { ovflow_flag = 1; return NA_LLINT; } } else { /* x is positive, y is non-positive */ if (y < (LLONG_MIN / x)) { ovflow_flag = 1; return NA_LLINT; } } } else { /* x is non-positive */ if (y > 0LL) { /* x is non-positive, y is positive */ if (x < (LLONG_MIN / y)) { ovflow_flag = 1; return NA_LLINT; } } else { /* x and y are non-positive */ if ((x != 0LL) && (y < (LLONG_MAX / x))) { ovflow_flag = 1; return NA_LLINT; } } } return x * y; } S4Vectors/src/sort_utils.c0000644000175400017540000010122513175736136016613 0ustar00biocbuildbiocbuild/**************************************************************************** * Low-level sorting utilities * * --------------------------- * ****************************************************************************/ #include "S4Vectors.h" #include /* for qsort() */ #include /* for INT_MIN, INT_MAX, UCHAR_MAX, USHRT_MAX */ /**************************************************************************** * Low-level wrappers to qsort() */ static const int *aa, *bb, *cc, *dd; static int aa_desc, bb_desc, cc_desc, dd_desc; #define COMPARE_TARGET_INTS(target, i1, i2, desc) \ ((desc) ? (target)[(i2)] - (target)[(i1)] \ : (target)[(i1)] - (target)[(i2)]) static int compar1_stable(const void *p1, const void *p2) { int i1, i2, ret; i1 = *((const int *) p1); i2 = *((const int *) p2); ret = COMPARE_TARGET_INTS(aa, i1, i2, aa_desc); if (ret != 0) return ret; /* Break tie by position so the ordering is "stable". */ return i1 - i2; } static int compar2_stable(const void *p1, const void *p2) { int i1, i2, ret; i1 = *((const int *) p1); i2 = *((const int *) p2); ret = COMPARE_TARGET_INTS(aa, i1, i2, aa_desc); if (ret != 0) return ret; ret = COMPARE_TARGET_INTS(bb, i1, i2, bb_desc); if (ret != 0) return ret; /* Break tie by position so the ordering is "stable". */ return i1 - i2; } static int compar3_stable(const void *p1, const void *p2) { int i1, i2, ret; i1 = *((const int *) p1); i2 = *((const int *) p2); ret = COMPARE_TARGET_INTS(aa, i1, i2, aa_desc); if (ret != 0) return ret; ret = COMPARE_TARGET_INTS(bb, i1, i2, bb_desc); if (ret != 0) return ret; ret = COMPARE_TARGET_INTS(cc, i1, i2, cc_desc); if (ret != 0) return ret; /* Break tie by position so the ordering is "stable". */ return i1 - i2; } static int compar4_stable(const void *p1, const void *p2) { int i1, i2, ret; i1 = *((const int *) p1); i2 = *((const int *) p2); ret = COMPARE_TARGET_INTS(aa, i1, i2, aa_desc); if (ret != 0) return ret; ret = COMPARE_TARGET_INTS(bb, i1, i2, bb_desc); if (ret != 0) return ret; ret = COMPARE_TARGET_INTS(cc, i1, i2, cc_desc); if (ret != 0) return ret; ret = COMPARE_TARGET_INTS(dd, i1, i2, dd_desc); /* Break tie by position so the ordering is "stable". */ return i1 - i2; } static void qsort1(int *base, int base_len, const int *a, int a_desc) { aa = a; aa_desc = a_desc; qsort(base, base_len, sizeof(int), compar1_stable); } static void qsort2(int *base, int base_len, const int *a, const int *b, int a_desc, int b_desc) { aa = a; bb = b; aa_desc = a_desc; bb_desc = b_desc; qsort(base, base_len, sizeof(int), compar2_stable); } static void qsort3(int *base, int base_len, const int *a, const int *b, const int *c, int a_desc, int b_desc, int c_desc) { aa = a; bb = b; cc = c; aa_desc = a_desc; bb_desc = b_desc; cc_desc = c_desc; qsort(base, base_len, sizeof(int), compar3_stable); } static void qsort4(int *base, int base_len, const int *a, const int *b, const int *c, const int *d, int a_desc, int b_desc, int c_desc, int d_desc) { aa = a; bb = b; cc = c; dd = d; aa_desc = a_desc; bb_desc = b_desc; cc_desc = c_desc; dd_desc = d_desc; qsort(base, base_len, sizeof(int), compar4_stable); } /**************************************************************************** * sorted_targets() and qsort_targets() */ static int sorted_target(const int *base, int base_len, const int *target, int desc) { int prev_tval, tval, i; if (base_len == 0) return 1; prev_tval = target[base[0]]; if (desc) { for (i = 1; i < base_len; i++) { tval = target[base[i]]; if (tval > prev_tval) return 0; prev_tval = tval; } } else { for (i = 1; i < base_len; i++) { tval = target[base[i]]; if (tval < prev_tval) return 0; prev_tval = tval; } } return 1; } static int sorted_targets(const int *base, int base_len, const int **targets, const int *descs, int ntarget) { int i, j, desc, tval, prev_tval; const int *target; if (ntarget == 1) return sorted_target(base, base_len, targets[0], descs[0]); for (i = 1; i < base_len; i++) { for (j = 0; j < ntarget; j++) { target = targets[j]; desc = descs[j]; tval = target[base[i]]; prev_tval = target[base[i - 1]]; if (tval != prev_tval) { if (desc != (tval < prev_tval)) return 0; break; } } } return 1; } /* Pretty dummy and doesn't scale :-( Should be easy to change. */ static void qsort_targets(int *base, int base_len, const int **targets, const int *descs, int ntarget) { if (ntarget == 1) { qsort1(base, base_len, targets[0], descs[0]); return; } if (ntarget == 2) { qsort2(base, base_len, targets[0], targets[1], descs[0], descs[1]); return; } if (ntarget == 3) { qsort3(base, base_len, targets[0], targets[1], targets[2], descs[0], descs[1], descs[2]); return; } if (ntarget == 4) { qsort4(base, base_len, targets[0], targets[1], targets[2], targets[3], descs[0], descs[1], descs[2], descs[3]); return; } error("S4Vectors internal error in qsort_targets(): " "ntarget must be between >= 1 and <= 4"); return; } static int lucky_sort_targets(int *base, int base_len, const int **targets, const int *descs, int ntarget, int qsort_cutoff) { int tmp; /* Find out whether 'base' is already sorted with respect to all remaining targets (including current). */ if (sorted_targets(base, base_len, targets, descs, ntarget)) return 1; if (base_len == 2) { tmp = base[0]; base[0] = base[1]; base[1] = tmp; return 1; } if (base_len <= qsort_cutoff) { qsort_targets(base, base_len, targets, descs, ntarget); return 1; } return 0; } /**************************************************************************** * Sorting an array of *distinct* unsigned chars */ static int compar_uchars_for_asc_sort(const void *p1, const void *p2) { return ((int) *((const unsigned char *) p1)) - ((int) *((const unsigned char *) p2)); } static int compar_uchars_for_desc_sort(const void *p1, const void *p2) { return ((int) *((const unsigned char *) p2)) - ((int) *((const unsigned char *) p1)); } /* The qsort() solution doesn't take advantage of the fact that the values in 'x' are distinct. */ static void sort_uchar_array(unsigned char *x, int nelt, int desc) { int (*compar)(const void *, const void *); compar = desc ? compar_uchars_for_desc_sort : compar_uchars_for_asc_sort; qsort(x, nelt, sizeof(unsigned char), compar); return; } /**************************************************************************** * sorted_ushort_buf() */ /* Don't call on an empty buffer (i.e. when 'buf_len' is 0). */ static int sorted_ushort_buf(const unsigned short int *ushort_buf, int buf_len, int desc) { unsigned short int prev_uidx, uidx; int i; prev_uidx = ushort_buf[0]; if (desc) { for (i = 1; i < buf_len; i++) { uidx = ushort_buf[i]; if (uidx > prev_uidx) return 0; prev_uidx = uidx; } } else { for (i = 1; i < buf_len; i++) { uidx = ushort_buf[i]; if (uidx < prev_uidx) return 0; prev_uidx = uidx; } } return 1; } /**************************************************************************** * Mini radix: A simple radix-based sort of a single array of *distinct* * unsigned short ints * * WARNING: The values to sort are assumed to be distinct. This is not * checked! Behavior is undefined if they are not. * * Uses 8-bit bucket indices. */ #define MINIRX_NBUCKET (1 << CHAR_BIT) static int minirx_desc; static void minirx_sort_lsb(unsigned short int *base, int base_len, unsigned short int *out, int swapped) { static unsigned char bucket2base[MINIRX_NBUCKET]; int i, uidx, min_uidx, max_uidx; unsigned short int *out_p; if (base_len == 1) { if (swapped) *out = *base; return; } if (sorted_ushort_buf(base, base_len, minirx_desc)) { if (swapped) memcpy(out, base, sizeof(unsigned short int) * base_len); return; } out_p = out; if (base_len == MINIRX_NBUCKET) { for (i = 0; i < base_len; i++) { uidx = (unsigned char) base[i]; bucket2base[uidx] = i; } if (minirx_desc) { uidx = UCHAR_MAX; /* 0xff */ do { i = bucket2base[uidx]; *(out_p++) = base[i]; } while (uidx-- != 0x00); } else { uidx = 0x00; do { i = bucket2base[uidx]; *(out_p++) = base[i]; } while (uidx++ != UCHAR_MAX); } } else { min_uidx = UCHAR_MAX; /* 0xff */ max_uidx = 0x00; memset(bucket2base, UCHAR_MAX, sizeof(unsigned char) * MINIRX_NBUCKET); /* Use 8 less significant bits of the base values (unsigned short ints) to compute the bucket indices. */ for (i = 0; i < base_len; i++) { uidx = (unsigned char) base[i]; bucket2base[uidx] = i; if (uidx < min_uidx) min_uidx = uidx; if (uidx > max_uidx) max_uidx = uidx; } if (minirx_desc) { uidx = max_uidx; do { i = bucket2base[uidx]; if (i != UCHAR_MAX) *(out_p++) = base[i]; } while (uidx-- != min_uidx); } else { uidx = min_uidx; do { i = bucket2base[uidx]; if (i != UCHAR_MAX) *(out_p++) = base[i]; } while (uidx++ != max_uidx); } } if (!swapped) memcpy(base, out, sizeof(unsigned short int) * base_len); return; } #define MINIRX_BASE_MAXLENGTH (1 << (2 * CHAR_BIT)) static unsigned char minirx_base_uidx_buf[MINIRX_BASE_MAXLENGTH]; /* Populate 'bucket_counts_buf', 'bucket_used_buf', and 'minirx_base_uidx_buf'. */ static int minirx_compute_bucket_counts( const unsigned short int *base, int base_len, int *bucket_counts_buf, unsigned char *bucket_used_buf) { int nbucket, i, uidx; memset(bucket_counts_buf, 0, sizeof(int) * MINIRX_NBUCKET); nbucket = 0; /* Use 8 most significant bits of the base values (unsigned short ints) to compute the bucket indices. */ for (i = 0; i < base_len; i++) { uidx = (unsigned char) (base[i] >> CHAR_BIT); minirx_base_uidx_buf[i] = uidx; if (bucket_counts_buf[uidx]++ == 0) bucket_used_buf[nbucket++] = uidx; } return nbucket; } static int sorted_uchar_buf(const unsigned char *uchar_buf, int buf_len, int desc) { int i, prev_uidx, uidx; prev_uidx = uchar_buf[0]; if (desc) { for (i = 1; i < buf_len; i++) { uidx = uchar_buf[i]; if (uidx > prev_uidx) return 0; prev_uidx = uidx; } } else { for (i = 1; i < buf_len; i++) { uidx = uchar_buf[i]; if (uidx < prev_uidx) return 0; prev_uidx = uidx; } } return 1; } /* Walk only on buckets IN USE. */ static void minirx_compute_bucket_offsets_fast( const unsigned char *bucket_used_buf, int nbucket, const int *bucket_counts_buf, int *bucket_offsets_buf) { int offset, i, uidx; offset = 0; for (i = 0; i < nbucket; i++) { uidx = bucket_used_buf[i]; offset += bucket_counts_buf[uidx]; bucket_offsets_buf[uidx] = offset; } return; } static void compute_min_max_uchar_buf( const unsigned char *uchar_buf, int buf_len, int *min_uidx, int *max_uidx) { int i, min, max, uidx; min = UCHAR_MAX; /* 0xff */ max = 0x00; for (i = 0; i < buf_len; i++) { uidx = uchar_buf[i]; if (uidx < min) min = uidx; if (uidx > max) max = uidx; } *min_uidx = min; *max_uidx = max; return; } static void minirx_compute_bucket_offsets(int desc, int min_uidx, int max_uidx, const int *bucket_counts_buf, int *bucket_offsets_buf) { int offset, uidx; offset = 0; if (desc) { uidx = max_uidx; do { offset += bucket_counts_buf[uidx]; bucket_offsets_buf[uidx] = offset; } while (uidx-- != min_uidx); } else { uidx = min_uidx; do { offset += bucket_counts_buf[uidx]; bucket_offsets_buf[uidx] = offset; } while (uidx++ != max_uidx); } return; } static int minirx_sort_base_by_bucket(unsigned short int *base, int base_len, unsigned short int *out, const int *bucket_counts_buf, int *bucket_offsets_buf, unsigned char *bucket_used_buf, int nbucket, int desc) { int bucket_used_buf_is_sorted, min_uidx, max_uidx, i; /* Figure out if we need to sort 'bucket_used_buf'. */ bucket_used_buf_is_sorted = sorted_uchar_buf(bucket_used_buf, nbucket, desc); if (!bucket_used_buf_is_sorted) { //if (nbucket == 2) { // min_uidx = bucket_used_buf[0]; // bucket_used_buf[0] = bucket_used_buf[1]; // bucket_used_buf[1] = min_uidx; // bucket_used_buf_is_sorted = 1; //} else if (nbucket >= 0xe0) { /* 14/16 * 256 = 224 */ // /* Too expensive to find the real min/max uidx. */ // min_uidx = 0x00; // max_uidx = UCHAR_MAX; /* 0xff */ //} else { compute_min_max_uchar_buf( bucket_used_buf, nbucket, &min_uidx, &max_uidx); /* Don't bother sorting if that's going to cost more than just walking on the range of buckets. */ //if (nbucket <= 4) { // /* Cut-off value of 240 based on empirical // observation. */ // if ((int) max_uidx - min_uidx >= 240) { // sort_uchar_array(bucket_used_buf, // nbucket, // desc); // bucket_used_buf_is_sorted = 1; // } //} //} } /* Compute bucket offsets. */ if (bucket_used_buf_is_sorted) { minirx_compute_bucket_offsets_fast(bucket_used_buf, nbucket, bucket_counts_buf, bucket_offsets_buf); } else { minirx_compute_bucket_offsets(desc, min_uidx, max_uidx, bucket_counts_buf, bucket_offsets_buf); } /* Sort 'base' by bucket. */ for (i = base_len - 1; i >= 0; i--) out[--bucket_offsets_buf[minirx_base_uidx_buf[i]]] = base[i]; return bucket_used_buf_is_sorted; } static void minirx_sort(unsigned short int *base, int base_len, unsigned short int *out) { static int bucket_counts_buf[MINIRX_NBUCKET], bucket_offsets_buf[MINIRX_NBUCKET]; static unsigned char bucket_used_buf[MINIRX_NBUCKET]; static int base_uidx_buf_is_sorted, bucket_used_buf_is_sorted; static unsigned short int *tmp; int nbucket, swapped, i, uidx, offset; /* --- HANDLE THE EASY SITUATIONS --- */ if (base_len <= 1) return; /* --- COMPUTE BUCKET INDICES, BUCKET COUNTS, AND LIST OF USED BUCKETS --- */ nbucket = minirx_compute_bucket_counts(base, base_len, bucket_counts_buf, bucket_used_buf); /* --- SORT 'base' BY BUCKET --- */ base_uidx_buf_is_sorted = nbucket > 1 ? sorted_uchar_buf(minirx_base_uidx_buf, base_len, minirx_desc) : 1; if (base_uidx_buf_is_sorted) { bucket_used_buf_is_sorted = 1; swapped = 0; } else { bucket_used_buf_is_sorted = minirx_sort_base_by_bucket( base, base_len, out, bucket_counts_buf, bucket_offsets_buf, bucket_used_buf, nbucket, minirx_desc); /* Swap 'base' and 'out'. */ tmp = out; out = base; base = tmp; swapped = 1; } /* --- ORDER EACH BUCKET --- */ if (bucket_used_buf_is_sorted) { for (i = 0; i < nbucket; i++) { uidx = bucket_used_buf[i]; base_len = bucket_counts_buf[uidx]; minirx_sort_lsb(base, base_len, out, swapped); base += base_len; out += base_len; } } else { for (i = 0; i < nbucket; i++) { uidx = bucket_used_buf[i]; offset = bucket_offsets_buf[uidx]; base_len = bucket_counts_buf[uidx]; minirx_sort_lsb(base + offset, base_len, out + offset, swapped); } } return; } /* Sort an array of *distinct* unsigned short ints. The values in 'x' are assumed to be distinct. This is not checked! Behavior is undefined if they are not. Between 10x (for small 'nelt') and 25x (for big 'nelt') faster than using qsort(). */ static void sort_ushort_array(unsigned short int *x, int nelt, int desc) { static unsigned short int out[MINIRX_BASE_MAXLENGTH]; minirx_desc = desc; minirx_sort(x, nelt, out); return; } /* --- .Call ENTRY POINT --- */ SEXP test_sort_ushort_array(SEXP x, SEXP desc) { int x_len, i; unsigned short int *us; SEXP ans; x_len = LENGTH(x); us = (unsigned short int *) R_alloc(x_len, sizeof(unsigned short int)); for (i = 0; i < x_len; i++) us[i] = (unsigned short int) INTEGER(x)[i]; sort_ushort_array(us, x_len, LOGICAL(desc)[0]); PROTECT(ans = NEW_INTEGER(x_len)); for (i = 0; i < x_len; i++) INTEGER(ans)[i] = (int) us[i]; UNPROTECT(1); return ans; } /**************************************************************************** * RADIX SORT of arrays of integers * * Uses 16-bit bucket indices. * * The current implementation assumes that sizeof(int) is 4 and * sizeof(unsigned short int) is 2. */ static int can_use_rxsort() { return sizeof(int) == 4 && sizeof(unsigned short int) == 2; } /* Dummy qsort_targets() above would need to be modified if were to support more than 4 targets. */ #define MAX_RXTARGETS 4 static const int * rxtargets[MAX_RXTARGETS]; static int rxdescs[MAX_RXTARGETS]; static int last_rxlevel; static unsigned short int * base_uidx_buf; #define RXLEVELS_PER_RXTARGET 2 #define BITS_PER_RXLEVEL (sizeof(unsigned short int) * CHAR_BIT) #define MAX_RXLEVELS (MAX_RXTARGETS * RXLEVELS_PER_RXTARGET) #define RXNBUCKET (1 << BITS_PER_RXLEVEL) static int rxbucket_counts_bufs[RXNBUCKET * MAX_RXLEVELS], rxbucket_offsets_bufs[RXNBUCKET * MAX_RXLEVELS]; static unsigned short int rxbucket_used_bufs[RXNBUCKET * MAX_RXLEVELS]; /* Populate 'bucket_counts_buf', 'bucket_used_buf', and 'base_uidx_buf'. */ static int compute_bucket_counts( const int *base, int base_len, const int *target, int use_msb, int *bucket_counts_buf, unsigned short int *bucket_used_buf) { int nbucket, i, tval; unsigned short int uidx; memset(bucket_counts_buf, 0, sizeof(int) * RXNBUCKET); nbucket = 0; if (use_msb) { /* Use 16 most significant bits of the target values to compute the bucket indices. */ for (i = 0; i < base_len; i++) { tval = target[base[i]]; uidx = (unsigned short int) (tval >> BITS_PER_RXLEVEL); uidx += 0x8000; base_uidx_buf[i] = uidx; if (bucket_counts_buf[uidx]++ == 0) bucket_used_buf[nbucket++] = uidx; } } else { /* Use 16 less significant bits of the target values to compute the bucket indices. */ for (i = 0; i < base_len; i++) { tval = target[base[i]]; uidx = (unsigned short int) tval; base_uidx_buf[i] = uidx; if (bucket_counts_buf[uidx]++ == 0) bucket_used_buf[nbucket++] = uidx; } } return nbucket; } /* Walk only on buckets IN USE. */ static void compute_bucket_offsets_fast( const unsigned short int *bucket_used_buf, int nbucket, const int *bucket_counts_buf, int *bucket_offsets_buf) { int offset, i; unsigned short int uidx; offset = 0; for (i = 0; i < nbucket; i++) { uidx = bucket_used_buf[i]; offset += bucket_counts_buf[uidx]; bucket_offsets_buf[uidx] = offset; } return; } static void compute_min_max_ushort_buf( const unsigned short int *ushort_buf, int buf_len, unsigned short int *min_uidx, unsigned short int *max_uidx) { unsigned short int min, max, uidx; int i; min = USHRT_MAX; /* 0xffff */ max = 0x0000; for (i = 0; i < buf_len; i++) { uidx = ushort_buf[i]; if (uidx < min) min = uidx; if (uidx > max) max = uidx; } *min_uidx = min; *max_uidx = max; return; } static void compute_bucket_offsets(int desc, unsigned short int min_uidx, unsigned short int max_uidx, const int *bucket_counts_buf, int *bucket_offsets_buf) { int offset; unsigned short int uidx; offset = 0; if (desc) { uidx = max_uidx; do { offset += bucket_counts_buf[uidx]; bucket_offsets_buf[uidx] = offset; } while (uidx-- != min_uidx); } else { uidx = min_uidx; do { offset += bucket_counts_buf[uidx]; bucket_offsets_buf[uidx] = offset; } while (uidx++ != max_uidx); } return; } static int sort_base_by_bucket(int *base, int base_len, int *out, const int *bucket_counts_buf, int *bucket_offsets_buf, unsigned short int *bucket_used_buf, int nbucket, int desc) { int bucket_used_buf_is_sorted, i; unsigned short int min_uidx, max_uidx; /* Figure out if we need to sort 'bucket_used_buf'. */ bucket_used_buf_is_sorted = sorted_ushort_buf(bucket_used_buf, nbucket, desc); if (!bucket_used_buf_is_sorted) { //if (nbucket == 2) { // min_uidx = bucket_used_buf[0]; // bucket_used_buf[0] = bucket_used_buf[1]; // bucket_used_buf[1] = min_uidx; // bucket_used_buf_is_sorted = 1; //} else if (nbucket >= 0xe000) { /* 14/16 * 65536 = 57344 */ // /* Too expensive to find the real min/max uidx. */ // min_uidx = 0x0000; // max_uidx = USHRT_MAX; /* 0xffff */ //} else { compute_min_max_ushort_buf( bucket_used_buf, nbucket, &min_uidx, &max_uidx); /* Don't bother sorting if that's going to cost more than just walking on the range of buckets. */ if (nbucket < 4096) { /* Cut-off value of 4 based on empirical observation. */ if ((int) max_uidx - min_uidx >= 4 * nbucket) { sort_ushort_array(bucket_used_buf, nbucket, desc); bucket_used_buf_is_sorted = 1; } } //} } /* Compute bucket offsets. */ if (bucket_used_buf_is_sorted) { compute_bucket_offsets_fast(bucket_used_buf, nbucket, bucket_counts_buf, bucket_offsets_buf); } else { compute_bucket_offsets(desc, min_uidx, max_uidx, bucket_counts_buf, bucket_offsets_buf); } /* Sort 'base' by bucket. */ for (i = base_len - 1; i >= 0; i--) out[--bucket_offsets_buf[base_uidx_buf[i]]] = base[i]; return bucket_used_buf_is_sorted; } static void rxsort_rec(int *base, int base_len, int *out, int level, int swapped) { static int target_no, qsort_cutoff, desc, base_uidx_buf_is_sorted, bucket_used_buf_is_sorted, *tmp; static const int *target; int *bucket_counts_buf, *bucket_offsets_buf, nbucket, i, offset; unsigned short int *bucket_used_buf, uidx; /* --- HANDLE THE EASY SITUATIONS --- */ if (base_len == 0) return; if (base_len == 1) { if (swapped) *out = *base; return; } target_no = level / RXLEVELS_PER_RXTARGET; /* The formula for computing the qsort cut-off makes the bold assumption that the cost of qsort_targets() is linear with respect to the number of targets involved in the sort ('ntarget' argument). That tends to be the case when there is a high percentage of ties but the reality is more complex. The current formula leads to the following cut-off values: target_no | 0 | 1 | 2 | 3 -------------------------------------------------------------- with 1 target | 512 * 1/1 | | | with 2 targets | 512 * 1/2 | 512 * 2/2 | | with 4 targets | 512 * 1/4 | 512 * 2/4 | 512 * 3/4 | 512 * 4/4 The choice of 512 as max cut-off is based on empirical observation. TODO: All these things need more fine tuning... */ qsort_cutoff = 512 * (target_no + 1) / ((last_rxlevel + 1) / RXLEVELS_PER_RXTARGET); if (lucky_sort_targets(base, base_len, rxtargets + target_no, rxdescs + target_no, ((last_rxlevel - level) / RXLEVELS_PER_RXTARGET) + 1, qsort_cutoff)) { if (swapped) memcpy(out, base, sizeof(int) * base_len); return; } /* --- COMPUTE BUCKET INDICES, BUCKET COUNTS, AND LIST OF USED BUCKETS --- */ target = rxtargets[target_no]; desc = rxdescs[target_no]; bucket_counts_buf = rxbucket_counts_bufs + RXNBUCKET * level; bucket_used_buf = rxbucket_used_bufs + RXNBUCKET * level; nbucket = compute_bucket_counts(base, base_len, target, level % 2 == 0, bucket_counts_buf, bucket_used_buf); /* --- SORT 'base' BY BUCKET --- */ base_uidx_buf_is_sorted = nbucket > 1 ? sorted_ushort_buf(base_uidx_buf, base_len, desc) : 1; if (base_uidx_buf_is_sorted) { bucket_used_buf_is_sorted = 1; } else { bucket_offsets_buf = rxbucket_offsets_bufs + RXNBUCKET * level; bucket_used_buf_is_sorted = sort_base_by_bucket( base, base_len, out, bucket_counts_buf, bucket_offsets_buf, bucket_used_buf, nbucket, desc); /* Swap 'base' and 'out'. */ tmp = out; out = base; base = tmp; swapped = !swapped; } if (level == last_rxlevel) { if (swapped) memcpy(out, base, sizeof(int) * base_len); return; } /* --- ORDER EACH BUCKET --- */ level++; if (bucket_used_buf_is_sorted) { for (i = 0; i < nbucket; i++) { uidx = bucket_used_buf[i]; base_len = bucket_counts_buf[uidx]; rxsort_rec(base, base_len, out, level, swapped); base += base_len; out += base_len; } } else { for (i = 0; i < nbucket; i++) { uidx = bucket_used_buf[i]; offset = bucket_offsets_buf[uidx]; base_len = bucket_counts_buf[uidx]; rxsort_rec(base + offset, base_len, out + offset, level, swapped); } } return; } static unsigned short int *alloc_rxbuf1(int base_len) { return (unsigned short int *) malloc(sizeof(unsigned short int) * base_len); } static int *alloc_rxbuf2(int base_len, unsigned short int *rxbuf1, int auto_rxbuf1) { int *rxbuf2; rxbuf2 = (int *) malloc(sizeof(int) * base_len); if (rxbuf2 == NULL && auto_rxbuf1) free(rxbuf1); return rxbuf2; } /**************************************************************************** * Sorting or getting the order of an int array */ static int compar_ints_for_asc_sort(const void *p1, const void *p2) { return *((const int *) p1) - *((const int *) p2); } static int compar_ints_for_desc_sort(const void *p1, const void *p2) { return compar_ints_for_asc_sort(p2, p1); } /* If efficiency matters, use _sort_ints() in radix mode instead. */ void _sort_int_array(int *x, size_t nelt, int desc) { int (*compar)(const void *, const void *); compar = desc ? compar_ints_for_desc_sort : compar_ints_for_asc_sort; qsort(x, nelt, sizeof(int), compar); return; } /* If efficiency matters, use _sort_ints() in radix mode instead. */ void _get_order_of_int_array(const int *x, int nelt, int desc, int *out, int out_shift) { int i; for (i = 0; i < nelt; i++) out[i] = i + out_shift; qsort1(out, nelt, x - out_shift, desc); return; } /* base: 0-based indices into 'x'. rxbuf1, rxbuf2: NULL or user-allocated buffers of length 'base_len'. */ int _sort_ints(int *base, int base_len, const int *x, int desc, int use_radix, unsigned short int *rxbuf1, int *rxbuf2) { int qsort_cutoff, auto_rxbuf1, auto_rxbuf2; rxtargets[0] = x; rxdescs[0] = desc; qsort_cutoff = (use_radix && can_use_rxsort()) ? 1024 : base_len; if (lucky_sort_targets(base, base_len, rxtargets, rxdescs, 1, qsort_cutoff)) return 0; auto_rxbuf1 = rxbuf1 == NULL; if (auto_rxbuf1) { rxbuf1 = alloc_rxbuf1(base_len); if (rxbuf1 == NULL) return -1; } auto_rxbuf2 = rxbuf2 == NULL; if (auto_rxbuf2) { rxbuf2 = alloc_rxbuf2(base_len, rxbuf1, auto_rxbuf1); if (rxbuf2 == NULL) return -2; } last_rxlevel = 1; base_uidx_buf = rxbuf1; rxsort_rec(base, base_len, rxbuf2, 0, 0); if (auto_rxbuf2) free(rxbuf2); if (auto_rxbuf1) free(rxbuf1); return 0; } /**************************************************************************** * Getting the order of 2 int arrays of the same length * * The second array (b) is used to break ties in the first array (a). */ static int compar_int_pairs(int a1, int b1, int a2, int b2) { int ret; ret = a1 - a2; if (ret != 0) return ret; ret = b1 - b2; return ret; } /* Vectorized comparison of 2 vectors of integer pairs. */ void _pcompare_int_pairs(const int *a1, const int *b1, int nelt1, const int *a2, const int *b2, int nelt2, int *out, int out_len, int with_warning) { int i, j, k; for (i = j = k = 0; k < out_len; i++, j++, k++) { if (i >= nelt1) i = 0; /* recycle i */ if (j >= nelt2) j = 0; /* recycle j */ out[k] = compar_int_pairs(a1[i], b1[i], a2[j], b2[j]); } /* This warning message is meaningful only when 'out_len' is 'max(nelt1, nelt2)' and is consistent with the warning we get from binary arithmetic/comparison operations on numeric vectors. */ if (with_warning && out_len != 0 && (i != nelt1 || j != nelt2)) warning("longer object length is not a multiple " "of shorter object length"); return; } int _int_pairs_are_sorted(const int *a, const int *b, int nelt, int desc, int strict) { int a1, b1, a2, b2, i, ret; if (nelt == 0) return 1; a2 = a[0]; b2 = b[0]; for (i = 1; i < nelt; i++) { a1 = a2; b1 = b2; a2 = a[i]; b2 = b[i]; ret = compar_int_pairs(a1, b1, a2, b2); if (ret == 0) { if (strict) return 0; continue; } if (desc != (ret > 0)) return 0; } return 1; } /* If efficiency matters, use _sort_int_pairs() in radix mode instead. */ void _get_order_of_int_pairs(const int *a, const int *b, int nelt, int a_desc, int b_desc, int *out, int out_shift) { int i; for (i = 0; i < nelt; i++) out[i] = i + out_shift; qsort2(out, nelt, a - out_shift, b - out_shift, a_desc, b_desc); return; } /* base: 0-based indices into 'a' and 'b'. rxbuf1, rxbuf2: NULL or user-allocated buffers of length 'base_len'. */ int _sort_int_pairs(int *base, int base_len, const int *a, const int *b, int a_desc, int b_desc, int use_radix, unsigned short int *rxbuf1, int *rxbuf2) { int qsort_cutoff, auto_rxbuf1, auto_rxbuf2; rxtargets[0] = a; rxtargets[1] = b; rxdescs[0] = a_desc; rxdescs[1] = b_desc; qsort_cutoff = (use_radix && can_use_rxsort()) ? 512 : base_len; if (lucky_sort_targets(base, base_len, rxtargets, rxdescs, 2, qsort_cutoff)) return 0; auto_rxbuf1 = rxbuf1 == NULL; if (auto_rxbuf1) { rxbuf1 = alloc_rxbuf1(base_len); if (rxbuf1 == NULL) return -1; } auto_rxbuf2 = rxbuf2 == NULL; if (auto_rxbuf2) { rxbuf2 = alloc_rxbuf2(base_len, rxbuf1, auto_rxbuf1); if (rxbuf2 == NULL) return -2; } last_rxlevel = 3; base_uidx_buf = rxbuf1; rxsort_rec(base, base_len, rxbuf2, 0, 0); if (auto_rxbuf2) free(rxbuf2); if (auto_rxbuf1) free(rxbuf1); return 0; } void _get_matches_of_ordered_int_pairs( const int *a1, const int *b1, const int *o1, int nelt1, const int *a2, const int *b2, const int *o2, int nelt2, int nomatch, int *out, int out_shift) { int i1, i2, ret; i2 = 0; ret = 0; for (i1 = 0; i1 < nelt1; i1++, o1++) { while (i2 < nelt2) { ret = compar_int_pairs( a1[*o1], b1[*o1], a2[*o2], b2[*o2]); if (ret <= 0) break; i2++, o2++; } out[*o1] = ret == 0 ? *o2 + out_shift : nomatch; } return; } /**************************************************************************** * Getting the order of 4 int arrays of the same length * * 2nd, 3rd and 4th arrays are used to successively break ties. */ static int compar_int_quads(int a1, int b1, int c1, int d1, int a2, int b2, int c2, int d2) { int ret; ret = compar_int_pairs(a1, b1, a2, b2); if (ret != 0) return ret; ret = c1 - c2; if (ret != 0) return ret; ret = d1 - d2; return ret; } int _int_quads_are_sorted(const int *a, const int *b, const int *c, const int *d, int nelt, int desc, int strict) { int a1, b1, c1, d1, a2, b2, c2, d2, i, ret; if (nelt == 0) return 1; a2 = a[0]; b2 = b[0]; c2 = c[0]; d2 = d[0]; for (i = 1; i < nelt; i++) { a1 = a2; b1 = b2; c1 = c2; d1 = d2; a2 = a[i]; b2 = b[i]; c2 = c[i]; d2 = d[i]; ret = compar_int_quads(a1, b1, c1, d1, a2, b2, c2, d2); if (ret == 0) { if (strict) return 0; continue; } if (desc != (ret > 0)) return 0; } return 1; } void _get_order_of_int_quads(const int *a, const int *b, const int *c, const int *d, int nelt, int a_desc, int b_desc, int c_desc, int d_desc, int *out, int out_shift) { int i; for (i = 0; i < nelt; i++) out[i] = i + out_shift; qsort4(out, nelt, a - out_shift, b - out_shift, c - out_shift, d - out_shift, a_desc, b_desc, c_desc, d_desc); return; } /* base: 0-based indices into 'a' and 'b'. rxbuf1, rxbuf2: NULL or user-allocated buffers of length 'base_len'. */ int _sort_int_quads(int *base, int base_len, const int *a, const int *b, const int *c, const int *d, int a_desc, int b_desc, int c_desc, int d_desc, int use_radix, unsigned short int *rxbuf1, int *rxbuf2) { int qsort_cutoff, auto_rxbuf1, auto_rxbuf2; rxtargets[0] = a; rxtargets[1] = b; rxtargets[2] = c; rxtargets[3] = d; rxdescs[0] = a_desc; rxdescs[1] = b_desc; rxdescs[2] = c_desc; rxdescs[3] = d_desc; qsort_cutoff = (use_radix && can_use_rxsort()) ? 256 : base_len; if (lucky_sort_targets(base, base_len, rxtargets, rxdescs, 4, qsort_cutoff)) return 0; auto_rxbuf1 = rxbuf1 == NULL; if (auto_rxbuf1) { rxbuf1 = alloc_rxbuf1(base_len); if (rxbuf1 == NULL) return -1; } auto_rxbuf2 = rxbuf2 == NULL; if (auto_rxbuf2) { rxbuf2 = alloc_rxbuf2(base_len, rxbuf1, auto_rxbuf1); if (rxbuf2 == NULL) return -2; } last_rxlevel = 7; base_uidx_buf = rxbuf1; rxsort_rec(base, base_len, rxbuf2, 0, 0); if (auto_rxbuf2) free(rxbuf2); if (auto_rxbuf1) free(rxbuf1); return 0; } void _get_matches_of_ordered_int_quads( const int *a1, const int *b1, const int *c1, const int *d1, const int *o1, int nelt1, const int *a2, const int *b2, const int *c2, const int *d2, const int *o2, int nelt2, int nomatch, int *out, int out_shift) { int i1, i2, ret; i2 = 0; ret = 0; for (i1 = 0; i1 < nelt1; i1++, o1++) { while (i2 < nelt2) { ret = compar_int_quads( a1[*o1], b1[*o1], c1[*o1], d1[*o1], a2[*o2], b2[*o2], c2[*o2], d2[*o2]); if (ret <= 0) break; i2++, o2++; } out[*o1] = ret == 0 ? *o2 + out_shift : nomatch; } return; } S4Vectors/src/subsetting_utils.c0000644000175400017540000001515513175736136020021 0ustar00biocbuildbiocbuild/**************************************************************************** * Low-level subsetting utilities * ****************************************************************************/ #include "S4Vectors.h" /**************************************************************************** * memcpy()-based copy of data from a vector to a vector of the same type. */ /* Return new 'dest_offset'. */ int _copy_vector_block(SEXP dest, int dest_offset, SEXP src, int src_offset, int block_width) { int new_dest_offset, i; void *dest_p; const void *src_p; size_t elt_size; SEXP src_elt; // dest_elt; if (block_width < 0) error("negative widths are not allowed"); new_dest_offset = dest_offset + block_width; if (dest_offset < 0 || new_dest_offset > LENGTH(dest) || src_offset < 0 || src_offset + block_width > LENGTH(src)) error("subscript contains out-of-bounds indices"); switch (TYPEOF(dest)) { case RAWSXP: dest_p = (void *) (RAW(dest) + dest_offset); src_p = (const void *) (RAW(src) + src_offset); elt_size = sizeof(Rbyte); break; case LGLSXP: dest_p = (void *) (LOGICAL(dest) + dest_offset); src_p = (const void *) (LOGICAL(src) + src_offset); elt_size = sizeof(int); break; case INTSXP: dest_p = (void *) (INTEGER(dest) + dest_offset); src_p = (const void *) (INTEGER(src) + src_offset); elt_size = sizeof(int); break; case REALSXP: dest_p = (void *) (REAL(dest) + dest_offset); src_p = (const void *) (REAL(src) + src_offset); elt_size = sizeof(double); break; case CPLXSXP: dest_p = (void *) (COMPLEX(dest) + dest_offset); src_p = (const void *) (COMPLEX(src) + src_offset); elt_size = sizeof(Rcomplex); break; case STRSXP: for (i = 0; i < block_width; i++) { src_elt = STRING_ELT(src, src_offset + i); SET_STRING_ELT(dest, dest_offset + i, src_elt); //PROTECT(dest_elt = duplicate(src_elt)); //SET_STRING_ELT(dest, dest_offset + i, dest_elt); //UNPROTECT(1); } return new_dest_offset; case VECSXP: for (i = 0; i < block_width; i++) { src_elt = VECTOR_ELT(src, src_offset + i); SET_VECTOR_ELT(dest, dest_offset + i, src_elt); //PROTECT(dest_elt = duplicate(src_elt)); //SET_VECTOR_ELT(dest, dest_offset + i, dest_elt); //UNPROTECT(1); } return new_dest_offset; default: error("S4Vectors internal error in _copy_vector_block(): " "%s type not supported", CHAR(type2str(TYPEOF(dest)))); } memcpy(dest_p, src_p, elt_size * block_width); return new_dest_offset; } /* Return new 'dest_offset'. */ int _copy_vector_positions(SEXP dest, int dest_offset, SEXP src, const int *pos, int npos) { int i; for (i = 0; i < npos; i++) dest_offset = _copy_vector_block(dest, dest_offset, src, pos[i] - 1, 1); return dest_offset; } int _copy_vector_ranges(SEXP dest, int dest_offset, SEXP src, const int *start, const int *width, int nranges) { int i; for (i = 0; i < nranges; i++) dest_offset = _copy_vector_block(dest, dest_offset, src, start[i] - 1, width[i]); return dest_offset; } /**************************************************************************** * _subset_vector_OR_factor_by_positions() and * _subset_vector_OR_factor_by_ranges() */ SEXP _subset_vector_OR_factor_by_positions(SEXP x, const int *pos, int npos) { SEXP ans, x_names, ans_names, ans_class, ans_levels; PROTECT(ans = allocVector(TYPEOF(x), npos)); /* Extract the values from 'x'. */ _copy_vector_positions(ans, 0, x, pos, npos); /* Extract the names from 'x'. */ x_names = GET_NAMES(x); if (x_names != R_NilValue) { PROTECT(ans_names = NEW_CHARACTER(npos)); _copy_vector_positions(ans_names, 0, x_names, pos, npos); SET_NAMES(ans, ans_names); UNPROTECT(1); } /* 'x' could be a factor in which case we need to propagate its levels. */ if (isFactor(x)) { /* Levels must be set before class. */ PROTECT(ans_levels = duplicate(GET_LEVELS(x))); SET_LEVELS(ans, ans_levels); UNPROTECT(1); PROTECT(ans_class = duplicate(GET_CLASS(x))); SET_CLASS(ans, ans_class); UNPROTECT(1); } UNPROTECT(1); return ans; } SEXP _subset_vector_OR_factor_by_ranges(SEXP x, const int *start, const int *width, int nranges) { int x_len, i, ans_len, start_i, width_i, end_i; SEXP ans, x_names, ans_names, ans_class, ans_levels; x_len = LENGTH(x); _reset_ovflow_flag(); for (i = ans_len = 0; i < nranges; i++) { start_i = start[i]; if (start_i == NA_INTEGER || start_i < 1) error("'start' must be >= 1"); width_i = width[i]; if (width_i == NA_INTEGER || width_i < 0) error("'width' must be >= 0"); end_i = start_i - 1 + width_i; if (end_i > x_len) error("'end' must be <= 'length(x)'"); ans_len = _safe_int_add(ans_len, width_i); } if (_get_ovflow_flag()) error("subscript is too big"); PROTECT(ans = allocVector(TYPEOF(x), ans_len)); /* Extract the values from 'x'. */ _copy_vector_ranges(ans, 0, x, start, width, nranges); /* Extract the names from 'x'. */ x_names = GET_NAMES(x); if (x_names != R_NilValue) { PROTECT(ans_names = NEW_CHARACTER(ans_len)); _copy_vector_ranges(ans_names, 0, x_names, start, width, nranges); SET_NAMES(ans, ans_names); UNPROTECT(1); } /* 'x' could be a factor in which case we need to propagate its levels. */ if (isFactor(x)) { /* Levels must be set before class. */ PROTECT(ans_levels = duplicate(GET_LEVELS(x))); SET_LEVELS(ans, ans_levels); UNPROTECT(1); PROTECT(ans_class = duplicate(GET_CLASS(x))); SET_CLASS(ans, ans_class); UNPROTECT(1); } UNPROTECT(1); return ans; } /**************************************************************************** * vector_OR_factor_extract_positions() and vector_OR_factor_extract_ranges() */ /* --- .Call ENTRY POINT --- * Args: * x: An atomic vector, or factor, or list. * pos: Integer vector of positions to extract. * Return an object of the same type as 'x' (names and levels are propagated). */ SEXP vector_OR_factor_extract_positions(SEXP x, SEXP pos) { int npos; npos = LENGTH(pos); return _subset_vector_OR_factor_by_positions(x, INTEGER(pos), npos); } /* --- .Call ENTRY POINT --- * Args: * x: An atomic vector, or factor, or list. * start, width: Integer vectors of the same length defining the ranges to * extract. * Return an object of the same type as 'x' (names and levels are propagated). */ SEXP vector_OR_factor_extract_ranges(SEXP x, SEXP start, SEXP width) { int nranges; const int *start_p, *width_p; nranges = _check_integer_pairs(start, width, &start_p, &width_p, "start", "width"); return _subset_vector_OR_factor_by_ranges(x, start_p, width_p, nranges); } S4Vectors/src/vector_utils.c0000644000175400017540000000675413175736136017141 0ustar00biocbuildbiocbuild/**************************************************************************** * Low-level manipulation of ordinary R vectors * ****************************************************************************/ #include "S4Vectors.h" /**************************************************************************** * memcmp()-based comparison of 2 vectors of the same type. * NOTE: Doesn't support STRSXP and VECSXP. */ int _vector_memcmp(SEXP x1, int x1_offset, SEXP x2, int x2_offset, int nelt) { const void *s1 = NULL, *s2 = NULL; /* gcc -Wall */ size_t eltsize = 0; /* gcc -Wall */ if (x1_offset < 0 || x1_offset + nelt > LENGTH(x1) || x2_offset < 0 || x2_offset + nelt > LENGTH(x2)) error("S4Vectors internal error in _vector_memcmp(): " "elements to compare are out of vector bounds"); switch (TYPEOF(x1)) { case RAWSXP: s1 = (const void *) (RAW(x1) + x1_offset); s2 = (const void *) (RAW(x2) + x2_offset); eltsize = sizeof(Rbyte); break; case LGLSXP: case INTSXP: s1 = (const void *) (INTEGER(x1) + x1_offset); s2 = (const void *) (INTEGER(x2) + x2_offset); eltsize = sizeof(int); break; case REALSXP: s1 = (const void *) (REAL(x1) + x1_offset); s2 = (const void *) (REAL(x2) + x2_offset); eltsize = sizeof(double); break; case CPLXSXP: s1 = (const void *) (COMPLEX(x1) + x1_offset); s2 = (const void *) (COMPLEX(x2) + x2_offset); eltsize = sizeof(Rcomplex); break; default: error("S4Vectors internal error in _vector_memcmp(): " "%s type not supported", CHAR(type2str(TYPEOF(x1)))); } return s1 == s2 ? 0 : memcmp(s1, s2, nelt * eltsize); } /**************************************************************************** * sapply_NROW() */ static int get_NROW(SEXP x) { SEXP x_dim, x_rownames; if (x == R_NilValue) return 0; if (!IS_VECTOR(x)) error("get_NROW() defined only on a vector (or NULL)"); /* A data.frame doesn't have a "dim" attribute but the dimensions can be inferred from the "names" and "row.names" attributes. */ x_rownames = getAttrib(x, R_RowNamesSymbol); if (x_rownames != R_NilValue) return LENGTH(x_rownames); x_dim = GET_DIM(x); if (x_dim == R_NilValue || LENGTH(x_dim) == 0) return LENGTH(x); return INTEGER(x_dim)[0]; } /* * --- .Call ENTRY POINT --- * A C implementation of 'sapply(x, NROW)' that works only on a list of * vectors (or NULLs). */ SEXP sapply_NROW(SEXP x) { SEXP ans, x_elt; int x_len, i, *ans_elt; x_len = LENGTH(x); PROTECT(ans = NEW_INTEGER(x_len)); for (i = 0, ans_elt = INTEGER(ans); i < x_len; i++, ans_elt++) { x_elt = VECTOR_ELT(x, i); if (x_elt != R_NilValue && !IS_VECTOR(x_elt)) { UNPROTECT(1); error("element %d not a vector (or NULL)", i + 1); } *ans_elt = get_NROW(x_elt); } UNPROTECT(1); return ans; } /**************************************************************************** * _list_as_data_frame() */ /* Performs IN-PLACE coercion of list 'x' into a data frame! */ SEXP _list_as_data_frame(SEXP x, int nrow) { SEXP rownames, class; int i; if (!isVectorList(x) || GET_NAMES(x) == R_NilValue) error("S4Vectors internal error in _list_as_data_frame(): " "'x' must be a named list"); /* Set the "row.names" attribute. */ PROTECT(rownames = NEW_INTEGER(nrow)); for (i = 0; i < nrow; i++) INTEGER(rownames)[i] = i + 1; SET_ATTR(x, R_RowNamesSymbol, rownames); UNPROTECT(1); /* Set the "class" attribute. */ PROTECT(class = mkString("data.frame")); SET_CLASS(x, class); UNPROTECT(1); return x; } S4Vectors/tests/0000755000175400017540000000000013175714520014603 5ustar00biocbuildbiocbuildS4Vectors/tests/run_unitTests.R0000644000175400017540000000012513175714520017612 0ustar00biocbuildbiocbuildrequire("S4Vectors") || stop("unable to load S4Vectors package") S4Vectors:::.test() S4Vectors/vignettes/0000755000175400017540000000000013175736135015457 5ustar00biocbuildbiocbuildS4Vectors/vignettes/RleTricks.Rnw0000644000175400017540000000365013175714520020047 0ustar00biocbuildbiocbuild\documentclass{article} % \VignetteIndexEntry{Rle Tips and Tricks} % \VignetteDepends{} % \VignetteKeywords{Rle} % \VignettePackage{S4Vectors} \usepackage{times} \usepackage{hyperref} \textwidth=6.5in \textheight=8.5in % \parskip=.3cm \oddsidemargin=-.1in \evensidemargin=-.1in \headheight=-.3in \newcommand{\Rfunction}[1]{{\texttt{#1}}} \newcommand{\Robject}[1]{{\texttt{#1}}} \newcommand{\Rpackage}[1]{{\textit{#1}}} \newcommand{\Rmethod}[1]{{\texttt{#1}}} \newcommand{\Rfunarg}[1]{{\texttt{#1}}} \newcommand{\Rclass}[1]{{\textit{#1}}} \newcommand{\Rcode}[1]{{\texttt{#1}}} \newcommand{\software}[1]{\textsf{#1}} \newcommand{\R}{\software{R}} \title{Rle Tips and Tricks} \author{Patrick Aboyoun} \date{\today} \begin{document} \maketitle <>= options(width=60) @ <>= rollmeanRle <- function (x, k) { n <- length(x) cumsum(c(Rle(sum(window(x, 1, k))), window(x, k + 1, n) - window(x, 1, n - k))) / k } @ <>= rollvarRle <- function(x, k) { n <- length(x) means <- rollmeanRle(x, k) nextMean <- window(means, 2, n - k + 1) cumsum(c(Rle(sum((window(x, 1, k) - means[1])^2)), k * diff(means)^2 - (window(x, 1, n - k) - nextMean)^2 + (window(x, k + 1, n) - nextMean)^2)) / (k - 1) } @ <>= rollcovRle <- function(x, y, k) { n <- length(x) meanX <- rollmeanRle(x, k) meanY <- rollmeanRle(y, k) nextMeanX <- window(meanX, 2, n - k + 1) nextMeanY <- window(meanY, 2, n - k + 1) cumsum(c(Rle(sum((window(x, 1, k) - meanX[1]) * (window(y, 1, k) - meanY[1]))), k * diff(meanX) * diff(meanY) - (window(x, 1, n - k) - nextMeanX) * (window(y, 1, n - k) - nextMeanY) + (window(x, k + 1, n) - nextMeanX) * (window(y, k + 1, n) - nextMeanY))) / (k - 1) } @ <>= rollsdRle <- function(x, k) { sqrt(rollvarRle(x, k)) } @ <>= rollcorRle <- function(x, y, k) { rollcovRle(x, y, k) / (rollsdRle(x, k) * rollsdRle(y, k)) } @ \end{document} S4Vectors/vignettes/S4QuickOverview.Rnw0000644000175400017540000004024113175714520021154 0ustar00biocbuildbiocbuild%\VignetteIndexEntry{A quick overview of the S4 class system} %\VignetteDepends{methods,Matrix,IRanges,ShortRead,graph} \SweaveOpts{keep.source=TRUE, eps=FALSE, width=9, height=3} \documentclass[9pt]{beamer} \usepackage{slides} \AtBeginSection[] { \begin{frame}{Outline} \tableofcontents[currentsection,currentsubsection] \end{frame} } \title{A quick overview of the S4 class system} \author{Herv\'e Pag\`es\\ \href{mailto:hpages@fredhutch.org}{hpages@fredhutch.org}} %\institute[FHCRC]{Fred Hutchinson Cancer Research Center\\ % Seattle, WA} \date{June 2016} \begin{document} <>= options(width=60) library(Matrix) library(IRanges) library(ShortRead) library(graph) @ \maketitle \frame{\tableofcontents} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{What is S4?} \begin{frame}[fragile] \frametitle{The S4 class system} \begin{block}{} \begin{itemize} \item The \textit{S4 class system} is a set of facilities provided in R for OO programming. \item Implemented in the \Rpackage{methods} package. \item On a fresh \R{} session: \begin{Schunk} \begin{Sinput} > sessionInfo() \end{Sinput} \begin{Soutput} ... attached base packages: [1] stats graphics grDevices utils datasets [6] methods base \end{Soutput} \end{Schunk} \item R also supports an older class system: the \textit{S3 class system}. \end{itemize} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{A different world} \begin{block}{The syntax} \begin{Schunk} \begin{Sinput} > foo(x, ...) \end{Sinput} \end{Schunk} not: \begin{Schunk} \begin{Sinput} > x.foo(...) \end{Sinput} \end{Schunk} like in other OO programming languages. \end{block} \begin{block}{The central concepts} \begin{itemize} \item The core components: \emph{classes}\footnote{also called \emph{formal classes}, to distinguish them from the S3 classes aka \emph{old style classes}}, \emph{generic functions} and \emph{methods} \item The glue: \emph{method dispatch} (supports \emph{simple} and \emph{multiple} dispatch) \end{itemize} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{} \begin{block}{The result} \begin{Schunk} \begin{Sinput} > ls('package:methods') \end{Sinput} \begin{Soutput} [1] "addNextMethod" "allGenerics" [3] "allNames" "Arith" [5] "as" "as<-" [7] "asMethodDefinition" "assignClassDef" ... [211] "testVirtual" "traceOff" [213] "traceOn" "tryNew" [215] "unRematchDefinition" "validObject" [217] "validSlotNames" \end{Soutput} \end{Schunk} \begin{itemize} \item Rich, complex, can be intimidating \item The classes and methods we implement in our packages can be hard to document, especially when the class hierarchy is complicated and multiple dispatch is used \end{itemize} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{S4 in Bioconductor} \begin{block}{} \begin{itemize} \item Heavily used. In BioC 3.3: 3158 classes and 22511 methods defined in 609 packages! (out of 1211 software packages) \item Top 10: 128 classes in \Rpackage{ChemmineOB}, 98 in \Rpackage{flowCore}, 79 in \Rpackage{IRanges}, 68 in \Rpackage{rsbml}, 61 in \Rpackage{ShortRead}, 58 in \Rpackage{Biostrings}, 51 in \Rpackage{rtracklayer}, 50 in \Rpackage{oligoClasses}, 45 in \Rpackage{flowUtils}, and 40 in \Rpackage{BaseSpaceR}. \item For the end-user: it's mostly transparent. But when something goes wrong, error messages issued by the S4 class system can be hard to understand. Also it can be hard to find the documentation for a specific method. \item Most Bioconductor packages use only a small subset of the S4 capabilities (covers 99.99\% of our needs) \end{itemize} \end{block} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{S4 from an end-user point of view} \begin{frame}[fragile] \frametitle{Where do S4 objects come from?} \begin{block}{From a dataset} <>= library(graph) data(apopGraph) apopGraph @ \end{block} \begin{block}{From using an object constructor function} <>= library(IRanges) IRanges(start=c(101, 25), end=c(110, 80)) @ \end{block} \end{frame} \begin{frame}[fragile] \frametitle{} \begin{block}{From a coercion} <>= library(Matrix) m <- matrix(3:-4, nrow=2) as(m, "Matrix") @ \end{block} \begin{block}{From using a specialized high-level constructor} \begin{Schunk} \begin{Sinput} > library(GenomicFeatures) > makeTxDbFromUCSC("sacCer2", tablename="ensGene") \end{Sinput} \begin{Soutput} TxDb object: # Db type: TxDb # Supporting package: GenomicFeatures # Data source: UCSC # Genome: sacCer2 # Organism: Saccharomyces cerevisiae # Taxonomy ID: 4932 # UCSC Table: ensGene # UCSC Track: Ensembl Genes ... \end{Soutput} \end{Schunk} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{} \begin{block}{From using a high-level I/O function} <>= library(ShortRead) path_to_my_data <- system.file( package="ShortRead", "extdata", "Data", "C1-36Firecrest", "Bustard", "GERALD") lane1 <- readFastq(path_to_my_data, pattern="s_1_sequence.txt") lane1 @ \end{block} \begin{block}{Inside another object} <>= sread(lane1) @ \end{block} \end{frame} \begin{frame}[fragile] \frametitle{How to manipulate S4 objects?} \begin{block}{Low-level: getters and setters} <>= ir <- IRanges(start=c(101, 25), end=c(110, 80)) width(ir) width(ir) <- width(ir) - 5 ir @ \end{block} \begin{block}{High-level: plenty of specialized methods} <>= qa1 <- qa(lane1, lane="lane1") class(qa1) @ \end{block} \end{frame} \begin{frame}[fragile] \frametitle{How to find the right man page?} \begin{itemize} \item \Rcode{class?graphNEL} or equivalently \Rcode{?\`{}graphNEL-class\`} for accessing the man page of a class \item \Rcode{?qa} for accessing the man page of a generic function \item The man page for a generic might also document some or all of the methods for this generic. The \textit{See Also:} section might give a clue. Also using \Rcode{showMethods()} can be useful: <>= showMethods("qa") @ \item \Rcode{?\`{}qa,ShortReadQ-method\`} to access the man page for a particular method (might be the same man page as for the generic) \item In doubt: \Rcode{??qa} will search the man pages of all the installed packages and return the list of man pages that contain the string \Rcode{qa} \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Inspecting objects and discovering methods} \begin{itemize} \item \Rcode{class()} and \Rcode{showClass()} {\footnotesize <>= class(lane1) showClass("ShortReadQ") @ } \item \Rcode{str()} for compact display of the content of an object \item \Rcode{showMethods()} to discover methods \item \Rcode{selectMethod()} to see the code \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Implementing an S4 class (in 4 slides)} \begin{frame}[fragile] \frametitle{Class definition and constructor} \begin{block}{Class definition} {\footnotesize <>= setClass("SNPLocations", slots=c( genome="character", # a single string snpid="character", # a character vector of length N chrom="character", # a character vector of length N pos="integer" # an integer vector of length N ) ) @ } \end{block} \begin{block}{Constructor} {\footnotesize <>= SNPLocations <- function(genome, snpid, chrom, pos) new("SNPLocations", genome=genome, snpid=snpid, chrom=chrom, pos=pos) @ <>= snplocs <- SNPLocations("hg19", c("rs0001", "rs0002"), c("chr1", "chrX"), c(224033L, 1266886L)) @ } \end{block} \end{frame} \begin{frame}[fragile] \frametitle{Getters} \begin{block}{Defining the \Rfunction{length} method} {\footnotesize <>= setMethod("length", "SNPLocations", function(x) length(x@snpid)) @ <>= length(snplocs) # just testing @ } \end{block} \begin{block}{Defining the slot getters} {\footnotesize <>= setGeneric("genome", function(x) standardGeneric("genome")) setMethod("genome", "SNPLocations", function(x) x@genome) @ <>= setGeneric("snpid", function(x) standardGeneric("snpid")) setMethod("snpid", "SNPLocations", function(x) x@snpid) @ <>= setGeneric("chrom", function(x) standardGeneric("chrom")) setMethod("chrom", "SNPLocations", function(x) x@chrom) @ <>= setGeneric("pos", function(x) standardGeneric("pos")) setMethod("pos", "SNPLocations", function(x) x@pos) @ <>= genome(snplocs) # just testing snpid(snplocs) # just testing @ } \end{block} \end{frame} \begin{frame}[fragile] \frametitle{} \begin{block}{Defining the \Rfunction{show} method} {\footnotesize <>= setMethod("show", "SNPLocations", function(object) cat(class(object), "instance with", length(object), "SNPs on genome", genome(object), "\n") ) @ <<>>= snplocs # just testing @ } \end{block} \begin{block}{Defining the \textit{validity method}} {\footnotesize <>= setValidity("SNPLocations", function(object) { if (!is.character(genome(object)) || length(genome(object)) != 1 || is.na(genome(object))) return("'genome' slot must be a single string") slot_lengths <- c(length(snpid(object)), length(chrom(object)), length(pos(object))) if (length(unique(slot_lengths)) != 1) return("lengths of slots 'snpid', 'chrom' and 'pos' differ") TRUE } ) @ \begin{Schunk} \begin{Sinput} > snplocs@chrom <- LETTERS[1:3] # a very bad idea! > validObject(snplocs) \end{Sinput} \begin{Soutput} Error in validObject(snplocs) : invalid class "SNPLocations" object: lengths of slots 'snpid', 'chrom' and 'pos' differ \end{Soutput} \end{Schunk} } \end{block} \end{frame} \begin{frame}[fragile] \frametitle{} \begin{block}{Defining slot setters} {\footnotesize <>= setGeneric("chrom<-", function(x, value) standardGeneric("chrom<-")) setReplaceMethod("chrom", "SNPLocations", function(x, value) {x@chrom <- value; validObject(x); x}) @ <>= chrom(snplocs) <- LETTERS[1:2] # repair currently broken object @ \begin{Schunk} \begin{Sinput} > chrom(snplocs) <- LETTERS[1:3] # try to break it again \end{Sinput} \begin{Soutput} Error in validObject(x) : invalid class "SNPLocations" object: lengths of slots 'snpid', 'chrom' and 'pos' differ \end{Soutput} \end{Schunk} } \end{block} \begin{block}{Defining a coercion method} {\footnotesize <>= setAs("SNPLocations", "data.frame", function(from) data.frame(snpid=snpid(from), chrom=chrom(from), pos=pos(from)) ) @ <>= as(snplocs, "data.frame") # testing @ } \end{block} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Extending an existing class} \begin{frame}[fragile] \frametitle{Slot inheritance} \begin{itemize} \item Most of the time (but not always), the child class will have additional slots: {\footnotesize <>= setClass("AnnotatedSNPs", contains="SNPLocations", slots=c( geneid="character" # a character vector of length N ) ) @ } \item The slots from the parent class are inherited: {\footnotesize <>= showClass("AnnotatedSNPs") @ } \item Constructor: {\footnotesize <>= AnnotatedSNPs <- function(genome, snpid, chrom, pos, geneid) { new("AnnotatedSNPs", SNPLocations(genome, snpid, chrom, pos), geneid=geneid) } @ } \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Method inheritance} \begin{itemize} \item Let's create an AnnotatedSNPs object: {\footnotesize <>= snps <- AnnotatedSNPs("hg19", c("rs0001", "rs0002"), c("chr1", "chrX"), c(224033L, 1266886L), c("AAU1", "SXW-23")) @ } \item All the methods defined for SNPLocations objects work out-of-the-box: {\footnotesize <>= snps @ } \item But sometimes they don't do the right thing: {\footnotesize <>= as(snps, "data.frame") # the 'geneid' slot is ignored @ } \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{} \begin{itemize} \item Being a SNPLocations \emph{object} vs being a SNPLocations \emph{instance}: {\footnotesize <<>>= is(snps, "AnnotatedSNPs") # 'snps' is an AnnotatedSNPs object is(snps, "SNPLocations") # and is also a SNPLocations object class(snps) # but is *not* a SNPLocations *instance* @ } \item Method overriding: for example we could define a \Rfunction{show} method for AnnotatedSNPs objects. \Rfunction{callNextMethod} can be used in that context to call the method defined for the parent class from within the method for the child class. \item Automatic coercion method: {\footnotesize <>= as(snps, "SNPLocations") @ } \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Incremental validity method} \begin{itemize} \item The \textit{validity method} for AnnotatedSNPs objects only needs to validate what's not already validated by the \textit{validity method} for SNPLocations objects: {\footnotesize <>= setValidity("AnnotatedSNPs", function(object) { if (length(object@geneid) != length(object)) return("'geneid' slot must have the length of the object") TRUE } ) @ } \item In other words: before an AnnotatedSNPs object can be considered valid, it must first be a valid SNPLocations object. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{What else?} \begin{frame}[fragile] \frametitle{} \begin{block}{Other important S4 features} \begin{itemize} \item \textit{Virtual} classes: equivalent to \textit{abstract} classes in Java \item Class unions (see \Rcode{?setClassUnion}) \item Multiple inheritance: a powerful feature that should be used with caution. If used inappropriately, can lead to a class hierarchy that is very hard to maintain \end{itemize} \end{block} \begin{block}{Resources} \begin{itemize} \item Man pages in the \Rpackage{methods} package: \Rcode{?setClass}, \Rcode{?showMethods}, \Rcode{?selectMethod}, \Rcode{?getMethod}, \Rcode{?is}, \Rcode{?setValidity}, \Rcode{?as} \item The \textit{Extending RangedSummarizedExperiment} section of the \textit{SummarizedExperiment} vignette in the \Rpackage{SummarizedExperiment} package. \item Note: S4 is \emph{not} covered in the \textit{An Introduction to R} or \textit{The R language definition} manuals\footnote{http://cran.fhcrc.org/manuals.html} \item The \emph{Writing R Extensions} manual for details about integrating S4 classes to a package \item The \textit{R Programming for Bioinformatics} book by Robert Gentleman\footnote{http://bioconductor.org/help/publications/books/r-programming-for-bioinformatics/} \end{itemize} \end{block} \end{frame} \end{document} S4Vectors/vignettes/slides.sty0000644000175400017540000000211313175714520017472 0ustar00biocbuildbiocbuild\usepackage{Sweave} \usepackage{color, graphics} \usepackage{latexsym, amsmath, amssymb} %% simple macros \newcommand{\software}[1]{\textsl{#1}} \newcommand\R{\textsl{R}} \newcommand\Bioconductor{\textsl{Bioconductor}} \newcommand\Rpackage[1]{{\textsl{#1}\index{#1 (package)}}} \newcommand\Biocpkg[1]{% {\href{http://bioconductor.org/packages/release/bioc/html/#1.html}% {\textsl{#1}}}% \index{#1 (package)}} \newcommand\Rpkg[1]{% {\href{http://cran.fhcrc.org/web/packages/#1/index.html}% {\textsl{#1}}}% \index{#1 (package)}} \newcommand\Biocdatapkg[1]{% {\href{http://bioconductor.org/packages/release/data/experiment/html/#1.html}% {\textsl{#1}}}% \index{#1 (package)}} \newcommand\Robject[1]{{\small\texttt{#1}}} \newcommand\Rclass[1]{{\textit{#1}\index{#1 (class)}}} \newcommand\Rfunction[1]{{{\small\texttt{#1}}\index{#1 (function)}}} \newcommand\Rmethod[1]{{\texttt{#1}}} \newcommand\Rfunarg[1]{{\small\texttt{#1}}} \newcommand\Rcode[1]{{\small\texttt{#1}}} %% \AtBeginSection[] %% { %% \begin{frame}{Outline} %% \tableofcontents %% \end{frame} %% }