stringr/0000755000176000001440000000000012060045717011774 5ustar ripleyusersstringr/MD50000644000176000001440000000577612060045717012323 0ustar ripleyusersc14761e3000b5eaae6ff0aa5262a6630 *DESCRIPTION 0cca89f86586b39903872782ad0c2d92 *NAMESPACE 84346581b10f2d04598438e428488268 *NEWS 3d81c2717c2e42df6b55d2b264502c0f *R/c.r 1711405698cf3015be163e208f1eff40 *R/checks.r e55ebb86acca2cb4ee3026c7f2694457 *R/count.r 729c91c257c5b4642c03a4db03f5f96a *R/detect.r 93397237016f3ceaedbe6d2a4b1548b4 *R/dup.r 2293db9f73f80f83f15286b5531c9766 *R/extract.r 2b8caff655dbdccedc752c496c1f1041 *R/length.r 95d57be6ed4e6e8786d39a0797098d72 *R/locate.r 37072fa39b5a3a41f0cf0b54b6341a2e *R/match.r b650885277dba55b82664f2edf42aeec *R/modifiers.r 27735fdcdf9c7bb201bf148cef58e7a5 *R/pad-trim.r 44165abbb4926ee6bee46f3b94f425ef *R/replace.r f84ed242cc4fbc0b817e6fb9178388dd *R/split.r 6f82f94cbc0d13367b53828923db57a2 *R/sub.r 524e1f157dac5ce09334df6c6ae0c774 *R/utils.r b5557d805bac52a3e4c8ce27e5a02c35 *R/vectorise.r 59b72fb1d808b48dc72ee1d22bb07f03 *R/word.r 83d187f96099d2fb3e0e74fde6c5f180 *R/wrap.r 1234794765eabc33a0e77211b8681688 *README.md f24da3c0d81e9d8fb6dc9eab10f303dd *inst/tests/test-check.r 61f9d77768cf9ff813d382f9337178fb *inst/tests/test-count.r d2a6a58e44de1968cf46bd3e8c2d0e26 *inst/tests/test-detect.r 065f752787f210c753d5bb5feea7f7a5 *inst/tests/test-dup.r bcdf3dd9ddd2d00d189d43eac13347a8 *inst/tests/test-extract.r 8f03149944d3937c9b5a30d686c6e492 *inst/tests/test-join.r 922366c3451f88871b9ce063529edb7a *inst/tests/test-length.r 76249df3c11c62fb11aef63899029790 *inst/tests/test-locate.r d723b2fc4e6682042b9ac6339f2b4bdf *inst/tests/test-match.r 3cfc28d6785f4a8c0796a7980c9aac90 *inst/tests/test-pad.r f339473f66b14267ec4b86db14b97820 *inst/tests/test-split.r c95563eafc4fad4c60504ae59225b9d0 *inst/tests/test-sub.r 7dc6b256c7c2d3af1483b84698494819 *inst/tests/test-trim.r fad8767b5232cda34cea611d7e461796 *man/fixed.Rd 828127f0f7f43842bd27ebe9e366a737 *man/ignore.case.Rd 8aca10a194e602482768ad9540784f18 *man/invert_match.Rd 9c4e9545741520f8728598ddd46acc1d *man/perl.Rd e7299ef80cd457c767f4c4701ac7ff1c *man/str_c.Rd acb3d1faa6c4d880146abd057dda2a13 *man/str_count.Rd 3d113fd04cb4e133aa847b9919b26e94 *man/str_detect.Rd 3da6dedbe73f2cd8ab0b220fddb05265 *man/str_dup.Rd 4b7252acee53920f9489554eba32ff47 *man/str_extract.Rd 421188a3864ff18626b8cb6b602ed257 *man/str_extract_all.Rd 80947dce676306790aec00a00bc71de5 *man/str_length.Rd a5719d08876a471dbc136e9c69a80b17 *man/str_locate.Rd 4fb3fa0632efe8eb22aa9aec33d27c41 *man/str_locate_all.Rd 044068e21a9d3883c9568dbe81c2aefb *man/str_match.Rd 28e18ed2325d9fbf4bf1cb9a343c269f *man/str_match_all.Rd 499efab9a76d60d78c58ba641c9dd761 *man/str_pad.Rd ea6ff066d63136cacb172a2b0c4cf5c2 *man/str_replace.Rd 7fac10186ba22f54a80021913ffd878a *man/str_replace_all.Rd 93e6eea98bd572829b8e164384673a2a *man/str_split.Rd 5031e079d68b1e5ff4dd034e0dfe4cac *man/str_split_fixed.Rd c397afe69fc8dc833a0ed0990a9abce5 *man/str_sub.Rd bf353bfff3f33db800910f7cf498c6c4 *man/str_sub_replace.Rd 2ac1d755e7a56c11ab8d08fb8b692e33 *man/str_trim.Rd ae0c6fcf7ea0086ab5e87b6d25d23c8b *man/str_wrap.Rd 62d4953c6ee32543df92481fe26e9377 *man/word.Rd 37129f1e586caa1da9010b485199f41b *tests/test-all.R stringr/tests/0000755000176000001440000000000011434472436013144 5ustar ripleyusersstringr/tests/test-all.R0000644000176000001440000000007412043564425015012 0ustar ripleyuserslibrary(testthat) library(stringr) test_package("stringr") stringr/README.md0000644000176000001440000000213512043564425013257 0ustar ripleyusers# stringr Strings are not glamorous, high-profile components of R, but they do play a big role in many data cleaning and preparations tasks. R provides a solid set of string operations, but because they have grown organically over time, they can be inconsistent and a little hard to learn. Additionally, they lag behind the string operations in other programming languages, so that some things that are easy to do in languages like Ruby or Python are rather hard to do in R. The `stringr` package aims to remedy these problems by providing a clean, modern interface to common string operations. More concretely, `stringr`: * Processes factors and characters in the same way. * Gives functions consistent names and arguments. * Simplifies string operations by eliminating options that you don't need 95% of the time. * Produces outputs than can easily be used as inputs. This includes ensuring that missing inputs result in missing outputs, and zero length inputs result in zero length outputs. * Completes R's string handling functions with useful functions from other programming languages. stringr/R/0000755000176000001440000000000012043565031012172 5ustar ripleyusersstringr/R/wrap.r0000644000176000001440000000231612057461575013345 0ustar ripleyusers#' Wrap strings into nicely formatted paragraphs. #' #' This is currently implemented as thin wrapper over \code{\link{strwrap}}, #' but is vectorised over \code{stringr}, and collapses output into single #' strings. See \code{\link{strwrap}} for more details. #' #' @param string character vector of strings to reformat. #' @param width positive integer giving target line width in characters. #' @param indent non-negative integer giving indentation of first line in #' each paragraph #' @param exdent non-negative integer giving indentation of following lines in #' each paragraph #' @return a character vector of reformatted strings. #' @export #' @examples #' thanks_path <- file.path(R.home("doc"), "THANKS") #' thanks <- str_c(readLines(thanks_path), collapse = "\n") #' thanks <- word(thanks, 1, 3, fixed("\n\n")) #' cat(str_wrap(thanks), "\n") #' cat(str_wrap(thanks, width = 40), "\n") #' cat(str_wrap(thanks, width = 60, indent = 2), "\n") #' cat(str_wrap(thanks, width = 60, exdent = 2), "\n") str_wrap <- function(string, width = 80, indent = 0, exdent = 0) { string <- check_string(string) pieces <- strwrap(string, width, indent, exdent, simplify = FALSE) unlist(lapply(pieces, str_c, collapse = "\n")) } stringr/R/word.r0000644000176000001440000000341012043564425013334 0ustar ripleyusers#' Extract words from a sentence. #' #' @param string input character vector. #' @param start integer vector giving position of first word to extract. #' Defaults to first word. If negative, counts backwards from last #' character. #' @param end integer vector giving position of last word to extract. #' Defaults to first word. If negative, counts backwards from last #' character. #' @param sep separator between words. Defaults to single space. #' @return character vector of words from \code{start} to \code{end} #' (inclusive). Will be length of longest input argument. #' @export #' @examples #' sentences <- c("Jane saw a cat", "Jane sat down") #' word(sentences, 1) #' word(sentences, 2) #' word(sentences, -1) #' word(sentences, 2, -1) #' #' # Also vectorised over start and end #' word(sentences[1], 1:3, -1) #' word(sentences[1], 1, 1:4) #' #' # Can define words by other separators #' str <- 'abc.def..123.4568.999' #' word(str, 1, sep = fixed('..')) #' word(str, 2, sep = fixed('..')) word <- function(string, start = 1L, end = start, sep = fixed(" ")) { n <- max(length(string), length(start), length(end)) string <- rep(string, length = n) start <- rep(start, length = n) end <- rep(end, length = n) breaks <- str_locate_all(string, sep) words <- lapply(breaks, invert_match) # Convert negative values into actual positions len <- vapply(words, nrow, integer(1)) neg_start <- !is.na(start) & start < 0L start[neg_start] <- start[neg_start] + len[neg_start] + 1L neg_end <- !is.na(end) & end < 0L end[neg_end] <- end[neg_end] + len[neg_end] + 1L # Extract locations starts <- mapply(function(word, loc) word[loc, "start"], words, start) ends <- mapply(function(word, loc) word[loc, "end"], words, end) str_sub(string, starts, ends) } stringr/R/vectorise.r0000644000176000001440000000217012043564425014366 0ustar ripleyusers# General wrapper around sub, gsub, regexpr, gregexpr, grepl. # Vectorises with pattern and replacement, and uses fixed and ignored.case # attributes. re_call <- function(f, string, pattern, replacement = NULL) { args <- list(pattern, replacement, string, fixed = is.fixed(pattern), ignore.case = case.ignored(pattern), perl = is.perl(pattern)) if (!("perl" %in% names(formals(f)))) { if (args$perl) message("Perl regexps not supported by ", f) args$perl <- NULL } do.call(f, compact(args)) } re_mapply <- function(f, string, pattern, replacement = NULL) { args <- list( FUN = f, SIMPLIFY = FALSE, USE.NAMES = FALSE, pattern, replacement, string, MoreArgs = list( fixed = is.fixed(pattern), ignore.case = case.ignored(pattern)) ) do.call("mapply", compact(args)) } # Check if a set of vectors is recyclable. # Ignores zero length vectors. Trivially TRUE if all inputs are zero length. recyclable <- function(...) { lengths <- vapply(list(...), length, integer(1)) lengths <- lengths[lengths != 0] if (length(lengths) == 0) return(TRUE) all(max(lengths) %% lengths == 0) } stringr/R/utils.r0000644000176000001440000000006212043565031013513 0ustar ripleyuserscompact <- function(l) Filter(Negate(is.null), l) stringr/R/sub.r0000644000176000001440000000631112043564425013155 0ustar ripleyusers#' Extract substrings from a character vector. #' #' \code{str_sub} will recycle all arguments to be the same length as the #' longest argument. If any arguments are of length 0, the output will be #' a zero length character vector. #' #' Substrings are inclusive - they include the characters at both start and #' end positions. \code{str_sub(string, 1, -1)} will return the complete #' substring, from the first character to the last. #' #' @param string input character vector. #' @param start integer vector giving position of first charater in substring, #' defaults to first character. If negative, counts backwards from last #' character. #' @param end integer vector giving position of last character in substring, #' defaults to last character. If negative, counts backwards from last #' character. #' @return character vector of substring from \code{start} to \code{end} #' (inclusive). Will be length of longest input argument. #' @keywords character #' @seealso \code{\link{substring}} which this function wraps, and #' \code{link{str_sub_replace}} for the replacement version #' @export #' @examples #' hw <- "Hadley Wickham" #' #' str_sub(hw, 1, 6) #' str_sub(hw, end = 6) #' str_sub(hw, 8, 14) #' str_sub(hw, 8) #' str_sub(hw, c(1, 8), c(6, 14)) #' #' str_sub(hw, -1) #' str_sub(hw, -7) #' str_sub(hw, end = -7) #' #' str_sub(hw, seq_len(str_length(hw))) #' str_sub(hw, end = seq_len(str_length(hw))) str_sub <- function(string, start = 1L, end = -1L) { if (length(string) == 0L || length(start) == 0L || length(end) == 0L) { return(vector("character", 0L)) } string <- check_string(string) n <- max(length(string), length(start), length(end)) string <- rep(string, length = n) start <- rep(start, length = n) end <- rep(end, length = n) # Convert negative values into actual positions len <- str_length(string) neg_start <- !is.na(start) & start < 0L start[neg_start] <- start[neg_start] + len[neg_start] + 1L neg_end <- !is.na(end) & end < 0L end[neg_end] <- end[neg_end] + len[neg_end] + 1L substring(string, start, end) } #' Replace substrings in a character vector. # #' \code{str_sub<-} will recycle all arguments to be the same length as the #' longest argument. #' #' @param string input character vector. #' @param start integer vector giving position of first charater in substring, #' defaults to first character. If negative, counts backwards from last #' character. #' @param end integer vector giving position of last character in substring, #' defaults to last character. If negative, counts backwards from last #' character. #' @param value replacement string #' @return character vector of substring from \code{start} to \code{end} #' (inclusive). Will be length of longest input argument. #' @name str_sub_replace #' @aliases str_sub<- str_sub_replace #' @usage str_sub(string, start = 1L, end = -1L) <- value #' @export "str_sub<-" #' @examples #' x <- "BBCDEF" #' str_sub(x, 1, 1) <- "A"; x #' str_sub(x, -1, -1) <- "K"; x #' str_sub(x, -2, -2) <- "GHIJ"; x #' str_sub(x, 2, -2) <- ""; x "str_sub<-" <- function(string, start = 1L, end = -1L, value) { str_c( str_sub(string, end = start - 1L), value, ifelse(end == -1L, "", str_sub(string, start = end + 1L))) } stringr/R/split.r0000644000176000001440000000710012043565031013506 0ustar ripleyusers#' Split up a string into a fixed number of pieces. #' #' Vectorised over \code{string}. \code{pattern} should be a single pattern, #' i.e. a character vector of length one. #' #' @param string input character vector #' @param pattern pattern to split up by, as defined by a POSIX regular #' expression. See the ``Extended Regular Expressions'' section of #' \code{\link{regex}} for details. If \code{NA}, returns original string. #' If \code{""} splits into individual characters. #' @param n number of pieces to return. Default (Inf) uses all #' possible split positions. If n is greater than the number of pieces, #' the result will be padded with empty strings. #' @return character matrix with \code{n} columns. #' @keywords character #' @seealso \code{\link{str_split}} for variable number of splits #' @export #' @examples #' fruits <- c( #' "apples and oranges and pears and bananas", #' "pineapples and mangos and guavas" #' ) #' str_split_fixed(fruits, " and ", 3) #' str_split_fixed(fruits, " and ", 4) str_split_fixed <- function(string, pattern, n) { if (length(string) == 0) { return(matrix(character(), nrow = 0, ncol = n)) } string <- check_string(string) pattern <- check_pattern(pattern, string) if (!is.numeric(n) || length(n) != 1) { stop("n should be a numeric vector of length 1") } if (n == Inf) { stop("n must be finite", call. = FALSE) } else if (n == 1) { matrix(string, ncol = 1) } else { locations <- str_locate_all(string, pattern) do.call("rbind", lapply(seq_along(locations), function(i) { location <- locations[[i]] string <- string[i] pieces <- min(n - 1, nrow(location)) cut <- location[seq_len(pieces), , drop = FALSE] keep <- invert_match(cut) padding <- rep("", n - pieces - 1) c(str_sub(string, keep[, 1], keep[, 2]), padding) })) } } #' Split up a string into a variable number of pieces. #' #' Vectorised over \code{string}. \code{pattern} should be a single pattern, #' i.e. a character vector of length one. #' #' @param string input character vector #' @param pattern pattern to split up by, as defined by a POSIX regular #' expression. See the ``Extended Regular Expressions'' section of #' \code{\link{regex}} for details. If \code{NA}, returns original string. #' If \code{""} splits into individual characters. #' @param n maximum number of pieces to return. Default (Inf) uses all #' possible split positions. #' @return a list of character vectors. #' @keywords character #' @export #' @seealso \code{\link{str_split_fixed}} for fixed number of splits #' @examples #' fruits <- c( #' "apples and oranges and pears and bananas", #' "pineapples and mangos and guavas" #' ) #' str_split(fruits, " and ") #' #' # Specify n to restrict the number of possible matches #' str_split(fruits, " and ", n = 3) #' str_split(fruits, " and ", n = 2) #' # If n greater than number of pieces, no padding occurs #' str_split(fruits, " and ", n = 5) str_split <- function(string, pattern, n = Inf) { if (length(string) == 0) return(list()) string <- check_string(string) pattern <- check_pattern(pattern, string) if (!is.numeric(n) || length(n) != 1) { stop("n should be a numeric vector of length 1") } if (n == 1) { as.list(string) } else { locations <- str_locate_all(string, pattern) pieces <- function(mat, string) { cut <- mat[seq_len(min(n - 1, nrow(mat))), , drop = FALSE] keep <- invert_match(cut) str_sub(string, keep[, 1], keep[, 2]) } mapply(pieces, locations, string, SIMPLIFY = FALSE, USE.NAMES = FALSE) } } stringr/R/replace.r0000644000176000001440000000504012043564425013775 0ustar ripleyusers#' Replace first occurrence of a matched pattern in a string. #' #' Vectorised over \code{string}, \code{pattern} and \code{replacement}. #' Shorter arguments will be expanded to length of longest. #' #' @inheritParams str_detect #' @param replacement replacement string. References of the form \code{\1}, #' \code{\2} will be replaced with the contents of the respective matched #' group (created by \code{()}) within the pattern. #' @return character vector. #' @keywords character #' @seealso \code{\link{sub}} which this function wraps, #' \code{\link{str_replace_all}} to replace all matches #' @export #' @examples #' fruits <- c("one apple", "two pears", "three bananas") #' str_replace(fruits, "[aeiou]", "-") #' str_replace_all(fruits, "[aeiou]", "-") #' #' str_replace(fruits, "([aeiou])", "") #' str_replace(fruits, "([aeiou])", "\\1\\1") #' str_replace(fruits, "[aeiou]", c("1", "2", "3")) #' str_replace(fruits, c("a", "e", "i"), "-") str_replace <- function(string, pattern, replacement) { string <- check_string(string) pattern <- check_pattern(pattern, string, replacement) if (length(pattern) == 1 && length(replacement) == 1) { re_call("sub", string, pattern, replacement) } else { unlist(re_mapply("sub", string, pattern, replacement)) } } #' Replace all occurrences of a matched pattern in a string. #' #' Vectorised over \code{string}, \code{pattern} and \code{replacement}. #' Shorter arguments will be expanded to length of longest. #' #' @inheritParams str_detect #' @param replacement replacement string. References of the form \code{\1}, #' \code{\2} will be replaced with the contents of the respective matched #' group (created by \code{()}) within the pattern. #' @return character vector. #' @keywords character #' @seealso \code{\link{gsub}} which this function wraps, #' \code{\link{str_replace}} to replace a single match #' @export #' @examples #' fruits <- c("one apple", "two pears", "three bananas") #' str_replace(fruits, "[aeiou]", "-") #' str_replace_all(fruits, "[aeiou]", "-") #' #' str_replace_all(fruits, "([aeiou])", "") #' str_replace_all(fruits, "([aeiou])", "\\1\\1") #' str_replace_all(fruits, "[aeiou]", c("1", "2", "3")) #' str_replace_all(fruits, c("a", "e", "i"), "-") str_replace_all <- function(string, pattern, replacement) { string <- check_string(string) pattern <- check_pattern(pattern, string, replacement) if (length(pattern) == 1 && length(replacement) == 1) { re_call("gsub", string, pattern, replacement) } else { unlist(re_mapply("gsub", string, pattern, replacement)) } } stringr/R/pad-trim.r0000644000176000001440000000416512043564425014106 0ustar ripleyusers#' Pad a string. #' #' Vectorised over \code{string}. All other inputs should be of length 1. #' #' @param string input character vector #' @param width pad strings to this minimum width #' @param side side on which padding character is added (left, right or both) #' @param pad single padding character (default is a space) #' @return character vector #' @seealso \code{\link{str_trim}} to remove whitespace #' @keywords character #' @export #' @examples #' rbind( #' str_pad("hadley", 30, "left"), #' str_pad("hadley", 30, "right"), #' str_pad("hadley", 30, "both") #' ) #' # Longer strings are returned unchanged #' str_pad("hadley", 3) str_pad <- function(string, width, side = "left", pad = " ") { string <- check_string(string) stopifnot(length(width) == 1) stopifnot(length(side) == 1) stopifnot(length(pad) == 1) if (str_length(pad) != 1) { stop("pad must be single character single") } side <- match.arg(side, c("left", "right", "both")) needed <- pmax(0, width - str_length(string)) left <- switch(side, left = needed, right = 0, both = floor(needed / 2)) right <- switch(side, left = 0, right = needed, both = ceiling(needed / 2)) # String duplication is slow, so only do the absolute necessary lengths <- unique(c(left, right)) padding <- str_dup(pad, lengths) str_c(padding[match(left, lengths)], string, padding[match(right, lengths)]) } #' Trim whitespace from start and end of string. #' #' @param string input character vector #' @param side side on which whitespace is removed (left, right or both) #' @return character vector with leading and trailing whitespace removed #' @keywords character #' @export #' @seealso \code{\link{str_pad}} to add whitespace #' @examples #' str_trim(" String with trailing and leading white space\t") #' str_trim("\n\nString with trailing and leading white space\n\n") str_trim <- function(string, side = "both") { string <- check_string(string) stopifnot(length(side) == 1) side <- match.arg(side, c("left", "right", "both")) pattern <- switch(side, left = "^\\s+", right = "\\s+$", both = "^\\s+|\\s+$") str_replace_all(string, pattern, "") } stringr/R/modifiers.r0000644000176000001440000000365512043564425014355 0ustar ripleyusers#' Match fixed characters, not regular expression. #' #' This function specifies that a pattern is a fixed string, rather #' than a regular expression. This can yield substantial speed ups, if #' regular expression matching is not needed. #' #' @param string string to match exactly as is #' @family modifiers #' @keywords character #' @export #' @examples #' pattern <- "a.b" #' strings <- c("abb", "a.b") #' str_detect(strings, pattern) #' str_detect(strings, fixed(pattern)) fixed <- function(string) { if (is.perl(string)) message("Overriding Perl regexp matching") structure(string, fixed = TRUE) } is.fixed <- function(string) { fixed <- attr(string, "fixed") if (is.null(fixed)) FALSE else fixed } #' Ignore case of match. #' #' This function specifies that a pattern should ignore the case of matches. #' #' @param string pattern for which to ignore case #' @keywords character #' @family modifiers #' @export #' @examples #' pattern <- "a.b" #' strings <- c("ABB", "aaB", "aab") #' str_detect(strings, pattern) #' str_detect(strings, ignore.case(pattern)) ignore.case <- function(string) { structure(string, ignore.case = TRUE) } case.ignored <- function(string) { ignore.case <- attr(string, "ignore.case") if (is.null(ignore.case)) FALSE else ignore.case } #' Use perl regular expressions. #' #' This function specifies that a pattern should use the Perl regular #' expression egine, rather than the default POSIX 1003.2 extended #' regular expressions #' #' @param string pattern to match with Perl regexps #' @family modifiers #' @keywords character #' @export #' @examples #' pattern <- "(?x)a.b" #' strings <- c("abb", "a.b") #' \dontrun{str_detect(strings, pattern)} #' str_detect(strings, perl(pattern)) perl <- function(string) { if (is.fixed(string)) message("Overriding fixed matching") structure(string, perl = TRUE) } is.perl <- function(string) { perl <- attr(string, "perl") if (is.null(perl)) FALSE else perl } stringr/R/match.r0000644000176000001440000000472212043565031013456 0ustar ripleyusers#' Extract first matched group from a string. #' #' Vectorised over \code{string}. \code{pattern} should be a single pattern, #' i.e. a character vector of length one. #' #' @inheritParams str_detect #' @param pattern pattern to look for, as defined by a POSIX regular #' expression. Pattern should contain groups, defined by (). See the #' ``Extended Regular Expressions'' section of \code{\link{regex}} for #' details. #' @return character matrix. First column is the complete match, followed by #' one for each capture group #' @keywords character #' @export #' @examples #' strings <- c(" 219 733 8965", "329-293-8753 ", "banana", "595 794 7569", #' "387 287 6718", "apple", "233.398.9187 ", "482 952 3315", #' "239 923 8115", "842 566 4692", "Work: 579-499-7527", "$1000", #' "Home: 543.355.3679") #' phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})" #' #' str_extract(strings, phone) #' str_match(strings, phone) str_match <- function(string, pattern) { string <- check_string(string) pattern <- check_pattern(pattern, string) if (length(string) == 0) return(character()) matcher <- re_call("regexec", string, pattern) matches <- regmatches(string, matcher) # Figure out how many groups there are and coerce into a matrix with # nmatches + 1 columns tmp <- str_replace_all(pattern, "\\\\\\(", "") n <- str_length(str_replace_all(tmp, "[^(]", "")) + 1 len <- vapply(matches, length, integer(1)) matches[len == 0] <- rep(list(rep(NA_character_, n)), sum(len == 0)) do.call("rbind", matches) } #' Extract all matched groups from a string. #' #' Vectorised over \code{string}. \code{pattern} should be a single pattern, #' i.e. a character vector of length one. #' #' @inheritParams str_detect #' @param pattern pattern to look for, as defined by a POSIX regular #' expression. Pattern should contain groups, defined by (). See the #' ``Extended Regular Expressions'' section of \code{\link{regex}} for #' details. #' @return list of character matrices, as given by \code{\link{str_match}} #' @keywords character #' @export #' @examples #' strings <- c("Home: 219 733 8965. Work: 229-293-8753 ", #' "banana pear apple", "595 794 7569 / 387 287 6718") #' phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})" #' #' str_extract_all(strings, phone) #' str_match_all(strings, phone) str_match_all <- function(string, pattern) { matches <- str_extract_all(string, pattern) lapply(matches, function(match) { str_match(match, pattern) }) } stringr/R/locate.r0000644000176000001440000000664712043565031013641 0ustar ripleyusers#' Locate the position of the first occurence of a pattern in a string. #' #' Vectorised over \code{string} and \code{pattern}, shorter is recycled to #' same length as longest. #' #' @inheritParams str_detect #' @return integer matrix. First column gives start postion of match, and #' second column gives end position. #' @keywords character #' @seealso #' \code{\link{regexpr}} which this function wraps #' #' \code{\link{str_extract}} for a convenient way of extracting matches # #' \code{\link{str_locate_all}} to locate position of all matches #' #' @export #' @examples #' fruit <- c("apple", "banana", "pear", "pinapple") #' str_locate(fruit, "a") #' str_locate(fruit, "e") #' str_locate(fruit, c("a", "b", "p", "p")) str_locate <- function(string, pattern) { string <- check_string(string) pattern <- check_pattern(pattern, string) if (length(pattern) == 1) { results <- re_call("regexpr", string, pattern) match_to_matrix(results) } else { results <- re_mapply("regexpr", string, pattern) out <- t(vapply(results, match_to_matrix, integer(2))) colnames(out) <- c("start", "end") out } } #' Locate the position of all occurences of a pattern in a string. #' #' Vectorised over \code{string} and \code{pattern}, shorter is recycled to #' same length as longest. #' #' If the match is of length 0, (e.g. from a special match like \code{$}) #' end will be one character less than start. #' #' @inheritParams str_detect #' @keywords character #' @return list of integer matrices. First column gives start postion of #' match, and second column gives end position. #' @seealso #' \code{\link{regexpr}} which this function wraps #' #' \code{\link{str_extract}} for a convenient way of extracting matches #' #' \code{\link{str_locate}} to locate position of first match #' #' @export #' @examples #' fruit <- c("apple", "banana", "pear", "pineapple") #' str_locate_all(fruit, "a") #' str_locate_all(fruit, "e") #' str_locate_all(fruit, c("a", "b", "p", "p")) str_locate_all <- function(string, pattern) { string <- check_string(string) pattern <- check_pattern(pattern, string) if (length(pattern) == 1) { matches <- re_call("gregexpr", string, pattern) } else { matches <- unlist(re_mapply("gregexpr", string, pattern), recursive = FALSE) } lapply(matches, match_to_matrix, global = TRUE) } # Convert annoying regexpr format to something more useful match_to_matrix <- function(match, global = FALSE) { if (global && length(match) == 1 && (is.na(match) || match == -1)) { null <- matrix(0, nrow = 0, ncol = 2) colnames(null) <- c("start", "end") return(null) } start <- as.vector(match) start[start == -1] <- NA end <- start + attr(match, "match.length") - 1L cbind(start = start, end = end) } #' Switch location of matches to location of non-matches. #' #' Invert a matrix of match locations to match the opposite of what was #' previously matched. #' #' @param loc matrix of match locations, as from \code{\link{str_locate_all}} #' @return numeric match giving locations of non-matches #' @export #' @examples #' numbers <- "1 and 2 and 4 and 456" #' num_loc <- str_locate_all(numbers, "[0-9]+")[[1]] #' str_sub(numbers, num_loc[, "start"], num_loc[, "end"]) #' #' text_loc <- invert_match(num_loc) #' str_sub(numbers, text_loc[, "start"], text_loc[, "end"]) invert_match <- function(loc) { cbind( start = c(0L, loc[, "end"] + 1L), end = c(loc[, "start"] - 1L, -1L) ) } stringr/R/length.r0000644000176000001440000000102212043564425013637 0ustar ripleyusers#' The length of a string (in characters). #' #' @inheritParams str_detect #' @return numeric vector giving number of characters in each element of the #' character vector. Missing string have missing length. #' @keywords character #' @seealso \code{\link{nchar}} which this function wraps #' @export #' @examples #' str_length(letters) #' str_length(c("i", "like", "programming", NA)) str_length <- function(string) { string <- check_string(string) nc <- nchar(string, allowNA = TRUE) is.na(nc) <- is.na(string) nc } stringr/R/extract.r0000644000176000001440000000325212043565031014031 0ustar ripleyusers#' Extract first piece of a string that matches a pattern. #' #' Vectorised over \code{string}. \code{pattern} should be a single pattern, #' i.e. a character vector of length one. #' #' @inheritParams str_detect #' @return character vector. #' @keywords character #' @seealso \code{\link{str_extract_all}} to extract all matches #' @export #' @examples #' shopping_list <- c("apples x4", "flour", "sugar", "milk x2") #' str_extract(shopping_list, "\\d") #' str_extract(shopping_list, "[a-z]+") #' str_extract(shopping_list, "[a-z]{1,4}") #' str_extract(shopping_list, "\\b[a-z]{1,4}\\b") str_extract <- function(string, pattern) { string <- check_string(string) pattern <- check_pattern(pattern, string) positions <- str_locate(string, pattern) str_sub(string, positions[, "start"], positions[, "end"]) } #' Extract all pieces of a string that match a pattern. #' #' Vectorised over \code{string}. \code{pattern} should be a single pattern, #' i.e. a character vector of length one. #' #' @inheritParams str_detect #' @return list of character vectors. #' @keywords character #' @seealso \code{\link{str_extract}} to extract the first match #' @export #' @examples #' shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2") #' str_extract_all(shopping_list, "[a-z]+") #' str_extract_all(shopping_list, "\\b[a-z]+\\b") #' str_extract_all(shopping_list, "\\d") str_extract_all <- function(string, pattern) { string <- check_string(string) pattern <- check_pattern(pattern, string) positions <- str_locate_all(string, pattern) lapply(seq_along(string), function(i) { position <- positions[[i]] str_sub(string[i], position[, "start"], position[, "end"]) }) } stringr/R/dup.r0000644000176000001440000000142012043565031013142 0ustar ripleyusers#' Duplicate and concatenate strings within a character vector. #' #' Vectorised over \code{string} and \code{times}. #' #' @param string input character vector #' @param times number of times to duplicate each string #' @return character vector #' @keywords character #' @export #' @examples #' fruit <- c("apple", "pear", "banana") #' str_dup(fruit, 2) #' str_dup(fruit, 1:3) #' str_c("ba", str_dup("na", 0:5)) str_dup <- function(string, times) { string <- check_string(string) # Use data frame to do recycling data <- data.frame(string, times) n <- nrow(data) string <- data$string times <- data$times output <- vapply(seq_len(n), function(i) { paste(rep.int(string[i], times[i]), collapse = "") }, character(1)) names(output) <- names(string) output } stringr/R/detect.r0000644000176000001440000000240612043564425013635 0ustar ripleyusers#' Detect the presence or absence of a pattern in a string. #' #' Vectorised over \code{string} and \code{pattern}. #' #' @param string input vector. This must be an atomic vector, and will be #' coerced to a character vector #' @param pattern pattern to look for, as defined by a POSIX regular #' expression. See the ``Extended Regular Expressions'' section of #' \code{\link{regex}} for details. See \code{\link{fixed}}, #' \code{\link{ignore.case}} and \code{\link{perl}} for how to use other #' types of matching: fixed, case insensitive and perl-compatible. #' @return boolean vector #' @seealso \code{\link{grepl}} which this function wraps #' @keywords character #' @export #' @examples #' fruit <- c("apple", "banana", "pear", "pinapple") #' str_detect(fruit, "a") #' str_detect(fruit, "^a") #' str_detect(fruit, "a$") #' str_detect(fruit, "b") #' str_detect(fruit, "[aeiou]") #' #' # Also vectorised over pattern #' str_detect("aecfg", letters) str_detect <- function(string, pattern) { string <- check_string(string) pattern <- check_pattern(pattern, string) if (length(pattern) == 1) { results <- re_call("grepl", string, pattern) } else { results <- unlist(re_mapply("grepl", string, pattern)) } is.na(results) <- is.na(string) results } stringr/R/count.r0000644000176000001440000000213112043564425013510 0ustar ripleyusers#' Count the number of matches in a string. #' #' Vectorised over \code{string} and \code{pattern}, shorter is recycled to #' same length as longest. #' #' @inheritParams str_detect #' @keywords character #' @return integer vector #' @seealso #' \code{\link{regexpr}} which this function wraps #' #' \code{\link{str_locate}}/\code{\link{str_locate_all}} to locate position #' of matches #' #' @export #' @examples #' fruit <- c("apple", "banana", "pear", "pineapple") #' str_count(fruit, "a") #' str_count(fruit, "p") #' str_count(fruit, "e") #' str_count(fruit, c("a", "b", "p", "p")) str_count <- function(string, pattern) { if (length(string) == 0) return(character()) string <- check_string(string) pattern <- check_pattern(pattern, string) if (length(pattern) == 1) { matches <- re_call("gregexpr", string, pattern) } else { matches <- unlist(re_mapply("gregexpr", string, pattern), recursive = FALSE) } match_length <- function(x) { len <- length(x) if (len > 1) return(len) if (identical(c(x), -1L)) 0L else 1L } vapply(matches, match_length, integer(1)) } stringr/R/checks.r0000644000176000001440000000114412043564425013623 0ustar ripleyusers# Check that string is of the correct type for stringr functions check_string <- function(string) { if (!is.atomic(string)) stop("String must be an atomic vector", call. = FALSE) if (!is.character(string)) string <- as.character(string) string } # Check that pattern is of the correct type for stringr functions check_pattern <- function(pattern, string, replacement = NULL) { if (!is.character(pattern)) stop("Pattern must be a character vector", call. = FALSE) if (!recyclable(string, pattern, replacement)) { stop("Lengths of string and pattern not compatible") } pattern } stringr/R/c.r0000644000176000001440000000322612043565031012602 0ustar ripleyusers#' Join multiple strings into a single string. #' #' To understand how \code{str_c} works, you need to imagine that you are #' building up a matrix of strings. Each input argument forms a column, and #' is expanded to the length of the longest argument, using the usual #' recyling rules. The \code{sep} string is inserted between each column. If #' collapse is \code{NULL} each row is collapsed into a single string. If #' non-\code{NULL} that string is inserted at the end of each row, and #' the entire matrix collapsed to a single string. #' #' @param ... one or more character vectors. Zero length arguments #' are removed #' @param sep string to insert between input vectors #' @param collapse optional string used to combine input vectors into single #' string #' @return If \code{collapse = NULL} (the default) a character vector with #' length equal to the longest input string. If \code{collapse} is non- #' NULL, a character vector of length 1. #' @keywords character #' @seealso \code{\link{paste}} which this function wraps #' @aliases str_c str_join #' @export str_c str_join #' @examples #' str_c("Letter: ", letters) #' str_c("Letter", letters, sep = ": ") #' str_c(letters, " is for", "...") #' str_c(letters[-26], " comes before ", letters[-1]) #' #' str_c(letters, collapse = "") #' str_c(letters, collapse = ", ") str_c <- str_join <- function(..., sep = "", collapse = NULL) { strings <- Filter(function(x) length(x) > 0, list(...)) atomic <- vapply(strings, is.atomic, logical(1)) if (!all(atomic)) { stop("Input to str_c should be atomic vectors", call. = FALSE) } do.call("paste", c(strings, list(sep = sep, collapse = collapse))) } stringr/NEWS0000644000176000001440000000527512057703460012506 0ustar ripleyusersstringr 0.6.2 ================ * fixed path in `str_wrap` example so works for more R installations. * remove dependency on plyr stringr 0.6.1 ============= * Zero input to `str_split_fixed` returns 0 row matrix with `n` columns * Export `str_join` stringr 0.6 =========== * new modifier `perl` that switches to Perl regular expressions * `str_match` now uses new base function `regmatches` to extract matches - this should hopefully be faster than my previous pure R algorithm stringr 0.5 =========== * new `str_wrap` function which gives `strwrap` output in a more convenient format * new `word` function extract words from a string given user defined separator (thanks to suggestion by David Cooper) * `str_locate` now returns consistent type when matching empty string (thanks to Stavros Macrakis) * new `str_count` counts number of matches in a string. * `str_pad` and `str_trim` receive performance tweaks - for large vectors this should give at least a two order of magnitude speed up * str_length returns NA for invalid multibyte strings * fix small bug in internal `recyclable` function stringr 0.4 =========== * all functions now vectorised with respect to string, pattern (and where appropriate) replacement parameters * fixed() function now tells stringr functions to use fixed matching, rather than escaping the regular expression. Should improve performance for large vectors. * new ignore.case() modifier tells stringr functions to ignore case of pattern. * str_replace renamed to str_replace_all and new str_replace function added. This makes str_replace consistent with all functions. * new str_sub<- function (analogous to substring<-) for substring replacement * str_sub now understands negative positions as a position from the end of the string. -1 replaces Inf as indicator for string end. * str_pad side argument can be left, right, or both (instead of center) * str_trim gains side argument to better match str_pad * stringr now has a namespace and imports plyr (rather than requiring it) stringr 0.3 =========== * fixed() now also escapes | * str_join() renamed to str_c() * all functions more carefully check input and return informative error messages if not as expected. * add invert_match() function to convert a matrix of location of matches to locations of non-matches * add fixed() function to allow matching of fixed strings. stringr 0.2 =========== * str_length now returns correct results when used with factors * str_sub now correctly replaces Inf in end argument with length of string * new function str_split_fixed returns fixed number of splits in a character matrix * str_split no longer uses strsplit to preserve trailing breaks stringr/NAMESPACE0000644000176000001440000000074212043565031013213 0ustar ripleyusersexport("str_sub<-") export(fixed) export(ignore.case) export(invert_match) export(perl) export(str_c) export(str_count) export(str_detect) export(str_dup) export(str_extract) export(str_extract_all) export(str_join) export(str_length) export(str_locate) export(str_locate_all) export(str_match) export(str_match_all) export(str_pad) export(str_replace) export(str_replace_all) export(str_split) export(str_split_fixed) export(str_sub) export(str_trim) export(str_wrap) export(word) stringr/man/0000755000176000001440000000000012057740140012545 5ustar ripleyusersstringr/man/word.Rd0000644000176000001440000000212612057740140014010 0ustar ripleyusers\name{word} \alias{word} \title{Extract words from a sentence.} \usage{ word(string, start = 1L, end = start, sep = fixed(" ")) } \arguments{ \item{string}{input character vector.} \item{start}{integer vector giving position of first word to extract. Defaults to first word. If negative, counts backwards from last character.} \item{end}{integer vector giving position of last word to extract. Defaults to first word. If negative, counts backwards from last character.} \item{sep}{separator between words. Defaults to single space.} } \value{ character vector of words from \code{start} to \code{end} (inclusive). Will be length of longest input argument. } \description{ Extract words from a sentence. } \examples{ sentences <- c("Jane saw a cat", "Jane sat down") word(sentences, 1) word(sentences, 2) word(sentences, -1) word(sentences, 2, -1) # Also vectorised over start and end word(sentences[1], 1:3, -1) word(sentences[1], 1, 1:4) # Can define words by other separators str <- 'abc.def..123.4568.999' word(str, 1, sep = fixed('..')) word(str, 2, sep = fixed('..')) } stringr/man/str_wrap.Rd0000644000176000001440000000213012057740140014671 0ustar ripleyusers\name{str_wrap} \alias{str_wrap} \title{Wrap strings into nicely formatted paragraphs.} \usage{ str_wrap(string, width = 80, indent = 0, exdent = 0) } \arguments{ \item{string}{character vector of strings to reformat.} \item{width}{positive integer giving target line width in characters.} \item{indent}{non-negative integer giving indentation of first line in each paragraph} \item{exdent}{non-negative integer giving indentation of following lines in each paragraph} } \value{ a character vector of reformatted strings. } \description{ This is currently implemented as thin wrapper over \code{\link{strwrap}}, but is vectorised over \code{stringr}, and collapses output into single strings. See \code{\link{strwrap}} for more details. } \examples{ thanks_path <- file.path(R.home("doc"), "THANKS") thanks <- str_c(readLines(thanks_path), collapse = "\\n") thanks <- word(thanks, 1, 3, fixed("\\n\\n")) cat(str_wrap(thanks), "\\n") cat(str_wrap(thanks, width = 40), "\\n") cat(str_wrap(thanks, width = 60, indent = 2), "\\n") cat(str_wrap(thanks, width = 60, exdent = 2), "\\n") } stringr/man/str_trim.Rd0000644000176000001440000000115412057740140014700 0ustar ripleyusers\name{str_trim} \alias{str_trim} \title{Trim whitespace from start and end of string.} \usage{ str_trim(string, side = "both") } \arguments{ \item{string}{input character vector} \item{side}{side on which whitespace is removed (left, right or both)} } \value{ character vector with leading and trailing whitespace removed } \description{ Trim whitespace from start and end of string. } \examples{ str_trim(" String with trailing and leading white space\\t") str_trim("\\n\\nString with trailing and leading white space\\n\\n") } \seealso{ \code{\link{str_pad}} to add whitespace } \keyword{character} stringr/man/str_sub_replace.Rd0000644000176000001440000000213112057740140016205 0ustar ripleyusers\name{str_sub_replace} \alias{str_sub<-} \alias{str_sub_replace} \title{Replace substrings in a character vector. \code{str_sub<-} will recycle all arguments to be the same length as the longest argument.} \usage{ str_sub(string, start = 1L, end = -1L) <- value } \arguments{ \item{string}{input character vector.} \item{start}{integer vector giving position of first charater in substring, defaults to first character. If negative, counts backwards from last character.} \item{end}{integer vector giving position of last character in substring, defaults to last character. If negative, counts backwards from last character.} \item{value}{replacement string} } \value{ character vector of substring from \code{start} to \code{end} (inclusive). Will be length of longest input argument. } \description{ Replace substrings in a character vector. \code{str_sub<-} will recycle all arguments to be the same length as the longest argument. } \examples{ x <- "BBCDEF" str_sub(x, 1, 1) <- "A"; x str_sub(x, -1, -1) <- "K"; x str_sub(x, -2, -2) <- "GHIJ"; x str_sub(x, 2, -2) <- ""; x } stringr/man/str_sub.Rd0000644000176000001440000000271312057740140014520 0ustar ripleyusers\name{str_sub} \alias{str_sub} \title{Extract substrings from a character vector.} \usage{ str_sub(string, start = 1L, end = -1L) } \arguments{ \item{string}{input character vector.} \item{start}{integer vector giving position of first charater in substring, defaults to first character. If negative, counts backwards from last character.} \item{end}{integer vector giving position of last character in substring, defaults to last character. If negative, counts backwards from last character.} } \value{ character vector of substring from \code{start} to \code{end} (inclusive). Will be length of longest input argument. } \description{ \code{str_sub} will recycle all arguments to be the same length as the longest argument. If any arguments are of length 0, the output will be a zero length character vector. } \details{ Substrings are inclusive - they include the characters at both start and end positions. \code{str_sub(string, 1, -1)} will return the complete substring, from the first character to the last. } \examples{ hw <- "Hadley Wickham" str_sub(hw, 1, 6) str_sub(hw, end = 6) str_sub(hw, 8, 14) str_sub(hw, 8) str_sub(hw, c(1, 8), c(6, 14)) str_sub(hw, -1) str_sub(hw, -7) str_sub(hw, end = -7) str_sub(hw, seq_len(str_length(hw))) str_sub(hw, end = seq_len(str_length(hw))) } \seealso{ \code{\link{substring}} which this function wraps, and \code{link{str_sub_replace}} for the replacement version } \keyword{character} stringr/man/str_split_fixed.Rd0000644000176000001440000000213612057740140016240 0ustar ripleyusers\name{str_split_fixed} \alias{str_split_fixed} \title{Split up a string into a fixed number of pieces.} \usage{ str_split_fixed(string, pattern, n) } \arguments{ \item{string}{input character vector} \item{pattern}{pattern to split up by, as defined by a POSIX regular expression. See the ``Extended Regular Expressions'' section of \code{\link{regex}} for details. If \code{NA}, returns original string. If \code{""} splits into individual characters.} \item{n}{number of pieces to return. Default (Inf) uses all possible split positions. If n is greater than the number of pieces, the result will be padded with empty strings.} } \value{ character matrix with \code{n} columns. } \description{ Vectorised over \code{string}. \code{pattern} should be a single pattern, i.e. a character vector of length one. } \examples{ fruits <- c( "apples and oranges and pears and bananas", "pineapples and mangos and guavas" ) str_split_fixed(fruits, " and ", 3) str_split_fixed(fruits, " and ", 4) } \seealso{ \code{\link{str_split}} for variable number of splits } \keyword{character} stringr/man/str_split.Rd0000644000176000001440000000224012057740140015055 0ustar ripleyusers\name{str_split} \alias{str_split} \title{Split up a string into a variable number of pieces.} \usage{ str_split(string, pattern, n = Inf) } \arguments{ \item{string}{input character vector} \item{pattern}{pattern to split up by, as defined by a POSIX regular expression. See the ``Extended Regular Expressions'' section of \code{\link{regex}} for details. If \code{NA}, returns original string. If \code{""} splits into individual characters.} \item{n}{maximum number of pieces to return. Default (Inf) uses all possible split positions.} } \value{ a list of character vectors. } \description{ Vectorised over \code{string}. \code{pattern} should be a single pattern, i.e. a character vector of length one. } \examples{ fruits <- c( "apples and oranges and pears and bananas", "pineapples and mangos and guavas" ) str_split(fruits, " and ") # Specify n to restrict the number of possible matches str_split(fruits, " and ", n = 3) str_split(fruits, " and ", n = 2) # If n greater than number of pieces, no padding occurs str_split(fruits, " and ", n = 5) } \seealso{ \code{\link{str_split_fixed}} for fixed number of splits } \keyword{character} stringr/man/str_replace_all.Rd0000644000176000001440000000271312057740140016172 0ustar ripleyusers\name{str_replace_all} \alias{str_replace_all} \title{Replace all occurrences of a matched pattern in a string.} \usage{ str_replace_all(string, pattern, replacement) } \arguments{ \item{replacement}{replacement string. References of the form \code{\1}, \code{\2} will be replaced with the contents of the respective matched group (created by \code{()}) within the pattern.} \item{string}{input vector. This must be an atomic vector, and will be coerced to a character vector} \item{pattern}{pattern to look for, as defined by a POSIX regular expression. See the ``Extended Regular Expressions'' section of \code{\link{regex}} for details. See \code{\link{fixed}}, \code{\link{ignore.case}} and \code{\link{perl}} for how to use other types of matching: fixed, case insensitive and perl-compatible.} } \value{ character vector. } \description{ Vectorised over \code{string}, \code{pattern} and \code{replacement}. Shorter arguments will be expanded to length of longest. } \examples{ fruits <- c("one apple", "two pears", "three bananas") str_replace(fruits, "[aeiou]", "-") str_replace_all(fruits, "[aeiou]", "-") str_replace_all(fruits, "([aeiou])", "") str_replace_all(fruits, "([aeiou])", "\\\\1\\\\1") str_replace_all(fruits, "[aeiou]", c("1", "2", "3")) str_replace_all(fruits, c("a", "e", "i"), "-") } \seealso{ \code{\link{gsub}} which this function wraps, \code{\link{str_replace}} to replace a single match } \keyword{character} stringr/man/str_replace.Rd0000644000176000001440000000266012057740140015343 0ustar ripleyusers\name{str_replace} \alias{str_replace} \title{Replace first occurrence of a matched pattern in a string.} \usage{ str_replace(string, pattern, replacement) } \arguments{ \item{replacement}{replacement string. References of the form \code{\1}, \code{\2} will be replaced with the contents of the respective matched group (created by \code{()}) within the pattern.} \item{string}{input vector. This must be an atomic vector, and will be coerced to a character vector} \item{pattern}{pattern to look for, as defined by a POSIX regular expression. See the ``Extended Regular Expressions'' section of \code{\link{regex}} for details. See \code{\link{fixed}}, \code{\link{ignore.case}} and \code{\link{perl}} for how to use other types of matching: fixed, case insensitive and perl-compatible.} } \value{ character vector. } \description{ Vectorised over \code{string}, \code{pattern} and \code{replacement}. Shorter arguments will be expanded to length of longest. } \examples{ fruits <- c("one apple", "two pears", "three bananas") str_replace(fruits, "[aeiou]", "-") str_replace_all(fruits, "[aeiou]", "-") str_replace(fruits, "([aeiou])", "") str_replace(fruits, "([aeiou])", "\\\\1\\\\1") str_replace(fruits, "[aeiou]", c("1", "2", "3")) str_replace(fruits, c("a", "e", "i"), "-") } \seealso{ \code{\link{sub}} which this function wraps, \code{\link{str_replace_all}} to replace all matches } \keyword{character} stringr/man/str_pad.Rd0000644000176000001440000000134712057740140014475 0ustar ripleyusers\name{str_pad} \alias{str_pad} \title{Pad a string.} \usage{ str_pad(string, width, side = "left", pad = " ") } \arguments{ \item{string}{input character vector} \item{width}{pad strings to this minimum width} \item{side}{side on which padding character is added (left, right or both)} \item{pad}{single padding character (default is a space)} } \value{ character vector } \description{ Vectorised over \code{string}. All other inputs should be of length 1. } \examples{ rbind( str_pad("hadley", 30, "left"), str_pad("hadley", 30, "right"), str_pad("hadley", 30, "both") ) # Longer strings are returned unchanged str_pad("hadley", 3) } \seealso{ \code{\link{str_trim}} to remove whitespace } \keyword{character} stringr/man/str_match_all.Rd0000644000176000001440000000167212057740137015664 0ustar ripleyusers\name{str_match_all} \alias{str_match_all} \title{Extract all matched groups from a string.} \usage{ str_match_all(string, pattern) } \arguments{ \item{pattern}{pattern to look for, as defined by a POSIX regular expression. Pattern should contain groups, defined by (). See the ``Extended Regular Expressions'' section of \code{\link{regex}} for details.} \item{string}{input vector. This must be an atomic vector, and will be coerced to a character vector} } \value{ list of character matrices, as given by \code{\link{str_match}} } \description{ Vectorised over \code{string}. \code{pattern} should be a single pattern, i.e. a character vector of length one. } \examples{ strings <- c("Home: 219 733 8965. Work: 229-293-8753 ", "banana pear apple", "595 794 7569 / 387 287 6718") phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})" str_extract_all(strings, phone) str_match_all(strings, phone) } \keyword{character} stringr/man/str_match.Rd0000644000176000001440000000206412057740137015030 0ustar ripleyusers\name{str_match} \alias{str_match} \title{Extract first matched group from a string.} \usage{ str_match(string, pattern) } \arguments{ \item{pattern}{pattern to look for, as defined by a POSIX regular expression. Pattern should contain groups, defined by (). See the ``Extended Regular Expressions'' section of \code{\link{regex}} for details.} \item{string}{input vector. This must be an atomic vector, and will be coerced to a character vector} } \value{ character matrix. First column is the complete match, followed by one for each capture group } \description{ Vectorised over \code{string}. \code{pattern} should be a single pattern, i.e. a character vector of length one. } \examples{ strings <- c(" 219 733 8965", "329-293-8753 ", "banana", "595 794 7569", "387 287 6718", "apple", "233.398.9187 ", "482 952 3315", "239 923 8115", "842 566 4692", "Work: 579-499-7527", "$1000", "Home: 543.355.3679") phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})" str_extract(strings, phone) str_match(strings, phone) } \keyword{character} stringr/man/str_locate_all.Rd0000644000176000001440000000253312057740137016034 0ustar ripleyusers\name{str_locate_all} \alias{str_locate_all} \title{Locate the position of all occurences of a pattern in a string.} \usage{ str_locate_all(string, pattern) } \arguments{ \item{string}{input vector. This must be an atomic vector, and will be coerced to a character vector} \item{pattern}{pattern to look for, as defined by a POSIX regular expression. See the ``Extended Regular Expressions'' section of \code{\link{regex}} for details. See \code{\link{fixed}}, \code{\link{ignore.case}} and \code{\link{perl}} for how to use other types of matching: fixed, case insensitive and perl-compatible.} } \value{ list of integer matrices. First column gives start postion of match, and second column gives end position. } \description{ Vectorised over \code{string} and \code{pattern}, shorter is recycled to same length as longest. } \details{ If the match is of length 0, (e.g. from a special match like \code{$}) end will be one character less than start. } \examples{ fruit <- c("apple", "banana", "pear", "pineapple") str_locate_all(fruit, "a") str_locate_all(fruit, "e") str_locate_all(fruit, c("a", "b", "p", "p")) } \seealso{ \code{\link{regexpr}} which this function wraps \code{\link{str_extract}} for a convenient way of extracting matches \code{\link{str_locate}} to locate position of first match } \keyword{character} stringr/man/str_locate.Rd0000644000176000001440000000227412057740137015206 0ustar ripleyusers\name{str_locate} \alias{str_locate} \title{Locate the position of the first occurence of a pattern in a string.} \usage{ str_locate(string, pattern) } \arguments{ \item{string}{input vector. This must be an atomic vector, and will be coerced to a character vector} \item{pattern}{pattern to look for, as defined by a POSIX regular expression. See the ``Extended Regular Expressions'' section of \code{\link{regex}} for details. See \code{\link{fixed}}, \code{\link{ignore.case}} and \code{\link{perl}} for how to use other types of matching: fixed, case insensitive and perl-compatible.} } \value{ integer matrix. First column gives start postion of match, and second column gives end position. } \description{ Vectorised over \code{string} and \code{pattern}, shorter is recycled to same length as longest. } \examples{ fruit <- c("apple", "banana", "pear", "pinapple") str_locate(fruit, "a") str_locate(fruit, "e") str_locate(fruit, c("a", "b", "p", "p")) } \seealso{ \code{\link{regexpr}} which this function wraps \code{\link{str_extract}} for a convenient way of extracting matches \code{\link{str_locate_all}} to locate position of all matches } \keyword{character} stringr/man/str_length.Rd0000644000176000001440000000111712057740137015213 0ustar ripleyusers\name{str_length} \alias{str_length} \title{The length of a string (in characters).} \usage{ str_length(string) } \arguments{ \item{string}{input vector. This must be an atomic vector, and will be coerced to a character vector} } \value{ numeric vector giving number of characters in each element of the character vector. Missing string have missing length. } \description{ The length of a string (in characters). } \examples{ str_length(letters) str_length(c("i", "like", "programming", NA)) } \seealso{ \code{\link{nchar}} which this function wraps } \keyword{character} stringr/man/str_extract_all.Rd0000644000176000001440000000210312057740137016230 0ustar ripleyusers\name{str_extract_all} \alias{str_extract_all} \title{Extract all pieces of a string that match a pattern.} \usage{ str_extract_all(string, pattern) } \arguments{ \item{string}{input vector. This must be an atomic vector, and will be coerced to a character vector} \item{pattern}{pattern to look for, as defined by a POSIX regular expression. See the ``Extended Regular Expressions'' section of \code{\link{regex}} for details. See \code{\link{fixed}}, \code{\link{ignore.case}} and \code{\link{perl}} for how to use other types of matching: fixed, case insensitive and perl-compatible.} } \value{ list of character vectors. } \description{ Vectorised over \code{string}. \code{pattern} should be a single pattern, i.e. a character vector of length one. } \examples{ shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2") str_extract_all(shopping_list, "[a-z]+") str_extract_all(shopping_list, "\\\\b[a-z]+\\\\b") str_extract_all(shopping_list, "\\\\d") } \seealso{ \code{\link{str_extract}} to extract the first match } \keyword{character} stringr/man/str_extract.Rd0000644000176000001440000000210412057740137015401 0ustar ripleyusers\name{str_extract} \alias{str_extract} \title{Extract first piece of a string that matches a pattern.} \usage{ str_extract(string, pattern) } \arguments{ \item{string}{input vector. This must be an atomic vector, and will be coerced to a character vector} \item{pattern}{pattern to look for, as defined by a POSIX regular expression. See the ``Extended Regular Expressions'' section of \code{\link{regex}} for details. See \code{\link{fixed}}, \code{\link{ignore.case}} and \code{\link{perl}} for how to use other types of matching: fixed, case insensitive and perl-compatible.} } \value{ character vector. } \description{ Vectorised over \code{string}. \code{pattern} should be a single pattern, i.e. a character vector of length one. } \examples{ shopping_list <- c("apples x4", "flour", "sugar", "milk x2") str_extract(shopping_list, "\\\\d") str_extract(shopping_list, "[a-z]+") str_extract(shopping_list, "[a-z]{1,4}") str_extract(shopping_list, "\\\\b[a-z]{1,4}\\\\b") } \seealso{ \code{\link{str_extract_all}} to extract all matches } \keyword{character} stringr/man/str_dup.Rd0000644000176000001440000000074412057740137014527 0ustar ripleyusers\name{str_dup} \alias{str_dup} \title{Duplicate and concatenate strings within a character vector.} \usage{ str_dup(string, times) } \arguments{ \item{string}{input character vector} \item{times}{number of times to duplicate each string} } \value{ character vector } \description{ Vectorised over \code{string} and \code{times}. } \examples{ fruit <- c("apple", "pear", "banana") str_dup(fruit, 2) str_dup(fruit, 1:3) str_c("ba", str_dup("na", 0:5)) } \keyword{character} stringr/man/str_detect.Rd0000644000176000001440000000177612057740137015215 0ustar ripleyusers\name{str_detect} \alias{str_detect} \title{Detect the presence or absence of a pattern in a string.} \usage{ str_detect(string, pattern) } \arguments{ \item{string}{input vector. This must be an atomic vector, and will be coerced to a character vector} \item{pattern}{pattern to look for, as defined by a POSIX regular expression. See the ``Extended Regular Expressions'' section of \code{\link{regex}} for details. See \code{\link{fixed}}, \code{\link{ignore.case}} and \code{\link{perl}} for how to use other types of matching: fixed, case insensitive and perl-compatible.} } \value{ boolean vector } \description{ Vectorised over \code{string} and \code{pattern}. } \examples{ fruit <- c("apple", "banana", "pear", "pinapple") str_detect(fruit, "a") str_detect(fruit, "^a") str_detect(fruit, "a$") str_detect(fruit, "b") str_detect(fruit, "[aeiou]") # Also vectorised over pattern str_detect("aecfg", letters) } \seealso{ \code{\link{grepl}} which this function wraps } \keyword{character} stringr/man/str_count.Rd0000644000176000001440000000205212057740137015061 0ustar ripleyusers\name{str_count} \alias{str_count} \title{Count the number of matches in a string.} \usage{ str_count(string, pattern) } \arguments{ \item{string}{input vector. This must be an atomic vector, and will be coerced to a character vector} \item{pattern}{pattern to look for, as defined by a POSIX regular expression. See the ``Extended Regular Expressions'' section of \code{\link{regex}} for details. See \code{\link{fixed}}, \code{\link{ignore.case}} and \code{\link{perl}} for how to use other types of matching: fixed, case insensitive and perl-compatible.} } \value{ integer vector } \description{ Vectorised over \code{string} and \code{pattern}, shorter is recycled to same length as longest. } \examples{ fruit <- c("apple", "banana", "pear", "pineapple") str_count(fruit, "a") str_count(fruit, "p") str_count(fruit, "e") str_count(fruit, c("a", "b", "p", "p")) } \seealso{ \code{\link{regexpr}} which this function wraps \code{\link{str_locate}}/\code{\link{str_locate_all}} to locate position of matches } \keyword{character} stringr/man/str_c.Rd0000644000176000001440000000255212057740137014160 0ustar ripleyusers\name{str_c} \alias{str_c} \alias{str_join} \title{Join multiple strings into a single string.} \usage{ str_c(..., sep = "", collapse = NULL) } \arguments{ \item{...}{one or more character vectors. Zero length arguments are removed} \item{sep}{string to insert between input vectors} \item{collapse}{optional string used to combine input vectors into single string} } \value{ If \code{collapse = NULL} (the default) a character vector with length equal to the longest input string. If \code{collapse} is non- NULL, a character vector of length 1. } \description{ To understand how \code{str_c} works, you need to imagine that you are building up a matrix of strings. Each input argument forms a column, and is expanded to the length of the longest argument, using the usual recyling rules. The \code{sep} string is inserted between each column. If collapse is \code{NULL} each row is collapsed into a single string. If non-\code{NULL} that string is inserted at the end of each row, and the entire matrix collapsed to a single string. } \examples{ str_c("Letter: ", letters) str_c("Letter", letters, sep = ": ") str_c(letters, " is for", "...") str_c(letters[-26], " comes before ", letters[-1]) str_c(letters, collapse = "") str_c(letters, collapse = ", ") } \seealso{ \code{\link{paste}} which this function wraps } \keyword{character} stringr/man/perl.Rd0000644000176000001440000000106312057740140013776 0ustar ripleyusers\name{perl} \alias{perl} \title{Use perl regular expressions.} \usage{ perl(string) } \arguments{ \item{string}{pattern to match with Perl regexps} } \description{ This function specifies that a pattern should use the Perl regular expression egine, rather than the default POSIX 1003.2 extended regular expressions } \examples{ pattern <- "(?x)a.b" strings <- c("abb", "a.b") \dontrun{str_detect(strings, pattern)} str_detect(strings, perl(pattern)) } \seealso{ Other modifiers: \code{\link{fixed}}, \code{\link{ignore.case}} } \keyword{character} stringr/man/invert_match.Rd0000644000176000001440000000120112057740137015517 0ustar ripleyusers\name{invert_match} \alias{invert_match} \title{Switch location of matches to location of non-matches.} \usage{ invert_match(loc) } \arguments{ \item{loc}{matrix of match locations, as from \code{\link{str_locate_all}}} } \value{ numeric match giving locations of non-matches } \description{ Invert a matrix of match locations to match the opposite of what was previously matched. } \examples{ numbers <- "1 and 2 and 4 and 456" num_loc <- str_locate_all(numbers, "[0-9]+")[[1]] str_sub(numbers, num_loc[, "start"], num_loc[, "end"]) text_loc <- invert_match(num_loc) str_sub(numbers, text_loc[, "start"], text_loc[, "end"]) } stringr/man/ignore.case.Rd0000644000176000001440000000074612057740140015240 0ustar ripleyusers\name{ignore.case} \alias{ignore.case} \title{Ignore case of match.} \usage{ ignore.case(string) } \arguments{ \item{string}{pattern for which to ignore case} } \description{ This function specifies that a pattern should ignore the case of matches. } \examples{ pattern <- "a.b" strings <- c("ABB", "aaB", "aab") str_detect(strings, pattern) str_detect(strings, ignore.case(pattern)) } \seealso{ Other modifiers: \code{\link{fixed}}, \code{\link{perl}} } \keyword{character} stringr/man/fixed.Rd0000644000176000001440000000112112057740137014134 0ustar ripleyusers\name{fixed} \alias{fixed} \title{Match fixed characters, not regular expression.} \usage{ fixed(string) } \arguments{ \item{string}{string to match exactly as is} } \description{ This function specifies that a pattern is a fixed string, rather than a regular expression. This can yield substantial speed ups, if regular expression matching is not needed. } \examples{ pattern <- "a.b" strings <- c("abb", "a.b") str_detect(strings, pattern) str_detect(strings, fixed(pattern)) } \seealso{ Other modifiers: \code{\link{ignore.case}}, \code{\link{perl}} } \keyword{character} stringr/inst/0000755000176000001440000000000012043564407012754 5ustar ripleyusersstringr/inst/tests/0000755000176000001440000000000012043565031014110 5ustar ripleyusersstringr/inst/tests/test-trim.r0000644000176000001440000000114712043564425016234 0ustar ripleyuserscontext("Trimming strings") test_that("trimming removes spaces", { is_trimmed <- equals("abc") expect_that(str_trim("abc "), is_trimmed) expect_that(str_trim(" abc"), is_trimmed) expect_that(str_trim(" abc "), is_trimmed) }) test_that("trimming removes tabs", { is_trimmed <- equals("abc") expect_that(str_trim("abc\t"), is_trimmed) expect_that(str_trim("\tabc"), is_trimmed) expect_that(str_trim("\tabc\t"), is_trimmed) }) test_that("side argument restricts trimming", { expect_that(str_trim(" abc ", "left"), equals("abc ")) expect_that(str_trim(" abc ", "right"), equals(" abc")) }) stringr/inst/tests/test-sub.r0000644000176000001440000000340012043564425016044 0ustar ripleyuserscontext("Extracting substrings") alphabet <- str_c(letters, collapse = "") test_that("correct substring extracted", { expect_that(str_sub(alphabet, 1, 3), equals("abc")) expect_that(str_sub(alphabet, 24, 26), equals("xyz")) }) test_that("arguments expanded to longest", { alphabet <- str_c(letters, collapse = "") expect_that( str_sub(alphabet, c(1, 24), c(3, 26)), equals(c("abc", "xyz"))) expect_that( str_sub(c("abc", "xyz"), 2, 2), equals(c("b", "y"))) }) test_that("specifying only end subsets from start", { expect_that(str_sub(alphabet, end = 3), equals(c("abc"))) }) test_that("specifying only start subsets to end", { expect_that(str_sub(alphabet, 24), equals(c("xyz"))) }) test_that("specifying -1 as end selects entire string", { expect_that( str_sub("ABCDEF", c(4, 5), c(5, -1)), equals(c("DE", "EF")) ) expect_that( str_sub("ABCDEF", c(4, 5), c(-1, -1)), equals(c("DEF", "EF")) ) }) test_that("negative values select from end", { expect_that(str_sub("ABCDEF", 1, -4), equals("ABC")) expect_that(str_sub("ABCDEF", -3), equals("DEF")) }) test_that("missing arguments give missing results", { expect_that(str_sub(NA), equals(NA_character_)) expect_that(str_sub(NA, 1, 3), equals(NA_character_)) expect_that(str_sub(c(NA, "NA"), 1, 3), equals(c(NA, "NA"))) expect_that(str_sub("test", NA, NA), equals(NA_character_)) expect_that(str_sub(c(NA, "test"), NA, NA), equals(rep(NA_character_, 2))) }) test_that("replacement works", { x <- "BBCDEF" str_sub(x, 1, 1) <- "A" expect_that(x, equals("ABCDEF")) str_sub(x, -1, -1) <- "K" expect_that(x, equals("ABCDEK")) str_sub(x, -2, -1) <- "EFGH" expect_that(x, equals("ABCDEFGH")) str_sub(x, 2, -2) <- "" expect_that(x, equals("AH")) }) stringr/inst/tests/test-split.r0000644000176000001440000000432212043565031016404 0ustar ripleyuserscontext("Splitting strings") test_that("special cases are correct", { expect_that(str_split(NA, "")[[1]], equals(NA_character_)) expect_that(str_split(character(), ""), equals(list())) }) test_that("str_split functions as expected", { test <- c("bab", "cac", "dadad") result <- str_split(test, "a") expect_that(result, is_a("list")) expect_that(length(result), equals(3)) lengths <- vapply(result, length, integer(1)) expect_that(lengths, equals(c(2, 2, 3))) expect_that(result, equals( list(c("b", "b"), c("c", "c"), c("d", "d", "d")))) }) test_that("vectors give correct results dealt with correctly", { test <- c("bab", "cac", "dadad", "eae") result <- str_split_fixed(test, "a", 3) expect_that(result, is_a("matrix")) expect_that(nrow(result), equals(4)) expect_that(ncol(result), equals(3)) expect_that(result[1, ], equals(c("b", "b", ""))) expect_that(result[3, ], equals(c("d", "d", "d"))) expect_that(result[, 1], equals(c("b", "c", "d", "e"))) }) test_that("n sets maximum number of splits in str_split", { test <- "Subject: Roger: his drinking problems" expect_that(length(str_split(test, ": ")[[1]]), equals(3)) expect_that(length(str_split(test, ": ", 4)[[1]]), equals(3)) expect_that(length(str_split(test, ": ", 3)[[1]]), equals(3)) expect_that(length(str_split(test, ": ", 2)[[1]]), equals(2)) expect_that(length(str_split(test, ": ", 1)[[1]]), equals(1)) expect_that( str_split(test, ": ", 3)[[1]], equals(c("Subject", "Roger", "his drinking problems"))) expect_that( str_split(test, ": ", 2)[[1]], equals(c("Subject", "Roger: his drinking problems"))) }) test_that("n sets exact number of splits in str_split_fixed", { test <- "Subject: Roger: his drinking problems" expect_that(ncol(str_split_fixed(test, ": ", 4)), equals(4)) expect_that(ncol(str_split_fixed(test, ": ", 3)), equals(3)) expect_that(ncol(str_split_fixed(test, ": ", 2)), equals(2)) expect_that(ncol(str_split_fixed(test, ": ", 1)), equals(1)) expect_that( str_split_fixed(test, ": ", 3)[1, ], equals(c("Subject", "Roger", "his drinking problems"))) expect_that( str_split_fixed(test, ": ", 2)[1, ], equals(c("Subject", "Roger: his drinking problems"))) }) stringr/inst/tests/test-pad.r0000644000176000001440000000112512043564425016021 0ustar ripleyuserscontext("Test padding") test_that("long strings are unchanged", { lengths <- sample(40:100, 10) strings <- vapply(lengths, function(x) str_c(letters[sample(26, x, rep = T)], collapse = ""), character(1)) padded <- str_pad(strings, width = 30) expect_that(str_length(padded), equals(str_length(padded))) }) test_that("directions work for simple case", { pad <- function(direction) str_pad("had", direction, width = 10) expect_that(pad("right"), equals("had ")) expect_that(pad("left"), equals(" had")) expect_that(pad("both"), equals(" had ")) }) stringr/inst/tests/test-match.r0000644000176000001440000000355012043565031016347 0ustar ripleyuserscontext("Matching groups") set.seed(1410) num <- matrix(sample(9, 10 * 10, rep = T), ncol = 10) num_flat <- apply(num, 1, str_c, collapse = "") phones <- str_c( "(", num[, 1], num[ ,2], num[, 3], ") ", num[, 4], num[, 5], num[, 6], " ", num[, 7], num[, 8], num[, 9], num[, 10]) test_that("special case are correct", { # These tests really should compare to character matrices, but str_match # returns matrices with dimnames set it's real pain expect_that(c(str_match(NA, "(a)")), equals(c(NA_character_, NA_character_))) expect_that(c(str_match(character(), "(a)")), equals(character())) }) test_that("no matching cases returns 1 column matrix", { res <- str_match(c("a", "b"), ".") expect_that(nrow(res), equals(2)) expect_that(ncol(res), equals(1)) expect_that(res[, 1], equals(c("a", "b"))) }) test_that("single match works when all match", { matches <- str_match(phones, "\\(([0-9]{3})\\) ([0-9]{3}) ([0-9]{4})") expect_that(nrow(matches), equals(length(phones))) expect_that(ncol(matches), equals(4)) expect_that(matches[, 1], equals(phones)) matches_flat <- apply(matches[, -1], 1, str_c, collapse = "") expect_that(matches_flat, equals(num_flat)) }) test_that("single match works when some don't match", { matches <- str_match(c(phones, "blah", NA), "\\(([0-9]{3})\\) ([0-9]{3}) ([0-9]{4})") expect_that(nrow(matches), equals(length(phones) + 2)) expect_that(ncol(matches), equals(4)) expect_that(matches[11, ], equals(rep(NA_character_, 4))) expect_that(matches[12, ], equals(rep(NA_character_, 4))) }) test_that("multiple match works", { phones_one <- str_c(phones, collapse = " ") multi_match <- str_match_all(phones_one, "\\(([0-9]{3})\\) ([0-9]{3}) ([0-9]{4})") single_matches <- str_match(phones, "\\(([0-9]{3})\\) ([0-9]{3}) ([0-9]{4})") expect_that(multi_match[[1]], equals(single_matches)) }) stringr/inst/tests/test-locate.r0000644000176000001440000000222512043564425016526 0ustar ripleyuserscontext("Locations") test_that("basic location matching works", { expect_that(str_locate("abc", "a")[1, ], equals(c(1, 1), check.attributes = F)) expect_that(str_locate("abc", "b")[1, ], equals(c(2, 2), check.attributes = F)) expect_that(str_locate("abc", "c")[1, ], equals(c(3, 3), check.attributes = F)) expect_that(str_locate("abc", ".+")[1, ], equals(c(1, 3), check.attributes = F)) }) test_that("locations are integers", { strings <- c("a b c", "d e f") expect_that(is.integer(str_locate(strings, "[a-z]")), is_true()) res <- str_locate_all(strings, "[a-z]")[[1]] expect_that(is.integer(res), is_true()) expect_that(is.integer(invert_match(res)), is_true()) }) test_that("both string and patterns are vectorised", { strings <- c("abc", "def") locs <- str_locate(strings, "a") expect_that(locs[, "start"], equals(c(1, NA))) locs <- str_locate(strings, c("a", "d")) expect_that(locs[, "start"], equals(c(1, 1))) expect_that(locs[, "end"], equals(c(1, 1))) locs <- str_locate_all(c("abab"), c("a", "b")) expect_that(locs[[1]][, "start"], equals(c(1, 3))) expect_that(locs[[2]][, "start"], equals(c(2, 4))) }) stringr/inst/tests/test-length.r0000644000176000001440000000116312043564425016540 0ustar ripleyuserscontext("String length") test_that("str_length is number of characters", { expect_that(str_length("a"), equals(1)) expect_that(str_length("ab"), equals(2)) expect_that(str_length("abc"), equals(3)) }) test_that("str_length of missing string is missing", { expect_that(str_length(NA), equals(NA_integer_)) expect_that(str_length(c(NA, 1)), equals(c(NA, 1))) expect_that(str_length("NA"), equals(2)) }) test_that("str_length of factor is length of level", { expect_that(str_length(factor("a")), equals(1)) expect_that(str_length(factor("ab")), equals(2)) expect_that(str_length(factor("abc")), equals(3)) }) stringr/inst/tests/test-join.r0000644000176000001440000000076012043564425016220 0ustar ripleyuserscontext("Joining strings") test_that("basic case works", { test <- c("a", "b", "c") expect_that(str_c(test), equals(test)) expect_that(str_c(test, sep = " "), equals(test)) expect_that(str_c(test, collapse = ""), equals("abc")) }) test_that("zero length vectors dropped", { test <- letters[1:3] expect_that(str_c(test, c()), equals(test)) expect_that(str_c(test, NULL), equals(test)) expect_that( str_c(test, NULL, "a", sep = " "), equals(c("a a", "b a", "c a"))) }) stringr/inst/tests/test-extract.r0000644000176000001440000000053512043564425016733 0ustar ripleyuserscontext("Extract patterns") test_that("single pattern extracted correctly", { test <- c("one two three", "a b c") expect_that( str_extract_all(test, "[a-z]+"), equals(list(c("one", "two", "three"), c("a", "b", "c")))) expect_that( str_extract_all(test, "[a-z]{3,}"), equals(list(c("one", "two", "three"), character()))) }) stringr/inst/tests/test-dup.r0000644000176000001440000000071012043564425016044 0ustar ripleyuserscontext("Duplicating strings") test_that("basic duplication works", { expect_that(str_dup("a", 3), equals("aaa")) expect_that(str_dup("abc", 2), equals("abcabc")) expect_that(str_dup(c("a", "b"), 2), equals(c("aa", "bb"))) expect_that(str_dup(c("a", "b"), c(2, 3)), equals(c("aa", "bbb"))) }) test_that("0 duplicates equals empty string", { expect_that(str_dup("a", 0), equals("")) expect_that(str_dup(c("a", "b"), 0), equals(rep("", 2))) }) stringr/inst/tests/test-detect.r0000644000176000001440000000140612043564425016527 0ustar ripleyuserscontext("Detecting patterns") test_that("special cases are correct", { expect_that(str_detect(NA, ""), equals(NA)) expect_that(str_detect(character(), ""), equals(logical())) }) test_that("vectorised patterns work", { expect_that(str_detect("ab", c("a", "b", "c")), equals(c(T, T, F))) expect_that(str_detect(c("ca", "ab"), c("a", "c")), equals(c(T, F))) }) test_that("modifiers work", { expect_that(str_detect("ab", "AB"), equals(FALSE)) expect_that(str_detect("ab", ignore.case("AB")), equals(TRUE)) expect_that(str_detect("abc", "ab[c]"), equals(TRUE)) expect_that(str_detect("abc", fixed("ab[c]")), equals(FALSE)) expect_that(str_detect("ab[c]", fixed("ab[c]")), equals(TRUE)) expect_that(str_detect("abc", perl("(?x)a b c")), equals(TRUE)) }) stringr/inst/tests/test-count.r0000644000176000001440000000054312043564425016410 0ustar ripleyuserscontext("Counting matches") test_that("counts are as expected", { fruit <- c("apple", "banana", "pear", "pineapple") expect_equal(str_count(fruit, "a"), c(1, 3, 1, 1)) expect_equal(str_count(fruit, "p"), c(2, 0, 1, 3)) expect_equal(str_count(fruit, "e"), c(1, 0, 1, 2)) expect_equal(str_count(fruit, c("a", "b", "p", "n")), c(1, 1, 1, 1)) }) stringr/inst/tests/test-check.r0000644000176000001440000000120712043564425016333 0ustar ripleyuserscontext("String and pattern checks") test_that("string is atomic", { expect_that(check_string(list()), throws_error("must be an atomic")) }) test_that("pattern is a string", { expect_that(check_pattern(1), throws_error("must be a character vector")) }) test_that("error when string and pattern lengths incompatible", { expect_that(check_pattern(letters, "a"), equals(letters)) expect_that(check_pattern("a", letters), equals("a")) expect_that(check_pattern(c("a", "b", "c"), c("a", "b")), throws_error("not compatible")) expect_that(check_pattern(c("a", "b"), c("a", "b", "c")), throws_error("not compatible")) }) stringr/DESCRIPTION0000644000176000001440000000175512060045717013512 0ustar ripleyusersPackage: stringr Maintainer: Hadley Wickham License: GPL-2 Title: Make it easier to work with strings. Type: Package Author: Hadley Wickham Description: stringr is a set of simple wrappers that make R's string functions more consistent, simpler and easier to use. It does this by ensuring that: function and argument names (and positions) are consistent, all functions deal with NA's and zero length character appropriately, and the output data structures from each function matches the input data structures of other functions. Version: 0.6.2 Depends: R (>= 2.14) Suggests: testthat (>= 0.3) Collate: 'c.r' 'checks.r' 'count.r' 'detect.r' 'dup.r' 'extract.r' 'length.r' 'locate.r' 'match.r' 'modifiers.r' 'pad-trim.r' 'replace.r' 'split.r' 'sub.r' 'vectorise.r' 'word.r' 'wrap.r' 'utils.r' Packaged: 2012-12-05 21:47:03 UTC; hadley Repository: CRAN Date/Publication: 2012-12-06 08:39:59