rematch/0000755000175100001440000000000012706070476011724 5ustar hornikusersrematch/inst/0000755000175100001440000000000012705756757012715 5ustar hornikusersrematch/inst/README.Rmd0000644000175100001440000000274112705676720014311 0ustar hornikusers ```{r, setup, echo = FALSE, message = FALSE} knitr::opts_chunk$set( comment = "#>", tidy = FALSE, error = FALSE, fig.width = 8, fig.height = 8) ``` # rematch > Match Regular Expressions with a Nicer 'API' [![Linux Build Status](https://travis-ci.org/MangoTheCat/rematch.svg?branch=master)](https://travis-ci.org/MangoTheCat/rematch) [![Windows Build status](https://ci.appveyor.com/api/projects/status/github/MangoTheCat/rematch?svg=true)](https://ci.appveyor.com/project/gaborcsardi/rematch) [![](http://www.r-pkg.org/badges/version/rematch)](http://www.r-pkg.org/pkg/rematch) [![CRAN RStudio mirror downloads](http://cranlogs.r-pkg.org/badges/rematch)](http://www.r-pkg.org/pkg/rematch) [![Coverage Status](https://img.shields.io/codecov/c/github/MangoTheCat/rematch/master.svg)](https://codecov.io/github/MangoTheCat/rematch?branch=master) A small wrapper on 'regexpr' to extract the matches and captured groups from the match of a regular expression to a character vector. ## Installation ```{r eval = FALSE} source("https://install-github.me/MangoTheCat/rematch") ``` ## Usage ```{r} library(rematch) ``` ```{r} dates <- c("2016-04-20", "1977-08-08", "not a date", "2016", "76-03-02", "2012-06-30", "2015-01-21 19:58") isodate <- "([0-9]{4})-([0-1][0-9])-([0-3][0-9])" re_match(text = dates, pattern = isodate) ``` ```{r} isodaten <- "(?[0-9]{4})-(?[0-1][0-9])-(?[0-3][0-9])" re_match(text = dates, pattern = isodaten) ``` ## License MIT © Mango Solutions rematch/inst/NEWS.md0000644000175100001440000000024112705756757014010 0ustar hornikusers # 1.0.1 * Make `R CMD check` work when `testthat` is not available. * Fixed a bug with group capture when `text` is a scalar. # 1.0.0 First public release. rematch/inst/README.md0000644000175100001440000000365512705676722014176 0ustar hornikusers # rematch > Match Regular Expressions with a Nicer 'API' [![Linux Build Status](https://travis-ci.org/MangoTheCat/rematch.svg?branch=master)](https://travis-ci.org/MangoTheCat/rematch) [![Windows Build status](https://ci.appveyor.com/api/projects/status/github/MangoTheCat/rematch?svg=true)](https://ci.appveyor.com/project/gaborcsardi/rematch) [![](http://www.r-pkg.org/badges/version/rematch)](http://www.r-pkg.org/pkg/rematch) [![CRAN RStudio mirror downloads](http://cranlogs.r-pkg.org/badges/rematch)](http://www.r-pkg.org/pkg/rematch) [![Coverage Status](https://img.shields.io/codecov/c/github/MangoTheCat/rematch/master.svg)](https://codecov.io/github/MangoTheCat/rematch?branch=master) A small wrapper on 'regexpr' to extract the matches and captured groups from the match of a regular expression to a character vector. ## Installation ```r source("https://install-github.me/MangoTheCat/rematch") ``` ## Usage ```r library(rematch) ``` ```r dates <- c("2016-04-20", "1977-08-08", "not a date", "2016", "76-03-02", "2012-06-30", "2015-01-21 19:58") isodate <- "([0-9]{4})-([0-1][0-9])-([0-3][0-9])" re_match(text = dates, pattern = isodate) ``` ``` #> .match #> [1,] "2016-04-20" "2016" "04" "20" #> [2,] "1977-08-08" "1977" "08" "08" #> [3,] NA NA NA NA #> [4,] NA NA NA NA #> [5,] NA NA NA NA #> [6,] "2012-06-30" "2012" "06" "30" #> [7,] "2015-01-21" "2015" "01" "21" ``` ```r isodaten <- "(?[0-9]{4})-(?[0-1][0-9])-(?[0-3][0-9])" re_match(text = dates, pattern = isodaten) ``` ``` #> .match year month day #> [1,] "2016-04-20" "2016" "04" "20" #> [2,] "1977-08-08" "1977" "08" "08" #> [3,] NA NA NA NA #> [4,] NA NA NA NA #> [5,] NA NA NA NA #> [6,] "2012-06-30" "2012" "06" "30" #> [7,] "2015-01-21" "2015" "01" "21" ``` ## License MIT © Mango Solutions rematch/tests/0000755000175100001440000000000012705756321013065 5ustar hornikusersrematch/tests/testthat.R0000644000175100001440000000010712705756321015046 0ustar hornikusers if (require(testthat)) { library(rematch) test_check("rematch") } rematch/tests/testthat/0000755000175100001440000000000012706070476014726 5ustar hornikusersrematch/tests/testthat/test.R0000644000175100001440000000404012705757162016031 0ustar hornikusers context("rematch") test_that("corner cases", { res <- re_match("", c("foo", "bar")) expect_equal(res, cbind(.match = c("", ""))) res <- re_match("", c("foo", "", "bar")) expect_equal(res, cbind(.match = c("", "", ""))) res <- re_match("", character()) expect_equal(res, cbind(.match = character())) res <- re_match("foo", character()) expect_equal(res, cbind(.match = character())) res <- re_match("foo (g1) (g2)", character()) expect_equal(res, cbind(.match = character(), character(), character())) res <- re_match("foo (g1) (?g2)", character()) expect_equal( res, cbind(.match = character(), character(), name = character()) ) res <- re_match("foo", "not") expect_equal(res, cbind(.match = NA_character_)) }) test_that("not so corner cases", { dates <- c("2016-04-20", "1977-08-08", "not a date", "2016", "76-03-02", "2012-06-30", "2015-01-21 19:58") isodate <- "([0-9]{4})-([0-1][0-9])-([0-3][0-9])" expect_equal( re_match(text = dates, pattern = isodate), cbind( .match = c(dates[1:2], NA, NA, NA, "2012-06-30", "2015-01-21"), c("2016", "1977", NA, NA, NA, "2012", "2015"), c("04", "08", NA, NA, NA, "06", "01"), c("20", "08", NA, NA, NA, "30", "21") ) ) isodaten <- "(?[0-9]{4})-(?[0-1][0-9])-(?[0-3][0-9])" expect_equal( re_match(text = dates, pattern = isodaten), cbind( .match = c(dates[1:2], NA, NA, NA, "2012-06-30", "2015-01-21"), year = c("2016", "1977", NA, NA, NA, "2012", "2015"), month = c("04", "08", NA, NA, NA, "06", "01"), day = c("20", "08", NA, NA, NA, "30", "21") ) ) }) test_that("UTF8", { res <- re_match("Gábor", c("Gábor Csárdi")) expect_equal(res, cbind(.match = "Gábor")) }) test_that("text is scalar & capture groups", { res <- re_match("(\\w+) (\\w+)", "foo bar") expect_equal(res, cbind(.match = "foo bar", "foo", "bar")) res <- re_match("(?\\w+) (?\\w+)", "foo bar") expect_equal(res, cbind(.match = "foo bar", g1 = "foo", g2 = "bar")) }) rematch/NAMESPACE0000644000175100001440000000007712705662723013150 0ustar hornikusers# Generated by roxygen2: do not edit by hand export(re_match) rematch/R/0000755000175100001440000000000012705756705012132 5ustar hornikusersrematch/R/package.R0000644000175100001440000000462512705756705013657 0ustar hornikusers #' Match Regular Expressions with a Nicer 'API' #' #' A small wrapper on 'regexpr' to extract the matches and captured #' groups from the match of a regular expression to a character vector. #' See \code{\link{re_match}}. #' #' @docType package #' @name rematch NULL #' Match a regular expression to a character vector #' #' This function is a small wrapper on the \code{\link[base]{regexpr}} #' base R function, to provide an API that is easier to use. #' #' Currently only the first occurence of the pattern is used. #' #' @param pattern Regular expression, defaults to be a PCRE #' expression. See \code{\link[base]{regex}} for more about #' regular expressions. #' @param text Character vector. #' @param perl Logical, should Perl-compatible regular expessions #' be used? #' @param ... Additional arguments to pass to #' \code{\link[base]{regexpr}}. #' @return A character matrix of the matched (sub)strings. #' The first column is always the full match. This column is #' named \code{.match}. The result of the columns are capture groups, #' with appropriate column names, if the groups are named. #' #' @export #' @examples #' dates <- c("2016-04-20", "1977-08-08", "not a date", "2016", #' "76-03-02", "2012-06-30", "2015-01-21 19:58") #' isodate <- "([0-9]{4})-([0-1][0-9])-([0-3][0-9])" #' re_match(text = dates, pattern = isodate) #' #' # The same with named groups #' isodaten <- "(?[0-9]{4})-(?[0-1][0-9])-(?[0-3][0-9])" #' re_match(text = dates, pattern = isodaten) re_match <- function(pattern, text, perl = TRUE, ...) { stopifnot(is.character(pattern), length(pattern) == 1, !is.na(pattern)) text <- as.character(text) match <- regexpr(pattern, text, perl = perl, ...) ## Full matches res <- cbind(as.character( ifelse( match == -1, NA_character_, substr(text, match, match + attr(match, "match.length") - 1) ) )) if (!is.null(attr(match, "capture.start"))) { res <- cbind( res, rbind(vapply( seq_len(NCOL(attr(match, "capture.start"))), function(i) { start <- attr(match, "capture.start")[,i] len <- attr(match, "capture.length")[,i] end <- start + len - 1 res <- substr(text, start, end) res[ start == -1 ] <- NA_character_ res }, character(length(match)) )) ) } colnames(res) <- c(".match", attr(match, "capture.names")) res } rematch/MD50000644000175100001440000000102212706070476012227 0ustar hornikusers86fed7d3b1f4a29ffc9ebe16b54659fc *DESCRIPTION ddfa93fe5d0827d872d34b7eef574617 *LICENSE 66d6c0cadabbb19765b6d2b06a501098 *NAMESPACE 491554e0d61b8dea6bad628da41b51dc *R/package.R 7e4ca3f7d415ec56887db89e15f8543a *inst/NEWS.md 103bfb2264d313302cd456814cdbecaa *inst/README.Rmd 30e86bc337e361e92ad4ef0661f98535 *inst/README.md 02c44945b414d30330c875f4d47354d0 *man/re_match.Rd a8d0ce22292bf9718bcce898db9c5480 *man/rematch.Rd c99ec8380983645be33bdffcc35830d5 *tests/testthat.R 0a9a58b76071b82c6132742ebf4afe6f *tests/testthat/test.R rematch/DESCRIPTION0000644000175100001440000000121012706070476013424 0ustar hornikusersPackage: rematch Title: Match Regular Expressions with a Nicer 'API' Version: 1.0.1 Author: Gabor Csardi Maintainer: Gabor Csardi Description: A small wrapper on 'regexpr' to extract the matches and captured groups from the match of a regular expression to a character vector. License: MIT + file LICENSE LazyData: true URL: https://github.com/MangoTheCat/rematch BugReports: https://github.com/MangoTheCat/rematch/issues RoxygenNote: 5.0.1.9000 Suggests: covr, testthat Encoding: UTF-8 NeedsCompilation: no Packaged: 2016-04-20 19:54:37 UTC; gaborcsardi Repository: CRAN Date/Publication: 2016-04-21 08:20:46 rematch/man/0000755000175100001440000000000012705662723012500 5ustar hornikusersrematch/man/rematch.Rd0000644000175100001440000000061112705662723014410 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/package.R \docType{package} \name{rematch} \alias{rematch} \alias{rematch-package} \title{Match Regular Expressions with a Nicer 'API'} \description{ A small wrapper on 'regexpr' to extract the matches and captured groups from the match of a regular expression to a character vector. See \code{\link{re_match}}. } rematch/man/re_match.Rd0000644000175100001440000000254212705676663014564 0ustar hornikusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/package.R \name{re_match} \alias{re_match} \title{Match a regular expression to a character vector} \usage{ re_match(pattern, text, perl = TRUE, ...) } \arguments{ \item{pattern}{Regular expression, defaults to be a PCRE expression. See \code{\link[base]{regex}} for more about regular expressions.} \item{text}{Character vector.} \item{perl}{Logical, should Perl-compatible regular expessions be used?} \item{...}{Additional arguments to pass to \code{\link[base]{regexpr}}.} } \value{ A character matrix of the matched (sub)strings. The first column is always the full match. This column is named \code{.match}. The result of the columns are capture groups, with appropriate column names, if the groups are named. } \description{ This function is a small wrapper on the \code{\link[base]{regexpr}} base R function, to provide an API that is easier to use. } \details{ Currently only the first occurence of the pattern is used. } \examples{ dates <- c("2016-04-20", "1977-08-08", "not a date", "2016", "76-03-02", "2012-06-30", "2015-01-21 19:58") isodate <- "([0-9]{4})-([0-1][0-9])-([0-3][0-9])" re_match(text = dates, pattern = isodate) # The same with named groups isodaten <- "(?[0-9]{4})-(?[0-1][0-9])-(?[0-3][0-9])" re_match(text = dates, pattern = isodaten) } rematch/LICENSE0000644000175100001440000000005512705666635012740 0ustar hornikusersYEAR: 2016 COPYRIGHT HOLDER: Mango Solutions