re2-1.2.0/0000755000004100000410000000000013656356061012245 5ustar www-datawww-datare2-1.2.0/re2.gemspec0000644000004100000410000000406313656356061014305 0ustar www-datawww-data######################################################### # This file has been automatically generated by gem2tgz # ######################################################### # -*- encoding: utf-8 -*- # stub: re2 1.2.0 ruby lib # stub: ext/re2/extconf.rb Gem::Specification.new do |s| s.name = "re2".freeze s.version = "1.2.0" s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version= s.require_paths = ["lib".freeze] s.authors = ["Paul Mucur".freeze] s.date = "2020-04-18" s.description = "Ruby bindings to re2, \"an efficient, principled regular expression library\".".freeze s.extensions = ["ext/re2/extconf.rb".freeze] s.files = ["LICENSE.txt".freeze, "README.md".freeze, "Rakefile".freeze, "ext/re2/extconf.rb".freeze, "ext/re2/re2.cc".freeze, "lib/re2.rb".freeze, "lib/re2/scanner.rb".freeze, "lib/re2/string.rb".freeze, "spec/kernel_spec.rb".freeze, "spec/re2/match_data_spec.rb".freeze, "spec/re2/regexp_spec.rb".freeze, "spec/re2/scanner_spec.rb".freeze, "spec/re2/string_spec.rb".freeze, "spec/re2_spec.rb".freeze, "spec/spec_helper.rb".freeze] s.homepage = "https://github.com/mudge/re2".freeze s.licenses = ["BSD-3-Clause".freeze] s.rubygems_version = "2.5.2.1".freeze s.summary = "Ruby bindings to re2.".freeze s.test_files = ["spec/kernel_spec.rb".freeze, "spec/re2/match_data_spec.rb".freeze, "spec/re2/regexp_spec.rb".freeze, "spec/re2/scanner_spec.rb".freeze, "spec/re2/string_spec.rb".freeze, "spec/re2_spec.rb".freeze, "spec/spec_helper.rb".freeze] if s.respond_to? :specification_version then s.specification_version = 4 if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then s.add_development_dependency(%q.freeze, ["~> 0.9"]) s.add_development_dependency(%q.freeze, ["~> 3.2"]) else s.add_dependency(%q.freeze, ["~> 0.9"]) s.add_dependency(%q.freeze, ["~> 3.2"]) end else s.add_dependency(%q.freeze, ["~> 0.9"]) s.add_dependency(%q.freeze, ["~> 3.2"]) end end re2-1.2.0/README.md0000644000004100000410000001341713656356061013532 0ustar www-datawww-datare2 [![Build Status](https://travis-ci.org/mudge/re2.svg?branch=master)](http://travis-ci.org/mudge/re2) === A Ruby binding to [re2][], an "efficient, principled regular expression library". **Current version:** 1.2.0 **Supported Ruby versions:** 1.8.7, 1.9.2, 1.9.3, 2.0.0, 2.1.0, 2.2, 2.3, Rubinius 3.8 Installation ------------ You will need [re2][] installed as well as a C++ compiler such as [gcc][] (on Debian and Ubuntu, this is provided by the [build-essential][] package). If you are using Mac OS X, I recommend installing re2 with [Homebrew][] by running the following: $ brew install re2 If you are using Debian, you can install the [libre2-dev][] package like so: $ sudo apt-get install libre2-dev Recent versions of re2 require a compiler with C++11 support such as [clang](http://clang.llvm.org/) 3.4 or [gcc](https://gcc.gnu.org/) 4.8. If you are using a packaged Ruby distribution, make sure you also have the Ruby header files installed such as those provided by the [ruby-dev][] package on Debian and Ubuntu. You can then install the library via RubyGems with `gem install re2` or `gem install re2 -- --with-re2-dir=/opt/local/re2` if re2 is not installed in the default location of `/usr/local/`. Documentation ------------- Full documentation automatically generated from the latest version is available at . Note that re2's regular expression syntax differs from PCRE and Ruby's built-in [`Regexp`][Regexp] library, see the [official syntax page][] for more details. Usage ----- While re2 uses the same naming scheme as Ruby's built-in regular expression library (with [`Regexp`](http://mudge.name/re2/RE2/Regexp.html) and [`MatchData`](http://mudge.name/re2/RE2/MatchData.html)), its API is slightly different: ```console $ irb -rubygems > require 're2' > r = RE2::Regexp.new('w(\d)(\d+)') => # > m = r.match("w1234") => # > m[1] => "1" > m.string => "w1234" > m.begin(1) => 1 > m.end(1) => 2 > r =~ "w1234" => true > r !~ "bob" => true > r.match("bob") => nil ``` As [`RE2::Regexp.new`](http://mudge.name/re2/RE2/Regexp.html#initialize-instance_method) (or `RE2::Regexp.compile`) can be quite verbose, a helper method has been defined against `Kernel` so you can use a shorter version to create regular expressions: ```console > RE2('(\d+)') => # ``` Note the use of *single quotes* as double quotes will interpret `\d` as `d` as in the following example: ```console > RE2("(\d+)") => # ``` As of 0.3.0, you can use named groups: ```console > r = RE2::Regexp.new('(?P\w+) (?P\d+)') => #\w+) (?P\d+)/> > m = r.match("Bob 40") => # > m[:name] => "Bob" > m["age"] => "40" ``` As of 0.6.0, you can use `RE2::Regexp#scan` to incrementally scan text for matches (similar in purpose to Ruby's [`String#scan`](http://ruby-doc.org/core-2.0.0/String.html#method-i-scan)). Calling `scan` will return an `RE2::Scanner` which is [enumerable](http://ruby-doc.org/core-2.0.0/Enumerable.html) meaning you can use `each` to iterate through the matches (and even use [`Enumerator::Lazy`](http://ruby-doc.org/core-2.0/Enumerator/Lazy.html)): ```ruby re = RE2('(\w+)') scanner = re.scan("It is a truth universally acknowledged") scanner.each do |match| puts match end scanner.rewind enum = scanner.to_enum enum.next #=> ["It"] enum.next #=> ["is"] ``` Features -------- * Pre-compiling regular expressions with [`RE2::Regexp.new(re)`](https://github.com/google/re2/blob/2016-02-01/re2/re2.h#L100), `RE2::Regexp.compile(re)` or `RE2(re)` (including specifying options, e.g. `RE2::Regexp.new("pattern", :case_sensitive => false)` * Extracting matches with `re2.match(text)` (and an exact number of matches with `re2.match(text, number_of_matches)` such as `re2.match("123-234", 2)`) * Extracting matches by name (both with strings and symbols) * Checking for matches with `re2 =~ text`, `re2 === text` (for use in `case` statements) and `re2 !~ text` * Incrementally scanning text with `re2.scan(text)` * Checking regular expression compilation with `re2.ok?`, `re2.error` and `re2.error_arg` * Checking regular expression "cost" with `re2.program_size` * Checking the options for an expression with `re2.options` or individually with `re2.case_sensitive?` * Performing a single string replacement with `pattern.replace(replacement, original)` * Performing a global string replacement with `pattern.replace_all(replacement, original)` * Escaping regular expressions with [`RE2.escape(unquoted)`](https://github.com/google/re2/blob/2016-02-01/re2/re2.h#L418) and `RE2.quote(unquoted)` Contributions ------------- * Thanks to [Jason Woods](https://github.com/driskell) who contributed the original implementations of `RE2::MatchData#begin` and `RE2::MatchData#end`; * Thanks to [Stefano Rivera](https://github.com/stefanor) who first contributed C++11 support; * Thanks to [Stan Hu](https://github.com/stanhu) for reporting a bug with empty patterns and `RE2::Regexp#scan`; * Thanks to [Sebastian Reitenbach](https://github.com/buzzdeee) for reporting the deprecation and removal of the `utf8` encoding option in re2. Contact ------- All issues and suggestions should go to [GitHub Issues](https://github.com/mudge/re2/issues). [re2]: https://github.com/google/re2 [gcc]: http://gcc.gnu.org/ [ruby-dev]: http://packages.debian.org/ruby-dev [build-essential]: http://packages.debian.org/build-essential [Regexp]: http://ruby-doc.org/core/classes/Regexp.html [MatchData]: http://ruby-doc.org/core/classes/MatchData.html [Homebrew]: http://mxcl.github.com/homebrew [libre2-dev]: http://packages.debian.org/search?keywords=libre2-dev [official syntax page]: https://github.com/google/re2/wiki/Syntax re2-1.2.0/spec/0000755000004100000410000000000013656356061013177 5ustar www-datawww-datare2-1.2.0/spec/re2_spec.rb0000644000004100000410000000454413656356061015235 0ustar www-datawww-dataRSpec.describe RE2 do describe "#Replace" do it "only replaces the first occurrence of the pattern" do expect(RE2.Replace("woo", "o", "a")).to eq("wao") end it "performs replacement based on regular expressions" do expect(RE2.Replace("woo", "o+", "e")).to eq("we") end it "supports flags in patterns" do expect(RE2.Replace("Good morning", "(?i)gOOD MORNING", "hi")).to eq("hi") end it "does not perform replacements in-place" do name = "Robert" replacement = RE2.Replace(name, "R", "Cr") expect(name).to_not equal(replacement) end it "supports passing an RE2::Regexp as the pattern" do re = RE2::Regexp.new('wo{2}') expect(RE2.Replace("woo", re, "miaow")).to eq("miaow") end it "respects any passed RE2::Regexp's flags" do re = RE2::Regexp.new('gOOD MORNING', :case_sensitive => false) expect(RE2.Replace("Good morning", re, "hi")).to eq("hi") end if String.method_defined?(:encoding) it "preserves the original string's encoding" do original = "Foo" replacement = RE2.Replace(original, "oo", "ah") expect(original.encoding).to eq(replacement.encoding) end end end describe "#GlobalReplace" do it "replaces every occurrence of a pattern" do expect(RE2.GlobalReplace("woo", "o", "a")).to eq("waa") end it "performs replacement based on regular expressions" do expect(RE2.GlobalReplace("woohoo", "o+", "e")).to eq("wehe") end it "supports flags in patterns" do expect(RE2.GlobalReplace("Robert", "(?i)r", "w")).to eq("wobewt") end it "does not perform replacement in-place" do name = "Robert" replacement = RE2.GlobalReplace(name, "(?i)R", "w") expect(name).to_not equal(replacement) end it "supports passing an RE2::Regexp as the pattern" do re = RE2::Regexp.new('wo{2,}') expect(RE2.GlobalReplace("woowooo", re, "miaow")).to eq("miaowmiaow") end it "respects any passed RE2::Regexp's flags" do re = RE2::Regexp.new('gOOD MORNING', :case_sensitive => false) expect(RE2.GlobalReplace("Good morning Good morning", re, "hi")).to eq("hi hi") end end describe "#QuoteMeta" do it "escapes a string so it can be used as a regular expression" do expect(RE2.QuoteMeta("1.5-2.0?")).to eq('1\.5\-2\.0\?') end end end re2-1.2.0/spec/kernel_spec.rb0000644000004100000410000000060313656356061016015 0ustar www-datawww-dataRSpec.describe Kernel do describe "#RE2" do it "returns an RE2::Regexp instance given a pattern" do expect(RE2('w(o)(o)')).to be_a(RE2::Regexp) end it "returns an RE2::Regexp instance given a pattern and options" do re = RE2('w(o)(o)', :case_sensitive => false) expect(re).to be_a(RE2::Regexp) expect(re).to_not be_case_sensitive end end end re2-1.2.0/spec/re2/0000755000004100000410000000000013656356061013667 5ustar www-datawww-datare2-1.2.0/spec/re2/string_spec.rb0000644000004100000410000000322513656356061016536 0ustar www-datawww-datarequire "re2/string" class String include RE2::String end RSpec.describe RE2::String do describe "#re2_sub" do it "delegates to RE2.Replace to perform replacement" do expect("My name is Robert Paulson".re2_sub('Robert', 'Crobert')).to eq("My name is Crobert Paulson") end it "doesn't perform an in-place replacement" do string = "My name is Robert Paulson" expect(string.re2_sub('Robert', 'Crobert')).to_not equal(string) end end describe "#re2_gsub" do it "delegates to RE2.GlobalReplace to perform replacement" do expect("My name is Robert Paulson".re2_gsub('a', 'e')).to eq("My neme is Robert Peulson") end it "doesn't perform an in-place replacement" do string = "My name is Robert Paulson" expect(string.re2_gsub('a', 'e')).to_not equal(string) end end describe "#re2_match" do it "delegates to RE2::Regexp#match to perform matches" do md = "My name is Robert Paulson".re2_match('My name is (\S+) (\S+)') expect(md).to be_a(RE2::MatchData) expect(md[0]).to eq("My name is Robert Paulson") expect(md[1]).to eq("Robert") expect(md[2]).to eq("Paulson") end it "supports limiting the number of matches" do md = "My name is Robert Paulson".re2_match('My name is (\S+) (\S+)', 0) expect(md).to eq(true) end end describe "#re2_escape" do it "escapes the string for use in regular expressions" do expect("1.5-2.0?".re2_escape).to eq('1\.5\-2\.0\?') end end describe "#re2_quote" do it "escapes the string for use in regular expressions" do expect("1.5-2.0?".re2_quote).to eq('1\.5\-2\.0\?') end end end re2-1.2.0/spec/re2/match_data_spec.rb0000644000004100000410000001743413656356061017324 0ustar www-datawww-data# encoding: utf-8 RSpec.describe RE2::MatchData do describe "#to_a" do it "is populated with the match and capturing groups" do a = RE2::Regexp.new('w(o)(o)').match('woo').to_a expect(a).to eq(["woo", "o", "o"]) end it "populates optional capturing groups with nil if they are missing" do a = RE2::Regexp.new('(\d?)(a)(b)').match('ab').to_a expect(a).to eq(["ab", nil, "a", "b"]) end end describe "#[]" do it "accesses capturing groups by numerical index" do md = RE2::Regexp.new('(\d)(\d{2})').match("123") expect(md[1]).to eq("1") expect(md[2]).to eq("23") end it "has the whole match as the 0th item" do md = RE2::Regexp.new('(\d)(\d{2})').match("123") expect(md[0]).to eq("123") end it "supports access by numerical ranges" do md = RE2::Regexp.new('(\d+) (\d+) (\d+)').match("123 456 789") expect(md[1..3]).to eq(["123", "456", "789"]) expect(md[1...3]).to eq(["123", "456"]) end it "supports slicing" do md = RE2::Regexp.new('(\d+) (\d+) (\d+)').match("123 456 789") expect(md[1, 3]).to eq(["123", "456", "789"]) expect(md[1, 2]).to eq(["123", "456"]) end it "returns nil if attempting to access non-existent capturing groups by index" do md = RE2::Regexp.new('(\d+)').match('bob 123') expect(md[2]).to be_nil expect(md[3]).to be_nil end it "allows access by string names when there are named groups" do md = RE2::Regexp.new('(?P\d+)').match('bob 123') expect(md["numbers"]).to eq("123") end it "allows access by symbol names when there are named groups" do md = RE2::Regexp.new('(?P\d+)').match('bob 123') expect(md[:numbers]).to eq("123") end it "allows access by names and indices with mixed groups" do md = RE2::Regexp.new('(?P\w+)(\s*)(?P\d+)').match("bob 123") expect(md["name"]).to eq("bob") expect(md[:name]).to eq("bob") expect(md[2]).to eq(" ") expect(md["numbers"]).to eq("123") expect(md[:numbers]).to eq("123") end it "returns nil if no such named group exists" do md = RE2::Regexp.new('(\d+)').match("bob 123") expect(md["missing"]).to be_nil expect(md[:missing]).to be_nil end it "raises an error if given an inappropriate index" do md = RE2::Regexp.new('(\d+)').match("bob 123") expect { md[nil] }.to raise_error(TypeError) end if String.method_defined?(:encoding) it "returns UTF-8 encoded strings by default" do md = RE2::Regexp.new('(?P\S+)').match("bob") expect(md[0].encoding.name).to eq("UTF-8") expect(md["name"].encoding.name).to eq("UTF-8") expect(md[:name].encoding.name).to eq("UTF-8") end it "returns Latin 1 strings encoding when utf-8 is false" do md = RE2::Regexp.new('(?P\S+)', :utf8 => false).match('bob') expect(md[0].encoding.name).to eq("ISO-8859-1") expect(md["name"].encoding.name).to eq("ISO-8859-1") expect(md[:name].encoding.name).to eq("ISO-8859-1") end end end describe "#string" do it "returns the original string to match against" do re = RE2::Regexp.new('(\D+)').match("bob") expect(re.string).to eq("bob") end it "returns a copy, not the actual original" do string = "bob" re = RE2::Regexp.new('(\D+)').match(string) expect(re.string).to_not equal(string) end it "returns a frozen string" do re = RE2::Regexp.new('(\D+)').match("bob") expect(re.string).to be_frozen end end describe "#size" do it "returns the number of capturing groups plus the matching string" do md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56") expect(md.size).to eq(3) end end describe "#length" do it "returns the number of capturing groups plus the matching string" do md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56") expect(md.length).to eq(3) end end describe "#regexp" do it "returns the original RE2::Regexp used" do re = RE2::Regexp.new('(\d+)') md = re.match("123") expect(md.regexp).to equal(re) end end describe "#inspect" do it "returns a text representation of the object and indices" do md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56") expect(md.inspect).to eq('#') end it "represents missing matches as nil" do md = RE2::Regexp.new('(\d+) (\d+)?').match("1234 ") expect(md.inspect).to eq('#') end end describe "#to_s" do it "returns the matching part of the original string" do md = RE2::Regexp.new('(\d{2,5})').match("one two 23456") expect(md.to_s).to eq("23456") end end describe "#to_ary" do it "allows the object to be expanded with an asterisk" do md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56") m1, m2, m3 = *md expect(m1).to eq("1234 56") expect(m2).to eq("1234") expect(m3).to eq("56") end end describe "#begin" do it "returns the offset of the start of a match by index" do md = RE2::Regexp.new('(wo{2})').match('a woohoo') expect(md.string[md.begin(0)..-1]).to eq('woohoo') end it "returns the offset of the start of a match by string name" do md = RE2::Regexp.new('(?Pfo{2})').match('a foobar') expect(md.string[md.begin('foo')..-1]).to eq('foobar') end it "returns the offset of the start of a match by symbol name" do md = RE2::Regexp.new('(?Pfo{2})').match('a foobar') expect(md.string[md.begin(:foo)..-1]).to eq('foobar') end it "returns the offset despite multibyte characters" do md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby') expect(md.string[md.begin(0)..-1]).to eq('Ruby') end it "returns nil for non-existent numerical matches" do md = RE2::Regexp.new('(\d)').match('123') expect(md.begin(10)).to be_nil end it "returns nil for negative numerical matches" do md = RE2::Regexp.new('(\d)').match('123') expect(md.begin(-4)).to be_nil end it "returns nil for non-existent named matches" do md = RE2::Regexp.new('(\d)').match('123') expect(md.begin('foo')).to be_nil end it "returns nil for non-existent symbol named matches" do md = RE2::Regexp.new('(\d)').match('123') expect(md.begin(:foo)).to be_nil end end describe "#end" do it "returns the offset of the character following the end of a match" do md = RE2::Regexp.new('(wo{2})').match('a woohoo') expect(md.string[0...md.end(0)]).to eq('a woo') end it "returns the offset of a match by string name" do md = RE2::Regexp.new('(?Pfo{2})').match('a foobar') expect(md.string[0...md.end('foo')]).to eq('a foo') end it "returns the offset of a match by symbol name" do md = RE2::Regexp.new('(?Pfo{2})').match('a foobar') expect(md.string[0...md.end(:foo)]).to eq('a foo') end it "returns the offset despite multibyte characters" do md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby') expect(md.string[0...md.end(0)]).to eq('I ♥ Ruby') end it "returns nil for non-existent numerical matches" do md = RE2::Regexp.new('(\d)').match('123') expect(md.end(10)).to be_nil end it "returns nil for negative numerical matches" do md = RE2::Regexp.new('(\d)').match('123') expect(md.end(-4)).to be_nil end it "returns nil for non-existent named matches" do md = RE2::Regexp.new('(\d)').match('123') expect(md.end('foo')).to be_nil end it "returns nil for non-existent symbol named matches" do md = RE2::Regexp.new('(\d)').match('123') expect(md.end(:foo)).to be_nil end end end re2-1.2.0/spec/re2/scanner_spec.rb0000644000004100000410000001267713656356061016674 0ustar www-datawww-data# encoding: utf-8 RSpec.describe RE2::Scanner do describe "#regexp" do it "returns the original pattern for the scanner" do re = RE2::Regexp.new('(\w+)') scanner = re.scan("It is a truth") expect(scanner.regexp).to equal(re) end end describe "#string" do it "returns the original text for the scanner" do re = RE2::Regexp.new('(\w+)') text = "It is a truth" scanner = re.scan(text) expect(scanner.string).to equal(text) end end describe "#scan" do it "returns the next array of matches" do r = RE2::Regexp.new('(\w+)') scanner = r.scan("It is a truth universally acknowledged") expect(scanner.scan).to eq(["It"]) expect(scanner.scan).to eq(["is"]) expect(scanner.scan).to eq(["a"]) expect(scanner.scan).to eq(["truth"]) expect(scanner.scan).to eq(["universally"]) expect(scanner.scan).to eq(["acknowledged"]) expect(scanner.scan).to be_nil end it "returns an empty array if there are no capturing groups" do r = RE2::Regexp.new('\w+') scanner = r.scan("Foo bar") expect(scanner.scan).to eq([]) end it "returns nil if there is no match" do r = RE2::Regexp.new('\d+') scanner = r.scan("Foo bar") expect(scanner.scan).to be_nil end it "returns an empty array if the input is empty" do r = RE2::Regexp.new("") scanner = r.scan("") expect(scanner.scan).to eq([]) expect(scanner.scan).to be_nil end it "returns an array of nil with an empty input and capture" do r = RE2::Regexp.new("()") scanner = r.scan("") expect(scanner.scan).to eq([nil]) expect(scanner.scan).to be_nil end it "returns an empty array for every match if the pattern is empty" do r = RE2::Regexp.new("") scanner = r.scan("Foo") expect(scanner.scan).to eq([]) expect(scanner.scan).to eq([]) expect(scanner.scan).to eq([]) expect(scanner.scan).to eq([]) expect(scanner.scan).to be_nil end it "returns an array of nil if the pattern is an empty capturing group" do r = RE2::Regexp.new("()") scanner = r.scan("Foo") expect(scanner.scan).to eq([nil]) expect(scanner.scan).to eq([nil]) expect(scanner.scan).to eq([nil]) expect(scanner.scan).to eq([nil]) expect(scanner.scan).to be_nil end it "returns array of nils with multiple empty capturing groups" do r = RE2::Regexp.new("()()()") scanner = r.scan("Foo") expect(scanner.scan).to eq([nil, nil, nil]) expect(scanner.scan).to eq([nil, nil, nil]) expect(scanner.scan).to eq([nil, nil, nil]) expect(scanner.scan).to eq([nil, nil, nil]) expect(scanner.scan).to be_nil end it "supports empty groups with multibyte characters" do r = RE2::Regexp.new("()€") scanner = r.scan("€") expect(scanner.scan).to eq([nil]) expect(scanner.scan).to be_nil end end it "is enumerable" do r = RE2::Regexp.new('(\d)') scanner = r.scan("There are 1 some 2 numbers 3") expect(scanner).to be_a(Enumerable) end describe "#each" do it "yields each match" do r = RE2::Regexp.new('(\d)') scanner = r.scan("There are 1 some 2 numbers 3") matches = [] scanner.each do |match| matches << match end expect(matches).to eq([["1"], ["2"], ["3"]]) end it "returns an enumerator when not given a block" do r = RE2::Regexp.new('(\d)') scanner = r.scan("There are 1 some 2 numbers 3") # Prior to Ruby 1.9, Enumerator was within Enumerable. if defined?(Enumerator) expect(scanner.each).to be_a(Enumerator) elsif defined?(Enumerable::Enumerator) expect(scanner.each).to be_a(Enumerable::Enumerator) end end end describe "#rewind" do it "resets any consumption" do r = RE2::Regexp.new('(\d)') scanner = r.scan("There are 1 some 2 numbers 3") expect(scanner.to_enum.first).to eq(["1"]) expect(scanner.to_enum.first).to eq(["2"]) scanner.rewind expect(scanner.to_enum.first).to eq(["1"]) end it "resets the eof? check" do r = RE2::Regexp.new('(\d)') scanner = r.scan("1") scanner.scan expect(scanner.eof?).to be_truthy scanner.rewind expect(scanner.eof?).to be_falsey end end describe "#eof?" do it "returns false if the input has not been consumed" do r = RE2::Regexp.new('(\d)') scanner = r.scan("1 2 3") expect(scanner.eof?).to be_falsey end it "returns true if the input has been consumed" do r = RE2::Regexp.new('(\d)') scanner = r.scan("1") scanner.scan expect(scanner.eof?).to be_truthy end it "returns false if no match is made" do r = RE2::Regexp.new('(\d)') scanner = r.scan("a") scanner.scan expect(scanner.eof?).to be_falsey end it "returns false with an empty input that has not been scanned" do r = RE2::Regexp.new("") scanner = r.scan("") expect(scanner.eof?).to be_falsey end it "returns false with an empty input that has not been matched" do r = RE2::Regexp.new('(\d)') scanner = r.scan("") scanner.scan expect(scanner.eof?).to be_falsey end it "returns true with an empty input that has been matched" do r = RE2::Regexp.new("") scanner = r.scan("") scanner.scan expect(scanner.eof?).to be_truthy end end end re2-1.2.0/spec/re2/regexp_spec.rb0000644000004100000410000002770613656356061016534 0ustar www-datawww-dataRSpec.describe RE2::Regexp do describe "#initialize" do it "returns an instance given only a pattern" do re = RE2::Regexp.new('woo') expect(re).to be_a(RE2::Regexp) end it "returns an instance given a pattern and options" do re = RE2::Regexp.new('woo', :case_sensitive => false) expect(re).to be_a(RE2::Regexp) end it "raises an error if given an inappropriate type" do expect { RE2::Regexp.new(nil) }.to raise_error(TypeError) end end describe "#compile" do it "returns an instance given only a pattern" do re = RE2::Regexp.compile('woo') expect(re).to be_a(RE2::Regexp) end it "returns an instance given a pattern and options" do re = RE2::Regexp.compile('woo', :case_sensitive => false) expect(re).to be_a(RE2::Regexp) end end describe "#options" do it "returns a hash of options" do options = RE2::Regexp.new('woo').options expect(options).to be_a(Hash) end it "is populated with default options when nothing has been set" do options = RE2::Regexp.new('woo').options expect(options).to include(:utf8 => true, :posix_syntax => false, :longest_match => false, :log_errors => true, :literal => false, :never_nl => false, :case_sensitive => true, :perl_classes => false, :word_boundary => false, :one_line => false) end it "is populated with overridden options when specified" do options = RE2::Regexp.new('woo', :case_sensitive => false).options expect(options[:case_sensitive]).to eq(false) end end describe "#error" do it "returns nil if there is no error" do error = RE2::Regexp.new('woo').error expect(error).to be_nil end # Use log_errors => false to suppress RE2's logging to STDERR. it "contains the error string if there is an error" do error = RE2::Regexp.new('wo(o', :log_errors => false).error expect(error).to eq("missing ): wo(o") end end describe "#error_arg" do it "returns nil if there is no error" do error_arg = RE2::Regexp.new('woo').error_arg expect(error_arg).to be_nil end it "returns the offending portin of the regexp if there is an error" do error_arg = RE2::Regexp.new('wo(o', :log_errors => false).error_arg expect(error_arg).to eq("wo(o") end end describe "#program_size" do it "returns a numeric value" do program_size = RE2::Regexp.new('w(o)(o)').program_size expect(program_size).to be_a(Fixnum) end end describe "#to_str" do it "returns the original pattern" do string = RE2::Regexp.new('w(o)(o)').to_str expect(string).to eq("w(o)(o)") end end describe "#pattern" do it "returns the original pattern" do pattern = RE2::Regexp.new('w(o)(o)').pattern expect(pattern).to eq("w(o)(o)") end end describe "#inspect" do it "shows the class name and original pattern" do string = RE2::Regexp.new('w(o)(o)').inspect expect(string).to eq("#") end end describe "#utf8?" do it "returns true by default" do expect(RE2::Regexp.new('woo')).to be_utf8 end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :utf8 => false) expect(re).to_not be_utf8 end end describe "#posix_syntax?" do it "returns false by default" do expect(RE2::Regexp.new('woo')).to_not be_posix_syntax end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :posix_syntax => true) expect(re).to be_posix_syntax end end describe "#literal?" do it "returns false by default" do expect(RE2::Regexp.new('woo')).to_not be_literal end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :literal => true) expect(re).to be_literal end end describe "#never_nl?" do it "returns false by default" do expect(RE2::Regexp.new('woo')).to_not be_never_nl end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :never_nl => true) expect(re).to be_never_nl end end describe "#case_sensitive?" do it "returns true by default" do expect(RE2::Regexp.new('woo')).to be_case_sensitive end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :case_sensitive => false) expect(re).to_not be_case_sensitive end end describe "#case_insensitive?" do it "returns false by default" do expect(RE2::Regexp.new('woo')).to_not be_case_insensitive end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :case_sensitive => false) expect(re).to be_case_insensitive end end describe "#casefold?" do it "returns true by default" do expect(RE2::Regexp.new('woo')).to_not be_casefold end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :case_sensitive => false) expect(re).to be_casefold end end describe "#longest_match?" do it "returns false by default" do expect(RE2::Regexp.new('woo')).to_not be_casefold end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :longest_match => true) expect(re).to be_longest_match end end describe "#log_errors?" do it "returns true by default" do expect(RE2::Regexp.new('woo')).to be_log_errors end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :log_errors => false) expect(re).to_not be_log_errors end end describe "#perl_classes?" do it "returns false by default" do expect(RE2::Regexp.new('woo')).to_not be_perl_classes end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :perl_classes => true) expect(re).to be_perl_classes end end describe "#word_boundary?" do it "returns false by default" do expect(RE2::Regexp.new('woo')).to_not be_word_boundary end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :word_boundary => true) expect(re).to be_word_boundary end end describe "#one_line?" do it "returns false by default" do expect(RE2::Regexp.new('woo')).to_not be_one_line end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :one_line => true) expect(re).to be_one_line end end describe "#max_mem" do it "returns the default max memory" do expect(RE2::Regexp.new('woo').max_mem).to eq(8388608) end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :max_mem => 1024) expect(re.max_mem).to eq(1024) end end describe "#match" do let(:re) { RE2::Regexp.new('My name is (\S+) (\S+)') } it "returns match data given only text" do md = re.match("My name is Robert Paulson") expect(md).to be_a(RE2::MatchData) end it "returns nil if there is no match for the given text" do expect(re.match("My age is 99")).to be_nil end it "returns only true or false if no matches are requested" do expect(re.match("My name is Robert Paulson", 0)).to eq(true) expect(re.match("My age is 99", 0)).to eq(false) end it "raises an exception when given nil" do expect { re.match(nil) }.to raise_error(TypeError) end it "raises an exception when given an inappropriate number of matches" do expect { re.match("My name is Robert Paulson", {}) }.to raise_error(TypeError) end describe "with a specific number of matches under the total in the pattern" do subject { re.match("My name is Robert Paulson", 1) } it "returns a match data object" do expect(subject).to be_a(RE2::MatchData) end it "has the whole match and only the specified number of matches" do expect(subject.size).to eq(2) end it "populates any specified matches" do expect(subject[1]).to eq("Robert") end it "does not populate any matches that weren't included" do expect(subject[2]).to be_nil end end describe "with a number of matches over the total in the pattern" do subject { re.match("My name is Robert Paulson", 5) } it "returns a match data object" do expect(subject).to be_a(RE2::MatchData) end it "has the whole match the specified number of matches" do expect(subject.size).to eq(6) end it "populates any specified matches" do expect(subject[1]).to eq("Robert") expect(subject[2]).to eq("Paulson") end it "pads the remaining matches with nil" do expect(subject[3]).to be_nil expect(subject[4]).to be_nil expect(subject[5]).to be_nil expect(subject[6]).to be_nil end end end describe "#match?" do it "returns only true or false if no matches are requested" do re = RE2::Regexp.new('My name is (\S+) (\S+)') expect(re.match?("My name is Robert Paulson")).to eq(true) expect(re.match?("My age is 99")).to eq(false) end end describe "#=~" do it "returns only true or false if no matches are requested" do re = RE2::Regexp.new('My name is (\S+) (\S+)') expect(re =~ "My name is Robert Paulson").to eq(true) expect(re =~ "My age is 99").to eq(false) end end describe "#!~" do it "returns only true or false if no matches are requested" do re = RE2::Regexp.new('My name is (\S+) (\S+)') expect(re !~ "My name is Robert Paulson").to eq(false) expect(re !~ "My age is 99").to eq(true) end end describe "#===" do it "returns only true or false if no matches are requested" do re = RE2::Regexp.new('My name is (\S+) (\S+)') expect(re === "My name is Robert Paulson").to eq(true) expect(re === "My age is 99").to eq(false) end end describe "#ok?" do it "returns true for valid regexps" do expect(RE2::Regexp.new('woo')).to be_ok expect(RE2::Regexp.new('wo(o)')).to be_ok expect(RE2::Regexp.new('((\d)\w+){3,}')).to be_ok end it "returns false for invalid regexps" do expect(RE2::Regexp.new('wo(o', :log_errors => false)).to_not be_ok expect(RE2::Regexp.new('wo[o', :log_errors => false)).to_not be_ok expect(RE2::Regexp.new('*', :log_errors => false)).to_not be_ok end end describe "#escape" do it "transforms a string into a regexp" do expect(RE2::Regexp.escape("1.5-2.0?")).to eq('1\.5\-2\.0\?') end end describe "#quote" do it "transforms a string into a regexp" do expect(RE2::Regexp.quote("1.5-2.0?")).to eq('1\.5\-2\.0\?') end end describe "#number_of_capturing_groups" do it "returns the number of groups in a regexp" do expect(RE2::Regexp.new('(a)(b)(c)').number_of_capturing_groups).to eq(3) expect(RE2::Regexp.new('abc').number_of_capturing_groups).to eq(0) expect(RE2::Regexp.new('a((b)c)').number_of_capturing_groups).to eq(2) end end describe "#named_capturing_groups" do it "returns a hash of names to indices" do expect(RE2::Regexp.new('(?Pa)').named_capturing_groups).to be_a(Hash) end it "maps names to indices with only one group" do groups = RE2::Regexp.new('(?Pa)').named_capturing_groups expect(groups["bob"]).to eq(1) end it "maps names to indices with several groups" do groups = RE2::Regexp.new('(?Pa)(o)(?Pe)').named_capturing_groups expect(groups["bob"]).to eq(1) expect(groups["rob"]).to eq(3) end end describe "#scan" do it "returns a scanner" do r = RE2::Regexp.new('(\w+)') scanner = r.scan("It is a truth universally acknowledged") expect(scanner).to be_a(RE2::Scanner) end end end re2-1.2.0/spec/spec_helper.rb0000644000004100000410000000077713656356061016030 0ustar www-datawww-datarequire "re2" RSpec.configure do |config| config.expect_with :rspec do |expectations| expectations.include_chain_clauses_in_custom_matcher_descriptions = true end config.mock_with :rspec do |mocks| mocks.verify_partial_doubles = true end config.filter_run :focus config.run_all_when_everything_filtered = true config.disable_monkey_patching! config.warnings = true config.default_formatter = 'doc' if config.files_to_run.one? config.order = :random Kernel.srand config.seed end re2-1.2.0/Rakefile0000644000004100000410000000026113656356061013711 0ustar www-datawww-datarequire 'rake/extensiontask' require 'rspec/core/rake_task' Rake::ExtensionTask.new('re2') RSpec::Core::RakeTask.new(:spec) task :spec => :compile task :default => :spec re2-1.2.0/lib/0000755000004100000410000000000013656356061013013 5ustar www-datawww-datare2-1.2.0/lib/re2.rb0000644000004100000410000000041613656356061014031 0ustar www-datawww-data# re2 (http://github.com/mudge/re2) # Ruby bindings to re2, an "efficient, principled regular expression library" # # Copyright (c) 2010-2014, Paul Mucur (http://mudge.name) # Released under the BSD Licence, please see LICENSE.txt require "re2.so" require "re2/scanner" re2-1.2.0/lib/re2/0000755000004100000410000000000013656356061013503 5ustar www-datawww-datare2-1.2.0/lib/re2/string.rb0000644000004100000410000000743213656356061015344 0ustar www-datawww-data# re2 (http://github.com/mudge/re2) # Ruby bindings to re2, an "efficient, principled regular expression library" # # Copyright (c) 2010-2014, Paul Mucur (http://mudge.name) # Released under the BSD Licence, please see LICENSE.txt require "re2" module RE2 module String # Replaces the first occurrence +pattern+ with +rewrite+ and returns a new # string. # # @param [String, RE2::Regexp] pattern a regexp matching text to be replaced # @param [String] rewrite the string to replace with # @example # "hello there".re2_sub("hello", "howdy") #=> "howdy there" # re2 = RE2.new("hel+o") # "hello there".re2_sub(re2, "yo") #=> "yo there" # text = "Good morning" # text.re2_sub("morn", "even") #=> "Good evening" # text #=> "Good morning" def re2_sub(*args) RE2.Replace(self, *args) end # Replaces every occurrence of +pattern+ with +rewrite+ and return a new string. # # @param [String, RE2::Regexp] pattern a regexp matching text to be replaced # @param [String] rewrite the string to replace with # @example # "hello there".re2_gsub("e", "i") #=> "hillo thiri" # re2 = RE2.new("oo?") # "whoops-doops".re2_gsub(re2, "e") #=> "wheps-deps" # text = "Good morning" # text.re2_gsub("o", "ee") #=> "Geeeed meerning" # text #=> "Good morning" def re2_gsub(*args) RE2.GlobalReplace(self, *args) end # Match the pattern and return either a boolean (if no submatches are required) # or a {RE2::MatchData} instance. # # @return [Boolean, RE2::MatchData] # # @overload match(pattern) # Returns an {RE2::MatchData} containing the matching # pattern and all subpatterns resulting from looking for # +pattern+. # # @param [String, RE2::Regexp] pattern the regular expression to match # @return [RE2::MatchData] the matches # @raise [NoMemoryError] if there was not enough memory to allocate the matches # @example # r = RE2::Regexp.new('w(o)(o)') # "woo".re2_match(r) #=> # # # @overload match(pattern, 0) # Returns either true or false indicating whether a # successful match was made. # # @param [String, RE2::Regexp] pattern the regular expression to match # @return [Boolean] whether the match was successful # @raise [NoMemoryError] if there was not enough memory to allocate the matches # @example # r = RE2::Regexp.new('w(o)(o)') # "woo".re2_match(0) #=> true # "bob".re2_match(0) #=> false # # @overload match(pattern, number_of_matches) # See +match(pattern)+ but with a specific number of # matches returned (padded with nils if necessary). # # @param [String, RE2::Regexp] pattern the regular expression to match # @param [Fixnum] number_of_matches the number of matches to return # @return [RE2::MatchData] the matches # @raise [NoMemoryError] if there was not enough memory to allocate the matches # @example # r = RE2::Regexp.new('w(o)(o)') # "woo".re2_match(r, 1) #=> # # "woo".re2_match(r, 3) #=> # def re2_match(pattern, *args) RE2::Regexp.new(pattern).match(self, *args) end # Escapes all potentially meaningful regexp characters. # The returned string, used as a regular expression, will exactly match the # original string. # # @return [String] the escaped string # @example # "1.5-2.0?".escape #=> "1\.5\-2\.0\?" def re2_escape RE2.QuoteMeta(self) end alias_method :re2_quote, :re2_escape end end re2-1.2.0/lib/re2/scanner.rb0000644000004100000410000000032513656356061015461 0ustar www-datawww-datamodule RE2 class Scanner include Enumerable def each if block_given? while matches = scan yield matches end else to_enum(:each) end end end end re2-1.2.0/ext/0000755000004100000410000000000013656356061013045 5ustar www-datawww-datare2-1.2.0/ext/re2/0000755000004100000410000000000013656356061013535 5ustar www-datawww-datare2-1.2.0/ext/re2/re2.cc0000644000004100000410000012777313656356061014555 0ustar www-datawww-data/* * re2 (http://github.com/mudge/re2) * Ruby bindings to re2, an "efficient, principled regular expression library" * * Copyright (c) 2010-2014, Paul Mucur (http://mudge.name) * Released under the BSD Licence, please see LICENSE.txt */ #include #include #include #include #include #include using std::string; using std::ostringstream; using std::nothrow; using std::map; using std::vector; #define BOOL2RUBY(v) (v ? Qtrue : Qfalse) #define UNUSED(x) ((void)x) #ifndef RSTRING_LEN #define RSTRING_LEN(x) (RSTRING(x)->len) #endif #ifndef RSTRING_PTR #define RSTRING_PTR(x) (RSTRING(x)->ptr) #endif #ifdef HAVE_RUBY_ENCODING_H #include #define ENCODED_STR_NEW(str, length, encoding) \ ({ \ VALUE _string = rb_str_new(str, length); \ int _enc = rb_enc_find_index(encoding); \ rb_enc_associate_index(_string, _enc); \ _string; \ }) #define ENCODED_STR_NEW2(str, length, str2) \ ({ \ VALUE _string = rb_str_new(str, length); \ int _enc = rb_enc_get_index(str2); \ rb_enc_associate_index(_string, _enc); \ _string; \ }) #else #define ENCODED_STR_NEW(str, length, encoding) \ rb_str_new((const char *)str, (long)length) #define ENCODED_STR_NEW2(str, length, str2) \ rb_str_new((const char *)str, (long)length) #endif #ifdef HAVE_RB_STR_SUBLEN #define ENCODED_STR_SUBLEN(str, offset, encoding) \ LONG2NUM(rb_str_sublen(str, offset)) #else #ifdef HAVE_RUBY_ENCODING_H #define ENCODED_STR_SUBLEN(str, offset, encoding) \ ({ \ VALUE _string = ENCODED_STR_NEW(RSTRING_PTR(str), offset, encoding); \ rb_str_length(_string); \ }) #else #define ENCODED_STR_SUBLEN(str, offset, encoding) \ LONG2NUM(offset) #endif #endif #ifdef HAVE_ENDPOS_ARGUMENT #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \ (pattern->Match(text, startpos, endpos, anchor, match, nmatch)) #else #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \ (pattern->Match(text, startpos, anchor, match, nmatch)) #endif typedef struct { RE2 *pattern; } re2_pattern; typedef struct { re2::StringPiece *matches; int number_of_matches; VALUE regexp, text; } re2_matchdata; typedef struct { re2::StringPiece *input; int number_of_capturing_groups; bool eof; VALUE regexp, text; } re2_scanner; VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner; /* Symbols used in RE2 options. */ static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors, id_max_mem, id_literal, id_never_nl, id_case_sensitive, id_perl_classes, id_word_boundary, id_one_line; void re2_matchdata_mark(re2_matchdata* self) { rb_gc_mark(self->regexp); rb_gc_mark(self->text); } void re2_matchdata_free(re2_matchdata* self) { if (self->matches) { delete[] self->matches; } free(self); } void re2_scanner_mark(re2_scanner* self) { rb_gc_mark(self->regexp); rb_gc_mark(self->text); } void re2_scanner_free(re2_scanner* self) { if (self->input) { delete self->input; } free(self); } void re2_regexp_free(re2_pattern* self) { if (self->pattern) { delete self->pattern; } free(self); } static VALUE re2_matchdata_allocate(VALUE klass) { re2_matchdata *m; return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark, re2_matchdata_free, m); } static VALUE re2_scanner_allocate(VALUE klass) { re2_scanner *c; return Data_Make_Struct(klass, re2_scanner, re2_scanner_mark, re2_scanner_free, c); } /* * Returns a frozen copy of the string passed into +match+. * * @return [String] a frozen copy of the passed string. * @example * m = RE2::Regexp.new('(\d+)').match("bob 123") * m.string #=> "bob 123" */ static VALUE re2_matchdata_string(VALUE self) { re2_matchdata *m; Data_Get_Struct(self, re2_matchdata, m); return m->text; } /* * Returns the string passed into the scanner. * * @return [String] the original string. * @example * c = RE2::Regexp.new('(\d+)').scan("foo") * c.string #=> "foo" */ static VALUE re2_scanner_string(VALUE self) { re2_scanner *c; Data_Get_Struct(self, re2_scanner, c); return c->text; } /* * Returns whether the scanner has consumed all input or not. * * @return [Boolean] whether the scanner has consumed all input or not * @example * c = RE2::Regexp.new('(\d+)').scan("foo") * c.eof? #=> true */ static VALUE re2_scanner_eof(VALUE self) { re2_scanner *c; Data_Get_Struct(self, re2_scanner, c); return BOOL2RUBY(c->eof); } /* * Rewind the scanner to the start of the string. * * @example * s = RE2::Regexp.new('(\d+)').scan("1 2 3") * e = s.to_enum * e.scan #=> ["1"] * e.scan #=> ["2"] * s.rewind * e.scan #=> ["1"] */ static VALUE re2_scanner_rewind(VALUE self) { re2_scanner *c; Data_Get_Struct(self, re2_scanner, c); c->input = new(nothrow) re2::StringPiece(StringValuePtr(c->text)); c->eof = false; return self; } /* * Scan the given text incrementally for matches, returning an array of * matches on each subsequent call. Returns nil if no matches are found. * * @return [Array] the matches. * @example * s = RE2::Regexp.new('(\w+)').scan("Foo bar baz") * s.scan #=> ["Foo"] * s.scan #=> ["bar"] */ static VALUE re2_scanner_scan(VALUE self) { int i; size_t original_input_size, new_input_size; bool input_advanced; re2_pattern *p; re2_scanner *c; VALUE result; Data_Get_Struct(self, re2_scanner, c); Data_Get_Struct(c->regexp, re2_pattern, p); vector argv(c->number_of_capturing_groups); vector args(c->number_of_capturing_groups); vector matches(c->number_of_capturing_groups); if (c->eof) { return Qnil; } original_input_size = c->input->size(); for (i = 0; i < c->number_of_capturing_groups; i++) { matches[i] = ""; argv[i] = &matches[i]; args[i] = &argv[i]; } if (RE2::FindAndConsumeN(c->input, *p->pattern, &args[0], c->number_of_capturing_groups)) { result = rb_ary_new2(c->number_of_capturing_groups); new_input_size = c->input->size(); input_advanced = new_input_size < original_input_size; for (i = 0; i < c->number_of_capturing_groups; i++) { if (matches[i].empty()) { rb_ary_push(result, Qnil); } else { rb_ary_push(result, ENCODED_STR_NEW(matches[i].data(), matches[i].size(), p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1")); } } /* Check whether we've exhausted the input yet. */ c->eof = new_input_size == 0; /* If the match didn't advance the input, we need to do this ourselves. */ if (!input_advanced && new_input_size > 0) { c->input->remove_prefix(1); } } else { result = Qnil; } return result; } /* * Retrieve a matchdata by index or name. */ re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) { int id; re2_matchdata *m; re2_pattern *p; map groups; string name; re2::StringPiece *match; Data_Get_Struct(self, re2_matchdata, m); Data_Get_Struct(m->regexp, re2_pattern, p); if (FIXNUM_P(idx)) { id = FIX2INT(idx); } else { if (SYMBOL_P(idx)) { name = rb_id2name(SYM2ID(idx)); } else { name = StringValuePtr(idx); } groups = p->pattern->NamedCapturingGroups(); if (groups.count(name) == 1) { id = groups[name]; } else { return NULL; } } if (id >= 0 && id < m->number_of_matches) { match = &m->matches[id]; if (!match->empty()) { return match; } } return NULL; } /* * Returns the number of elements in the match array (including nils). * * @return [Fixnum] the number of elements * @example * m = RE2::Regexp.new('(\d+)').match("bob 123") * m.size #=> 2 * m.length #=> 2 */ static VALUE re2_matchdata_size(VALUE self) { re2_matchdata *m; Data_Get_Struct(self, re2_matchdata, m); return INT2FIX(m->number_of_matches); } /* * Returns the offset of the start of the nth element of the matchdata. * * @param [Fixnum, String, Symbol] n the name or number of the match * @return [Fixnum] the offset of the start of the match * @example * m = RE2::Regexp.new('ob (\d+)').match("bob 123") * m.begin(0) #=> 1 * m.begin(1) #=> 4 */ static VALUE re2_matchdata_begin(VALUE self, VALUE n) { re2_matchdata *m; re2_pattern *p; re2::StringPiece *match; long offset; Data_Get_Struct(self, re2_matchdata, m); Data_Get_Struct(m->regexp, re2_pattern, p); match = re2_matchdata_find_match(n, self); if (match == NULL) { return Qnil; } else { offset = reinterpret_cast(match->data()) - reinterpret_cast(StringValuePtr(m->text)); return ENCODED_STR_SUBLEN(StringValue(m->text), offset, p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"); } } /* * Returns the offset of the character following the end of the nth element of the matchdata. * * @param [Fixnum, String, Symbol] n the name or number of the match * @return [Fixnum] the offset of the character following the end of the match * @example * m = RE2::Regexp.new('ob (\d+) b').match("bob 123 bob") * m.end(0) #=> 9 * m.end(1) #=> 7 */ static VALUE re2_matchdata_end(VALUE self, VALUE n) { re2_matchdata *m; re2_pattern *p; re2::StringPiece *match; long offset; Data_Get_Struct(self, re2_matchdata, m); Data_Get_Struct(m->regexp, re2_pattern, p); match = re2_matchdata_find_match(n, self); if (match == NULL) { return Qnil; } else { offset = reinterpret_cast(match->data()) - reinterpret_cast(StringValuePtr(m->text)) + match->size(); return ENCODED_STR_SUBLEN(StringValue(m->text), offset, p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"); } } /* * Returns the {RE2::Regexp} used in the match. * * @return [RE2::Regexp] the regexp used in the match * @example * m = RE2::Regexp.new('(\d+)').match("bob 123") * m.regexp #=> # */ static VALUE re2_matchdata_regexp(VALUE self) { re2_matchdata *m; Data_Get_Struct(self, re2_matchdata, m); return m->regexp; } /* * Returns the {RE2::Regexp} used in the scanner. * * @return [RE2::Regexp] the regexp used in the scanner * @example * c = RE2::Regexp.new('(\d+)').scan("bob 123") * c.regexp #=> # */ static VALUE re2_scanner_regexp(VALUE self) { re2_scanner *c; Data_Get_Struct(self, re2_scanner, c); return c->regexp; } static VALUE re2_regexp_allocate(VALUE klass) { re2_pattern *p; return Data_Make_Struct(klass, re2_pattern, 0, re2_regexp_free, p); } /* * Returns the array of matches. * * @return [Array] the array of matches * @example * m = RE2::Regexp.new('(\d+)').match("bob 123") * m.to_a #=> ["123", "123"] */ static VALUE re2_matchdata_to_a(VALUE self) { int i; re2_matchdata *m; re2_pattern *p; re2::StringPiece *match; VALUE array; Data_Get_Struct(self, re2_matchdata, m); Data_Get_Struct(m->regexp, re2_pattern, p); array = rb_ary_new2(m->number_of_matches); for (i = 0; i < m->number_of_matches; i++) { match = &m->matches[i]; if (match->empty()) { rb_ary_push(array, Qnil); } else { rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(), p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1")); } } return array; } static VALUE re2_matchdata_nth_match(int nth, VALUE self) { re2_matchdata *m; re2_pattern *p; re2::StringPiece *match; Data_Get_Struct(self, re2_matchdata, m); Data_Get_Struct(m->regexp, re2_pattern, p); if (nth < 0 || nth >= m->number_of_matches) { return Qnil; } else { match = &m->matches[nth]; if (match->empty()) { return Qnil; } else { return ENCODED_STR_NEW(match->data(), match->size(), p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"); } } } static VALUE re2_matchdata_named_match(const char* name, VALUE self) { int idx; re2_matchdata *m; re2_pattern *p; map groups; string name_as_string(name); Data_Get_Struct(self, re2_matchdata, m); Data_Get_Struct(m->regexp, re2_pattern, p); groups = p->pattern->NamedCapturingGroups(); if (groups.count(name_as_string) == 1) { idx = groups[name_as_string]; return re2_matchdata_nth_match(idx, self); } else { return Qnil; } } /* * Retrieve zero, one or more matches by index or name. * * @return [Array, String, Boolean] * * @overload [](index) * Access a particular match by index. * * @param [Fixnum] index the index of the match to fetch * @return [String, nil] the specified match * @example * m = RE2::Regexp.new('(\d+)').match("bob 123") * m[0] #=> "123" * * @overload [](start, length) * Access a range of matches by starting index and length. * * @param [Fixnum] start the index from which to start * @param [Fixnum] length the number of elements to fetch * @return [Array] the specified matches * @example * m = RE2::Regexp.new('(\d+)').match("bob 123") * m[0, 1] #=> ["123"] * * @overload [](range) * Access a range of matches by index. * * @param [Range] range the range of match indexes to fetch * @return [Array] the specified matches * @example * m = RE2::Regexp.new('(\d+)').match("bob 123") * m[0..1] #=> "[123", "123"] * * @overload [](name) * Access a particular match by name. * * @param [String, Symbol] name the name of the match to fetch * @return [String, nil] the specific match * @example * m = RE2::Regexp.new('(?P\d+)').match("bob 123") * m["number"] #=> "123" * m[:number] #=> "123" */ static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) { VALUE idx, rest; rb_scan_args(argc, argv, "11", &idx, &rest); if (TYPE(idx) == T_STRING) { return re2_matchdata_named_match(StringValuePtr(idx), self); } else if (SYMBOL_P(idx)) { return re2_matchdata_named_match(rb_id2name(SYM2ID(idx)), self); } else if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) { return rb_ary_aref(argc, argv, re2_matchdata_to_a(self)); } else { return re2_matchdata_nth_match(FIX2INT(idx), self); } } /* * Returns the entire matched string. * * @return [String] the entire matched string */ static VALUE re2_matchdata_to_s(VALUE self) { return re2_matchdata_nth_match(0, self); } /* * Returns a printable version of the match. * * @return [String] a printable version of the match * @example * m = RE2::Regexp.new('(\d+)').match("bob 123") * m.inspect #=> "#" */ static VALUE re2_matchdata_inspect(VALUE self) { int i; re2_matchdata *m; re2_pattern *p; VALUE match, result; ostringstream output; Data_Get_Struct(self, re2_matchdata, m); Data_Get_Struct(m->regexp, re2_pattern, p); output << "#number_of_matches; i++) { output << " "; if (i > 0) { output << i << ":"; } match = re2_matchdata_nth_match(i, self); if (match == Qnil) { output << "nil"; } else { output << "\"" << StringValuePtr(match) << "\""; } } output << ">"; result = ENCODED_STR_NEW(output.str().data(), output.str().length(), p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"); return result; } /* * Returns a new RE2 object with a compiled version of * +pattern+ stored inside. Equivalent to +RE2.new+. * * @return [RE2::Regexp] an RE2::Regexp with the specified pattern and options * @param [String] pattern the pattern to compile * @param [Hash] options the options to compile a regexp with * @see RE2::Regexp.new * */ static VALUE re2_re2(int argc, VALUE *argv, VALUE self) { UNUSED(self); return rb_class_new_instance(argc, argv, re2_cRegexp); } /* * Returns a new {RE2::Regexp} object with a compiled version of * +pattern+ stored inside. * * @return [RE2::Regexp] * * @overload initialize(pattern) * Returns a new {RE2::Regexp} object with a compiled version of * +pattern+ stored inside with the default options. * * @param [String] pattern the pattern to compile * @return [RE2::Regexp] an RE2::Regexp with the specified pattern * @raise [NoMemoryError] if memory could not be allocated for the compiled * pattern * * @overload initialize(pattern, options) * Returns a new {RE2::Regexp} object with a compiled version of * +pattern+ stored inside with the specified options. * * @param [String] pattern the pattern to compile * @param [Hash] options the options with which to compile the pattern * @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1 * @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax * @option options [Boolean] :longest_match (false) search for longest match, not first match * @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR * @option options [Fixnum] :max_mem approx. max memory footprint of RE2 * @option options [Boolean] :literal (false) interpret string as literal, not regexp * @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode) * @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode * @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode * @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode * @return [RE2::Regexp] an RE2::Regexp with the specified pattern and options * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern */ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) { VALUE pattern, options, utf8, posix_syntax, longest_match, log_errors, max_mem, literal, never_nl, case_sensitive, perl_classes, word_boundary, one_line; re2_pattern *p; rb_scan_args(argc, argv, "11", &pattern, &options); Data_Get_Struct(self, re2_pattern, p); if (RTEST(options)) { if (TYPE(options) != T_HASH) { rb_raise(rb_eArgError, "options should be a hash"); } RE2::Options re2_options; utf8 = rb_hash_aref(options, ID2SYM(id_utf8)); if (!NIL_P(utf8)) { re2_options.set_encoding(RTEST(utf8) ? RE2::Options::EncodingUTF8 : RE2::Options::EncodingLatin1); } posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax)); if (!NIL_P(posix_syntax)) { re2_options.set_posix_syntax(RTEST(posix_syntax)); } longest_match = rb_hash_aref(options, ID2SYM(id_longest_match)); if (!NIL_P(longest_match)) { re2_options.set_longest_match(RTEST(longest_match)); } log_errors = rb_hash_aref(options, ID2SYM(id_log_errors)); if (!NIL_P(log_errors)) { re2_options.set_log_errors(RTEST(log_errors)); } max_mem = rb_hash_aref(options, ID2SYM(id_max_mem)); if (!NIL_P(max_mem)) { re2_options.set_max_mem(NUM2INT(max_mem)); } literal = rb_hash_aref(options, ID2SYM(id_literal)); if (!NIL_P(literal)) { re2_options.set_literal(RTEST(literal)); } never_nl = rb_hash_aref(options, ID2SYM(id_never_nl)); if (!NIL_P(never_nl)) { re2_options.set_never_nl(RTEST(never_nl)); } case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive)); if (!NIL_P(case_sensitive)) { re2_options.set_case_sensitive(RTEST(case_sensitive)); } perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes)); if (!NIL_P(perl_classes)) { re2_options.set_perl_classes(RTEST(perl_classes)); } word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary)); if (!NIL_P(word_boundary)) { re2_options.set_word_boundary(RTEST(word_boundary)); } one_line = rb_hash_aref(options, ID2SYM(id_one_line)); if (!NIL_P(one_line)) { re2_options.set_one_line(RTEST(one_line)); } p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options); } else { p->pattern = new(nothrow) RE2(StringValuePtr(pattern)); } if (p->pattern == 0) { rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object"); } return self; } /* * Returns a printable version of the regular expression +re2+. * * @return [String] a printable version of the regular expression * @example * re2 = RE2::Regexp.new("woo?") * re2.inspect #=> "#" */ static VALUE re2_regexp_inspect(VALUE self) { re2_pattern *p; VALUE result; ostringstream output; Data_Get_Struct(self, re2_pattern, p); output << "#pattern->pattern() << "/>"; result = ENCODED_STR_NEW(output.str().data(), output.str().length(), p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"); return result; } /* * Returns a string version of the regular expression +re2+. * * @return [String] a string version of the regular expression * @example * re2 = RE2::Regexp.new("woo?") * re2.to_s #=> "woo?" */ static VALUE re2_regexp_to_s(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return ENCODED_STR_NEW(p->pattern->pattern().data(), p->pattern->pattern().size(), p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"); } /* * Returns whether or not the regular expression +re2+ * was compiled successfully or not. * * @return [Boolean] whether or not compilation was successful * @example * re2 = RE2::Regexp.new("woo?") * re2.ok? #=> true */ static VALUE re2_regexp_ok(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->ok()); } /* * Returns whether or not the regular expression +re2+ * was compiled with the utf8 option set to true. * * @return [Boolean] the utf8 option * @example * re2 = RE2::Regexp.new("woo?", :utf8 => true) * re2.utf8? #=> true */ static VALUE re2_regexp_utf8(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8); } /* * Returns whether or not the regular expression +re2+ * was compiled with the posix_syntax option set to true. * * @return [Boolean] the posix_syntax option * @example * re2 = RE2::Regexp.new("woo?", :posix_syntax => true) * re2.posix_syntax? #=> true */ static VALUE re2_regexp_posix_syntax(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().posix_syntax()); } /* * Returns whether or not the regular expression +re2+ * was compiled with the longest_match option set to true. * * @return [Boolean] the longest_match option * @example * re2 = RE2::Regexp.new("woo?", :longest_match => true) * re2.longest_match? #=> true */ static VALUE re2_regexp_longest_match(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().longest_match()); } /* * Returns whether or not the regular expression +re2+ * was compiled with the log_errors option set to true. * * @return [Boolean] the log_errors option * @example * re2 = RE2::Regexp.new("woo?", :log_errors => true) * re2.log_errors? #=> true */ static VALUE re2_regexp_log_errors(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().log_errors()); } /* * Returns the max_mem setting for the regular expression * +re2+. * * @return [Fixnum] the max_mem option * @example * re2 = RE2::Regexp.new("woo?", :max_mem => 1024) * re2.max_mem #=> 1024 */ static VALUE re2_regexp_max_mem(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return INT2FIX(p->pattern->options().max_mem()); } /* * Returns whether or not the regular expression +re2+ * was compiled with the literal option set to true. * * @return [Boolean] the literal option * @example * re2 = RE2::Regexp.new("woo?", :literal => true) * re2.literal? #=> true */ static VALUE re2_regexp_literal(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().literal()); } /* * Returns whether or not the regular expression +re2+ * was compiled with the never_nl option set to true. * * @return [Boolean] the never_nl option * @example * re2 = RE2::Regexp.new("woo?", :never_nl => true) * re2.never_nl? #=> true */ static VALUE re2_regexp_never_nl(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().never_nl()); } /* * Returns whether or not the regular expression +re2+ * was compiled with the case_sensitive option set to true. * * @return [Boolean] the case_sensitive option * @example * re2 = RE2::Regexp.new("woo?", :case_sensitive => true) * re2.case_sensitive? #=> true */ static VALUE re2_regexp_case_sensitive(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().case_sensitive()); } /* * Returns whether or not the regular expression +re2+ * was compiled with the case_sensitive option set to false. * * @return [Boolean] the inverse of the case_sensitive option * @example * re2 = RE2::Regexp.new("woo?", :case_sensitive => true) * re2.case_insensitive? #=> false * re2.casefold? #=> false */ static VALUE re2_regexp_case_insensitive(VALUE self) { return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue); } /* * Returns whether or not the regular expression +re2+ * was compiled with the perl_classes option set to true. * * @return [Boolean] the perl_classes option * @example * re2 = RE2::Regexp.new("woo?", :perl_classes => true) * re2.perl_classes? #=> true */ static VALUE re2_regexp_perl_classes(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().perl_classes()); } /* * Returns whether or not the regular expression +re2+ * was compiled with the word_boundary option set to true. * * @return [Boolean] the word_boundary option * @example * re2 = RE2::Regexp.new("woo?", :word_boundary => true) * re2.word_boundary? #=> true */ static VALUE re2_regexp_word_boundary(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().word_boundary()); } /* * Returns whether or not the regular expression +re2+ * was compiled with the one_line option set to true. * * @return [Boolean] the one_line option * @example * re2 = RE2::Regexp.new("woo?", :one_line => true) * re2.one_line? #=> true */ static VALUE re2_regexp_one_line(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().one_line()); } /* * If the RE2 could not be created properly, returns an * error string otherwise returns nil. * * @return [String, nil] the error string or nil */ static VALUE re2_regexp_error(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); if (p->pattern->ok()) { return Qnil; } else { return rb_str_new(p->pattern->error().data(), p->pattern->error().size()); } } /* * If the RE2 could not be created properly, returns * the offending portion of the regexp otherwise returns nil. * * @return [String, nil] the offending portion of the regexp or nil */ static VALUE re2_regexp_error_arg(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); if (p->pattern->ok()) { return Qnil; } else { return ENCODED_STR_NEW(p->pattern->error_arg().data(), p->pattern->error_arg().size(), p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"); } } /* * Returns the program size, a very approximate measure * of a regexp's "cost". Larger numbers are more expensive * than smaller numbers. * * @return [Fixnum] the regexp "cost" */ static VALUE re2_regexp_program_size(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return INT2FIX(p->pattern->ProgramSize()); } /* * Returns a hash of the options currently set for * +re2+. * * @return [Hash] the options */ static VALUE re2_regexp_options(VALUE self) { VALUE options; re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); options = rb_hash_new(); rb_hash_aset(options, ID2SYM(id_utf8), BOOL2RUBY(p->pattern->options().encoding() == RE2::Options::EncodingUTF8)); rb_hash_aset(options, ID2SYM(id_posix_syntax), BOOL2RUBY(p->pattern->options().posix_syntax())); rb_hash_aset(options, ID2SYM(id_longest_match), BOOL2RUBY(p->pattern->options().longest_match())); rb_hash_aset(options, ID2SYM(id_log_errors), BOOL2RUBY(p->pattern->options().log_errors())); rb_hash_aset(options, ID2SYM(id_max_mem), INT2FIX(p->pattern->options().max_mem())); rb_hash_aset(options, ID2SYM(id_literal), BOOL2RUBY(p->pattern->options().literal())); rb_hash_aset(options, ID2SYM(id_never_nl), BOOL2RUBY(p->pattern->options().never_nl())); rb_hash_aset(options, ID2SYM(id_case_sensitive), BOOL2RUBY(p->pattern->options().case_sensitive())); rb_hash_aset(options, ID2SYM(id_perl_classes), BOOL2RUBY(p->pattern->options().perl_classes())); rb_hash_aset(options, ID2SYM(id_word_boundary), BOOL2RUBY(p->pattern->options().word_boundary())); rb_hash_aset(options, ID2SYM(id_one_line), BOOL2RUBY(p->pattern->options().one_line())); /* This is a read-only hash after all... */ rb_obj_freeze(options); return options; } /* * Returns the number of capturing subpatterns, or -1 if the regexp * wasn't valid on construction. The overall match ($0) does not * count: if the regexp is "(a)(b)", returns 2. * * @return [Fixnum] the number of capturing subpatterns */ static VALUE re2_regexp_number_of_capturing_groups(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return INT2FIX(p->pattern->NumberOfCapturingGroups()); } /* * Returns a hash of names to capturing indices of groups. * * @return [Hash] a hash of names to capturing indices */ static VALUE re2_regexp_named_capturing_groups(VALUE self) { VALUE capturing_groups; re2_pattern *p; map groups; map::iterator iterator; Data_Get_Struct(self, re2_pattern, p); groups = p->pattern->NamedCapturingGroups(); capturing_groups = rb_hash_new(); for (iterator = groups.begin(); iterator != groups.end(); iterator++) { rb_hash_aset(capturing_groups, ENCODED_STR_NEW(iterator->first.data(), iterator->first.size(), p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"), INT2FIX(iterator->second)); } return capturing_groups; } /* * Match the pattern against the given +text+ and return either * a boolean (if no submatches are required) or a {RE2::MatchData} * instance. * * @return [Boolean, RE2::MatchData] * * @overload match(text) * Returns an {RE2::MatchData} containing the matching * pattern and all subpatterns resulting from looking for * the regexp in +text+. * * @param [String] text the text to search * @return [RE2::MatchData] the matches * @raise [NoMemoryError] if there was not enough memory to allocate the matches * @example * r = RE2::Regexp.new('w(o)(o)') * r.match('woo') #=> # * * @overload match(text, 0) * Returns either true or false indicating whether a * successful match was made. * * @param [String] text the text to search * @return [Boolean] whether the match was successful * @raise [NoMemoryError] if there was not enough memory to allocate the matches * @example * r = RE2::Regexp.new('w(o)(o)') * r.match('woo', 0) #=> true * r.match('bob', 0) #=> false * * @overload match(text, number_of_matches) * See +match(text)+ but with a specific number of * matches returned (padded with nils if necessary). * * @param [String] text the text to search * @param [Fixnum] number_of_matches the number of matches to return * @return [RE2::MatchData] the matches * @raise [NoMemoryError] if there was not enough memory to allocate the matches * @example * r = RE2::Regexp.new('w(o)(o)') * r.match('woo', 1) #=> # * r.match('woo', 3) #=> # */ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) { int n; bool matched; re2_pattern *p; re2_matchdata *m; VALUE text, number_of_matches, matchdata; rb_scan_args(argc, argv, "11", &text, &number_of_matches); /* Ensure text is a string. */ text = StringValue(text); Data_Get_Struct(self, re2_pattern, p); if (RTEST(number_of_matches)) { n = NUM2INT(number_of_matches); } else { n = p->pattern->NumberOfCapturingGroups(); } if (n == 0) { matched = match(p->pattern, StringValuePtr(text), 0, static_cast(RSTRING_LEN(text)), RE2::UNANCHORED, 0, 0); return BOOL2RUBY(matched); } else { /* Because match returns the whole match as well. */ n += 1; matchdata = rb_class_new_instance(0, 0, re2_cMatchData); Data_Get_Struct(matchdata, re2_matchdata, m); m->matches = new(nothrow) re2::StringPiece[n]; m->regexp = self; m->text = rb_str_dup(text); rb_str_freeze(m->text); if (m->matches == 0) { rb_raise(rb_eNoMemError, "not enough memory to allocate StringPieces for matches"); } m->number_of_matches = n; matched = match(p->pattern, StringValuePtr(m->text), 0, static_cast(RSTRING_LEN(m->text)), RE2::UNANCHORED, m->matches, n); if (matched) { return matchdata; } else { return Qnil; } } } /* * Returns true or false to indicate a successful match. * Equivalent to +re2.match(text, 0)+. * * @return [Boolean] whether the match was successful */ static VALUE re2_regexp_match_query(VALUE self, VALUE text) { VALUE argv[2]; argv[0] = text; argv[1] = INT2FIX(0); return re2_regexp_match(2, argv, self); } /* * Returns a {RE2::Scanner} for scanning the given text incrementally. * * @example * c = RE2::Regexp.new('(\w+)').scan("Foo bar baz") */ static VALUE re2_regexp_scan(VALUE self, VALUE text) { re2_pattern *p; re2_scanner *c; VALUE scanner; Data_Get_Struct(self, re2_pattern, p); scanner = rb_class_new_instance(0, 0, re2_cScanner); Data_Get_Struct(scanner, re2_scanner, c); c->input = new(nothrow) re2::StringPiece(StringValuePtr(text)); c->regexp = self; c->text = text; c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups(); c->eof = false; return scanner; } /* * Returns a copy of +str+ with the first occurrence +pattern+ * replaced with +rewrite+. * * @param [String] str the string to modify * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced * @param [String] rewrite the string to replace with * @return [String] the resulting string * @example * RE2.Replace("hello there", "hello", "howdy") #=> "howdy there" * re2 = RE2.new("hel+o") * RE2.Replace("hello there", re2, "yo") #=> "yo there" */ static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern, VALUE rewrite) { UNUSED(self); re2_pattern *p; /* Convert all the inputs to be pumped into RE2::Replace. */ string str_as_string(StringValuePtr(str)); /* Do the replacement. */ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) { Data_Get_Struct(pattern, re2_pattern, p); RE2::Replace(&str_as_string, *p->pattern, StringValuePtr(rewrite)); return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(), p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"); } else { RE2::Replace(&str_as_string, StringValuePtr(pattern), StringValuePtr(rewrite)); return ENCODED_STR_NEW2(str_as_string.data(), str_as_string.size(), pattern); } } /* * Return a copy of +str+ with +pattern+ replaced by +rewrite+. * * @param [String] str the string to modify * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced * @param [String] rewrite the string to replace with * @return [String] the resulting string * @example * re2 = RE2.new("oo?") * RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps" * RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri" */ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern, VALUE rewrite) { UNUSED(self); /* Convert all the inputs to be pumped into RE2::GlobalReplace. */ re2_pattern *p; string str_as_string(StringValuePtr(str)); /* Do the replacement. */ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) { Data_Get_Struct(pattern, re2_pattern, p); RE2::GlobalReplace(&str_as_string, *p->pattern, StringValuePtr(rewrite)); return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(), p->pattern->options().encoding() == RE2::Options::EncodingUTF8 ? "UTF-8" : "ISO-8859-1"); } else { RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern), StringValuePtr(rewrite)); return ENCODED_STR_NEW2(str_as_string.data(), str_as_string.size(), pattern); } } /* * Returns a version of str with all potentially meaningful regexp * characters escaped. The returned string, used as a regular * expression, will exactly match the original string. * * @param [String] unquoted the unquoted string * @return [String] the escaped string * @example * RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?" */ static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) { UNUSED(self); string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted)); return rb_str_new(quoted_string.data(), quoted_string.size()); } /* Forward declare Init_re2 to be called by C code but define it separately so * that YARD can parse it. */ extern "C" void Init_re2(void); void Init_re2(void) { re2_mRE2 = rb_define_module("RE2"); re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject); re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject); re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject); rb_define_alloc_func(re2_cRegexp, (VALUE (*)(VALUE))re2_regexp_allocate); rb_define_alloc_func(re2_cMatchData, (VALUE (*)(VALUE))re2_matchdata_allocate); rb_define_alloc_func(re2_cScanner, (VALUE (*)(VALUE))re2_scanner_allocate); rb_define_method(re2_cMatchData, "string", RUBY_METHOD_FUNC(re2_matchdata_string), 0); rb_define_method(re2_cMatchData, "regexp", RUBY_METHOD_FUNC(re2_matchdata_regexp), 0); rb_define_method(re2_cMatchData, "to_a", RUBY_METHOD_FUNC(re2_matchdata_to_a), 0); rb_define_method(re2_cMatchData, "size", RUBY_METHOD_FUNC(re2_matchdata_size), 0); rb_define_method(re2_cMatchData, "length", RUBY_METHOD_FUNC(re2_matchdata_size), 0); rb_define_method(re2_cMatchData, "begin", RUBY_METHOD_FUNC(re2_matchdata_begin), 1); rb_define_method(re2_cMatchData, "end", RUBY_METHOD_FUNC(re2_matchdata_end), 1); rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref), -1); rb_define_method(re2_cMatchData, "to_s", RUBY_METHOD_FUNC(re2_matchdata_to_s), 0); rb_define_method(re2_cMatchData, "inspect", RUBY_METHOD_FUNC(re2_matchdata_inspect), 0); rb_define_method(re2_cScanner, "string", RUBY_METHOD_FUNC(re2_scanner_string), 0); rb_define_method(re2_cScanner, "eof?", RUBY_METHOD_FUNC(re2_scanner_eof), 0); rb_define_method(re2_cScanner, "regexp", RUBY_METHOD_FUNC(re2_scanner_regexp), 0); rb_define_method(re2_cScanner, "scan", RUBY_METHOD_FUNC(re2_scanner_scan), 0); rb_define_method(re2_cScanner, "rewind", RUBY_METHOD_FUNC(re2_scanner_rewind), 0); rb_define_method(re2_cRegexp, "initialize", RUBY_METHOD_FUNC(re2_regexp_initialize), -1); rb_define_method(re2_cRegexp, "ok?", RUBY_METHOD_FUNC(re2_regexp_ok), 0); rb_define_method(re2_cRegexp, "error", RUBY_METHOD_FUNC(re2_regexp_error), 0); rb_define_method(re2_cRegexp, "error_arg", RUBY_METHOD_FUNC(re2_regexp_error_arg), 0); rb_define_method(re2_cRegexp, "program_size", RUBY_METHOD_FUNC(re2_regexp_program_size), 0); rb_define_method(re2_cRegexp, "options", RUBY_METHOD_FUNC(re2_regexp_options), 0); rb_define_method(re2_cRegexp, "number_of_capturing_groups", RUBY_METHOD_FUNC(re2_regexp_number_of_capturing_groups), 0); rb_define_method(re2_cRegexp, "named_capturing_groups", RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0); rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match), -1); rb_define_method(re2_cRegexp, "match?", RUBY_METHOD_FUNC(re2_regexp_match_query), 1); rb_define_method(re2_cRegexp, "=~", RUBY_METHOD_FUNC(re2_regexp_match_query), 1); rb_define_method(re2_cRegexp, "===", RUBY_METHOD_FUNC(re2_regexp_match_query), 1); rb_define_method(re2_cRegexp, "scan", RUBY_METHOD_FUNC(re2_regexp_scan), 1); rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0); rb_define_method(re2_cRegexp, "to_str", RUBY_METHOD_FUNC(re2_regexp_to_s), 0); rb_define_method(re2_cRegexp, "pattern", RUBY_METHOD_FUNC(re2_regexp_to_s), 0); rb_define_method(re2_cRegexp, "source", RUBY_METHOD_FUNC(re2_regexp_to_s), 0); rb_define_method(re2_cRegexp, "inspect", RUBY_METHOD_FUNC(re2_regexp_inspect), 0); rb_define_method(re2_cRegexp, "utf8?", RUBY_METHOD_FUNC(re2_regexp_utf8), 0); rb_define_method(re2_cRegexp, "posix_syntax?", RUBY_METHOD_FUNC(re2_regexp_posix_syntax), 0); rb_define_method(re2_cRegexp, "longest_match?", RUBY_METHOD_FUNC(re2_regexp_longest_match), 0); rb_define_method(re2_cRegexp, "log_errors?", RUBY_METHOD_FUNC(re2_regexp_log_errors), 0); rb_define_method(re2_cRegexp, "max_mem", RUBY_METHOD_FUNC(re2_regexp_max_mem), 0); rb_define_method(re2_cRegexp, "literal?", RUBY_METHOD_FUNC(re2_regexp_literal), 0); rb_define_method(re2_cRegexp, "never_nl?", RUBY_METHOD_FUNC(re2_regexp_never_nl), 0); rb_define_method(re2_cRegexp, "case_sensitive?", RUBY_METHOD_FUNC(re2_regexp_case_sensitive), 0); rb_define_method(re2_cRegexp, "case_insensitive?", RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0); rb_define_method(re2_cRegexp, "casefold?", RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0); rb_define_method(re2_cRegexp, "perl_classes?", RUBY_METHOD_FUNC(re2_regexp_perl_classes), 0); rb_define_method(re2_cRegexp, "word_boundary?", RUBY_METHOD_FUNC(re2_regexp_word_boundary), 0); rb_define_method(re2_cRegexp, "one_line?", RUBY_METHOD_FUNC(re2_regexp_one_line), 0); rb_define_module_function(re2_mRE2, "Replace", RUBY_METHOD_FUNC(re2_Replace), 3); rb_define_module_function(re2_mRE2, "GlobalReplace", RUBY_METHOD_FUNC(re2_GlobalReplace), 3); rb_define_module_function(re2_mRE2, "QuoteMeta", RUBY_METHOD_FUNC(re2_QuoteMeta), 1); rb_define_singleton_method(re2_cRegexp, "escape", RUBY_METHOD_FUNC(re2_QuoteMeta), 1); rb_define_singleton_method(re2_cRegexp, "quote", RUBY_METHOD_FUNC(re2_QuoteMeta), 1); rb_define_singleton_method(re2_cRegexp, "compile", RUBY_METHOD_FUNC(rb_class_new_instance), -1); rb_define_global_function("RE2", RUBY_METHOD_FUNC(re2_re2), -1); /* Create the symbols used in options. */ id_utf8 = rb_intern("utf8"); id_posix_syntax = rb_intern("posix_syntax"); id_longest_match = rb_intern("longest_match"); id_log_errors = rb_intern("log_errors"); id_max_mem = rb_intern("max_mem"); id_literal = rb_intern("literal"); id_never_nl = rb_intern("never_nl"); id_case_sensitive = rb_intern("case_sensitive"); id_perl_classes = rb_intern("perl_classes"); id_word_boundary = rb_intern("word_boundary"); id_one_line = rb_intern("one_line"); #if 0 /* Fake so YARD generates the file. */ rb_mKernel = rb_define_module("Kernel"); #endif } re2-1.2.0/ext/re2/extconf.rb0000644000004100000410000000427713656356061015542 0ustar www-datawww-data# re2 (http://github.com/mudge/re2) # Ruby bindings to re2, an "efficient, principled regular expression library" # # Copyright (c) 2010-2012, Paul Mucur (http://mudge.name) # Released under the BSD Licence, please see LICENSE.txt require 'mkmf' if ENV["CC"] RbConfig::MAKEFILE_CONFIG["CC"] = ENV["CC"] RbConfig::CONFIG["CC"] = ENV["CC"] end if ENV["CXX"] RbConfig::MAKEFILE_CONFIG["CXX"] = ENV["CXX"] RbConfig::CONFIG["CXX"] = ENV["CXX"] end incl, lib = dir_config("re2", "/usr/local/include", "/usr/local/lib") $CFLAGS << " -Wall -Wextra -funroll-loops" # Pass -x c++ to force gcc to compile the test program # as C++ (as it will end in .c by default). compile_options = "-x c++" have_library("stdc++") have_header("stdint.h") have_func("rb_str_sublen") unless have_library("re2") abort "You must have re2 installed and specified with --with-re2-dir, please see https://github.com/google/re2/wiki/Install" end # Recent versions of re2 now require a compiler with C++11 support checking_for("re2 requires C++11 compiler") do minimal_program = < int main() { return 0; } SRC unless try_compile(minimal_program, compile_options) if try_compile(minimal_program, compile_options + " -std=c++11") compile_options << " -std=c++11" $CPPFLAGS << " -std=c++11" elsif try_compile(minimal_program, compile_options + " -std=c++0x") compile_options << " -std=c++0x" $CPPFLAGS << " -std=c++0x" else abort "Cannot compile re2 with your compiler: recent versions require C++11 support." end end end # Determine which version of re2 the user has installed. # Revision d9f8806c004d added an `endpos` argument to the # generic Match() function. # # To test for this, try to compile a simple program that uses # the newer form of Match() and set a flag if it is successful. checking_for("RE2::Match() with endpos argument") do test_re2_match_signature = < int main() { RE2 pattern("test"); re2::StringPiece *match; pattern.Match("test", 0, 0, RE2::UNANCHORED, match, 0); return 0; } SRC if try_compile(test_re2_match_signature, compile_options) $defs.push("-DHAVE_ENDPOS_ARGUMENT") end end create_makefile("re2") re2-1.2.0/LICENSE.txt0000644000004100000410000000273613656356061014100 0ustar www-datawww-dataCopyright (c) 2010-2014, Paul Mucur. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Paul Mucur, nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.