re2-0.7.0/0000755000175000017500000000000012522466615011217 5ustar lucaslucasre2-0.7.0/LICENSE.txt0000644000175000017500000000273612522466615013052 0ustar lucaslucasCopyright (c) 2010-2014, Paul Mucur. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Paul Mucur, nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. re2-0.7.0/lib/0000755000175000017500000000000012522466615011765 5ustar lucaslucasre2-0.7.0/lib/re2/0000755000175000017500000000000012522466615012455 5ustar lucaslucasre2-0.7.0/lib/re2/scanner.rb0000644000175000017500000000032512522466615014433 0ustar lucaslucasmodule RE2 class Scanner include Enumerable def each if block_given? while matches = scan yield matches end else to_enum(:each) end end end end re2-0.7.0/lib/re2/string.rb0000644000175000017500000000743212522466615014316 0ustar lucaslucas# re2 (http://github.com/mudge/re2) # Ruby bindings to re2, an "efficient, principled regular expression library" # # Copyright (c) 2010-2014, Paul Mucur (http://mudge.name) # Released under the BSD Licence, please see LICENSE.txt require "re2" module RE2 module String # Replaces the first occurrence +pattern+ with +rewrite+ and returns a new # string. # # @param [String, RE2::Regexp] pattern a regexp matching text to be replaced # @param [String] rewrite the string to replace with # @example # "hello there".re2_sub("hello", "howdy") #=> "howdy there" # re2 = RE2.new("hel+o") # "hello there".re2_sub(re2, "yo") #=> "yo there" # text = "Good morning" # text.re2_sub("morn", "even") #=> "Good evening" # text #=> "Good morning" def re2_sub(*args) RE2.Replace(self, *args) end # Replaces every occurrence of +pattern+ with +rewrite+ and return a new string. # # @param [String, RE2::Regexp] pattern a regexp matching text to be replaced # @param [String] rewrite the string to replace with # @example # "hello there".re2_gsub("e", "i") #=> "hillo thiri" # re2 = RE2.new("oo?") # "whoops-doops".re2_gsub(re2, "e") #=> "wheps-deps" # text = "Good morning" # text.re2_gsub("o", "ee") #=> "Geeeed meerning" # text #=> "Good morning" def re2_gsub(*args) RE2.GlobalReplace(self, *args) end # Match the pattern and return either a boolean (if no submatches are required) # or a {RE2::MatchData} instance. # # @return [Boolean, RE2::MatchData] # # @overload match(pattern) # Returns an {RE2::MatchData} containing the matching # pattern and all subpatterns resulting from looking for # +pattern+. # # @param [String, RE2::Regexp] pattern the regular expression to match # @return [RE2::MatchData] the matches # @raise [NoMemoryError] if there was not enough memory to allocate the matches # @example # r = RE2::Regexp.new('w(o)(o)') # "woo".re2_match(r) #=> # # # @overload match(pattern, 0) # Returns either true or false indicating whether a # successful match was made. # # @param [String, RE2::Regexp] pattern the regular expression to match # @return [Boolean] whether the match was successful # @raise [NoMemoryError] if there was not enough memory to allocate the matches # @example # r = RE2::Regexp.new('w(o)(o)') # "woo".re2_match(0) #=> true # "bob".re2_match(0) #=> false # # @overload match(pattern, number_of_matches) # See +match(pattern)+ but with a specific number of # matches returned (padded with nils if necessary). # # @param [String, RE2::Regexp] pattern the regular expression to match # @param [Fixnum] number_of_matches the number of matches to return # @return [RE2::MatchData] the matches # @raise [NoMemoryError] if there was not enough memory to allocate the matches # @example # r = RE2::Regexp.new('w(o)(o)') # "woo".re2_match(r, 1) #=> # # "woo".re2_match(r, 3) #=> # def re2_match(pattern, *args) RE2::Regexp.new(pattern).match(self, *args) end # Escapes all potentially meaningful regexp characters. # The returned string, used as a regular expression, will exactly match the # original string. # # @return [String] the escaped string # @example # "1.5-2.0?".escape #=> "1\.5\-2\.0\?" def re2_escape RE2.QuoteMeta(self) end alias_method :re2_quote, :re2_escape end end re2-0.7.0/lib/re2.rb0000644000175000017500000000041612522466615013003 0ustar lucaslucas# re2 (http://github.com/mudge/re2) # Ruby bindings to re2, an "efficient, principled regular expression library" # # Copyright (c) 2010-2014, Paul Mucur (http://mudge.name) # Released under the BSD Licence, please see LICENSE.txt require "re2.so" require "re2/scanner" re2-0.7.0/spec/0000755000175000017500000000000012522466615012151 5ustar lucaslucasre2-0.7.0/spec/re2/0000755000175000017500000000000012522466615012641 5ustar lucaslucasre2-0.7.0/spec/re2/string_spec.rb0000644000175000017500000000320712522466615015510 0ustar lucaslucasrequire "spec_helper" require "re2/string" class String include RE2::String end describe RE2::String do describe "#re2_sub" do it "delegates to RE2.Replace to perform replacement" do "My name is Robert Paulson".re2_sub('Robert', 'Crobert').must_equal("My name is Crobert Paulson") end it "doesn't perform an in-place replacement" do string = "My name is Robert Paulson" string.re2_sub('Robert', 'Crobert').wont_be_same_as(string) end end describe "#re2_gsub" do it "delegates to RE2.GlobalReplace to perform replacement" do "My name is Robert Paulson".re2_gsub('a', 'e').must_equal("My neme is Robert Peulson") end it "doesn't perform an in-place replacement" do string = "My name is Robert Paulson" string.re2_gsub('a', 'e').wont_be_same_as(string) end end describe "#re2_match" do it "delegates to RE2::Regexp#match to perform matches" do md = "My name is Robert Paulson".re2_match('My name is (\S+) (\S+)') md.must_be_instance_of(RE2::MatchData) md[0].must_equal("My name is Robert Paulson") md[1].must_equal("Robert") md[2].must_equal("Paulson") end it "supports limiting the number of matches" do md = "My name is Robert Paulson".re2_match('My name is (\S+) (\S+)', 0) md.must_equal(true) end end describe "#re2_escape" do it "escapes the string for use in regular expressions" do "1.5-2.0?".re2_escape.must_equal('1\.5\-2\.0\?') end end describe "#re2_quote" do it "escapes the string for use in regular expressions" do "1.5-2.0?".re2_quote.must_equal('1\.5\-2\.0\?') end end end re2-0.7.0/spec/re2/scanner_spec.rb0000644000175000017500000000464612522466615015643 0ustar lucaslucasrequire "spec_helper" describe RE2::Scanner do describe "#regexp" do it "returns the original pattern for the scanner" do re = RE2::Regexp.new('(\w+)') scanner = re.scan("It is a truth") scanner.regexp.must_be_same_as(re) end end describe "#string" do it "returns the original text for the scanner" do re = RE2::Regexp.new('(\w+)') text = "It is a truth" scanner = re.scan(text) scanner.string.must_be_same_as(text) end end describe "#scan" do it "returns the next array of matches" do r = RE2::Regexp.new('(\w+)') scanner = r.scan("It is a truth universally acknowledged") scanner.scan.must_equal(["It"]) scanner.scan.must_equal(["is"]) scanner.scan.must_equal(["a"]) scanner.scan.must_equal(["truth"]) scanner.scan.must_equal(["universally"]) scanner.scan.must_equal(["acknowledged"]) scanner.scan.must_be_nil end it "returns an empty array if there are no capturing groups" do r = RE2::Regexp.new('\w+') scanner = r.scan("Foo bar") scanner.scan.must_equal([]) end it "returns nil if there is no match" do r = RE2::Regexp.new('\d+') scanner = r.scan("Foo bar") scanner.scan.must_be_nil end end it "is enumerable" do r = RE2::Regexp.new('(\d)') scanner = r.scan("There are 1 some 2 numbers 3") scanner.must_be_kind_of(Enumerable) end describe "#each" do it "yields each match" do r = RE2::Regexp.new('(\d)') scanner = r.scan("There are 1 some 2 numbers 3") matches = [] scanner.each do |match| matches << match end matches.must_equal([["1"], ["2"], ["3"]]) end it "returns an enumerator when not given a block" do r = RE2::Regexp.new('(\d)') scanner = r.scan("There are 1 some 2 numbers 3") # Prior to Ruby 1.9, Enumerator was within Enumerable. if defined?(Enumerator) scanner.each.must_be_kind_of(Enumerator) elsif defined?(Enumerable::Enumerator) scanner.each.must_be_kind_of(Enumerable::Enumerator) end end end describe "#rewind" do it "resets any consumption" do r = RE2::Regexp.new('(\d)') scanner = r.scan("There are 1 some 2 numbers 3") scanner.to_enum.first.must_equal(["1"]) scanner.to_enum.first.must_equal(["2"]) scanner.rewind scanner.to_enum.first.must_equal(["1"]) end end end re2-0.7.0/spec/re2/match_data_spec.rb0000644000175000017500000001472512522466615016276 0ustar lucaslucas# encoding: utf-8 require "spec_helper" describe RE2::MatchData do describe "#to_a" do it "is populated with the match and capturing groups" do a = RE2::Regexp.new('w(o)(o)').match('woo').to_a a.must_equal(["woo", "o", "o"]) end it "populates optional capturing groups with nil if they are missing" do a = RE2::Regexp.new('(\d?)(a)(b)').match('ab').to_a a.must_equal(["ab", nil, "a", "b"]) end end describe "#[]" do it "accesses capturing groups by numerical index" do md = RE2::Regexp.new('(\d)(\d{2})').match("123") md[1].must_equal("1") md[2].must_equal("23") end it "has the whole match as the 0th item" do md = RE2::Regexp.new('(\d)(\d{2})').match("123") md[0].must_equal("123") end it "supports access by numerical ranges" do md = RE2::Regexp.new('(\d+) (\d+) (\d+)').match("123 456 789") md[1..3].must_equal(["123", "456", "789"]) md[1...3].must_equal(["123", "456"]) end it "supports slicing" do md = RE2::Regexp.new('(\d+) (\d+) (\d+)').match("123 456 789") md[1, 3].must_equal(["123", "456", "789"]) md[1, 2].must_equal(["123", "456"]) end it "returns nil if attempting to access non-existent capturing groups by index" do md = RE2::Regexp.new('(\d+)').match('bob 123') md[2].must_be_nil md[3].must_be_nil end it "allows access by string names when there are named groups" do md = RE2::Regexp.new('(?P\d+)').match('bob 123') md["numbers"].must_equal("123") end it "allows access by symbol names when there are named groups" do md = RE2::Regexp.new('(?P\d+)').match('bob 123') md[:numbers].must_equal("123") end it "allows access by names and indices with mixed groups" do md = RE2::Regexp.new('(?P\w+)(\s*)(?P\d+)').match("bob 123") md["name"].must_equal("bob") md[:name].must_equal("bob") md[2].must_equal(" ") md["numbers"].must_equal("123") md[:numbers].must_equal("123") end it "returns nil if no such named group exists" do md = RE2::Regexp.new('(\d+)').match("bob 123") md["missing"].must_be_nil md[:missing].must_be_nil end it "raises an error if given an inappropriate index" do md = RE2::Regexp.new('(\d+)').match("bob 123") lambda { md[nil] }.must_raise(TypeError) end if String.method_defined?(:encoding) it "returns UTF-8 encoded strings by default" do md = RE2::Regexp.new('(?P\S+)').match("bob") md[0].encoding.name.must_equal("UTF-8") md["name"].encoding.name.must_equal("UTF-8") md[:name].encoding.name.must_equal("UTF-8") end it "returns Latin 1 strings encoding when utf-8 is false" do md = RE2::Regexp.new('(?P\S+)', :utf8 => false).match('bob') md[0].encoding.name.must_equal("ISO-8859-1") md["name"].encoding.name.must_equal("ISO-8859-1") md[:name].encoding.name.must_equal("ISO-8859-1") end end end describe "#string" do it "returns the original string to match against" do re = RE2::Regexp.new('(\D+)').match("bob") re.string.must_equal("bob") end it "returns a copy, not the actual original" do string = "bob" re = RE2::Regexp.new('(\D+)').match(string) re.string.wont_be_same_as(string) end it "returns a frozen string" do re = RE2::Regexp.new('(\D+)').match("bob") re.string.must_be(:frozen?) end end describe "#size" do it "returns the number of capturing groups plus the matching string" do md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56") md.size.must_equal(3) end end describe "#length" do it "returns the number of capturing groups plus the matching string" do md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56") md.length.must_equal(3) end end describe "#regexp" do it "returns the original RE2::Regexp used" do re = RE2::Regexp.new('(\d+)') md = re.match("123") md.regexp.must_be_same_as(re) end end describe "#inspect" do it "returns a text representation of the object and indices" do md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56") md.inspect.must_equal('#') end it "represents missing matches as nil" do md = RE2::Regexp.new('(\d+) (\d+)?').match("1234 ") md.inspect.must_equal('#') end end describe "#to_s" do it "returns the matching part of the original string" do md = RE2::Regexp.new('(\d{2,5})').match("one two 23456") md.to_s.must_equal("23456") end end describe "#to_ary" do it "allows the object to be expanded with an asterisk" do md = RE2::Regexp.new('(\d+) (\d+)').match("1234 56") m1, m2, m3 = *md m1.must_equal("1234 56") m2.must_equal("1234") m3.must_equal("56") end end describe "#begin" do it "returns the offset of the start of a match by index" do md = RE2::Regexp.new('(wo{2})').match('a woohoo') md.string[md.begin(0)..-1].must_equal('woohoo') end it "returns the offset of the start of a match by string name" do md = RE2::Regexp.new('(?Pfo{2})').match('a foobar') md.string[md.begin('foo')..-1].must_equal('foobar') end it "returns the offset of the start of a match by symbol name" do md = RE2::Regexp.new('(?Pfo{2})').match('a foobar') md.string[md.begin(:foo)..-1].must_equal('foobar') end it "returns the offset despite multibyte characters" do md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby') md.string[md.begin(0)..-1].must_equal('Ruby') end end describe "#end" do it "returns the offset of the character following the end of a match" do md = RE2::Regexp.new('(wo{2})').match('a woohoo') md.string[0...md.end(0)].must_equal('a woo') end it "returns the offset of a match by string name" do md = RE2::Regexp.new('(?Pfo{2})').match('a foobar') md.string[0...md.end('foo')].must_equal('a foo') end it "returns the offset of a match by symbol name" do md = RE2::Regexp.new('(?Pfo{2})').match('a foobar') md.string[0...md.end(:foo)].must_equal('a foo') end it "returns the offset despite multibyte characters" do md = RE2::Regexp.new('(Ruby)').match('I ♥ Ruby') md.string[0...md.end(0)].must_equal('I ♥ Ruby') end end end re2-0.7.0/spec/re2/regexp_spec.rb0000644000175000017500000002677712522466615015515 0ustar lucaslucasrequire "spec_helper" describe RE2::Regexp do describe "#initialize" do it "returns an instance given only a pattern" do re = RE2::Regexp.new('woo') re.must_be_instance_of(RE2::Regexp) end it "returns an instance given a pattern and options" do re = RE2::Regexp.new('woo', :case_sensitive => false) re.must_be_instance_of(RE2::Regexp) end it "raises an error if given an inappropriate type" do lambda { RE2::Regexp.new(nil) }.must_raise(TypeError) end end describe "#compile" do it "returns an instance given only a pattern" do re = RE2::Regexp.compile('woo') re.must_be_instance_of(RE2::Regexp) end it "returns an instance given a pattern and options" do re = RE2::Regexp.compile('woo', :case_sensitive => false) re.must_be_instance_of(RE2::Regexp) end end describe "#options" do it "returns a hash of options" do options = RE2::Regexp.new('woo').options options.must_be_instance_of(Hash) end it "is populated with default options when nothing has been set" do options = RE2::Regexp.new('woo').options assert options[:utf8] refute options[:posix_syntax] refute options[:longest_match] assert [:log_errors] refute options[:literal] refute options[:never_nl] assert options[:case_sensitive] refute options[:perl_classes] refute options[:word_boundary] refute options[:one_line] end it "is populated with overridden options when specified" do options = RE2::Regexp.new('woo', :case_sensitive => false).options refute options[:case_sensitive] end end describe "#error" do it "returns nil if there is no error" do error = RE2::Regexp.new('woo').error error.must_be_nil end # Use log_errors => false to suppress RE2's logging to STDERR. it "contains the error string if there is an error" do error = RE2::Regexp.new('wo(o', :log_errors => false).error error.must_equal("missing ): wo(o") end end describe "#error_arg" do it "returns nil if there is no error" do error_arg = RE2::Regexp.new('woo').error_arg error_arg.must_be_nil end it "returns the offending portin of the regexp if there is an error" do error_arg = RE2::Regexp.new('wo(o', :log_errors => false).error_arg error_arg.must_equal("wo(o") end end describe "#program_size" do it "returns a numeric value" do program_size = RE2::Regexp.new('w(o)(o)').program_size program_size.must_be_instance_of(Fixnum) end end describe "#to_str" do it "returns the original pattern" do string = RE2::Regexp.new('w(o)(o)').to_str string.must_equal("w(o)(o)") end end describe "#pattern" do it "returns the original pattern" do pattern = RE2::Regexp.new('w(o)(o)').pattern pattern.must_equal("w(o)(o)") end end describe "#inspect" do it "shows the class name and original pattern" do string = RE2::Regexp.new('w(o)(o)').inspect string.must_equal("#") end end describe "#utf8?" do it "returns true by default" do RE2::Regexp.new('woo').must_be(:utf8?) end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :utf8 => false) re.wont_be(:utf8?) end end describe "#posix_syntax?" do it "returns false by default" do RE2::Regexp.new('woo').wont_be(:posix_syntax?) end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :posix_syntax => true) re.must_be(:posix_syntax?) end end describe "#literal?" do it "returns false by default" do RE2::Regexp.new('woo').wont_be(:literal?) end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :literal => true) re.must_be(:literal?) end end describe "#never_nl?" do it "returns false by default" do RE2::Regexp.new('woo').wont_be(:never_nl?) end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :never_nl => true) re.must_be(:never_nl?) end end describe "#case_sensitive?" do it "returns true by default" do RE2::Regexp.new('woo').must_be(:case_sensitive?) end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :case_sensitive => false) re.wont_be(:case_sensitive?) end end describe "#case_insensitive?" do it "returns false by default" do RE2::Regexp.new('woo').wont_be(:case_insensitive?) end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :case_sensitive => false) re.must_be(:case_insensitive?) end end describe "#casefold?" do it "returns true by default" do RE2::Regexp.new('woo').wont_be(:casefold?) end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :case_sensitive => false) re.must_be(:casefold?) end end describe "#longest_match?" do it "returns false by default" do RE2::Regexp.new('woo').wont_be(:casefold?) end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :longest_match => true) re.must_be(:longest_match?) end end describe "#log_errors?" do it "returns true by default" do RE2::Regexp.new('woo').must_be(:log_errors?) end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :log_errors => false) re.wont_be(:log_errors?) end end describe "#perl_classes?" do it "returns false by default" do RE2::Regexp.new('woo').wont_be(:perl_classes?) end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :perl_classes => true) re.must_be(:perl_classes?) end end describe "#word_boundary?" do it "returns false by default" do RE2::Regexp.new('woo').wont_be(:word_boundary?) end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :word_boundary => true) re.must_be(:word_boundary?) end end describe "#one_line?" do it "returns false by default" do RE2::Regexp.new('woo').wont_be(:one_line?) end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :one_line => true) re.must_be(:one_line?) end end describe "#max_mem" do it "returns the default max memory" do RE2::Regexp.new('woo').max_mem.must_equal(8388608) end it "can be overridden on initialization" do re = RE2::Regexp.new('woo', :max_mem => 1024) re.max_mem.must_equal(1024) end end describe "#match" do let(:re) { RE2::Regexp.new('My name is (\S+) (\S+)') } it "returns match data given only text" do md = re.match("My name is Robert Paulson") md.must_be_instance_of(RE2::MatchData) end it "returns nil if there is no match for the given text" do re.match("My age is 99").must_be_nil end it "returns only true or false if no matches are requested" do re.match("My name is Robert Paulson", 0).must_equal(true) re.match("My age is 99", 0).must_equal(false) end it "raises an exception when given nil" do lambda { re.match(nil) }.must_raise(TypeError) end it "raises an exception when given an inappropriate number of matches" do lambda { re.match("My name is Robert Paulson", {}) }.must_raise(TypeError) end describe "with a specific number of matches under the total in the pattern" do subject { re.match("My name is Robert Paulson", 1) } it "returns a match data object" do subject.must_be_instance_of(RE2::MatchData) end it "has the whole match and only the specified number of matches" do subject.size.must_equal(2) end it "populates any specified matches" do subject[1].must_equal("Robert") end it "does not populate any matches that weren't included" do subject[2].must_be_nil end end describe "with a number of matches over the total in the pattern" do subject { re.match("My name is Robert Paulson", 5) } it "returns a match data object" do subject.must_be_instance_of(RE2::MatchData) end it "has the whole match the specified number of matches" do subject.size.must_equal(6) end it "populates any specified matches" do subject[1].must_equal("Robert") subject[2].must_equal("Paulson") end it "pads the remaining matches with nil" do subject[3].must_be_nil subject[4].must_be_nil subject[5].must_be_nil subject[6].must_be_nil end end end describe "#match?" do it "returns only true or false if no matches are requested" do re = RE2::Regexp.new('My name is (\S+) (\S+)') re.match?("My name is Robert Paulson").must_equal(true) re.match?("My age is 99").must_equal(false) end end describe "#=~" do it "returns only true or false if no matches are requested" do re = RE2::Regexp.new('My name is (\S+) (\S+)') (re =~ "My name is Robert Paulson").must_equal(true) (re =~ "My age is 99").must_equal(false) end end describe "#!~" do it "returns only true or false if no matches are requested" do re = RE2::Regexp.new('My name is (\S+) (\S+)') (re !~ "My name is Robert Paulson").must_equal(false) (re !~ "My age is 99").must_equal(true) end end describe "#===" do it "returns only true or false if no matches are requested" do re = RE2::Regexp.new('My name is (\S+) (\S+)') (re === "My name is Robert Paulson").must_equal(true) (re === "My age is 99").must_equal(false) end end describe "#ok?" do it "returns true for valid regexps" do RE2::Regexp.new('woo').must_be(:ok?) RE2::Regexp.new('wo(o)').must_be(:ok?) RE2::Regexp.new('((\d)\w+){3,}').must_be(:ok?) end it "returns false for invalid regexps" do RE2::Regexp.new('wo(o', :log_errors => false).wont_be(:ok?) RE2::Regexp.new('wo[o', :log_errors => false).wont_be(:ok?) RE2::Regexp.new('*', :log_errors => false).wont_be(:ok?) end end describe "#escape" do it "transforms a string into a regexp" do RE2::Regexp.escape("1.5-2.0?").must_equal('1\.5\-2\.0\?') end end describe "#quote" do it "transforms a string into a regexp" do RE2::Regexp.quote("1.5-2.0?").must_equal('1\.5\-2\.0\?') end end describe "#number_of_capturing_groups" do it "returns the number of groups in a regexp" do RE2::Regexp.new('(a)(b)(c)').number_of_capturing_groups.must_equal(3) RE2::Regexp.new('abc').number_of_capturing_groups.must_equal(0) RE2::Regexp.new('a((b)c)').number_of_capturing_groups.must_equal(2) end end describe "#named_capturing_groups" do it "returns a hash of names to indices" do RE2::Regexp.new('(?Pa)').named_capturing_groups.must_be_instance_of(Hash) end it "maps names to indices with only one group" do groups = RE2::Regexp.new('(?Pa)').named_capturing_groups groups["bob"].must_equal(1) end it "maps names to indices with several groups" do groups = RE2::Regexp.new('(?Pa)(o)(?Pe)').named_capturing_groups groups["bob"].must_equal(1) groups["rob"].must_equal(3) end end describe "#scan" do it "returns a scanner" do r = RE2::Regexp.new('(\w+)') scanner = r.scan("It is a truth universally acknowledged") scanner.must_be_instance_of(RE2::Scanner) end end end re2-0.7.0/spec/spec_helper.rb0000644000175000017500000000014412522466615014766 0ustar lucaslucas$:.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) require "re2" require "minitest/autorun" re2-0.7.0/spec/kernel_spec.rb0000644000175000017500000000062512522466615014773 0ustar lucaslucasrequire "spec_helper" describe Kernel do describe "#RE2" do it "returns an RE2::Regexp instance given a pattern" do RE2('w(o)(o)').must_be_instance_of(RE2::Regexp) end it "returns an RE2::Regexp instance given a pattern and options" do re = RE2('w(o)(o)', :case_sensitive => false) re.must_be_instance_of(RE2::Regexp) re.wont_be(:case_sensitive?) end end end re2-0.7.0/spec/re2_spec.rb0000644000175000017500000000462612522466615014210 0ustar lucaslucasrequire "spec_helper" describe RE2 do describe "#Replace" do it "only replaces the first occurrence of the pattern" do RE2.Replace("woo", "o", "a").must_equal("wao") end it "performs replacement based on regular expressions" do RE2.Replace("woo", "o+", "e").must_equal("we") end it "supports flags in patterns" do RE2.Replace("Good morning", "(?i)gOOD MORNING", "hi").must_equal("hi") end it "does not perform replacements in-place" do name = "Robert" replacement = RE2.Replace(name, "R", "Cr") replacement.must_equal("Crobert") name.wont_be_same_as(replacement) end it "supports passing an RE2::Regexp as the pattern" do re = RE2::Regexp.new('wo{2}') RE2.Replace("woo", re, "miaow").must_equal("miaow") end it "respects any passed RE2::Regexp's flags" do re = RE2::Regexp.new('gOOD MORNING', :case_sensitive => false) RE2.Replace("Good morning", re, "hi").must_equal("hi") end if String.method_defined?(:encoding) it "preserves the original string's encoding" do original = "Foo" replacement = RE2.Replace(original, "oo", "ah") original.encoding.must_equal(replacement.encoding) end end end describe "#GlobalReplace" do it "replaces every occurrence of a pattern" do RE2.GlobalReplace("woo", "o", "a").must_equal("waa") end it "performs replacement based on regular expressions" do RE2.GlobalReplace("woohoo", "o+", "e").must_equal("wehe") end it "supports flags in patterns" do RE2.GlobalReplace("Robert", "(?i)r", "w").must_equal("wobewt") end it "does not perform replacement in-place" do name = "Robert" replacement = RE2.GlobalReplace(name, "(?i)R", "w") replacement.must_equal("wobewt") name.wont_be_same_as(replacement) end it "supports passing an RE2::Regexp as the pattern" do re = RE2::Regexp.new('wo{2,}') RE2.GlobalReplace("woowooo", re, "miaow").must_equal("miaowmiaow") end it "respects any passed RE2::Regexp's flags" do re = RE2::Regexp.new('gOOD MORNING', :case_sensitive => false) RE2.GlobalReplace("Good morning Good morning", re, "hi").must_equal("hi hi") end end describe "#QuoteMeta" do it "escapes a string so it can be used as a regular expression" do RE2.QuoteMeta("1.5-2.0?").must_equal('1\.5\-2\.0\?') end end end re2-0.7.0/metadata.yml0000644000175000017500000000420312522466615013521 0ustar lucaslucas--- !ruby/object:Gem::Specification name: re2 version: !ruby/object:Gem::Version version: 0.7.0 platform: ruby authors: - Paul Mucur autorequire: bindir: bin cert_chain: [] date: 2015-01-25 00:00:00.000000000 Z dependencies: - !ruby/object:Gem::Dependency name: rake-compiler requirement: !ruby/object:Gem::Requirement requirements: - - "~>" - !ruby/object:Gem::Version version: '0.9' type: :development prerelease: false version_requirements: !ruby/object:Gem::Requirement requirements: - - "~>" - !ruby/object:Gem::Version version: '0.9' - !ruby/object:Gem::Dependency name: minitest requirement: !ruby/object:Gem::Requirement requirements: - - "~>" - !ruby/object:Gem::Version version: '5.4' type: :development prerelease: false version_requirements: !ruby/object:Gem::Requirement requirements: - - "~>" - !ruby/object:Gem::Version version: '5.4' description: Ruby bindings to re2, "an efficient, principled regular expression library". email: ruby.re2@librelist.com executables: [] extensions: - ext/re2/extconf.rb extra_rdoc_files: [] files: - LICENSE.txt - README.md - Rakefile - ext/re2/extconf.rb - ext/re2/re2.cc - lib/re2.rb - lib/re2/scanner.rb - lib/re2/string.rb - spec/kernel_spec.rb - spec/re2/match_data_spec.rb - spec/re2/regexp_spec.rb - spec/re2/scanner_spec.rb - spec/re2/string_spec.rb - spec/re2_spec.rb - spec/spec_helper.rb homepage: http://github.com/mudge/re2 licenses: - BSD metadata: {} post_install_message: rdoc_options: [] require_paths: - lib required_ruby_version: !ruby/object:Gem::Requirement requirements: - - ">=" - !ruby/object:Gem::Version version: '0' required_rubygems_version: !ruby/object:Gem::Requirement requirements: - - ">=" - !ruby/object:Gem::Version version: '0' requirements: [] rubyforge_project: rubygems_version: 2.2.2 signing_key: specification_version: 4 summary: Ruby bindings to re2. test_files: - spec/spec_helper.rb - spec/re2_spec.rb - spec/kernel_spec.rb - spec/re2/regexp_spec.rb - spec/re2/match_data_spec.rb - spec/re2/string_spec.rb - spec/re2/scanner_spec.rb has_rdoc: re2-0.7.0/ext/0000755000175000017500000000000012522466615012017 5ustar lucaslucasre2-0.7.0/ext/re2/0000755000175000017500000000000012522466615012507 5ustar lucaslucasre2-0.7.0/ext/re2/re2.cc0000644000175000017500000012477112522466615013522 0ustar lucaslucas/* * re2 (http://github.com/mudge/re2) * Ruby bindings to re2, an "efficient, principled regular expression library" * * Copyright (c) 2010-2014, Paul Mucur (http://mudge.name) * Released under the BSD Licence, please see LICENSE.txt */ #include #include #include #include #include #include using std::string; using std::ostringstream; using std::nothrow; using std::map; using std::vector; #define BOOL2RUBY(v) (v ? Qtrue : Qfalse) #define UNUSED(x) ((void)x) #ifndef RSTRING_LEN #define RSTRING_LEN(x) (RSTRING(x)->len) #endif #ifndef RSTRING_PTR #define RSTRING_PTR(x) (RSTRING(x)->ptr) #endif #ifdef HAVE_RUBY_ENCODING_H #include #define ENCODED_STR_NEW(str, length, encoding) \ ({ \ VALUE _string = rb_str_new(str, length); \ int _enc = rb_enc_find_index(encoding); \ rb_enc_associate_index(_string, _enc); \ _string; \ }) #define ENCODED_STR_NEW2(str, length, str2) \ ({ \ VALUE _string = rb_str_new(str, length); \ int _enc = rb_enc_get_index(str2); \ rb_enc_associate_index(_string, _enc); \ _string; \ }) #else #define ENCODED_STR_NEW(str, length, encoding) \ rb_str_new((const char *)str, (long)length) #define ENCODED_STR_NEW2(str, length, str2) \ rb_str_new((const char *)str, (long)length) #endif #ifdef HAVE_RB_STR_SUBLEN #define ENCODED_STR_SUBLEN(str, offset, encoding) \ LONG2NUM(rb_str_sublen(str, offset)) #else #ifdef HAVE_RUBY_ENCODING_H #define ENCODED_STR_SUBLEN(str, offset, encoding) \ ({ \ VALUE _string = ENCODED_STR_NEW(RSTRING_PTR(str), offset, encoding); \ rb_str_length(_string); \ }) #else #define ENCODED_STR_SUBLEN(str, offset, encoding) \ LONG2NUM(offset) #endif #endif #ifdef HAVE_ENDPOS_ARGUMENT #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \ (pattern->Match(text, startpos, endpos, anchor, match, nmatch)) #else #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \ (pattern->Match(text, startpos, anchor, match, nmatch)) #endif typedef struct { RE2 *pattern; } re2_pattern; typedef struct { re2::StringPiece *matches; int number_of_matches; VALUE regexp, text; } re2_matchdata; typedef struct { re2::StringPiece *input; int number_of_capturing_groups; VALUE regexp, text; } re2_scanner; VALUE re2_mRE2, re2_cRegexp, re2_cMatchData, re2_cScanner; /* Symbols used in RE2 options. */ static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors, id_max_mem, id_literal, id_never_nl, id_case_sensitive, id_perl_classes, id_word_boundary, id_one_line; void re2_matchdata_mark(re2_matchdata* self) { rb_gc_mark(self->regexp); rb_gc_mark(self->text); } void re2_matchdata_free(re2_matchdata* self) { if (self->matches) { delete[] self->matches; } free(self); } void re2_scanner_mark(re2_scanner* self) { rb_gc_mark(self->regexp); rb_gc_mark(self->text); } void re2_scanner_free(re2_scanner* self) { if (self->input) { delete self->input; } free(self); } void re2_regexp_free(re2_pattern* self) { if (self->pattern) { delete self->pattern; } free(self); } static VALUE re2_matchdata_allocate(VALUE klass) { re2_matchdata *m; return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark, re2_matchdata_free, m); } static VALUE re2_scanner_allocate(VALUE klass) { re2_scanner *c; return Data_Make_Struct(klass, re2_scanner, re2_scanner_mark, re2_scanner_free, c); } /* * Returns a frozen copy of the string passed into +match+. * * @return [String] a frozen copy of the passed string. * @example * m = RE2::Regexp.new('(\d+)').match("bob 123") * m.string #=> "bob 123" */ static VALUE re2_matchdata_string(VALUE self) { re2_matchdata *m; Data_Get_Struct(self, re2_matchdata, m); return m->text; } /* * Returns the string passed into the scanner. * * @return [String] the original string. * @example * c = RE2::Regexp.new('(\d+)').scan("foo") * c.string #=> "foo" */ static VALUE re2_scanner_string(VALUE self) { re2_scanner *c; Data_Get_Struct(self, re2_scanner, c); return c->text; } /* * Rewind the scanner to the start of the string. * * @example * s = RE2::Regexp.new('(\d+)').scan("1 2 3") * e = s.to_enum * e.scan #=> ["1"] * e.scan #=> ["2"] * s.rewind * e.scan #=> ["1"] */ static VALUE re2_scanner_rewind(VALUE self) { re2_scanner *c; Data_Get_Struct(self, re2_scanner, c); c->input = new(nothrow) re2::StringPiece(StringValuePtr(c->text)); return self; } /* * Scan the given text incrementally for matches, returning an array of * matches on each subsequent call. Returns nil if no matches are found. * * @return [Array] the matches. * @example * s = RE2::Regexp.new('(\w+)').scan("Foo bar baz") * s.scan #=> ["Foo"] * s.scan #=> ["bar"] */ static VALUE re2_scanner_scan(VALUE self) { int i; re2_pattern *p; re2_scanner *c; VALUE result; Data_Get_Struct(self, re2_scanner, c); Data_Get_Struct(c->regexp, re2_pattern, p); vector argv(c->number_of_capturing_groups); vector args(c->number_of_capturing_groups); vector matches(c->number_of_capturing_groups); for (i = 0; i < c->number_of_capturing_groups; i++) { matches[i] = ""; argv[i] = &matches[i]; args[i] = &argv[i]; } if (RE2::FindAndConsumeN(c->input, *p->pattern, &args[0], c->number_of_capturing_groups)) { result = rb_ary_new2(c->number_of_capturing_groups); for (i = 0; i < c->number_of_capturing_groups; i++) { if (matches[i].empty()) { rb_ary_push(result, Qnil); } else { rb_ary_push(result, ENCODED_STR_NEW(matches[i].data(), matches[i].size(), p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1")); } } } else { result = Qnil; } return result; } /* * Retrieve a matchdata by index or name. */ re2::StringPiece *re2_matchdata_find_match(VALUE idx, VALUE self) { int id; re2_matchdata *m; re2_pattern *p; map groups; string name; re2::StringPiece *match; Data_Get_Struct(self, re2_matchdata, m); Data_Get_Struct(m->regexp, re2_pattern, p); if (FIXNUM_P(idx)) { id = FIX2INT(idx); } else { if (SYMBOL_P(idx)) { name = rb_id2name(SYM2ID(idx)); } else { name = StringValuePtr(idx); } groups = p->pattern->NamedCapturingGroups(); if (groups.count(name) == 1) { id = groups[name]; } else { return NULL; } } if (id >= 0 && id < m->number_of_matches) { match = &m->matches[id]; if (!match->empty()) { return match; } } return NULL; } /* * Returns the number of elements in the match array (including nils). * * @return [Fixnum] the number of elements * @example * m = RE2::Regexp.new('(\d+)').match("bob 123") * m.size #=> 2 * m.length #=> 2 */ static VALUE re2_matchdata_size(VALUE self) { re2_matchdata *m; Data_Get_Struct(self, re2_matchdata, m); return INT2FIX(m->number_of_matches); } /* * Returns the offset of the start of the nth element of the matchdata. * * @param [Fixnum, String, Symbol] n the name or number of the match * @return [Fixnum] the offset of the start of the match * @example * m = RE2::Regexp.new('ob (\d+)').match("bob 123") * m.begin(0) #=> 1 * m.begin(1) #=> 4 */ static VALUE re2_matchdata_begin(VALUE self, VALUE n) { re2_matchdata *m; re2_pattern *p; re2::StringPiece *match; long offset; Data_Get_Struct(self, re2_matchdata, m); Data_Get_Struct(m->regexp, re2_pattern, p); match = re2_matchdata_find_match(n, self); if (match == NULL) { return Qnil; } else { offset = reinterpret_cast(match->data()) - reinterpret_cast(StringValuePtr(m->text)); return ENCODED_STR_SUBLEN(StringValue(m->text), offset, p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"); } } /* * Returns the offset of the character following the end of the nth element of the matchdata. * * @param [Fixnum, String, Symbol] n the name or number of the match * @return [Fixnum] the offset of the character following the end of the match * @example * m = RE2::Regexp.new('ob (\d+) b').match("bob 123 bob") * m.end(0) #=> 9 * m.end(1) #=> 7 */ static VALUE re2_matchdata_end(VALUE self, VALUE n) { re2_matchdata *m; re2_pattern *p; re2::StringPiece *match; long offset; Data_Get_Struct(self, re2_matchdata, m); Data_Get_Struct(m->regexp, re2_pattern, p); match = re2_matchdata_find_match(n, self); if (match == NULL) { return Qnil; } else { offset = reinterpret_cast(match->data()) - reinterpret_cast(StringValuePtr(m->text)) + match->size(); return ENCODED_STR_SUBLEN(StringValue(m->text), offset, p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"); } } /* * Returns the {RE2::Regexp} used in the match. * * @return [RE2::Regexp] the regexp used in the match * @example * m = RE2::Regexp.new('(\d+)').match("bob 123") * m.regexp #=> # */ static VALUE re2_matchdata_regexp(VALUE self) { re2_matchdata *m; Data_Get_Struct(self, re2_matchdata, m); return m->regexp; } /* * Returns the {RE2::Regexp} used in the scanner. * * @return [RE2::Regexp] the regexp used in the scanner * @example * c = RE2::Regexp.new('(\d+)').scan("bob 123") * c.regexp #=> # */ static VALUE re2_scanner_regexp(VALUE self) { re2_scanner *c; Data_Get_Struct(self, re2_scanner, c); return c->regexp; } static VALUE re2_regexp_allocate(VALUE klass) { re2_pattern *p; return Data_Make_Struct(klass, re2_pattern, 0, re2_regexp_free, p); } /* * Returns the array of matches. * * @return [Array] the array of matches * @example * m = RE2::Regexp.new('(\d+)').match("bob 123") * m.to_a #=> ["123", "123"] */ static VALUE re2_matchdata_to_a(VALUE self) { int i; re2_matchdata *m; re2_pattern *p; re2::StringPiece *match; VALUE array; Data_Get_Struct(self, re2_matchdata, m); Data_Get_Struct(m->regexp, re2_pattern, p); array = rb_ary_new2(m->number_of_matches); for (i = 0; i < m->number_of_matches; i++) { match = &m->matches[i]; if (match->empty()) { rb_ary_push(array, Qnil); } else { rb_ary_push(array, ENCODED_STR_NEW(match->data(), match->size(), p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1")); } } return array; } static VALUE re2_matchdata_nth_match(int nth, VALUE self) { re2_matchdata *m; re2_pattern *p; re2::StringPiece *match; Data_Get_Struct(self, re2_matchdata, m); Data_Get_Struct(m->regexp, re2_pattern, p); if (nth < 0 || nth >= m->number_of_matches) { return Qnil; } else { match = &m->matches[nth]; if (match->empty()) { return Qnil; } else { return ENCODED_STR_NEW(match->data(), match->size(), p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"); } } } static VALUE re2_matchdata_named_match(const char* name, VALUE self) { int idx; re2_matchdata *m; re2_pattern *p; map groups; string name_as_string(name); Data_Get_Struct(self, re2_matchdata, m); Data_Get_Struct(m->regexp, re2_pattern, p); groups = p->pattern->NamedCapturingGroups(); if (groups.count(name_as_string) == 1) { idx = groups[name_as_string]; return re2_matchdata_nth_match(idx, self); } else { return Qnil; } } /* * Retrieve zero, one or more matches by index or name. * * @return [Array, String, Boolean] * * @overload [](index) * Access a particular match by index. * * @param [Fixnum] index the index of the match to fetch * @return [String, nil] the specified match * @example * m = RE2::Regexp.new('(\d+)').match("bob 123") * m[0] #=> "123" * * @overload [](start, length) * Access a range of matches by starting index and length. * * @param [Fixnum] start the index from which to start * @param [Fixnum] length the number of elements to fetch * @return [Array] the specified matches * @example * m = RE2::Regexp.new('(\d+)').match("bob 123") * m[0, 1] #=> ["123"] * * @overload [](range) * Access a range of matches by index. * * @param [Range] range the range of match indexes to fetch * @return [Array] the specified matches * @example * m = RE2::Regexp.new('(\d+)').match("bob 123") * m[0..1] #=> "[123", "123"] * * @overload [](name) * Access a particular match by name. * * @param [String, Symbol] name the name of the match to fetch * @return [String, nil] the specific match * @example * m = RE2::Regexp.new('(?P\d+)').match("bob 123") * m["number"] #=> "123" * m[:number] #=> "123" */ static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) { VALUE idx, rest; rb_scan_args(argc, argv, "11", &idx, &rest); if (TYPE(idx) == T_STRING) { return re2_matchdata_named_match(StringValuePtr(idx), self); } else if (SYMBOL_P(idx)) { return re2_matchdata_named_match(rb_id2name(SYM2ID(idx)), self); } else if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) { return rb_ary_aref(argc, argv, re2_matchdata_to_a(self)); } else { return re2_matchdata_nth_match(FIX2INT(idx), self); } } /* * Returns the entire matched string. * * @return [String] the entire matched string */ static VALUE re2_matchdata_to_s(VALUE self) { return re2_matchdata_nth_match(0, self); } /* * Returns a printable version of the match. * * @return [String] a printable version of the match * @example * m = RE2::Regexp.new('(\d+)').match("bob 123") * m.inspect #=> "#" */ static VALUE re2_matchdata_inspect(VALUE self) { int i; re2_matchdata *m; re2_pattern *p; VALUE match, result; ostringstream output; Data_Get_Struct(self, re2_matchdata, m); Data_Get_Struct(m->regexp, re2_pattern, p); output << "#number_of_matches; i++) { output << " "; if (i > 0) { output << i << ":"; } match = re2_matchdata_nth_match(i, self); if (match == Qnil) { output << "nil"; } else { output << "\"" << StringValuePtr(match) << "\""; } } output << ">"; result = ENCODED_STR_NEW(output.str().data(), output.str().length(), p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"); return result; } /* * Returns a new RE2 object with a compiled version of * +pattern+ stored inside. Equivalent to +RE2.new+. * * @return [RE2::Regexp] an RE2::Regexp with the specified pattern and options * @param [String] pattern the pattern to compile * @param [Hash] options the options to compile a regexp with * @see RE2::Regexp.new * */ static VALUE re2_re2(int argc, VALUE *argv, VALUE self) { UNUSED(self); return rb_class_new_instance(argc, argv, re2_cRegexp); } /* * Returns a new {RE2::Regexp} object with a compiled version of * +pattern+ stored inside. * * @return [RE2::Regexp] * * @overload initialize(pattern) * Returns a new {RE2::Regexp} object with a compiled version of * +pattern+ stored inside with the default options. * * @param [String] pattern the pattern to compile * @return [RE2::Regexp] an RE2::Regexp with the specified pattern * @raise [NoMemoryError] if memory could not be allocated for the compiled * pattern * * @overload initialize(pattern, options) * Returns a new {RE2::Regexp} object with a compiled version of * +pattern+ stored inside with the specified options. * * @param [String] pattern the pattern to compile * @param [Hash] options the options with which to compile the pattern * @option options [Boolean] :utf8 (true) text and pattern are UTF-8; otherwise Latin-1 * @option options [Boolean] :posix_syntax (false) restrict regexps to POSIX egrep syntax * @option options [Boolean] :longest_match (false) search for longest match, not first match * @option options [Boolean] :log_errors (true) log syntax and execution errors to ERROR * @option options [Fixnum] :max_mem approx. max memory footprint of RE2 * @option options [Boolean] :literal (false) interpret string as literal, not regexp * @option options [Boolean] :never_nl (false) never match \n, even if it is in regexp * @option options [Boolean] :case_sensitive (true) match is case-sensitive (regexp can override with (?i) unless in posix_syntax mode) * @option options [Boolean] :perl_classes (false) allow Perl's \d \s \w \D \S \W when in posix_syntax mode * @option options [Boolean] :word_boundary (false) allow \b \B (word boundary and not) when in posix_syntax mode * @option options [Boolean] :one_line (false) ^ and $ only match beginning and end of text when in posix_syntax mode * @return [RE2::Regexp] an RE2::Regexp with the specified pattern and options * @raise [NoMemoryError] if memory could not be allocated for the compiled pattern */ static VALUE re2_regexp_initialize(int argc, VALUE *argv, VALUE self) { VALUE pattern, options, utf8, posix_syntax, longest_match, log_errors, max_mem, literal, never_nl, case_sensitive, perl_classes, word_boundary, one_line; re2_pattern *p; rb_scan_args(argc, argv, "11", &pattern, &options); Data_Get_Struct(self, re2_pattern, p); if (RTEST(options)) { if (TYPE(options) != T_HASH) { rb_raise(rb_eArgError, "options should be a hash"); } RE2::Options re2_options; utf8 = rb_hash_aref(options, ID2SYM(id_utf8)); if (!NIL_P(utf8)) { re2_options.set_utf8(RTEST(utf8)); } posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax)); if (!NIL_P(posix_syntax)) { re2_options.set_posix_syntax(RTEST(posix_syntax)); } longest_match = rb_hash_aref(options, ID2SYM(id_longest_match)); if (!NIL_P(longest_match)) { re2_options.set_longest_match(RTEST(longest_match)); } log_errors = rb_hash_aref(options, ID2SYM(id_log_errors)); if (!NIL_P(log_errors)) { re2_options.set_log_errors(RTEST(log_errors)); } max_mem = rb_hash_aref(options, ID2SYM(id_max_mem)); if (!NIL_P(max_mem)) { re2_options.set_max_mem(NUM2INT(max_mem)); } literal = rb_hash_aref(options, ID2SYM(id_literal)); if (!NIL_P(literal)) { re2_options.set_literal(RTEST(literal)); } never_nl = rb_hash_aref(options, ID2SYM(id_never_nl)); if (!NIL_P(never_nl)) { re2_options.set_never_nl(RTEST(never_nl)); } case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive)); if (!NIL_P(case_sensitive)) { re2_options.set_case_sensitive(RTEST(case_sensitive)); } perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes)); if (!NIL_P(perl_classes)) { re2_options.set_perl_classes(RTEST(perl_classes)); } word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary)); if (!NIL_P(word_boundary)) { re2_options.set_word_boundary(RTEST(word_boundary)); } one_line = rb_hash_aref(options, ID2SYM(id_one_line)); if (!NIL_P(one_line)) { re2_options.set_one_line(RTEST(one_line)); } p->pattern = new(nothrow) RE2(StringValuePtr(pattern), re2_options); } else { p->pattern = new(nothrow) RE2(StringValuePtr(pattern)); } if (p->pattern == 0) { rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object"); } return self; } /* * Returns a printable version of the regular expression +re2+. * * @return [String] a printable version of the regular expression * @example * re2 = RE2::Regexp.new("woo?") * re2.inspect #=> "#" */ static VALUE re2_regexp_inspect(VALUE self) { re2_pattern *p; VALUE result; ostringstream output; Data_Get_Struct(self, re2_pattern, p); output << "#pattern->pattern() << "/>"; result = ENCODED_STR_NEW(output.str().data(), output.str().length(), p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"); return result; } /* * Returns a string version of the regular expression +re2+. * * @return [String] a string version of the regular expression * @example * re2 = RE2::Regexp.new("woo?") * re2.to_s #=> "woo?" */ static VALUE re2_regexp_to_s(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return ENCODED_STR_NEW(p->pattern->pattern().data(), p->pattern->pattern().size(), p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"); } /* * Returns whether or not the regular expression +re2+ * was compiled successfully or not. * * @return [Boolean] whether or not compilation was successful * @example * re2 = RE2::Regexp.new("woo?") * re2.ok? #=> true */ static VALUE re2_regexp_ok(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->ok()); } /* * Returns whether or not the regular expression +re2+ * was compiled with the utf8 option set to true. * * @return [Boolean] the utf8 option * @example * re2 = RE2::Regexp.new("woo?", :utf8 => true) * re2.utf8? #=> true */ static VALUE re2_regexp_utf8(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().utf8()); } /* * Returns whether or not the regular expression +re2+ * was compiled with the posix_syntax option set to true. * * @return [Boolean] the posix_syntax option * @example * re2 = RE2::Regexp.new("woo?", :posix_syntax => true) * re2.posix_syntax? #=> true */ static VALUE re2_regexp_posix_syntax(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().posix_syntax()); } /* * Returns whether or not the regular expression +re2+ * was compiled with the longest_match option set to true. * * @return [Boolean] the longest_match option * @example * re2 = RE2::Regexp.new("woo?", :longest_match => true) * re2.longest_match? #=> true */ static VALUE re2_regexp_longest_match(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().longest_match()); } /* * Returns whether or not the regular expression +re2+ * was compiled with the log_errors option set to true. * * @return [Boolean] the log_errors option * @example * re2 = RE2::Regexp.new("woo?", :log_errors => true) * re2.log_errors? #=> true */ static VALUE re2_regexp_log_errors(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().log_errors()); } /* * Returns the max_mem setting for the regular expression * +re2+. * * @return [Fixnum] the max_mem option * @example * re2 = RE2::Regexp.new("woo?", :max_mem => 1024) * re2.max_mem #=> 1024 */ static VALUE re2_regexp_max_mem(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return INT2FIX(p->pattern->options().max_mem()); } /* * Returns whether or not the regular expression +re2+ * was compiled with the literal option set to true. * * @return [Boolean] the literal option * @example * re2 = RE2::Regexp.new("woo?", :literal => true) * re2.literal? #=> true */ static VALUE re2_regexp_literal(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().literal()); } /* * Returns whether or not the regular expression +re2+ * was compiled with the never_nl option set to true. * * @return [Boolean] the never_nl option * @example * re2 = RE2::Regexp.new("woo?", :never_nl => true) * re2.never_nl? #=> true */ static VALUE re2_regexp_never_nl(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().never_nl()); } /* * Returns whether or not the regular expression +re2+ * was compiled with the case_sensitive option set to true. * * @return [Boolean] the case_sensitive option * @example * re2 = RE2::Regexp.new("woo?", :case_sensitive => true) * re2.case_sensitive? #=> true */ static VALUE re2_regexp_case_sensitive(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().case_sensitive()); } /* * Returns whether or not the regular expression +re2+ * was compiled with the case_sensitive option set to false. * * @return [Boolean] the inverse of the case_sensitive option * @example * re2 = RE2::Regexp.new("woo?", :case_sensitive => true) * re2.case_insensitive? #=> false * re2.casefold? #=> false */ static VALUE re2_regexp_case_insensitive(VALUE self) { return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue); } /* * Returns whether or not the regular expression +re2+ * was compiled with the perl_classes option set to true. * * @return [Boolean] the perl_classes option * @example * re2 = RE2::Regexp.new("woo?", :perl_classes => true) * re2.perl_classes? #=> true */ static VALUE re2_regexp_perl_classes(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().perl_classes()); } /* * Returns whether or not the regular expression +re2+ * was compiled with the word_boundary option set to true. * * @return [Boolean] the word_boundary option * @example * re2 = RE2::Regexp.new("woo?", :word_boundary => true) * re2.word_boundary? #=> true */ static VALUE re2_regexp_word_boundary(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().word_boundary()); } /* * Returns whether or not the regular expression +re2+ * was compiled with the one_line option set to true. * * @return [Boolean] the one_line option * @example * re2 = RE2::Regexp.new("woo?", :one_line => true) * re2.one_line? #=> true */ static VALUE re2_regexp_one_line(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().one_line()); } /* * If the RE2 could not be created properly, returns an * error string otherwise returns nil. * * @return [String, nil] the error string or nil */ static VALUE re2_regexp_error(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); if (p->pattern->ok()) { return Qnil; } else { return rb_str_new(p->pattern->error().data(), p->pattern->error().size()); } } /* * If the RE2 could not be created properly, returns * the offending portion of the regexp otherwise returns nil. * * @return [String, nil] the offending portion of the regexp or nil */ static VALUE re2_regexp_error_arg(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); if (p->pattern->ok()) { return Qnil; } else { return ENCODED_STR_NEW(p->pattern->error_arg().data(), p->pattern->error_arg().size(), p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"); } } /* * Returns the program size, a very approximate measure * of a regexp's "cost". Larger numbers are more expensive * than smaller numbers. * * @return [Fixnum] the regexp "cost" */ static VALUE re2_regexp_program_size(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return INT2FIX(p->pattern->ProgramSize()); } /* * Returns a hash of the options currently set for * +re2+. * * @return [Hash] the options */ static VALUE re2_regexp_options(VALUE self) { VALUE options; re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); options = rb_hash_new(); rb_hash_aset(options, ID2SYM(id_utf8), BOOL2RUBY(p->pattern->options().utf8())); rb_hash_aset(options, ID2SYM(id_posix_syntax), BOOL2RUBY(p->pattern->options().posix_syntax())); rb_hash_aset(options, ID2SYM(id_longest_match), BOOL2RUBY(p->pattern->options().longest_match())); rb_hash_aset(options, ID2SYM(id_log_errors), BOOL2RUBY(p->pattern->options().log_errors())); rb_hash_aset(options, ID2SYM(id_max_mem), INT2FIX(p->pattern->options().max_mem())); rb_hash_aset(options, ID2SYM(id_literal), BOOL2RUBY(p->pattern->options().literal())); rb_hash_aset(options, ID2SYM(id_never_nl), BOOL2RUBY(p->pattern->options().never_nl())); rb_hash_aset(options, ID2SYM(id_case_sensitive), BOOL2RUBY(p->pattern->options().case_sensitive())); rb_hash_aset(options, ID2SYM(id_perl_classes), BOOL2RUBY(p->pattern->options().perl_classes())); rb_hash_aset(options, ID2SYM(id_word_boundary), BOOL2RUBY(p->pattern->options().word_boundary())); rb_hash_aset(options, ID2SYM(id_one_line), BOOL2RUBY(p->pattern->options().one_line())); /* This is a read-only hash after all... */ rb_obj_freeze(options); return options; } /* * Returns the number of capturing subpatterns, or -1 if the regexp * wasn't valid on construction. The overall match ($0) does not * count: if the regexp is "(a)(b)", returns 2. * * @return [Fixnum] the number of capturing subpatterns */ static VALUE re2_regexp_number_of_capturing_groups(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return INT2FIX(p->pattern->NumberOfCapturingGroups()); } /* * Returns a hash of names to capturing indices of groups. * * @return [Hash] a hash of names to capturing indices */ static VALUE re2_regexp_named_capturing_groups(VALUE self) { VALUE capturing_groups; re2_pattern *p; map groups; map::iterator iterator; Data_Get_Struct(self, re2_pattern, p); groups = p->pattern->NamedCapturingGroups(); capturing_groups = rb_hash_new(); for (iterator = groups.begin(); iterator != groups.end(); iterator++) { rb_hash_aset(capturing_groups, ENCODED_STR_NEW(iterator->first.data(), iterator->first.size(), p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"), INT2FIX(iterator->second)); } return capturing_groups; } /* * Match the pattern against the given +text+ and return either * a boolean (if no submatches are required) or a {RE2::MatchData} * instance. * * @return [Boolean, RE2::MatchData] * * @overload match(text) * Returns an {RE2::MatchData} containing the matching * pattern and all subpatterns resulting from looking for * the regexp in +text+. * * @param [String] text the text to search * @return [RE2::MatchData] the matches * @raise [NoMemoryError] if there was not enough memory to allocate the matches * @example * r = RE2::Regexp.new('w(o)(o)') * r.match('woo') #=> # * * @overload match(text, 0) * Returns either true or false indicating whether a * successful match was made. * * @param [String] text the text to search * @return [Boolean] whether the match was successful * @raise [NoMemoryError] if there was not enough memory to allocate the matches * @example * r = RE2::Regexp.new('w(o)(o)') * r.match('woo', 0) #=> true * r.match('bob', 0) #=> false * * @overload match(text, number_of_matches) * See +match(text)+ but with a specific number of * matches returned (padded with nils if necessary). * * @param [String] text the text to search * @param [Fixnum] number_of_matches the number of matches to return * @return [RE2::MatchData] the matches * @raise [NoMemoryError] if there was not enough memory to allocate the matches * @example * r = RE2::Regexp.new('w(o)(o)') * r.match('woo', 1) #=> # * r.match('woo', 3) #=> # */ static VALUE re2_regexp_match(int argc, VALUE *argv, VALUE self) { int n; bool matched; re2_pattern *p; re2_matchdata *m; VALUE text, number_of_matches, matchdata; rb_scan_args(argc, argv, "11", &text, &number_of_matches); /* Ensure text is a string. */ text = StringValue(text); Data_Get_Struct(self, re2_pattern, p); if (RTEST(number_of_matches)) { n = NUM2INT(number_of_matches); } else { n = p->pattern->NumberOfCapturingGroups(); } if (n == 0) { matched = match(p->pattern, StringValuePtr(text), 0, static_cast(RSTRING_LEN(text)), RE2::UNANCHORED, 0, 0); return BOOL2RUBY(matched); } else { /* Because match returns the whole match as well. */ n += 1; matchdata = rb_class_new_instance(0, 0, re2_cMatchData); Data_Get_Struct(matchdata, re2_matchdata, m); m->matches = new(nothrow) re2::StringPiece[n]; m->regexp = self; m->text = rb_str_dup(text); rb_str_freeze(m->text); if (m->matches == 0) { rb_raise(rb_eNoMemError, "not enough memory to allocate StringPieces for matches"); } m->number_of_matches = n; matched = match(p->pattern, StringValuePtr(m->text), 0, static_cast(RSTRING_LEN(m->text)), RE2::UNANCHORED, m->matches, n); if (matched) { return matchdata; } else { return Qnil; } } } /* * Returns true or false to indicate a successful match. * Equivalent to +re2.match(text, 0)+. * * @return [Boolean] whether the match was successful */ static VALUE re2_regexp_match_query(VALUE self, VALUE text) { VALUE argv[2]; argv[0] = text; argv[1] = INT2FIX(0); return re2_regexp_match(2, argv, self); } /* * Returns a {RE2::Scanner} for scanning the given text incrementally. * * @example * c = RE2::Regexp.new('(\w+)').scan("Foo bar baz") */ static VALUE re2_regexp_scan(VALUE self, VALUE text) { re2_pattern *p; re2_scanner *c; VALUE scanner; Data_Get_Struct(self, re2_pattern, p); scanner = rb_class_new_instance(0, 0, re2_cScanner); Data_Get_Struct(scanner, re2_scanner, c); c->input = new(nothrow) re2::StringPiece(StringValuePtr(text)); c->regexp = self; c->text = text; c->number_of_capturing_groups = p->pattern->NumberOfCapturingGroups(); return scanner; } /* * Returns a copy of +str+ with the first occurrence +pattern+ * replaced with +rewrite+. * * @param [String] str the string to modify * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced * @param [String] rewrite the string to replace with * @return [String] the resulting string * @example * RE2.Replace("hello there", "hello", "howdy") #=> "howdy there" * re2 = RE2.new("hel+o") * RE2.Replace("hello there", re2, "yo") #=> "yo there" */ static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern, VALUE rewrite) { UNUSED(self); re2_pattern *p; /* Convert all the inputs to be pumped into RE2::Replace. */ string str_as_string(StringValuePtr(str)); /* Do the replacement. */ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) { Data_Get_Struct(pattern, re2_pattern, p); RE2::Replace(&str_as_string, *p->pattern, StringValuePtr(rewrite)); return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(), p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"); } else { RE2::Replace(&str_as_string, StringValuePtr(pattern), StringValuePtr(rewrite)); return ENCODED_STR_NEW2(str_as_string.data(), str_as_string.size(), pattern); } } /* * Return a copy of +str+ with +pattern+ replaced by +rewrite+. * * @param [String] str the string to modify * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced * @param [String] rewrite the string to replace with * @return [String] the resulting string * @example * re2 = RE2.new("oo?") * RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps" * RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri" */ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern, VALUE rewrite) { UNUSED(self); /* Convert all the inputs to be pumped into RE2::GlobalReplace. */ re2_pattern *p; string str_as_string(StringValuePtr(str)); /* Do the replacement. */ if (rb_obj_is_kind_of(pattern, re2_cRegexp)) { Data_Get_Struct(pattern, re2_pattern, p); RE2::GlobalReplace(&str_as_string, *p->pattern, StringValuePtr(rewrite)); return ENCODED_STR_NEW(str_as_string.data(), str_as_string.size(), p->pattern->options().utf8() ? "UTF-8" : "ISO-8859-1"); } else { RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern), StringValuePtr(rewrite)); return ENCODED_STR_NEW2(str_as_string.data(), str_as_string.size(), pattern); } } /* * Returns a version of str with all potentially meaningful regexp * characters escaped. The returned string, used as a regular * expression, will exactly match the original string. * * @param [String] unquoted the unquoted string * @return [String] the escaped string * @example * RE2::Regexp.escape("1.5-2.0?") #=> "1\.5\-2\.0\?" */ static VALUE re2_QuoteMeta(VALUE self, VALUE unquoted) { UNUSED(self); string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted)); return rb_str_new(quoted_string.data(), quoted_string.size()); } /* Forward declare Init_re2 to be called by C code but define it separately so * that YARD can parse it. */ extern "C" void Init_re2(void); void Init_re2(void) { re2_mRE2 = rb_define_module("RE2"); re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject); re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject); re2_cScanner = rb_define_class_under(re2_mRE2, "Scanner", rb_cObject); rb_define_alloc_func(re2_cRegexp, (VALUE (*)(VALUE))re2_regexp_allocate); rb_define_alloc_func(re2_cMatchData, (VALUE (*)(VALUE))re2_matchdata_allocate); rb_define_alloc_func(re2_cScanner, (VALUE (*)(VALUE))re2_scanner_allocate); rb_define_method(re2_cMatchData, "string", RUBY_METHOD_FUNC(re2_matchdata_string), 0); rb_define_method(re2_cMatchData, "regexp", RUBY_METHOD_FUNC(re2_matchdata_regexp), 0); rb_define_method(re2_cMatchData, "to_a", RUBY_METHOD_FUNC(re2_matchdata_to_a), 0); rb_define_method(re2_cMatchData, "size", RUBY_METHOD_FUNC(re2_matchdata_size), 0); rb_define_method(re2_cMatchData, "length", RUBY_METHOD_FUNC(re2_matchdata_size), 0); rb_define_method(re2_cMatchData, "begin", RUBY_METHOD_FUNC(re2_matchdata_begin), 1); rb_define_method(re2_cMatchData, "end", RUBY_METHOD_FUNC(re2_matchdata_end), 1); rb_define_method(re2_cMatchData, "[]", RUBY_METHOD_FUNC(re2_matchdata_aref), -1); rb_define_method(re2_cMatchData, "to_s", RUBY_METHOD_FUNC(re2_matchdata_to_s), 0); rb_define_method(re2_cMatchData, "inspect", RUBY_METHOD_FUNC(re2_matchdata_inspect), 0); rb_define_method(re2_cScanner, "string", RUBY_METHOD_FUNC(re2_scanner_string), 0); rb_define_method(re2_cScanner, "regexp", RUBY_METHOD_FUNC(re2_scanner_regexp), 0); rb_define_method(re2_cScanner, "scan", RUBY_METHOD_FUNC(re2_scanner_scan), 0); rb_define_method(re2_cScanner, "rewind", RUBY_METHOD_FUNC(re2_scanner_rewind), 0); rb_define_method(re2_cRegexp, "initialize", RUBY_METHOD_FUNC(re2_regexp_initialize), -1); rb_define_method(re2_cRegexp, "ok?", RUBY_METHOD_FUNC(re2_regexp_ok), 0); rb_define_method(re2_cRegexp, "error", RUBY_METHOD_FUNC(re2_regexp_error), 0); rb_define_method(re2_cRegexp, "error_arg", RUBY_METHOD_FUNC(re2_regexp_error_arg), 0); rb_define_method(re2_cRegexp, "program_size", RUBY_METHOD_FUNC(re2_regexp_program_size), 0); rb_define_method(re2_cRegexp, "options", RUBY_METHOD_FUNC(re2_regexp_options), 0); rb_define_method(re2_cRegexp, "number_of_capturing_groups", RUBY_METHOD_FUNC(re2_regexp_number_of_capturing_groups), 0); rb_define_method(re2_cRegexp, "named_capturing_groups", RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0); rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match), -1); rb_define_method(re2_cRegexp, "match?", RUBY_METHOD_FUNC(re2_regexp_match_query), 1); rb_define_method(re2_cRegexp, "=~", RUBY_METHOD_FUNC(re2_regexp_match_query), 1); rb_define_method(re2_cRegexp, "===", RUBY_METHOD_FUNC(re2_regexp_match_query), 1); rb_define_method(re2_cRegexp, "scan", RUBY_METHOD_FUNC(re2_regexp_scan), 1); rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0); rb_define_method(re2_cRegexp, "to_str", RUBY_METHOD_FUNC(re2_regexp_to_s), 0); rb_define_method(re2_cRegexp, "pattern", RUBY_METHOD_FUNC(re2_regexp_to_s), 0); rb_define_method(re2_cRegexp, "source", RUBY_METHOD_FUNC(re2_regexp_to_s), 0); rb_define_method(re2_cRegexp, "inspect", RUBY_METHOD_FUNC(re2_regexp_inspect), 0); rb_define_method(re2_cRegexp, "utf8?", RUBY_METHOD_FUNC(re2_regexp_utf8), 0); rb_define_method(re2_cRegexp, "posix_syntax?", RUBY_METHOD_FUNC(re2_regexp_posix_syntax), 0); rb_define_method(re2_cRegexp, "longest_match?", RUBY_METHOD_FUNC(re2_regexp_longest_match), 0); rb_define_method(re2_cRegexp, "log_errors?", RUBY_METHOD_FUNC(re2_regexp_log_errors), 0); rb_define_method(re2_cRegexp, "max_mem", RUBY_METHOD_FUNC(re2_regexp_max_mem), 0); rb_define_method(re2_cRegexp, "literal?", RUBY_METHOD_FUNC(re2_regexp_literal), 0); rb_define_method(re2_cRegexp, "never_nl?", RUBY_METHOD_FUNC(re2_regexp_never_nl), 0); rb_define_method(re2_cRegexp, "case_sensitive?", RUBY_METHOD_FUNC(re2_regexp_case_sensitive), 0); rb_define_method(re2_cRegexp, "case_insensitive?", RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0); rb_define_method(re2_cRegexp, "casefold?", RUBY_METHOD_FUNC(re2_regexp_case_insensitive), 0); rb_define_method(re2_cRegexp, "perl_classes?", RUBY_METHOD_FUNC(re2_regexp_perl_classes), 0); rb_define_method(re2_cRegexp, "word_boundary?", RUBY_METHOD_FUNC(re2_regexp_word_boundary), 0); rb_define_method(re2_cRegexp, "one_line?", RUBY_METHOD_FUNC(re2_regexp_one_line), 0); rb_define_module_function(re2_mRE2, "Replace", RUBY_METHOD_FUNC(re2_Replace), 3); rb_define_module_function(re2_mRE2, "GlobalReplace", RUBY_METHOD_FUNC(re2_GlobalReplace), 3); rb_define_module_function(re2_mRE2, "QuoteMeta", RUBY_METHOD_FUNC(re2_QuoteMeta), 1); rb_define_singleton_method(re2_cRegexp, "escape", RUBY_METHOD_FUNC(re2_QuoteMeta), 1); rb_define_singleton_method(re2_cRegexp, "quote", RUBY_METHOD_FUNC(re2_QuoteMeta), 1); rb_define_singleton_method(re2_cRegexp, "compile", RUBY_METHOD_FUNC(rb_class_new_instance), -1); rb_define_global_function("RE2", RUBY_METHOD_FUNC(re2_re2), -1); /* Create the symbols used in options. */ id_utf8 = rb_intern("utf8"); id_posix_syntax = rb_intern("posix_syntax"); id_longest_match = rb_intern("longest_match"); id_log_errors = rb_intern("log_errors"); id_max_mem = rb_intern("max_mem"); id_literal = rb_intern("literal"); id_never_nl = rb_intern("never_nl"); id_case_sensitive = rb_intern("case_sensitive"); id_perl_classes = rb_intern("perl_classes"); id_word_boundary = rb_intern("word_boundary"); id_one_line = rb_intern("one_line"); #if 0 /* Fake so YARD generates the file. */ rb_mKernel = rb_define_module("Kernel"); #endif } re2-0.7.0/ext/re2/extconf.rb0000644000175000017500000000254412522466615014507 0ustar lucaslucas# re2 (http://github.com/mudge/re2) # Ruby bindings to re2, an "efficient, principled regular expression library" # # Copyright (c) 2010-2012, Paul Mucur (http://mudge.name) # Released under the BSD Licence, please see LICENSE.txt require 'mkmf' incl, lib = dir_config("re2", "/usr/local/include", "/usr/local/lib") $CFLAGS << " -Wall -Wextra -funroll-loops" have_library("stdc++") have_header("stdint.h") have_func("rb_str_sublen") if have_library("re2") # Determine which version of re2 the user has installed. # Revision d9f8806c004d added an `endpos` argument to the # generic Match() function. # # To test for this, try to compile a simple program that uses # the newer form of Match() and set a flag if it is successful. checking_for("RE2::Match() with endpos argument") do test_re2_match_signature = < int main() { RE2 pattern("test"); re2::StringPiece *match; pattern.Match("test", 0, 0, RE2::UNANCHORED, match, 0); return 0; } SRC # Pass -x c++ to force gcc to compile the test program # as C++ (as it will end in .c by default). if try_compile(test_re2_match_signature, "-x c++") $defs.push("-DHAVE_ENDPOS_ARGUMENT") end end create_makefile("re2") else abort "You must have re2 installed and specified with --with-re2-dir, please see http://code.google.com/p/re2/wiki/Install" end re2-0.7.0/Rakefile0000644000175000017500000000042312522466615012663 0ustar lucaslucasrequire 'rake/extensiontask' require 'rake/testtask' Rake::ExtensionTask.new('re2') Rake::TestTask.new do |t| t.libs << "spec" t.test_files = FileList["spec/**/*_spec.rb"] t.verbose = true end task :test => :compile task :spec => :test task :default => :test re2-0.7.0/README.md0000644000175000017500000001200612522466615012475 0ustar lucaslucasre2 [![Build Status](https://travis-ci.org/mudge/re2.svg?branch=master)](http://travis-ci.org/mudge/re2) === A Ruby binding to [re2][], an "efficient, principled regular expression library". **Current version:** 0.7.0 **Supported Ruby versions:** 1.8.7, 1.9.2, 1.9.3, 2.0.0, 2.1.0, Rubinius 2.2 Installation ------------ You will need [re2][] installed as well as a C++ compiler such as [gcc][] (on Debian and Ubuntu, this is provided by the [build-essential][] package). If you are using Mac OS X, I recommend installing re2 with [Homebrew][] by running the following: $ brew install re2 If you are using Debian, you can install the [libre2-dev][] package like so: $ sudo apt-get install libre2-dev If you are using a packaged Ruby distribution, make sure you also have the Ruby header files installed such as those provided by the [ruby-dev][] package on Debian and Ubuntu. You can then install the library via RubyGems with `gem install re2` or `gem install re2 -- --with-re2-dir=/opt/local/re2` if re2 is not installed in the default location of `/usr/local/`. Documentation ------------- Full documentation automatically generated from the latest version is available at . Bear in mind that re2's regular expression syntax differs from PCRE, see the [official syntax page][] for more details. Usage ----- You can use re2 as a mostly drop-in replacement for Ruby's own [Regexp][] and [MatchData][] classes: ```console $ irb -rubygems > require 're2' > r = RE2::Regexp.new('w(\d)(\d+)') => # > m = r.match("w1234") => # > m[1] => "1" > m.string => "w1234" > m.begin(1) => 1 > m.end(1) => 2 > r =~ "w1234" => true > r !~ "bob" => true > r.match("bob") => nil ``` As `RE2::Regexp.new` (or `RE2::Regexp.compile`) can be quite verbose, a helper method has been defined against `Kernel` so you can use a shorter version to create regular expressions: ```console > RE2('(\d+)') => # ``` Note the use of *single quotes* as double quotes will interpret `\d` as `d` as in the following example: ```console > RE2("(\d+)") => # ``` As of 0.3.0, you can use named groups: ```console > r = RE2::Regexp.new('(?P\w+) (?P\d+)') => #\w+) (?P\d+)/> > m = r.match("Bob 40") => # > m[:name] => "Bob" > m["age"] => "40" ``` As of 0.6.0, you can use `RE2::Regexp#scan` to incrementally scan text for matches (similar in purpose to Ruby's [`String#scan`](http://ruby-doc.org/core-2.0.0/String.html#method-i-scan)). Calling `scan` will return an `RE2::Scanner` which is [enumerable](http://ruby-doc.org/core-2.0.0/Enumerable.html) meaning you can use `each` to iterate through the matches (and even use [`Enumerator::Lazy`](http://ruby-doc.org/core-2.0/Enumerator/Lazy.html)): ```ruby re = RE2('(\w+)') scanner = re.scan("It is a truth universally acknowledged") scanner.each do |match| puts match end scanner.rewind enum = scanner.to_enum enum.next #=> ["It"] enum.next #=> ["is"] ``` Features -------- * Pre-compiling regular expressions with [`RE2::Regexp.new(re)`](http://code.google.com/p/re2/source/browse/re2/re2.h#96), `RE2::Regexp.compile(re)` or `RE2(re)` (including specifying options, e.g. `RE2::Regexp.new("pattern", :case_sensitive => false)` * Extracting matches with `re2.match(text)` (and an exact number of matches with `re2.match(text, number_of_matches)` such as `re2.match("123-234", 2)`) * Extracting matches by name (both with strings and symbols) * Checking for matches with `re2 =~ text`, `re2 === text` (for use in `case` statements) and `re2 !~ text` * Incrementally scanning text with `re2.scan(text)` * Checking regular expression compilation with `re2.ok?`, `re2.error` and `re2.error_arg` * Checking regular expression "cost" with `re2.program_size` * Checking the options for an expression with `re2.options` or individually with `re2.case_sensitive?` * Performing a single string replacement with `pattern.replace(replacement, original)` * Performing a global string replacement with `pattern.replace_all(replacement, original)` * Escaping regular expressions with [`RE2.escape(unquoted)`](http://code.google.com/p/re2/source/browse/re2/re2.h#377) and `RE2.quote(unquoted)` Contributions ------------- Thanks to [Jason Woods](https://github.com/driskell) who contributed the original implementations of `RE2::MatchData#begin` and `RE2::MatchData#end`. Contact ------- All feedback should go to the mailing list: [re2]: http://code.google.com/p/re2/ [gcc]: http://gcc.gnu.org/ [ruby-dev]: http://packages.debian.org/ruby-dev [build-essential]: http://packages.debian.org/build-essential [Regexp]: http://ruby-doc.org/core/classes/Regexp.html [MatchData]: http://ruby-doc.org/core/classes/MatchData.html [Homebrew]: http://mxcl.github.com/homebrew [libre2-dev]: http://packages.debian.org/search?keywords=libre2-dev [official syntax page]: http://code.google.com/p/re2/wiki/Syntax