js_regex-3.8.0/0000755000004100000410000000000014346302434013363 5ustar www-datawww-datajs_regex-3.8.0/lib/0000755000004100000410000000000014346302434014131 5ustar www-datawww-datajs_regex-3.8.0/lib/js_regex.rb0000644000004100000410000000207414346302434016267 0ustar www-datawww-data# JsRegex converts ::Regexp instances to JavaScript. # # Usage: # # js_regex = JsRegex.new(my_ruby_regex) # js_regex.to_h # for use in 'new RegExp()' # js_regex.to_s # for direct injection into JavaScript # class JsRegex require_relative File.join('js_regex', 'conversion') require_relative File.join('js_regex', 'error') require_relative File.join('js_regex', 'version') require 'json' attr_reader :source, :options, :warnings, :target def initialize(ruby_regex, **kwargs) @source, @options, @warnings, @target = Conversion.of(ruby_regex, **kwargs) end def to_h { source: source, options: options } end def to_json(options = {}) to_h.to_json(options) end def to_s "/#{source.empty? ? '(?:)' : source}/#{options}" end def self.new!(ruby_regex, **kwargs) js_regex = new(ruby_regex, **kwargs) if js_regex.warnings.any? raise StandardError.new( "Could not fully convert the given regex #{ruby_regex.inspect}:\n" + js_regex.warnings.join("\n") ).extend(JsRegex::Error) end js_regex end end js_regex-3.8.0/lib/js_regex/0000755000004100000410000000000014346302434015737 5ustar www-datawww-datajs_regex-3.8.0/lib/js_regex/second_pass.rb0000644000004100000410000001137714346302434020576 0ustar www-datawww-dataclass JsRegex # # After conversion of a full Regexp::Expression tree, this # checks for Node instances that need further processing. # module SecondPass class << self def call(tree) substitute_root_level_keep_mark(tree) alternate_conditional_permutations(tree) tree end private def substitute_root_level_keep_mark(tree) keep_mark_index = nil tree.children.each.with_index do |child, i| break keep_mark_index = i if child.type == :keep_mark end return unless keep_mark_index pre = tree.children[0...keep_mark_index] post = tree.children[(keep_mark_index + 1)..-1] lookbehind = Node.new('(?<=', *pre, ')') tree.update(children: [lookbehind, *post]) end def alternate_conditional_permutations(tree) permutations = conditional_tree_permutations(tree) return if permutations.empty? alternatives = permutations.map.with_index do |variant, i| Node.new((i.zero? ? '(?:' : '|(?:'), variant, ')') end tree.update(children: alternatives) end def conditional_tree_permutations(tree) conds = conditions(tree) return [] if conds.empty? caps_per_branch = captured_group_count(tree) condition_permutations(conds).map.with_index do |truthy_conds, i| tree_permutation = tree.clone # find referenced groups and conditionals and make one-sided crawl(tree_permutation) do |node| build_permutation(node, conds, truthy_conds, caps_per_branch, i) end end end def crawl(node, &block) return if node.instance_of?(String) yield(node) node.children.each { |child| crawl(child, &block) } end def conditions(tree) conditions = [] crawl(tree) do |node| conditions << node.reference if node.type.equal?(:conditional) end conditions end def captured_group_count(tree) count = 0 crawl(tree) { |node| count += 1 if node.type.equal?(:captured_group) } count end def condition_permutations(conditions) (0..(conditions.length)).inject([]) do |arr, n| arr + conditions.combination(n).to_a end end def build_permutation(node, conds, truthy_conds, caps_per_branch, i) truthy = truthy_conds.include?(node.reference) case node.type when :backref # We cannot use named groups or backrefs in the conditional expansion, # their repetition would cause a "Duplicate capture group name" error in JS. node.update(children: [ node.children.first.sub(/k<.*>/, node.reference.to_s) ]) # backref numbers need to be incremented for subsequent "branches" adapt_backref_to_permutation(node, caps_per_branch, i) when :captured_group # Remove name, c.f. :backref handling. node.update(children: [ node.children.first.sub(/\?<.*>/, ''), *node.children[1..-1] ]) # if the group is referenced by any condition, modulate its quantity if conds.include?(node.reference) adapt_referenced_group_to_permutation(node, truthy) end when :conditional adapt_conditional_to_permutation(node, truthy) end end def adapt_referenced_group_to_permutation(group_node, truthy) truthy ? min_quantify(group_node) : null_quantify(group_node) end def adapt_conditional_to_permutation(conditional_node, truthy) branches = conditional_node.children[1...-1] if branches.count == 1 truthy || null_quantify(branches.first) else null_quantify(truthy ? branches.last : branches.first) end conditional_node.update(type: :plain) end def adapt_backref_to_permutation(backref_node, caps_per_branch, i) new_num = backref_node.reference + caps_per_branch * i backref_node.update(children: ["\\#{new_num}"]) end def min_quantify(node) return if guarantees_at_least_one_match?(qtf = node.quantifier) if qtf.max.equal?(1) # any zero_or_one quantifier (?, ??, ?+) node.update(quantifier: nil) else min_quantifier = qtf.dup min_quantifier.text = "{1,#{qtf.max}}#{'?' if qtf.reluctant?}" node.update(quantifier: min_quantifier) end end def guarantees_at_least_one_match?(quantifier) quantifier.nil? || quantifier.min > 0 end def null_quantify(node) null_quantifier = Regexp::Expression::Quantifier.construct(text: '{0}') node.update(quantifier: null_quantifier) end end end end js_regex-3.8.0/lib/js_regex/version.rb0000644000004100000410000000004614346302434017751 0ustar www-datawww-dataclass JsRegex VERSION = '3.8.0' end js_regex-3.8.0/lib/js_regex/conversion.rb0000644000004100000410000000301514346302434020450 0ustar www-datawww-dataclass JsRegex # # This class acts as a facade, passing a Regexp to the Converters. # # ::of returns a source String, options String, warnings Array, target String. # class Conversion require 'regexp_parser' require_relative 'converter' require_relative 'error' require_relative 'node' require_relative 'second_pass' require_relative 'target' class << self def of(input, options: nil, target: Target::ES2009) target = Target.cast(target) source, warnings, extra_opts = convert_source(input, target) options_string = convert_options(input, options, extra_opts) [source, options_string, warnings, target] end private def convert_source(input, target) tree = Regexp::Parser.parse(input) context = Converter::Context.new( case_insensitive_root: tree.i?, target: target, ) converted_tree = Converter.convert(tree, context) final_tree = SecondPass.call(converted_tree) [final_tree.to_s, context.warnings, context.required_options] rescue Regexp::Parser::Error => e raise e.extend(JsRegex::Error) end def convert_options(input, custom_options, required_options) options = custom_options.to_s.scan(/[gimsuy]/) + required_options if input.is_a?(Regexp) && (input.options & Regexp::IGNORECASE).nonzero? options << 'i' end options.uniq.sort.join end end end end js_regex-3.8.0/lib/js_regex/converter/0000755000004100000410000000000014346302434017746 5ustar www-datawww-datajs_regex-3.8.0/lib/js_regex/converter/unsupported_token_converter.rb0000644000004100000410000000042014346302434026146 0ustar www-datawww-datarequire_relative 'base' class JsRegex module Converter # # Template class implementation. # class UnsupportedTokenConverter < JsRegex::Converter::Base private def convert_data warn_of_unsupported_feature end end end end js_regex-3.8.0/lib/js_regex/converter/escape_converter.rb0000644000004100000410000000337414346302434023631 0ustar www-datawww-datarequire_relative 'base' require_relative 'literal_converter' class JsRegex module Converter # # Template class implementation. # class EscapeConverter < JsRegex::Converter::Base ESCAPES_SHARED_BY_RUBY_AND_JS = %i[ alternation backslash backspace bol carriage codepoint dot eol form_feed group_close group_open hex interval_close interval_open newline one_or_more set_close set_open tab vertical_tab zero_or_more zero_or_one ].freeze private def convert_data case subtype when :codepoint_list convert_codepoint_list when :control, :meta_sequence unicode_escape_codepoint when :literal LiteralConverter.convert_data(expression.char, context) when *ESCAPES_SHARED_BY_RUBY_AND_JS pass_through when :bell, :escape, :octal hex_escape_codepoint else warn_of_unsupported_feature end end def convert_codepoint_list if context.enable_u_option split_codepoint_list else expression.chars.each_with_object(Node.new) do |char, node| node << LiteralConverter.convert_data(Regexp.escape(char), context) end end end def split_codepoint_list expression.codepoints.map { |cp| "\\u{#{cp.to_s(16).upcase}}" }.join end def unicode_escape_codepoint "\\u#{expression.codepoint.to_s(16).upcase.rjust(4, '0')}" end def hex_escape_codepoint "\\x#{expression.codepoint.to_s(16).upcase.rjust(2, '0')}" end end end end js_regex-3.8.0/lib/js_regex/converter/property_converter.rb0000644000004100000410000000241714346302434024252 0ustar www-datawww-datarequire_relative 'base' require 'character_set' class JsRegex module Converter # # Template class implementation. # # Uses the `character_set` and `regexp_property_values` gems to get the # codepoints matched by the property and build a set string from them. # class PropertyConverter < JsRegex::Converter::Base # A map of normalized Ruby property names to names supported by ES2018+. def self.map @map ||= File.read("#{__dir__}/property_map.csv").scan(/(.+),(.+)/).to_h end private def convert_data if context.es_2018_or_higher? && (prop_name_in_js = self.class.map[subtype.to_s.tr('_', '')]) context.enable_u_option "\\#{expression.negative? ? 'P' : 'p'}{#{prop_name_in_js}}" else build_character_set end end def build_character_set content = CharacterSet.of_expression(expression) if expression.case_insensitive? && !context.case_insensitive_root content = content.case_insensitive elsif !expression.case_insensitive? && context.case_insensitive_root warn_of_unsupported_feature('nested case-sensitive property') end content.to_s_with_surrogate_ranges end end end end js_regex-3.8.0/lib/js_regex/converter/keep_converter.rb0000644000004100000410000000106614346302434023311 0ustar www-datawww-datarequire_relative 'base' class JsRegex module Converter # # Template class implementation. # class KeepConverter < JsRegex::Converter::Base private def convert_data if context.es_2018_or_higher? if expression.level.zero? Node.new(type: :keep_mark) # mark for conversion in SecondPass else warn_of_unsupported_feature('nested keep mark') end else warn_of_unsupported_feature('keep mark', min_target: Target::ES2018) end end end end end js_regex-3.8.0/lib/js_regex/converter/group_converter.rb0000644000004100000410000000514714346302434023525 0ustar www-datawww-datarequire_relative 'base' class JsRegex module Converter # # Template class implementation. # class GroupConverter < JsRegex::Converter::Base private def convert_data case subtype when :capture then build_group when :named then build_named_group when :atomic then emulate_atomic_group when :comment then drop_without_warning when :options, :options_switch then build_options_group when :passive then build_passive_group when :absence then build_absence_group_if_simple else warn_of_unsupported_feature end end def build_named_group if context.es_2018_or_higher? # ES 2018+ supports named groups, but only the angled-bracket syntax build_group(head: "(?<#{expression.name}>") else build_group end end def emulate_atomic_group if context.in_atomic_group warn_of_unsupported_feature('nested atomic group') build_passive_group else context.start_atomic_group result = wrap_in_backrefed_lookahead(convert_subexpressions) context.end_atomic_group result end end def build_options_group if subtype.equal?(:options_switch) # can be ignored since #options on subsequent Expressions are correct drop_without_warning else build_passive_group end end def build_passive_group build_group(head: '(?:', capturing: false) end def build_absence_group_if_simple if unmatchable_absence_group? unmatchable_substitution elsif expression.inner_match_length.fixed? build_absence_group else warn_of_unsupported_feature('variable-length absence group content') end end def unmatchable_absence_group? expression.empty? end def unmatchable_substitution '(?!)' end def build_absence_group head = "(?:(?:.|\\n){,#{expression.inner_match_length.min - 1}}|(?:(?!" tail = ')(?:.|\n))*)' build_group(head: head, tail: tail, capturing: false) end def build_group(opts = {}) head = opts[:head] || '(' tail = opts[:tail] || ')' return Node.new(*wrap(head, tail)) if opts[:capturing].equal?(false) context.capture_group ref = expression.number Node.new(*wrap(head, tail), reference: ref, type: :captured_group) end def wrap(head, tail) [head, convert_subexpressions, tail] end end end end js_regex-3.8.0/lib/js_regex/converter/assertion_converter.rb0000644000004100000410000000164014346302434024372 0ustar www-datawww-datarequire_relative 'base' require_relative 'group_converter' class JsRegex module Converter # # Template class implementation. # # Note the inheritance from GroupConverter. # class AssertionConverter < JsRegex::Converter::GroupConverter private def convert_data case subtype when :lookahead, :nlookahead keep_as_is when :lookbehind return keep_as_is if context.es_2018_or_higher? warn_of_unsupported_feature('lookbehind', min_target: Target::ES2018) build_passive_group when :nlookbehind return keep_as_is if context.es_2018_or_higher? warn_of_unsupported_feature('negative lookbehind', min_target: Target::ES2018) else warn_of_unsupported_feature end end def keep_as_is build_group(head: pass_through, capturing: false) end end end end js_regex-3.8.0/lib/js_regex/converter/meta_converter.rb0000644000004100000410000000321614346302434023312 0ustar www-datawww-datarequire_relative 'base' class JsRegex module Converter # # Template class implementation. # class MetaConverter < JsRegex::Converter::Base DOT_EXPANSION = '(?:[\uD800-\uDBFF][\uDC00-\uDFFF]|[^\n\uD800-\uDFFF])' ML_DOT_EXPANSION = '(?:[\uD800-\uDBFF][\uDC00-\uDFFF]|[^\uD800-\uDFFF])' # Possible improvements for dot conversion: # # In ES2015, the 'u' flag allows dots to match astral chars. Unfortunately # the dot keeps matching lone surrogates even with this flag, so the use # of an expansion is still necessary to get the same behavior as in Ruby. # # ES2018 has the dotall flag 's', but it is tricky to use in conversions. # 's' activates matching of BOTH astral chars and "\n", whereas the dot in # Ruby doesn't match "\n" by default, and even with the 'm' flag set on # the root, subexps might still exclude "\n" like so: /.(?-m:.)./m private def convert_data case subtype when :alternation convert_alternatives when :dot expression.multiline? ? ML_DOT_EXPANSION : DOT_EXPANSION else warn_of_unsupported_feature end end def convert_alternatives kept_any_previous_branch = nil convert_subexpressions.transform do |node| unless dropped_branch?(node) node.children.unshift('|') if kept_any_previous_branch kept_any_previous_branch = true end node end end def dropped_branch?(branch_node) branch_node.children.any? && branch_node.children.all?(&:dropped?) end end end end js_regex-3.8.0/lib/js_regex/converter/conditional_converter.rb0000644000004100000410000000126514346302434024671 0ustar www-datawww-datarequire_relative 'base' class JsRegex module Converter # # Template class implementation. # class ConditionalConverter < JsRegex::Converter::Base private def convert_data case subtype when :open then mark_conditional_for_second_pass else warn_of_unsupported_feature end end def mark_conditional_for_second_pass reference = expression.referenced_expression.number node = Node.new('(?:', reference: reference, type: :conditional) expression.branches.each do |branch| node << Node.new('(?:', convert_expression(branch), ')') end node << ')' end end end end js_regex-3.8.0/lib/js_regex/converter/base.rb0000644000004100000410000000450214346302434021206 0ustar www-datawww-dataclass JsRegex module Converter # # Template class. Implement #convert_data in subclasses and return # instance of String or Node from it. # class Base # returns instance of Node with #quantifier attached. def convert(expression, context) self.context = context self.expression = expression node = convert_data node = Node.new(node) if node.instance_of?(String) apply_quantifier(node) end private attr_accessor :context, :expression def subtype expression.token end def data expression.text end alias pass_through data def apply_quantifier(node) return node if node.dropped? || (qtf = expression.quantifier).nil? if qtf.possessive? node.update(quantifier: qtf.dup.tap { |q| q.text = q.text[0..-2] }) return wrap_in_backrefed_lookahead(node) elsif qtf.token == :interval && qtf.text[0..1] == "{," node.update(quantifier: qtf.dup.tap { |q| q.text = "{0,#{q.max}}" }) else node.update(quantifier: qtf) end node end def convert_subexpressions Node.new(*expression.map { |subexp| convert_expression(subexp) }) end def convert_expression(expression) Converter.convert(expression, context) end def warn_of_unsupported_feature(description = nil, min_target: nil) description ||= "#{subtype} #{expression.type}".tr('_', ' ') full_text = "Dropped unsupported #{description} '#{expression}' "\ "at index #{expression.ts}" if min_target full_text += " (requires at least `target: '#{min_target}'`)" end warn_of(full_text) drop end def warn_of(text) context.warnings << text end def drop Node.new(type: :dropped) end alias drop_without_warning drop def wrap_in_backrefed_lookahead(content) number = context.capturing_group_count + 1 backref_node = Node.new("\\#{number}", reference: number, type: :backref) context.increment_local_capturing_group_count # an empty passive group (?:) is appended as literal digits may follow Node.new('(?=(', *content, '))', backref_node, '(?:)') end end end end js_regex-3.8.0/lib/js_regex/converter/subexpression_converter.rb0000644000004100000410000000041014346302434025266 0ustar www-datawww-datarequire_relative 'base' class JsRegex module Converter # # Template class implementation. # class SubexpressionConverter < JsRegex::Converter::Base private def convert_data convert_subexpressions end end end end js_regex-3.8.0/lib/js_regex/converter/context.rb0000644000004100000410000000465514346302434021771 0ustar www-datawww-dataclass JsRegex module Converter # # Passed among Converters to globalize basic status data. # # The Converters themselves are stateless. # class Context attr_reader :capturing_group_count, :case_insensitive_root, :in_atomic_group, :warnings def initialize(case_insensitive_root: false, target: nil) self.added_capturing_groups_after_group = Hash.new(0) self.capturing_group_count = 0 self.warnings = [] self.required_options_hash = {} self.case_insensitive_root = case_insensitive_root self.target = target end # target context def es_2015_or_higher? target >= Target::ES2015 end def es_2018_or_higher? target >= Target::ES2018 end # these methods allow appending options to the final Conversion output def enable_u_option return false unless es_2015_or_higher? required_options_hash['u'] = true end def required_options required_options_hash.keys end # group context def capture_group self.capturing_group_count = capturing_group_count + 1 end def start_atomic_group self.in_atomic_group = true end def end_atomic_group self.in_atomic_group = false end def increment_local_capturing_group_count added_capturing_groups_after_group[original_capturing_group_count] += 1 capture_group end # takes and returns 1-indexed group positions. # new is different from old if capturing groups were added in between. def new_capturing_group_position(old_position) increment = 0 added_capturing_groups_after_group.each do |after_n_groups, count| increment += count if after_n_groups < old_position end old_position + increment end def original_capturing_group_count capturing_group_count - total_added_capturing_groups end private attr_accessor :added_capturing_groups_after_group, :required_options_hash, :target attr_writer :capturing_group_count, :case_insensitive_root, :in_atomic_group, :warnings def total_added_capturing_groups added_capturing_groups_after_group.values.inject(0, &:+) end end end end js_regex-3.8.0/lib/js_regex/converter/anchor_converter.rb0000644000004100000410000000271114346302434023635 0ustar www-datawww-datarequire_relative 'base' class JsRegex module Converter # # Template class implementation. # class AnchorConverter < JsRegex::Converter::Base private def convert_data case subtype when :bol, :bos then '^' when :eol, :eos then '$' when :eos_ob_eol then '(?=\n?$)' when :word_boundary then convert_boundary when :nonword_boundary then convert_nonboundary else warn_of_unsupported_feature end end def convert_boundary if context.es_2018_or_higher? && context.enable_u_option BOUNDARY_EXPANSION else pass_boundary_with_warning end end def convert_nonboundary if context.es_2018_or_higher? && context.enable_u_option NONBOUNDARY_EXPANSION else pass_boundary_with_warning end end # This is an approximation to the word boundary behavior in Ruby, c.f. # https://github.com/ruby/ruby/blob/08476c45/tool/enc-unicode.rb#L130 W = '\d\p{L}\p{M}\p{Pc}' BOUNDARY_EXPANSION = "(?:(?<=[#{W}])(?=[^#{W}]|$)|(?<=[^#{W}]|^)(?=[#{W}]))" NONBOUNDARY_EXPANSION = "(?<=[#{W}])(?=[#{W}])" def pass_boundary_with_warning warn_of("The anchor '#{data}' at index #{expression.ts} only works "\ 'at ASCII word boundaries with targets below ES2018".') pass_through end end end end js_regex-3.8.0/lib/js_regex/converter/freespace_converter.rb0000644000004100000410000000040214346302434024313 0ustar www-datawww-datarequire_relative 'base' class JsRegex module Converter # # Template class implementation. # class FreespaceConverter < JsRegex::Converter::Base private def convert_data drop_without_warning end end end end js_regex-3.8.0/lib/js_regex/converter/type_converter.rb0000644000004100000410000000376014346302434023351 0ustar www-datawww-datarequire_relative 'base' class JsRegex module Converter # # Template class implementation. # class TypeConverter < JsRegex::Converter::Base HEX_EXPANSION = '[0-9A-Fa-f]' NONHEX_EXPANSION = '[^0-9A-Fa-f]' ES2018_HEX_EXPANSION = '\p{AHex}' ES2018_NONHEX_EXPANSION = '\P{AHex}' LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])' def self.directly_compatible?(expression) case expression.token when :space, :nonspace !expression.ascii_classes? when :digit, :nondigit, :word, :nonword !expression.unicode_classes? end end private def convert_data case subtype when :hex then hex_expansion when :nonhex then nonhex_expansion when :linebreak then LINEBREAK_EXPANSION when :digit, :space, :word return pass_through if self.class.directly_compatible?(expression) set_substitution when :nondigit, :nonspace, :nonword return pass_through if self.class.directly_compatible?(expression) negative_set_substitution else warn_of_unsupported_feature end end def hex_expansion if context.es_2018_or_higher? && context.enable_u_option ES2018_HEX_EXPANSION else HEX_EXPANSION end end def nonhex_expansion if context.es_2018_or_higher? && context.enable_u_option ES2018_NONHEX_EXPANSION else NONHEX_EXPANSION end end def negative_set_substitution # ::of_expression returns an inverted set for negative expressions, # so we need to un-invert before wrapping in [^ and ]. Kinda lame. "[^#{character_set.inversion.bmp_part}]" end def set_substitution character_set.bmp_part.to_s(in_brackets: true) end def character_set CharacterSet.of_expression(expression) end end end end js_regex-3.8.0/lib/js_regex/converter/backreference_converter.rb0000644000004100000410000000323214346302434025141 0ustar www-datawww-datarequire_relative 'base' class JsRegex module Converter # # Template class implementation. # class BackreferenceConverter < JsRegex::Converter::Base private def convert_data case subtype when :name_ref then convert_name_ref when :number, :number_ref, :number_rel_ref then convert_to_plain_num_ref when :name_call, :number_call, :number_rel_call then convert_call else # name_recursion_ref, number_recursion_ref, ... warn_of_unsupported_feature end end def convert_name_ref if context.es_2018_or_higher? # ES 2018+ supports named backrefs, but only the angled-bracket syntax Node.new("\\k<#{expression.name}>", reference: new_position, type: :backref) else convert_to_plain_num_ref end end def convert_to_plain_num_ref position = new_position Node.new("\\#{position}", reference: position, type: :backref) end def new_position context.new_capturing_group_position(target_position) end def target_position expression.referenced_expression.number end def convert_call if expression.respond_to?(:number) && expression.number.equal?(0) return warn_of_unsupported_feature('whole-pattern recursion') end context.increment_local_capturing_group_count target_copy = expression.referenced_expression.unquantified_clone # avoid "Duplicate capture group name" error in JS target_copy.token = :capture if target_copy.is?(:named, :group) convert_expression(target_copy) end end end end js_regex-3.8.0/lib/js_regex/converter/literal_converter.rb0000644000004100000410000000403614346302434024021 0ustar www-datawww-datarequire_relative 'base' class JsRegex module Converter # # Template class implementation. # class LiteralConverter < JsRegex::Converter::Base class << self ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/ def convert_data(data, context) if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN if context.enable_u_option escape_incompatible_bmp_literals(data) else convert_astral_data(data) end else escape_incompatible_bmp_literals(data) end end def convert_astral_data(data) data.each_char.each_with_object(Node.new) do |char, node| if char =~ ASTRAL_PLANE_CODEPOINT_PATTERN node << surrogate_substitution_for(char) else node << escape_incompatible_bmp_literals(char) end end end def escape_incompatible_bmp_literals(data) data.gsub('/', '\\/').gsub(/[\f\n\r\t]/) { |lit| Regexp.escape(lit) } end private def surrogate_substitution_for(char) CharacterSet::Writer.write_surrogate_ranges([], [char.codepoints]) end end private def convert_data result = self.class.convert_data(data, context) if context.case_insensitive_root && !expression.case_insensitive? warn_of_unsupported_feature('nested case-sensitive literal') elsif !context.case_insensitive_root && expression.case_insensitive? return handle_locally_case_insensitive_literal(result) end result end HAS_CASE_PATTERN = /[\p{lower}\p{upper}]/ def handle_locally_case_insensitive_literal(literal) literal =~ HAS_CASE_PATTERN ? case_insensitivize(literal) : literal end def case_insensitivize(literal) literal.each_char.each_with_object(Node.new) do |chr, node| node << (chr =~ HAS_CASE_PATTERN ? "[#{chr}#{chr.swapcase}]" : chr) end end end end end js_regex-3.8.0/lib/js_regex/converter/set_converter.rb0000644000004100000410000000441514346302434023161 0ustar www-datawww-datarequire_relative 'base' require_relative 'escape_converter' require_relative 'type_converter' require 'character_set' class JsRegex module Converter # # Template class implementation. # # Unlike other converters, this one does not recurse on subexpressions, # since many are unsupported by JavaScript. If it detects incompatible # children, it uses the `character_set` gem to establish the codepoints # matched by the whole set and build a completely new set string. # class SetConverter < JsRegex::Converter::Base private def convert_data return pass_through_with_escaping if directly_compatible? content = CharacterSet.of_expression(expression) if expression.case_insensitive? && !context.case_insensitive_root content = content.case_insensitive elsif !expression.case_insensitive? && context.case_insensitive_root warn_of_unsupported_feature('nested case-sensitive set') end if context.es_2015_or_higher? context.enable_u_option if content.astral_part? content.to_s(format: 'es6', in_brackets: true) else content.to_s_with_surrogate_ranges end end def directly_compatible? all_children_directly_compatible? && !casefolding_needed? end def all_children_directly_compatible? # note that #each_expression is recursive expression.each_expression.all? { |ch| child_directly_compatible?(ch) } end def child_directly_compatible?(exp) case exp.type when :literal # surrogate pair substitution needed on ES2009 if astral exp.text.ord <= 0xFFFF || context.enable_u_option when :set # conversion needed for nested sets, intersections exp.token.equal?(:range) when :type TypeConverter.directly_compatible?(exp) when :escape EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS.include?(exp.token) end end def casefolding_needed? expression.case_insensitive? ^ context.case_insensitive_root end def pass_through_with_escaping string = expression.to_s(:base) LiteralConverter.escape_incompatible_bmp_literals(string) end end end end js_regex-3.8.0/lib/js_regex/converter/property_map.csv0000644000004100000410000001075314346302434023212 0ustar www-datawww-data# THIS FILE IS GENERATED BY $ rake build_prop_map - DO NOT EDIT ascii,ASCII asciihexdigit,ASCII_Hex_Digit adlam,Script=Adlam anatolianhieroglyphs,Script=Anatolian_Hieroglyphs armenian,Script=Armenian avestan,Script=Avestan bamum,Script=Bamum bassavah,Script=Bassa_Vah batak,Script=Batak bengali,Script=Bengali bhaiksuki,Script=Bhaiksuki bidicontrol,Bidi_Control bopomofo,Script=Bopomofo braille,Script=Braille buginese,Script=Buginese buhid,Script=Buhid carian,Script=Carian caucasianalbanian,Script=Caucasian_Albanian chakma,Script=Chakma cham,Script=Cham cherokee,Script=Cherokee chorasmian,Script=Chorasmian connectorpunctuation,Connector_Punctuation control,Control coptic,Script=Coptic cuneiform,Script=Cuneiform cypriot,Script=Cypriot cyrillic,Script=Cyrillic deprecated,Deprecated deseret,Script=Deseret devanagari,Script=Devanagari divesakuru,Script=Dives_Akuru dogra,Script=Dogra duployan,Script=Duployan egyptianhieroglyphs,Script=Egyptian_Hieroglyphs elbasan,Script=Elbasan elymaic,Script=Elymaic emojicomponent,Emoji_Component emojimodifier,Emoji_Modifier enclosingmark,Enclosing_Mark finalpunctuation,Final_Punctuation georgian,Script=Georgian gothic,Script=Gothic grantha,Script=Grantha greek,Script=Greek gujarati,Script=Gujarati gunjalagondi,Script=Gunjala_Gondi gurmukhi,Script=Gurmukhi hangul,Script=Hangul hanifirohingya,Script=Hanifi_Rohingya hanunoo,Script=Hanunoo hatran,Script=Hatran hebrew,Script=Hebrew hexdigit,Hex_Digit idsbinaryoperator,IDS_Binary_Operator idstrinaryoperator,IDS_Trinary_Operator imperialaramaic,Script=Imperial_Aramaic initialpunctuation,Initial_Punctuation inscriptionalpahlavi,Script=Inscriptional_Pahlavi inscriptionalparthian,Script=Inscriptional_Parthian javanese,Script=Javanese joincontrol,Join_Control kayahli,Script=Kayah_Li kharoshthi,Script=Kharoshthi khitansmallscript,Script=Khitan_Small_Script khmer,Script=Khmer khojki,Script=Khojki khudawadi,Script=Khudawadi lao,Script=Lao lepcha,Script=Lepcha letternumber,Letter_Number limbu,Script=Limbu lineseparator,Line_Separator lineara,Script=Linear_A linearb,Script=Linear_B lisu,Script=Lisu logicalorderexception,Logical_Order_Exception lycian,Script=Lycian lydian,Script=Lydian mahajani,Script=Mahajani makasar,Script=Makasar malayalam,Script=Malayalam mandaic,Script=Mandaic manichaean,Script=Manichaean marchen,Script=Marchen masaramgondi,Script=Masaram_Gondi math,Math mathsymbol,Math_Symbol medefaidrin,Script=Medefaidrin meeteimayek,Script=Meetei_Mayek mendekikakui,Script=Mende_Kikakui meroiticcursive,Script=Meroitic_Cursive meroitichieroglyphs,Script=Meroitic_Hieroglyphs miao,Script=Miao modi,Script=Modi mro,Script=Mro multani,Script=Multani myanmar,Script=Myanmar nabataean,Script=Nabataean nandinagari,Script=Nandinagari newtailue,Script=New_Tai_Lue newa,Script=Newa nko,Script=Nko noncharactercodepoint,Noncharacter_Code_Point nushu,Script=Nushu nyiakengpuachuehmong,Script=Nyiakeng_Puachue_Hmong ogham,Script=Ogham olchiki,Script=Ol_Chiki oldhungarian,Script=Old_Hungarian olditalic,Script=Old_Italic oldnortharabian,Script=Old_North_Arabian oldpermic,Script=Old_Permic oldpersian,Script=Old_Persian oldsogdian,Script=Old_Sogdian oldsoutharabian,Script=Old_South_Arabian oldturkic,Script=Old_Turkic oriya,Script=Oriya osage,Script=Osage osmanya,Script=Osmanya othernumber,Other_Number pahawhhmong,Script=Pahawh_Hmong palmyrene,Script=Palmyrene paragraphseparator,Paragraph_Separator patternsyntax,Pattern_Syntax patternwhitespace,Pattern_White_Space paucinhau,Script=Pau_Cin_Hau phagspa,Script=Phags_Pa phoenician,Script=Phoenician privateuse,Private_Use psalterpahlavi,Script=Psalter_Pahlavi quotationmark,Quotation_Mark radical,Radical regionalindicator,Regional_Indicator rejang,Script=Rejang runic,Script=Runic samaritan,Script=Samaritan saurashtra,Script=Saurashtra separator,Separator sharada,Script=Sharada shavian,Script=Shavian siddham,Script=Siddham signwriting,Script=SignWriting sinhala,Script=Sinhala sogdian,Script=Sogdian sorasompeng,Script=Sora_Sompeng soyombo,Script=Soyombo spaceseparator,Space_Separator sundanese,Script=Sundanese sylotinagri,Script=Syloti_Nagri syriac,Script=Syriac tagbanwa,Script=Tagbanwa taile,Script=Tai_Le taitham,Script=Tai_Tham taiviet,Script=Tai_Viet tamil,Script=Tamil tangut,Script=Tangut thaana,Script=Thaana thai,Script=Thai tibetan,Script=Tibetan tifinagh,Script=Tifinagh tirhuta,Script=Tirhuta titlecaseletter,Titlecase_Letter ugaritic,Script=Ugaritic vai,Script=Vai wancho,Script=Wancho warangciti,Script=Warang_Citi whitespace,White_Space yezidi,Script=Yezidi yi,Script=Yi zanabazarsquare,Script=Zanabazar_Square js_regex-3.8.0/lib/js_regex/error.rb0000644000004100000410000000021714346302434017415 0ustar www-datawww-dataclass JsRegex # This is mixed into errors, e.g. those thrown by the parser, # allowing to `rescue JsRegex::Error`. module Error; end end js_regex-3.8.0/lib/js_regex/node.rb0000644000004100000410000000440614346302434017215 0ustar www-datawww-dataclass JsRegex # # Converter#convert result. Represents a branch or leaf node with an optional # quantifier as well as type and reference annotations for SecondPass. # class Node require_relative 'error' attr_reader :children, :quantifier, :reference, :type TYPES = %i[ backref captured_group conditional dropped keep_mark plain ].freeze def initialize(*children, reference: nil, type: :plain) self.children = children self.reference = reference self.type = type end def initialize_copy(*) self.children = children.map(&:clone) end def transform(&block) children.map!(&block) self end def <<(node) children << node self end def dropped? # keep everything else, including empty or depleted capturing groups # so as not to not mess with reference numbers (e.g. backrefs) type.equal?(:dropped) end def to_s case type when :dropped '' when :backref, :captured_group, :plain children.join << quantifier.to_s else raise TypeError.new( "#{type} must be substituted before stringification" ).extend(JsRegex::Error) end end def update(attrs) self.children = attrs.fetch(:children) if attrs.key?(:children) self.quantifier = attrs.fetch(:quantifier) if attrs.key?(:quantifier) self.type = attrs.fetch(:type) if attrs.key?(:type) self end private TypeError = Class.new(::TypeError).extend(JsRegex::Error) def type=(arg) arg.nil? || TYPES.include?(arg) || raise(TypeError, "unsupported type #{arg.class} for #{__method__}") @type = arg end def children=(arg) arg.class == Array || raise(TypeError, "unsupported type #{arg.class} for #{__method__}") @children = arg end def quantifier=(arg) arg.nil? || arg.class == Regexp::Expression::Quantifier || raise(TypeError, "unsupported type #{arg.class} for #{__method__}") @quantifier = arg end def reference=(arg) arg.nil? || arg.is_a?(Numeric) || raise(TypeError, "unsupported type #{arg.class} for #{__method__}") @reference = arg end end end js_regex-3.8.0/lib/js_regex/target.rb0000644000004100000410000000070614346302434017555 0ustar www-datawww-dataclass JsRegex module Target ES2009 = 'ES2009' ES2015 = 'ES2015' ES2018 = 'ES2018' SUPPORTED = [ES2009, ES2015, ES2018].freeze def self.cast(arg) return ES2009 if arg.nil? normalized_arg = arg.to_s.upcase return normalized_arg if SUPPORTED.include?(normalized_arg) raise ArgumentError.new( "Unknown target: #{arg.inspect}. Try one of #{SUPPORTED}." ).extend(JsRegex::Error) end end end js_regex-3.8.0/lib/js_regex/converter.rb0000644000004100000410000000216214346302434020274 0ustar www-datawww-dataclass JsRegex module Converter Dir[File.join(__dir__, 'converter', '*.rb')].sort.each do |file| require file end MAP = Hash.new(UnsupportedTokenConverter).merge( anchor: AnchorConverter, assertion: AssertionConverter, backref: BackreferenceConverter, conditional: ConditionalConverter, escape: EscapeConverter, expression: SubexpressionConverter, free_space: FreespaceConverter, group: GroupConverter, keep: KeepConverter, literal: LiteralConverter, meta: MetaConverter, nonproperty: PropertyConverter, property: PropertyConverter, set: SetConverter, type: TypeConverter ).freeze class << self def convert(exp, context = nil) self.for(exp).convert(exp, context || Context.new) end def for(expression) MAP[expression.type].new end # Legacy method. Remove in v4.0.0. def surrogate_pair_limit=(_arg) warn '#surrogate_pair_limit= is deprecated and has no effect anymore.' end end end end js_regex-3.8.0/js_regex.gemspec0000644000004100000410000000537614346302434016551 0ustar www-datawww-data######################################################### # This file has been automatically generated by gem2tgz # ######################################################### # -*- encoding: utf-8 -*- # stub: js_regex 3.8.0 ruby lib Gem::Specification.new do |s| s.name = "js_regex".freeze s.version = "3.8.0" s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version= s.require_paths = ["lib".freeze] s.authors = ["Janosch M\u00FCller".freeze] s.date = "2022-09-25" s.description = "JsRegex converts Ruby's native regular expressions for JavaScript, taking care of various incompatibilities and returning warnings for unsolvable differences.".freeze s.email = ["janosch84@gmail.com".freeze] s.files = ["lib/js_regex.rb".freeze, "lib/js_regex/conversion.rb".freeze, "lib/js_regex/converter.rb".freeze, "lib/js_regex/converter/anchor_converter.rb".freeze, "lib/js_regex/converter/assertion_converter.rb".freeze, "lib/js_regex/converter/backreference_converter.rb".freeze, "lib/js_regex/converter/base.rb".freeze, "lib/js_regex/converter/conditional_converter.rb".freeze, "lib/js_regex/converter/context.rb".freeze, "lib/js_regex/converter/escape_converter.rb".freeze, "lib/js_regex/converter/freespace_converter.rb".freeze, "lib/js_regex/converter/group_converter.rb".freeze, "lib/js_regex/converter/keep_converter.rb".freeze, "lib/js_regex/converter/literal_converter.rb".freeze, "lib/js_regex/converter/meta_converter.rb".freeze, "lib/js_regex/converter/property_converter.rb".freeze, "lib/js_regex/converter/property_map.csv".freeze, "lib/js_regex/converter/set_converter.rb".freeze, "lib/js_regex/converter/subexpression_converter.rb".freeze, "lib/js_regex/converter/type_converter.rb".freeze, "lib/js_regex/converter/unsupported_token_converter.rb".freeze, "lib/js_regex/error.rb".freeze, "lib/js_regex/node.rb".freeze, "lib/js_regex/second_pass.rb".freeze, "lib/js_regex/target.rb".freeze, "lib/js_regex/version.rb".freeze] s.homepage = "https://github.com/jaynetics/js_regex".freeze s.licenses = ["MIT".freeze] s.required_ruby_version = Gem::Requirement.new(">= 2.1.0".freeze) s.rubygems_version = "3.2.5".freeze s.summary = "Converts Ruby regexes to JavaScript regexes.".freeze if s.respond_to? :specification_version then s.specification_version = 4 end if s.respond_to? :add_runtime_dependency then s.add_runtime_dependency(%q.freeze, ["~> 1.4"]) s.add_runtime_dependency(%q.freeze, ["~> 2.5"]) s.add_runtime_dependency(%q.freeze, ["~> 1.0"]) else s.add_dependency(%q.freeze, ["~> 1.4"]) s.add_dependency(%q.freeze, ["~> 2.5"]) s.add_dependency(%q.freeze, ["~> 1.0"]) end end