js_regex-3.14.0/0000755000004100000410000000000015045024063013434 5ustar www-datawww-datajs_regex-3.14.0/lib/0000755000004100000410000000000015045024063014202 5ustar www-datawww-datajs_regex-3.14.0/lib/js_regex/0000755000004100000410000000000015045024063016010 5ustar www-datawww-datajs_regex-3.14.0/lib/js_regex/error.rb0000644000004100000410000000025615045024063017471 0ustar www-datawww-data# frozen_string_literal: true class JsRegex # This is mixed into errors, e.g. those thrown by the parser, # allowing to `rescue JsRegex::Error`. module Error; end end js_regex-3.14.0/lib/js_regex/node.rb0000644000004100000410000000457615045024063017276 0ustar www-datawww-data# frozen_string_literal: true class JsRegex # # Converter#convert result. Represents a branch or leaf node with an optional # quantifier as well as type and reference annotations for SecondPass. # class Node require_relative 'error' attr_reader :children, :quantifier, :reference, :type TYPES = %i[ backref branch captured_group conditional dropped keep_mark plain ].freeze def initialize(*children, reference: nil, type: :plain) self.children = children self.reference = reference self.type = type end def initialize_copy(*) self.children = children.map(&:clone) end def transform(&block) children.map!(&block) self end def <<(node) children << node self end def dropped? # keep everything else, including empty or depleted capturing groups # so as not to not mess with reference numbers (e.g. backrefs) type.equal?(:dropped) end def to_s case type when :dropped '' when :backref, :branch, :captured_group, :plain children.join << quantifier.to_s else raise TypeError.new( "#{type} must be substituted before stringification" ).extend(JsRegex::Error) end end def update(attrs) self.children = attrs.fetch(:children) if attrs.key?(:children) self.quantifier = attrs.fetch(:quantifier) if attrs.key?(:quantifier) self.type = attrs.fetch(:type) if attrs.key?(:type) self end def optional? quantifier && quantifier.min == 0 end private TypeError = Class.new(::TypeError).extend(JsRegex::Error) def type=(arg) arg.nil? || TYPES.include?(arg) || raise(TypeError, "unsupported type #{arg.class} for #{__method__}") @type = arg end def children=(arg) arg.class == Array || raise(TypeError, "unsupported type #{arg.class} for #{__method__}") @children = arg end def quantifier=(arg) arg.nil? || arg.class == Regexp::Expression::Quantifier || raise(TypeError, "unsupported type #{arg.class} for #{__method__}") @quantifier = arg end def reference=(arg) arg.nil? || arg.is_a?(Numeric) || raise(TypeError, "unsupported type #{arg.class} for #{__method__}") @reference = arg end end end js_regex-3.14.0/lib/js_regex/target.rb0000644000004100000410000000102415045024063017620 0ustar www-datawww-data# frozen_string_literal: true class JsRegex module Target ES2009 = 'ES2009' ES2015 = 'ES2015' ES2018 = 'ES2018' SUPPORTED = [ES2009, ES2015, ES2018].freeze def self.cast(arg) return ES2009 if arg.nil? normalized_arg = arg.to_s.upcase.sub(/^(ECMASCRIPT|ES|JAVASCRIPT|JS)? ?/, 'ES') return normalized_arg if SUPPORTED.include?(normalized_arg) raise ArgumentError.new( "Unknown target: #{arg.inspect}. Try one of #{SUPPORTED}." ).extend(JsRegex::Error) end end end js_regex-3.14.0/lib/js_regex/second_pass.rb0000644000004100000410000001371515045024063020645 0ustar www-datawww-data# frozen_string_literal: true class JsRegex # # After conversion of a full Regexp::Expression tree, this # checks for Node instances that need further processing. # module SecondPass class << self def call(tree) substitute_root_level_keep_mark(tree) alternate_conditional_permutations(tree) handle_non_participating_backrefs(tree) tree end private def substitute_root_level_keep_mark(tree) keep_mark_index = nil tree.children.each.with_index do |child, i| break keep_mark_index = i if child.type == :keep_mark end return unless keep_mark_index pre = tree.children[0...keep_mark_index] post = tree.children[(keep_mark_index + 1)..-1] lookbehind = Node.new('(?<=', *pre, ')') tree.update(children: [lookbehind, *post]) end def handle_non_participating_backrefs(tree) level = 0 completed_group_numbers = {} group_branches = {} branch_stack = [] crawl(tree, true) do |node, event| case [node.type, event] when [:branch, :enter] branch_stack.push(node) when [:branch, :exit] branch_stack.pop when [:captured_group, :enter] level += 1 when [:captured_group, :exit] unless node.optional? # ignore optional groups group_branches[node.reference] = branch_stack.last end number = level number += 1 while completed_group_numbers[number] completed_group_numbers[number] = true level -= 1 when [:backref, :exit] ref_branch = group_branches[node.reference] current_branch = branch_stack.last # make bad backrefs non-matchable references_other_branch = ref_branch && current_branch && ref_branch != current_branch forward_reference = !completed_group_numbers[node.reference] if references_other_branch || forward_reference node.update(type: :plain, children: ['(?!)']) end end end end def alternate_conditional_permutations(tree) permutations = conditional_tree_permutations(tree) return if permutations.empty? alternatives = permutations.map.with_index do |variant, i| Node.new((i.zero? ? '(?:' : '|(?:'), variant, ')', type: :branch) end tree.update(children: alternatives) end def conditional_tree_permutations(tree) conds = conditions(tree) return [] if conds.empty? caps_per_branch = captured_group_count(tree) condition_permutations(conds).map.with_index do |truthy_conds, i| tree_permutation = tree.clone # find referenced groups and conditionals and make one-sided crawl(tree_permutation) do |node| build_permutation(node, conds, truthy_conds, caps_per_branch, i) end tree_permutation end end def crawl(node, trace = false, &block) return if node.instance_of?(String) trace ? yield(node, :enter) : yield(node) node.children.each { |child| crawl(child, trace, &block) } trace && yield(node, :exit) end def conditions(tree) conditions = [] crawl(tree) do |node| conditions << node.reference if node.type.equal?(:conditional) end conditions end def captured_group_count(tree) count = 0 crawl(tree) { |node| count += 1 if node.type.equal?(:captured_group) } count end def condition_permutations(conditions) (0..(conditions.length)).inject([]) do |arr, n| arr + conditions.combination(n).to_a end end def build_permutation(node, conds, truthy_conds, caps_per_branch, i) truthy = truthy_conds.include?(node.reference) case node.type when :backref # We cannot use named groups or backrefs in the conditional expansion, # their repetition would cause a "Duplicate capture group name" error in JS. node.update(children: [ node.children.first.sub(/k<.*>/, node.reference.to_s) ]) # backref numbers need to be incremented for subsequent "branches" adapt_backref_to_permutation(node, caps_per_branch, i) when :captured_group # if the group is referenced by any condition, modulate its quantity if conds.include?(node.reference) adapt_referenced_group_to_permutation(node, truthy) end when :conditional adapt_conditional_to_permutation(node, truthy) end end def adapt_referenced_group_to_permutation(group_node, truthy) truthy ? min_quantify(group_node) : null_quantify(group_node) end def adapt_conditional_to_permutation(conditional_node, truthy) branches = conditional_node.children[1...-1] if branches.count == 1 truthy || null_quantify(branches.first) else null_quantify(truthy ? branches.last : branches.first) end conditional_node.update(type: :plain) end def adapt_backref_to_permutation(backref_node, caps_per_branch, i) new_num = backref_node.reference + caps_per_branch * i backref_node.update(children: ["\\#{new_num}"]) end def min_quantify(node) return unless node.optional? qtf = node.quantifier if qtf.max.equal?(1) # any zero_or_one quantifier (?, ??, ?+) node.update(quantifier: nil) else min_quantifier = qtf.dup min_quantifier.text = "{1,#{qtf.max}}#{'?' if qtf.reluctant?}" node.update(quantifier: min_quantifier) end end def null_quantify(node) null_quantifier = Regexp::Expression::Quantifier.construct(text: '{0}') node.update(quantifier: null_quantifier) end end end end js_regex-3.14.0/lib/js_regex/conversion.rb0000644000004100000410000000320215045024063020517 0ustar www-datawww-data# frozen_string_literal: true class JsRegex # # This class acts as a facade, passing a Regexp to the Converters. # # ::of returns a source String, options String, warnings Array, target String. # class Conversion require 'regexp_parser' require_relative 'converter' require_relative 'error' require_relative 'node' require_relative 'second_pass' require_relative 'target' class << self def of(input, options: nil, target: Target::ES2009, fail_fast: false) target = Target.cast(target) source, warnings, extra_opts = convert_source(input, target, fail_fast) options_string = convert_options(input, options, extra_opts) [source, options_string, warnings, target] end private def convert_source(input, target, fail_fast) tree = Regexp::Parser.parse(input) context = Converter::Context.new( case_insensitive_root: tree.i?, target: target, fail_fast: fail_fast, ) converted_tree = Converter.convert(tree, context) final_tree = SecondPass.call(converted_tree) [final_tree.to_s, context.warnings, context.required_options] rescue Regexp::Parser::Error => e raise e.extend(JsRegex::Error) end def convert_options(input, custom_options, required_options) options = custom_options.to_s.scan(/[dgimsuvy]/) + required_options if input.is_a?(Regexp) && (input.options & Regexp::IGNORECASE).nonzero? options << 'i' end options.uniq.sort.join end end end end js_regex-3.14.0/lib/js_regex/converter/0000755000004100000410000000000015045024063020017 5ustar www-datawww-datajs_regex-3.14.0/lib/js_regex/converter/freespace_converter.rb0000644000004100000410000000044115045024063024367 0ustar www-datawww-data# frozen_string_literal: true require_relative 'base' class JsRegex module Converter # # Template class implementation. # class FreespaceConverter < JsRegex::Converter::Base private def convert_data drop_without_warning end end end end js_regex-3.14.0/lib/js_regex/converter/keep_converter.rb0000644000004100000410000000112515045024063023356 0ustar www-datawww-data# frozen_string_literal: true require_relative 'base' class JsRegex module Converter # # Template class implementation. # class KeepConverter < JsRegex::Converter::Base private def convert_data if context.es_2018_or_higher? if expression.level.zero? Node.new(type: :keep_mark) # mark for conversion in SecondPass else warn_of_unsupported_feature('nested keep mark') end else warn_of_unsupported_feature('keep mark', min_target: Target::ES2018) end end end end end js_regex-3.14.0/lib/js_regex/converter/set_converter.rb0000644000004100000410000000720715045024063023234 0ustar www-datawww-data# frozen_string_literal: true require_relative 'base' require_relative 'escape_converter' require_relative 'type_converter' require 'character_set' class JsRegex module Converter # # Template class implementation. # # Unlike other converters, this one does not recurse on subexpressions, # since many are unsupported by JavaScript. If it detects incompatible # children, it uses the `character_set` gem to establish the codepoints # matched by the whole set and build a completely new set string. # class SetConverter < JsRegex::Converter::Base private def convert_data simple_conversion || full_recalculation end def simple_conversion return false if casefolding_needed? result = "[#{'^' if expression.negative?}".dup expression.expressions.each do |subexp| return false unless (child_res = simple_convert_child(subexp)) result << child_res.to_s end result << ']' end def casefolding_needed? expression.case_insensitive? ^ context.case_insensitive_root end def simple_convert_child(exp) case exp.type when :literal simple_convert_literal_child(exp) when :set # full conversion is needed for nested sets and intersections exp.token.equal?(:range) && exp.expressions.map do |op| simple_convert_child(op) or return false end.join('-') when :type TypeConverter.directly_compatible?(exp, context) && exp.text when :escape return exp.text if SET_SPECIFIC_ESCAPES_PATTERN.match?(exp.text) case exp.token when *CONVERTIBLE_ESCAPE_TOKENS EscapeConverter.new.convert(exp, context) when :literal exp.char.ord <= 0xFFFF && LiteralConverter.escape_incompatible_bmp_literals(exp.char) end end end def simple_convert_literal_child(exp) if !context.u? && exp.text =~ LiteralConverter::ASTRAL_PLANE_CODEPOINT_PATTERN && !context.enable_u_option false elsif SET_LITERALS_REQUIRING_ESCAPE_PATTERN.match?(exp.text) "\\#{exp.text}" else LiteralConverter.escape_incompatible_bmp_literals(exp.text) end end SET_LITERALS_REQUIRING_ESCAPE_PATTERN = Regexp.union(%w<( ) [ ] { } / - |>) SET_SPECIFIC_ESCAPES_PATTERN = /[\^\-]/.freeze CONVERTIBLE_ESCAPE_TOKENS = %i[control meta_sequence bell escape octal] + EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS def full_recalculation # Fetch codepoints as if the set was case-sensitive, then re-add # case-insensitivity if needed. # This way we preserve the casing of the original set in cases where the # whole regexp is case-insensitive, e.g. /[ABc]/i => /[ABc]/i. content = original_case_character_set if expression.case_insensitive? && !context.case_insensitive_root content = content.case_insensitive elsif !expression.case_insensitive? && context.case_insensitive_root warn_of_unsupported_feature('nested case-sensitive set') end if context.es_2015_or_higher? context.enable_u_option if content.astral_part? content.to_s(format: 'es6', in_brackets: true) else content.to_s_with_surrogate_ranges end end def original_case_character_set neutral_set = expression.dup neutral_set.each_expression(true) { |exp| exp.options[:i] = false } CharacterSet.of_expression(neutral_set) end end end end js_regex-3.14.0/lib/js_regex/converter/property_converter.rb0000644000004100000410000000245615045024063024326 0ustar www-datawww-data# frozen_string_literal: true require_relative 'base' require 'character_set' class JsRegex module Converter # # Template class implementation. # # Uses the `character_set` and `regexp_property_values` gems to get the # codepoints matched by the property and build a set string from them. # class PropertyConverter < JsRegex::Converter::Base # A map of normalized Ruby property names to names supported by ES2018+. def self.map @map ||= File.read("#{__dir__}/property_map.csv").scan(/(.+),(.+)/).to_h end private def convert_data if context.es_2018_or_higher? && (prop_name_in_js = self.class.map[subtype.to_s.tr('_', '')]) context.enable_u_option "\\#{expression.negative? ? 'P' : 'p'}{#{prop_name_in_js}}" else build_character_set end end def build_character_set content = CharacterSet.of_expression(expression) if expression.case_insensitive? && !context.case_insensitive_root content = content.case_insensitive elsif !expression.case_insensitive? && context.case_insensitive_root warn_of_unsupported_feature('nested case-sensitive property') end content.to_s_with_surrogate_ranges end end end end js_regex-3.14.0/lib/js_regex/converter/group_converter.rb0000644000004100000410000000515115045024063023571 0ustar www-datawww-data# frozen_string_literal: true require_relative 'base' class JsRegex module Converter # # Template class implementation. # class GroupConverter < JsRegex::Converter::Base private def convert_data case subtype when :capture then build_group when :named then build_named_group when :atomic then emulate_atomic_group when :comment then drop_without_warning when :options, :options_switch then build_options_group when :passive then build_passive_group when :absence then build_absence_group_if_simple else warn_of_unsupported_feature end end def build_named_group # Always convert named groups to numbered groups. ES2018+ supports named # groups, but can not handle repeated names in multiplexing or conditional # expansion scenarios. build_group end def emulate_atomic_group if context.in_atomic_group warn_of_unsupported_feature('nested atomic group') build_passive_group else context.start_atomic_group result = wrap_in_backrefed_lookahead(convert_subexpressions) context.end_atomic_group result end end def build_options_group if subtype.equal?(:options_switch) # can be ignored since #options on subsequent Expressions are correct drop_without_warning else build_passive_group end end def build_passive_group build_group(head: '(?:', capturing: false) end def build_absence_group_if_simple if unmatchable_absence_group? unmatchable_substitution elsif expression.inner_match_length.fixed? build_absence_group else warn_of_unsupported_feature('variable-length absence group content') end end def unmatchable_absence_group? expression.empty? end def build_absence_group head = "(?:(?:.|\\n){,#{expression.inner_match_length.min - 1}}|(?:(?!" tail = ')(?:.|\n))*)' build_group(head: head, tail: tail, capturing: false) end def build_group(opts = {}) head = opts[:head] || '(' tail = opts[:tail] || ')' return Node.new(*wrap(head, tail)) if opts[:capturing].equal?(false) context.capture_group unless context.in_subexp_recursion ref = expression.number Node.new(*wrap(head, tail), reference: ref, type: :captured_group) end def wrap(head, tail) [head, convert_subexpressions, tail] end end end end js_regex-3.14.0/lib/js_regex/converter/base.rb0000644000004100000410000000555015045024063021263 0ustar www-datawww-data# frozen_string_literal: true class JsRegex module Converter # # Template class. Implement #convert_data in subclasses and return # instance of String or Node from it. # class Base # returns instance of Node with #quantifier attached. def convert(expression, context) self.context = context self.expression = expression node = convert_data node = Node.new(node) if node.instance_of?(String) apply_quantifier(node) end private attr_accessor :context, :expression def subtype expression.token end def data expression.text end alias pass_through data def apply_quantifier(node) return node if node.dropped? || (qtf = expression.quantifier).nil? if qtf.possessive? node.update(quantifier: qtf.dup.tap { |q| q.text = q.text[0..-2] }) return wrap_in_backrefed_lookahead(node) elsif qtf.token == :interval && qtf.text[0..1] == "{," node.update(quantifier: qtf.dup.tap { |q| q.text = "{0,#{q.max}}" }) else node.update(quantifier: qtf) end node end def convert_subexpressions # mark alternation and conditional branches for processing in second pass type = expression.is?(:sequence) ? :branch : :plain Node.new(*expression.map { |subexp| convert_expression(subexp) }, type: type) end def convert_expression(expression) Converter.convert(expression, context) end def warn_of_unsupported_feature(description = nil, min_target: nil) description ||= "#{subtype} #{expression.type}".tr('_', ' ') full_text = "Dropped unsupported #{description} '#{expression}' "\ "at index #{expression.ts}" if min_target full_text += " (requires at least `target: '#{min_target}'`)" end warn_of(full_text) drop end def warn_of(text) if context.fail_fast raise ConversionError, text.sub(/^Dropped /, '') else context.warnings << text end end def drop Node.new(type: :dropped) end alias drop_without_warning drop def wrap_in_backrefed_lookahead(content) number = context.capturing_group_count + 1 backref_node = Node.new("\\#{number}", reference: number, type: :backref) backrefed_group = Node.new('(', *content, ')', reference: number, type: :captured_group) context.increment_local_capturing_group_count # The surrounding group is added so that quantifiers apply to the whole. # Without it, `(?:)` would need to be appended as literal digits may follow. Node.new('(?:(?=', backrefed_group, ')', backref_node, ')') end def unmatchable_substitution '(?!)' end end end end js_regex-3.14.0/lib/js_regex/converter/type_converter.rb0000644000004100000410000000646015045024063023422 0ustar www-datawww-data# frozen_string_literal: true require_relative 'base' class JsRegex module Converter # # Template class implementation. # class TypeConverter < JsRegex::Converter::Base HEX_EXPANSION = '[0-9A-Fa-f]' NONHEX_EXPANSION = '[^0-9A-Fa-f]' I_MODE_HEX_EXPANSION = '[0-9A-F]' I_MODE_NONHEX_EXPANSION = '[^0-9A-F]' LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])' ES2018_HEX_EXPANSION = '\p{AHex}' ES2018_NONHEX_EXPANSION = '\P{AHex}' # partially taken from https://unicode.org/reports/tr51/#EBNF_and_Regex ES2018_XGRAPHEME_EXPANSION = <<-'REGEXP'.gsub(/\s+/, '') (?: \r\n | \p{RI}\p{RI} | \p{Emoji} (?: \p{EMod} | \uFE0F\u20E3? | [\u{E0020}-\u{E007E}]+\u{E007F} )? (?: \u200D (?: \p{RI}\p{RI} | \p{Emoji}(?:\p{EMod}|\uFE0F\u20E3?|[\u{E0020}-\u{E007E}]+\u{E007F})? ) )* | [\P{M}\P{Lm}](?:\u200d|\p{M}|\p{Lm}|\p{Emoji_Modifier})* ) REGEXP def self.directly_compatible?(expression, _context = nil) case expression.token when :space, :nonspace !expression.ascii_classes? when :digit, :nondigit, :word, :nonword !expression.unicode_classes? end end private def convert_data case subtype when :hex then hex_expansion when :nonhex then nonhex_expansion when :linebreak then linebreak_expansion when :xgrapheme then xgrapheme when :digit, :space, :word return pass_through if self.class.directly_compatible?(expression) set_substitution when :nondigit, :nonspace, :nonword return pass_through if self.class.directly_compatible?(expression) negative_set_substitution else warn_of_unsupported_feature end end def hex_expansion if context.es_2018_or_higher? && context.enable_u_option ES2018_HEX_EXPANSION elsif context.case_insensitive_root I_MODE_HEX_EXPANSION else HEX_EXPANSION end end def nonhex_expansion if context.es_2018_or_higher? && context.enable_u_option ES2018_NONHEX_EXPANSION elsif context.case_insensitive_root I_MODE_NONHEX_EXPANSION else NONHEX_EXPANSION end end def linebreak_expansion wrap_in_backrefed_lookahead(LINEBREAK_EXPANSION) end def negative_set_substitution # ::of_expression returns an inverted set for negative expressions, # so we need to un-invert before wrapping in [^ and ]. Kinda lame. "[^#{character_set.inversion.bmp_part}]" end def set_substitution character_set.bmp_part.to_s(in_brackets: true) end def character_set CharacterSet.of_expression(expression) end def xgrapheme if context.es_2018_or_higher? && context.enable_u_option wrap_in_backrefed_lookahead(ES2018_XGRAPHEME_EXPANSION) else warn_of_unsupported_feature end end end end end js_regex-3.14.0/lib/js_regex/converter/meta_converter.rb0000644000004100000410000000325515045024063023366 0ustar www-datawww-data# frozen_string_literal: true require_relative 'base' class JsRegex module Converter # # Template class implementation. # class MetaConverter < JsRegex::Converter::Base DOT_EXPANSION = '(?:[\uD800-\uDBFF][\uDC00-\uDFFF]|[^\n\uD800-\uDFFF])' ML_DOT_EXPANSION = '(?:[\uD800-\uDBFF][\uDC00-\uDFFF]|[^\uD800-\uDFFF])' # Possible improvements for dot conversion: # # In ES2015, the 'u' flag allows dots to match astral chars. Unfortunately # the dot keeps matching lone surrogates even with this flag, so the use # of an expansion is still necessary to get the same behavior as in Ruby. # # ES2018 has the dotall flag 's', but it is tricky to use in conversions. # 's' activates matching of BOTH astral chars and "\n", whereas the dot in # Ruby doesn't match "\n" by default, and even with the 'm' flag set on # the root, subexps might still exclude "\n" like so: /.(?-m:.)./m private def convert_data case subtype when :alternation convert_alternatives when :dot expression.multiline? ? ML_DOT_EXPANSION : DOT_EXPANSION else warn_of_unsupported_feature end end def convert_alternatives kept_any_previous_branch = nil convert_subexpressions.transform do |node| unless dropped_branch?(node) node.children.unshift('|') if kept_any_previous_branch kept_any_previous_branch = true end node end end def dropped_branch?(branch_node) branch_node.children.any? && branch_node.children.all?(&:dropped?) end end end end js_regex-3.14.0/lib/js_regex/converter/escape_converter.rb0000644000004100000410000000345415045024063023701 0ustar www-datawww-data# frozen_string_literal: true require_relative 'base' require_relative 'literal_converter' class JsRegex module Converter # # Template class implementation. # class EscapeConverter < JsRegex::Converter::Base ESCAPES_SHARED_BY_RUBY_AND_JS = %i[ alternation backslash backspace bol carriage codepoint dot eol form_feed group_close group_open hex interval_close interval_open newline one_or_more set_close set_open tab vertical_tab zero_or_more zero_or_one ].freeze private def convert_data case subtype when :codepoint_list convert_codepoint_list when :control, :meta_sequence, :utf8_hex unicode_escape_codepoint when :literal LiteralConverter.convert_data(expression.char, context) when :bell, :escape, :hex, :octal hex_escape_codepoint when *ESCAPES_SHARED_BY_RUBY_AND_JS pass_through else warn_of_unsupported_feature end end def convert_codepoint_list if context.enable_u_option split_codepoint_list else expression.chars.each_with_object(Node.new) do |char, node| node << LiteralConverter.convert_data(Regexp.escape(char), context) end end end def split_codepoint_list expression.codepoints.map { |cp| "\\u{#{cp.to_s(16).upcase}}" }.join end def unicode_escape_codepoint "\\u#{expression.codepoint.to_s(16).upcase.rjust(4, '0')}" end def hex_escape_codepoint "\\x#{expression.codepoint.to_s(16).upcase.rjust(2, '0')}" end end end end js_regex-3.14.0/lib/js_regex/converter/anchor_converter.rb0000644000004100000410000000335415045024063023712 0ustar www-datawww-data# frozen_string_literal: true require_relative 'base' class JsRegex module Converter # # Template class implementation. # class AnchorConverter < JsRegex::Converter::Base private def convert_data case subtype when :bol then convert_bol when :bos then '^' when :eol then '(?=$|\n)' when :eos then '$' when :eos_ob_eol then '(?=\n?$)' when :word_boundary then convert_boundary when :nonword_boundary then convert_nonboundary else warn_of_unsupported_feature end end def convert_bol if context.es_2018_or_higher? '(?<=^|\n(?!$))' else # TODO: warn in v4.0.0, or drop ES2009 & ES2015 support '^' end end def convert_boundary if context.es_2018_or_higher? && context.enable_u_option BOUNDARY_EXPANSION else pass_boundary_with_warning end end def convert_nonboundary if context.es_2018_or_higher? && context.enable_u_option NONBOUNDARY_EXPANSION else pass_boundary_with_warning end end # This is an approximation to the word boundary behavior in Ruby, c.f. # https://github.com/ruby/ruby/blob/08476c45/tool/enc-unicode.rb#L130 W = '\d\p{L}\p{M}\p{Pc}' BOUNDARY_EXPANSION = "(?:(?<=[#{W}])(?=[^#{W}]|$)|(?<=[^#{W}]|^)(?=[#{W}]))" NONBOUNDARY_EXPANSION = "(?<=[#{W}])(?=[#{W}])" def pass_boundary_with_warning warn_of("The anchor '#{data}' at index #{expression.ts} only works "\ 'at ASCII word boundaries with targets below ES2018".') pass_through end end end end js_regex-3.14.0/lib/js_regex/converter/context.rb0000644000004100000410000001024415045024063022031 0ustar www-datawww-data# frozen_string_literal: true class JsRegex module Converter # # Passed among Converters to globalize basic status data. # # The Converters themselves are stateless. # class Context attr_reader :capturing_group_count, :case_insensitive_root, :fail_fast, :in_atomic_group, :in_subexp_recursion, :warnings def initialize(case_insensitive_root: false, fail_fast: false, target: nil) self.added_capturing_groups_after_group = Hash.new(0) self.capturing_group_count = 0 self.fail_fast = fail_fast self.recursions_per_expression = {} self.recursion_stack = [] self.required_options_hash = {} self.warnings = [] self.recursive_group_map = {} self.case_insensitive_root = case_insensitive_root self.target = target end # target context def es_2015_or_higher? target >= Target::ES2015 end def es_2018_or_higher? target >= Target::ES2018 end # these methods allow appending options to the final Conversion output def enable_u_option return false unless es_2015_or_higher? required_options_hash['u'] = true end def u? required_options_hash['u'] end def required_options required_options_hash.keys end # group context def capture_group self.capturing_group_count = capturing_group_count + 1 end def start_atomic_group self.in_atomic_group = true end def end_atomic_group self.in_atomic_group = false end def increment_local_capturing_group_count added_capturing_groups_after_group[original_capturing_group_count] += 1 capture_group end def recursions(exp) # Count recursions in the current stack path only recursion_stack.count { |e| recursion_id(e) == recursion_id(exp) } end def count_recursion(exp) recursion_stack.push(exp) end def recursion_id(exp) [exp.class, exp.starts_at] end def start_subexp_recursion self.in_subexp_recursion = true self.recursion_start_group_count = capturing_group_count end def end_subexp_recursion self.in_subexp_recursion = false # Pop the last recursion from stack when exiting recursion_stack.pop if recursion_stack.any? end # Get the number of groups at the start of the current recursion def recursion_start_group_count self.recursion_start_group_count || 0 end # takes and returns 1-indexed group positions. # new is different from old if capturing groups were added in between. def new_capturing_group_position(old_position) increment = 0 added_capturing_groups_after_group.each do |after_n_groups, count| increment += count if after_n_groups < old_position end old_position + increment end def original_capturing_group_count capturing_group_count - total_added_capturing_groups end # Track that a group was created by a recursive call def track_recursive_group_call(original_group_num, new_group_num) recursive_group_map[original_group_num] = new_group_num end # Get the group number created by a recursive call def get_recursive_group_position(original_group_num) recursive_group_map[original_group_num] end private attr_accessor :added_capturing_groups_after_group, :recursions_per_expression, :recursion_stack, :required_options_hash, :recursive_group_map, :target attr_writer :capturing_group_count, :case_insensitive_root, :fail_fast, :in_atomic_group, :in_subexp_recursion, :recursion_start_group_count, :warnings def total_added_capturing_groups added_capturing_groups_after_group.values.inject(0, &:+) end end end end js_regex-3.14.0/lib/js_regex/converter/conditional_converter.rb0000644000004100000410000000132415045024063024736 0ustar www-datawww-data# frozen_string_literal: true require_relative 'base' class JsRegex module Converter # # Template class implementation. # class ConditionalConverter < JsRegex::Converter::Base private def convert_data case subtype when :open then mark_conditional_for_second_pass else warn_of_unsupported_feature end end def mark_conditional_for_second_pass reference = expression.referenced_expression.number node = Node.new('(?:', reference: reference, type: :conditional) expression.branches.each do |branch| node << Node.new('(?:', convert_expression(branch), ')') end node << ')' end end end end js_regex-3.14.0/lib/js_regex/converter/unsupported_token_converter.rb0000644000004100000410000000045715045024063026231 0ustar www-datawww-data# frozen_string_literal: true require_relative 'base' class JsRegex module Converter # # Template class implementation. # class UnsupportedTokenConverter < JsRegex::Converter::Base private def convert_data warn_of_unsupported_feature end end end end js_regex-3.14.0/lib/js_regex/converter/subexpression_converter.rb0000644000004100000410000000044715045024063025351 0ustar www-datawww-data# frozen_string_literal: true require_relative 'base' class JsRegex module Converter # # Template class implementation. # class SubexpressionConverter < JsRegex::Converter::Base private def convert_data convert_subexpressions end end end end js_regex-3.14.0/lib/js_regex/converter/literal_converter.rb0000644000004100000410000000444615045024063024077 0ustar www-datawww-data# frozen_string_literal: true require_relative 'base' class JsRegex module Converter # # Template class implementation. # class LiteralConverter < JsRegex::Converter::Base ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/.freeze LITERAL_REQUIRING_ESCAPE_PATTERN = /[\/\f\n\r\t\v]/.freeze class << self def convert_data(data, context) if !context.u? && data =~ ASTRAL_PLANE_CODEPOINT_PATTERN if context.enable_u_option escape_incompatible_bmp_literals(data) else convert_astral_data(data) end else escape_incompatible_bmp_literals(data) end end def convert_astral_data(data) data.each_char.each_with_object(Node.new) do |char, node| if char.ord > 0xFFFF node << surrogate_substitution_for(char) else node << escape_incompatible_bmp_literals(char) end end end ESCAPES = Hash.new { |h, k| raise KeyError, "#{h}[#{k.inspect}]" } .merge("\f\n\r\t\v".chars.to_h { |c| [c, Regexp.escape(c)] }) .merge('/' => '\\/') def escape_incompatible_bmp_literals(data) data.gsub(LITERAL_REQUIRING_ESCAPE_PATTERN, ESCAPES) end private def surrogate_substitution_for(char) CharacterSet::Writer.write_surrogate_ranges([], [char.codepoints]) end end private def convert_data result = self.class.convert_data(data, context) if context.case_insensitive_root && !expression.case_insensitive? warn_of_unsupported_feature('nested case-sensitive literal') elsif !context.case_insensitive_root && expression.case_insensitive? return handle_locally_case_insensitive_literal(result) end result end HAS_CASE_PATTERN = /[\p{lower}\p{upper}]/.freeze def handle_locally_case_insensitive_literal(literal) literal =~ HAS_CASE_PATTERN ? case_insensitivize(literal) : literal end def case_insensitivize(literal) literal.each_char.each_with_object(Node.new) do |chr, node| node << (chr =~ HAS_CASE_PATTERN ? "[#{chr}#{chr.swapcase}]" : chr) end end end end end js_regex-3.14.0/lib/js_regex/converter/assertion_converter.rb0000644000004100000410000000167715045024063024455 0ustar www-datawww-data# frozen_string_literal: true require_relative 'base' require_relative 'group_converter' class JsRegex module Converter # # Template class implementation. # # Note the inheritance from GroupConverter. # class AssertionConverter < JsRegex::Converter::GroupConverter private def convert_data case subtype when :lookahead, :nlookahead keep_as_is when :lookbehind return keep_as_is if context.es_2018_or_higher? warn_of_unsupported_feature('lookbehind', min_target: Target::ES2018) build_passive_group when :nlookbehind return keep_as_is if context.es_2018_or_higher? warn_of_unsupported_feature('negative lookbehind', min_target: Target::ES2018) else warn_of_unsupported_feature end end def keep_as_is build_group(head: pass_through, capturing: false) end end end end js_regex-3.14.0/lib/js_regex/converter/property_map.csv0000644000004100000410000001075315045024063023263 0ustar www-datawww-data# THIS FILE IS GENERATED BY $ rake build_prop_map - DO NOT EDIT ascii,ASCII asciihexdigit,ASCII_Hex_Digit adlam,Script=Adlam anatolianhieroglyphs,Script=Anatolian_Hieroglyphs armenian,Script=Armenian avestan,Script=Avestan bamum,Script=Bamum bassavah,Script=Bassa_Vah batak,Script=Batak bengali,Script=Bengali bhaiksuki,Script=Bhaiksuki bidicontrol,Bidi_Control bopomofo,Script=Bopomofo braille,Script=Braille buginese,Script=Buginese buhid,Script=Buhid carian,Script=Carian caucasianalbanian,Script=Caucasian_Albanian chakma,Script=Chakma cham,Script=Cham cherokee,Script=Cherokee chorasmian,Script=Chorasmian connectorpunctuation,Connector_Punctuation control,Control coptic,Script=Coptic cuneiform,Script=Cuneiform cypriot,Script=Cypriot cyrillic,Script=Cyrillic deprecated,Deprecated deseret,Script=Deseret devanagari,Script=Devanagari divesakuru,Script=Dives_Akuru dogra,Script=Dogra duployan,Script=Duployan egyptianhieroglyphs,Script=Egyptian_Hieroglyphs elbasan,Script=Elbasan elymaic,Script=Elymaic emojicomponent,Emoji_Component emojimodifier,Emoji_Modifier enclosingmark,Enclosing_Mark finalpunctuation,Final_Punctuation georgian,Script=Georgian gothic,Script=Gothic grantha,Script=Grantha greek,Script=Greek gujarati,Script=Gujarati gunjalagondi,Script=Gunjala_Gondi gurmukhi,Script=Gurmukhi hangul,Script=Hangul hanifirohingya,Script=Hanifi_Rohingya hanunoo,Script=Hanunoo hatran,Script=Hatran hebrew,Script=Hebrew hexdigit,Hex_Digit idsbinaryoperator,IDS_Binary_Operator idstrinaryoperator,IDS_Trinary_Operator imperialaramaic,Script=Imperial_Aramaic initialpunctuation,Initial_Punctuation inscriptionalpahlavi,Script=Inscriptional_Pahlavi inscriptionalparthian,Script=Inscriptional_Parthian javanese,Script=Javanese joincontrol,Join_Control kayahli,Script=Kayah_Li kharoshthi,Script=Kharoshthi khitansmallscript,Script=Khitan_Small_Script khmer,Script=Khmer khojki,Script=Khojki khudawadi,Script=Khudawadi lao,Script=Lao lepcha,Script=Lepcha letternumber,Letter_Number limbu,Script=Limbu lineseparator,Line_Separator lineara,Script=Linear_A linearb,Script=Linear_B lisu,Script=Lisu logicalorderexception,Logical_Order_Exception lycian,Script=Lycian lydian,Script=Lydian mahajani,Script=Mahajani makasar,Script=Makasar malayalam,Script=Malayalam mandaic,Script=Mandaic manichaean,Script=Manichaean marchen,Script=Marchen masaramgondi,Script=Masaram_Gondi math,Math mathsymbol,Math_Symbol medefaidrin,Script=Medefaidrin meeteimayek,Script=Meetei_Mayek mendekikakui,Script=Mende_Kikakui meroiticcursive,Script=Meroitic_Cursive meroitichieroglyphs,Script=Meroitic_Hieroglyphs miao,Script=Miao modi,Script=Modi mro,Script=Mro multani,Script=Multani myanmar,Script=Myanmar nabataean,Script=Nabataean nandinagari,Script=Nandinagari newtailue,Script=New_Tai_Lue newa,Script=Newa nko,Script=Nko noncharactercodepoint,Noncharacter_Code_Point nushu,Script=Nushu nyiakengpuachuehmong,Script=Nyiakeng_Puachue_Hmong ogham,Script=Ogham olchiki,Script=Ol_Chiki oldhungarian,Script=Old_Hungarian olditalic,Script=Old_Italic oldnortharabian,Script=Old_North_Arabian oldpermic,Script=Old_Permic oldpersian,Script=Old_Persian oldsogdian,Script=Old_Sogdian oldsoutharabian,Script=Old_South_Arabian oldturkic,Script=Old_Turkic oriya,Script=Oriya osage,Script=Osage osmanya,Script=Osmanya othernumber,Other_Number pahawhhmong,Script=Pahawh_Hmong palmyrene,Script=Palmyrene paragraphseparator,Paragraph_Separator patternsyntax,Pattern_Syntax patternwhitespace,Pattern_White_Space paucinhau,Script=Pau_Cin_Hau phagspa,Script=Phags_Pa phoenician,Script=Phoenician privateuse,Private_Use psalterpahlavi,Script=Psalter_Pahlavi quotationmark,Quotation_Mark radical,Radical regionalindicator,Regional_Indicator rejang,Script=Rejang runic,Script=Runic samaritan,Script=Samaritan saurashtra,Script=Saurashtra separator,Separator sharada,Script=Sharada shavian,Script=Shavian siddham,Script=Siddham signwriting,Script=SignWriting sinhala,Script=Sinhala sogdian,Script=Sogdian sorasompeng,Script=Sora_Sompeng soyombo,Script=Soyombo spaceseparator,Space_Separator sundanese,Script=Sundanese sylotinagri,Script=Syloti_Nagri syriac,Script=Syriac tagbanwa,Script=Tagbanwa taile,Script=Tai_Le taitham,Script=Tai_Tham taiviet,Script=Tai_Viet tamil,Script=Tamil tangut,Script=Tangut thaana,Script=Thaana thai,Script=Thai tibetan,Script=Tibetan tifinagh,Script=Tifinagh tirhuta,Script=Tirhuta titlecaseletter,Titlecase_Letter ugaritic,Script=Ugaritic vai,Script=Vai wancho,Script=Wancho warangciti,Script=Warang_Citi whitespace,White_Space yezidi,Script=Yezidi yi,Script=Yi zanabazarsquare,Script=Zanabazar_Square js_regex-3.14.0/lib/js_regex/converter/backreference_converter.rb0000644000004100000410000001014615045024063025214 0ustar www-datawww-data# frozen_string_literal: true require_relative 'base' class JsRegex module Converter # # Template class implementation. # class BackreferenceConverter < JsRegex::Converter::Base private def convert_data case subtype when :name_ref then convert_name_ref when :number, :number_ref, :number_rel_ref then convert_to_plain_num_ref when :name_call, :number_call, :number_rel_call then convert_call else # name_recursion_ref, number_recursion_ref, ... warn_of_unsupported_feature end end def convert_name_ref # Check if this is a multiplexed named group reference if expression.referenced_expressions.count > 1 convert_multiplexed_name_ref else # Always use numeric backrefs since we convert all named groups to numbered # (see comment in GroupConverter) convert_to_plain_num_ref end end def convert_to_plain_num_ref position = new_position # Check if this backreference refers to a group that was recursively called original_group = target_position if (recursive_position = context.get_recursive_group_position(original_group)) # Use the position of the group created by the recursive call position = recursive_position end text = "\\#{position}#{'(?:)' if expression.x?}" Node.new(text, reference: position, type: :backref) end def convert_multiplexed_name_ref # Create alternation of all groups with the same name positions = expression.referenced_expressions.map do |ref_exp| context.new_capturing_group_position(ref_exp.number) end # Build alternation like (?:\1|\2) alternation = positions.map { |pos| "\\#{pos}" }.join('|') Node.new("(?:#{alternation})") end def new_position context.new_capturing_group_position(target_position) end def target_position expression.referenced_expression.number end def convert_call if context.recursions(expression) >= 5 warn_of("Recursion for '#{expression}' curtailed at 5 levels") return drop end context.count_recursion(expression) # Track groups before the wrapper group is added groups_before_wrapper = context.capturing_group_count context.increment_local_capturing_group_count target_copy = expression.referenced_expression.unquantified_clone # avoid "Duplicate capture group name" error in JS target_copy.token = :capture if target_copy.is?(:named, :group) context.start_subexp_recursion result = convert_expression(target_copy) context.end_subexp_recursion # Track all groups created during this recursive call # This handles both the directly called group and any nested groups within it # Get all group numbers from the referenced expression original_groups = collect_group_numbers(expression.referenced_expression) # The first new group number is groups_before_wrapper + 1 # (the wrapper group from increment_local_capturing_group_count doesn't appear in output) first_new_group = groups_before_wrapper + 1 # Map each original group to its corresponding new group # For example, if we recursively called group 1 which contains group 2, # and this created groups 3 and 4, then: # - group 1 -> group 3 # - group 2 -> group 4 original_groups.each_with_index do |old_group_num, index| new_group_num = first_new_group + index context.track_recursive_group_call(old_group_num, new_group_num) end # wrap in passive group if it is a full-pattern recursion expression.reference == 0 ? Node.new('(?:', result, ')') : result end def collect_group_numbers(exp) return [] if exp.terminal? numbers = [] numbers << exp.number if exp.capturing? exp.each_expression { |sub| numbers += collect_group_numbers(sub) } numbers end end end end js_regex-3.14.0/lib/js_regex/version.rb0000644000004100000410000000010615045024063020017 0ustar www-datawww-data# frozen_string_literal: true class JsRegex VERSION = '3.14.0' end js_regex-3.14.0/lib/js_regex/converter.rb0000644000004100000410000000222115045024063020341 0ustar www-datawww-data# frozen_string_literal: true class JsRegex module Converter Dir[File.join(__dir__, 'converter', '*.rb')].sort.each do |file| require file end MAP = Hash.new(UnsupportedTokenConverter).merge( anchor: AnchorConverter, assertion: AssertionConverter, backref: BackreferenceConverter, conditional: ConditionalConverter, escape: EscapeConverter, expression: SubexpressionConverter, free_space: FreespaceConverter, group: GroupConverter, keep: KeepConverter, literal: LiteralConverter, meta: MetaConverter, nonproperty: PropertyConverter, property: PropertyConverter, set: SetConverter, type: TypeConverter ).freeze class << self def convert(exp, context = nil) self.for(exp).convert(exp, context || Context.new) end def for(expression) MAP[expression.type].new end # Legacy method. Remove in v4.0.0. def surrogate_pair_limit=(_arg) warn '#surrogate_pair_limit= is deprecated and has no effect anymore.' end end end end js_regex-3.14.0/lib/js_regex.rb0000644000004100000410000000215615045024063016341 0ustar www-datawww-data# frozen_string_literal: true # JsRegex converts ::Regexp instances to JavaScript. # # Usage: # # js_regex = JsRegex.new(my_ruby_regex) # js_regex.to_h # for use in 'new RegExp()' # js_regex.to_s # for direct injection into JavaScript # class JsRegex require_relative File.join('js_regex', 'conversion') require_relative File.join('js_regex', 'error') require_relative File.join('js_regex', 'version') require 'json' attr_reader :source, :options, :warnings, :target def initialize(ruby_regex, **kwargs) @source, @options, @warnings, @target = Conversion.of(ruby_regex, **kwargs) end def to_h { source: source, options: options } end def to_json(options = {}) to_h.to_json(options) end def to_s "/#{source.empty? ? '(?:)' : source}/#{options}" end # @raise JsRegex::ConversionError def self.new!(ruby_regex, **kwargs) new(ruby_regex, fail_fast: true, **kwargs) end def self.compatible?(ruby_regex, **kwargs) new!(ruby_regex, **kwargs) true rescue ConversionError false end ConversionError = Class.new(StandardError).send(:include, JsRegex::Error) end js_regex-3.14.0/js_regex.gemspec0000644000004100000410000000540315045024063016611 0ustar www-datawww-data######################################################### # This file has been automatically generated by gem2tgz # ######################################################### # -*- encoding: utf-8 -*- # stub: js_regex 3.14.0 ruby lib Gem::Specification.new do |s| s.name = "js_regex".freeze s.version = "3.14.0" s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version= s.require_paths = ["lib".freeze] s.authors = ["Janosch M\u00FCller".freeze] s.date = "1980-01-02" s.description = "JsRegex converts Ruby's native regular expressions for JavaScript, taking care of various incompatibilities and returning warnings for unsolvable differences.".freeze s.email = ["janosch84@gmail.com".freeze] s.files = ["lib/js_regex.rb".freeze, "lib/js_regex/conversion.rb".freeze, "lib/js_regex/converter.rb".freeze, "lib/js_regex/converter/anchor_converter.rb".freeze, "lib/js_regex/converter/assertion_converter.rb".freeze, "lib/js_regex/converter/backreference_converter.rb".freeze, "lib/js_regex/converter/base.rb".freeze, "lib/js_regex/converter/conditional_converter.rb".freeze, "lib/js_regex/converter/context.rb".freeze, "lib/js_regex/converter/escape_converter.rb".freeze, "lib/js_regex/converter/freespace_converter.rb".freeze, "lib/js_regex/converter/group_converter.rb".freeze, "lib/js_regex/converter/keep_converter.rb".freeze, "lib/js_regex/converter/literal_converter.rb".freeze, "lib/js_regex/converter/meta_converter.rb".freeze, "lib/js_regex/converter/property_converter.rb".freeze, "lib/js_regex/converter/property_map.csv".freeze, "lib/js_regex/converter/set_converter.rb".freeze, "lib/js_regex/converter/subexpression_converter.rb".freeze, "lib/js_regex/converter/type_converter.rb".freeze, "lib/js_regex/converter/unsupported_token_converter.rb".freeze, "lib/js_regex/error.rb".freeze, "lib/js_regex/node.rb".freeze, "lib/js_regex/second_pass.rb".freeze, "lib/js_regex/target.rb".freeze, "lib/js_regex/version.rb".freeze] s.homepage = "https://github.com/jaynetics/js_regex".freeze s.licenses = ["MIT".freeze] s.required_ruby_version = Gem::Requirement.new(">= 2.1.0".freeze) s.rubygems_version = "3.3.15".freeze s.summary = "Converts Ruby regexes to JavaScript regexes.".freeze if s.respond_to? :specification_version then s.specification_version = 4 end if s.respond_to? :add_runtime_dependency then s.add_runtime_dependency(%q.freeze, ["~> 1.4"]) s.add_runtime_dependency(%q.freeze, ["~> 2.11"]) s.add_runtime_dependency(%q.freeze, ["~> 1.0"]) else s.add_dependency(%q.freeze, ["~> 1.4"]) s.add_dependency(%q.freeze, ["~> 2.11"]) s.add_dependency(%q.freeze, ["~> 1.0"]) end end