rsec-0.4.2/0000755000175100017500000000000013176057521011503 5ustar mmollmmollrsec-0.4.2/rsec.gemspec0000644000175100017500000000356413176057521014014 0ustar mmollmmoll######################################################### # This file has been automatically generated by gem2tgz # ######################################################### # -*- encoding: utf-8 -*- # stub: rsec 0.4.2 ruby lib Gem::Specification.new do |s| s.name = "rsec".freeze s.version = "0.4.2" s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version= s.require_paths = ["lib".freeze] s.authors = ["NS".freeze] s.date = "2016-09-25" s.description = "Easy and extreme fast dynamic PEG parser combinator.".freeze s.extra_rdoc_files = ["readme.rdoc".freeze] s.files = ["bench/bench.rb".freeze, "bench/little.rb".freeze, "bench/profile.rb".freeze, "examples/arithmetic.rb".freeze, "examples/bnf.rb".freeze, "examples/c_minus.rb".freeze, "examples/hello.scm".freeze, "examples/little_markdown.rb".freeze, "examples/nasm_manual.rb".freeze, "examples/s_exp.rb".freeze, "examples/scheme.rb".freeze, "examples/slow_json.rb".freeze, "lib/rsec.rb".freeze, "lib/rsec/helpers.rb".freeze, "lib/rsec/parser.rb".freeze, "lib/rsec/parsers/join.rb".freeze, "lib/rsec/parsers/misc.rb".freeze, "lib/rsec/parsers/prim.rb".freeze, "lib/rsec/parsers/repeat.rb".freeze, "lib/rsec/parsers/seq.rb".freeze, "lib/rsec/utils.rb".freeze, "license.txt".freeze, "readme.rdoc".freeze, "test/helpers.rb".freeze, "test/test_branch.rb".freeze, "test/test_examples.rb".freeze, "test/test_join.rb".freeze, "test/test_lookahead.rb".freeze, "test/test_misc.rb".freeze, "test/test_one_of.rb".freeze, "test/test_pattern.rb".freeze, "test/test_prim.rb".freeze, "test/test_repeat.rb".freeze, "test/test_rsec.rb".freeze, "test/test_seq.rb".freeze] s.homepage = "http://rsec.herokuapp.com".freeze s.required_ruby_version = Gem::Requirement.new(">= 1.9.1".freeze) s.rubygems_version = "2.5.2.1".freeze s.summary = "Extreme Fast Parser Combinator for Ruby".freeze end rsec-0.4.2/test/0000755000175100017500000000000013176057521012462 5ustar mmollmmollrsec-0.4.2/test/test_seq.rb0000644000175100017500000000220513176057521014635 0ustar mmollmmollrequire "#{File.dirname(__FILE__)}/helpers.rb" class TestSeq < TC def test_seq p = seq('a', 'b', 'c') ase ['a','b','c'], p.parse('abc') ase INVALID, p.parse('a') ase INVALID, p.parse('b') ase INVALID, p.parse('c') ase INVALID, p.parse('bc') ase INVALID, p.parse('ab') end def test_seq_ p = seq_('abc', 'ef', 'vv') ase %w[abc ef vv], p.parse("abc ef vv") p = seq_('abc', 'ef', 'vv', skip: /\s+/) ase %w[abc ef vv], p.parse("abc ef vv") ase INVALID, p.parse("abcef vv") end def test_seq_mix p = seq('e', seq_('a','b','c'), 'd') ase ['e', ['a','b','c'], 'd'], p.parse('eabcd') end def test_seq_one p = seq('a', 'b', 'c')[1] ase 'b', p.parse('abc') p = seq('abc', /\s*/, 'd')[2] ase 'd', p.parse('abc d') end def test_seq_one_ p = seq_('a', 'b', 'c')[1] ase 'b', p.parse('a bc') p = seq_('abc', /\s*/, 'd')[2] ase 'd', p.parse('abc d') end def test_fall p = 'a'.r >> 'b' ase 'b', p.parse!('ab') p = p << 'c' ase 'b', p.parse!('abc') p = p._? ase ['b'], p.eof.parse!('abc') ase [], p.eof.parse!('') end end rsec-0.4.2/test/test_rsec.rb0000644000175100017500000000042113176057521014777 0ustar mmollmmollrequire "#{File.dirname(__FILE__)}/helpers.rb" class TestRsec < TC def test_try_skip_pattern p = Rsec.try_skip_pattern 'abc'.r ase Rsec::SkipPattern, p.class p = Rsec.try_skip_pattern 'abc'.r.until ase Rsec::SkipUntilPattern, p.class end end rsec-0.4.2/test/test_repeat.rb0000644000175100017500000000221613176057521015327 0ustar mmollmmollrequire "#{File.dirname(__FILE__)}/helpers.rb" class TestRepeat < TC def test_maybe [:_?, :maybe].each do |m| p = seq('v', 'q').send m ase [], p.parse('') ase INVALID, p.eof.parse('v') ase [['v', 'q']], p.parse('vq') # with map block p = seq('v', 'q').maybe {|x| x.empty? ? 'bad' : 'good' } ase 'good', p.parse('vq') end end def test_multiply p = ('ce'.r * 3).eof ase ['ce','ce','ce'], (p.parse 'cecece') ase INVALID, (p.parse 'cece') ase INVALID, (p.parse 'cececece') p = ('ce'.r * 0).eof ase [], (p.parse '') ase INVALID, (p.parse 'ce') end def test_range p = ('ce'.r * (2..3)).eof ase INVALID, (p.parse 'ce') ase ['ce','ce'], (p.parse 'cece') ase INVALID, (p.parse 'cececece') end def test_inf p = ('ce'.r * (3..-1)).eof ase INVALID, (p.parse 'cece') ase ['ce','ce','ce'], (p.parse 'cecece') ase ['ce','ce','ce','ce','ce'], (p.parse 'cecececece') end def test_star p = '*'.r.star ase [], p.parse('') ase %w[* * *], p.parse('***') end end rsec-0.4.2/test/test_prim.rb0000644000175100017500000000316313176057521015020 0ustar mmollmmollrequire "#{File.dirname(__FILE__)}/helpers.rb" class TestPrim < TC def test_floating [:double].each do |ty| p = prim ty ase INVALID, p.parse('d') ase 3.2e5.round, p.parse('+3.2e5').round ase INVALID, p.parse(' 4.8') p = prim ty, allowed_sign: '' ase 1.5e-3.round(4), p.parse('1.5E-3').round(4) ase INVALID, p.parse('+3.0') p = prim ty, allowed_sign: '-' ase (-5.0).round, p.parse('-5').round ase INVALID, p.parse('-') ase INVALID, p.parse('+5') ase 5.0.round, p.parse('5').round # with map block p = prim(ty){|x| x * 2 } ase 100.0.round, p.parse('50').round end end def test_hex_floating return # NOTE Ruby 1.9.3 removed Float() from hex values [:hex_double].each do |ty| p = prim ty ase Float('0x3.2').round(4), p.parse('0x3.2').round(4) # with map block p = prim(ty){|x| x - 0.1 } ase (Float('0x3.2') - 0.1).round(4), p.parse('0x3.2').round(4) end end def test_integer [:int32, :unsigned_int32, :int64, :unsigned_int64].each do |ty| p = prim ty ase 432, p.parse('432') p = prim ty, base: 4 ase '120'.to_i(4), p.parse('120') p = prim ty, base: 35 ase '1ax'.to_i(35), p.parse('1ax') end p = prim :int32, allowed_signs: '-' ase INVALID, p.parse('+12') ase INVALID, p.parse('123333333333333333333333333333333333') ase INVALID, p.parse('-') ase -49, p.parse('-49') assert_raise RuntimeError do prim :unsigned_int32, allowed_signs: '+-' end end end rsec-0.4.2/test/test_pattern.rb0000644000175100017500000000173713176057521015533 0ustar mmollmmollrequire "#{File.dirname(__FILE__)}/helpers.rb" class TestPattern < TC def test_create p1 = 'x'.r asp 'x', p1 p1 = 'abc'.r asp 'abc', p1 asr do p1.eof.parse! 'abcd' end ase INVALID, p1.eof.parse('abcd') asr do p1.eof.parse! 'xabc' end ase INVALID, p1.eof.parse('xabc') # with map block p = 'x'.r{ 'y' } ase INVALID, p.parse('y') ase 'y', p.parse('x') end def test_until p = 'ef'.r.until asp 'xef', p asp "x\nef", p p = 'e'.r.until asp 'xe', p asp "x\ne", p # with map block p = 'e'.r.until{|s| s*2} ase 'xexe', p.parse('xe') end def test_word p = word('abc') ase INVALID, p.parse('abcd') ase INVALID, seq_(p, 'd').parse('abcd') ase 'abc', p.parse('abc') ase ['abc', 'd'], seq_(p, 'd').parse('abc d') end def test_symbol p = symbol('*') ase '*', p.parse(' * ') end end rsec-0.4.2/test/test_one_of.rb0000644000175100017500000000141613176057521015315 0ustar mmollmmollrequire "#{File.dirname(__FILE__)}/helpers.rb" class TestOneOf < TC def test_one_of p = one_of('abcd') ase 'c', p.parse('c') ase INVALID, p.parse('e') p = one_of('+=') ase '=', p.parse('=') begin p = one_of('') assert false, "should raise exception for empty string" rescue end # with map block p = one_of('x+'){|v| v * 2} ase '++', p.parse('+') end def test_one_of_ p = one_of_('abcd') ase 'a', p.parse('a') ase INVALID, p.parse('e') ase 'd', p.parse(' d ') ase 'a', p.parse(' a') ase 'c', p.parse('c ') assert_raise(ArgumentError) { p = one_of_('') } # with map block p = one_of_('w'){'v'} ase 'v', p.parse('w') end end rsec-0.4.2/test/test_misc.rb0000644000175100017500000000225513176057521015005 0ustar mmollmmollrequire "#{File.dirname(__FILE__)}/helpers.rb" class TestMisc < TC def test_lazy p1 = nil p2 = lazy{p1} p1 = '3'.r asp '3', p2 p1 = '4'.r asp '3', p2 p2 = lazy{p7} # don't have to define p7 before lazy p7 = '5'.r asp '5',p2 end def test_eof p = ''.r.eof asp '', p ase INVALID, p.parse('a') p = seq('a', 'b').eof ase INVALID, p.parse('abc') ase ['a', 'b'], p.parse('ab') end def test_cache p1 = seq('a', seq('b', 'c')) p = seq(p1.cached, 'd') ase [['a',['b','c']],'d'], p.parse('abcd') # with map block p = seq(p1.cached{ 'mapped' }, 'd') ase ['mapped', 'd'], p.parse('abcd') end def test_map p = /\w/.r.map{|n| n*2} ase 'bb', p.parse('b') ase INVALID, p.parse('.') end def test_fail p = 'v'.r.fail 'omg!' p.eof.parse! 'u' assert false, "should raise syntax error" rescue Rsec::SyntaxError => e assert e.to_s.index 'omg!' end def test_fail_with_block p = 'v'.r.fail('omg!'){ 'should fail' } p.eof.parse! 'u' assert false, "should raise syntax error" rescue Rsec::SyntaxError => e assert e.to_s.index 'omg!' end end rsec-0.4.2/test/test_lookahead.rb0000644000175100017500000000070413176057521015776 0ustar mmollmmollrequire "#{File.dirname(__FILE__)}/helpers.rb" class TestLookAhead < TC def test_lookahead p1 = 'a'.r & 'b' p2 = /\w/.r p = seq(p1, p2) ase ['a', 'b'], p.parse('ab') ase INVALID, p.parse('ac') p1 = 'a'.r ^ 'b' p = seq(p1, p2) ase ['a', 'c'], p.parse('ac') ase INVALID, p.parse('ab') end def test_negative_lookahead p = 'a'.r ^ 'b' ase 'a', p.parse('ac') ase INVALID, p.parse('ab') end end rsec-0.4.2/test/test_join.rb0000644000175100017500000000261313176057521015007 0ustar mmollmmollrequire "#{File.dirname(__FILE__)}/helpers.rb" class TestJoin < TC def test_join p0 = /\w{1,3}/.r.join '+' ase ['abc'], p0.eof.parse('abc') ase ['a','+','bc','+','d'], p0.parse('a+bc+d') ase INVALID, p0.eof.parse('a+ bc+d') ase INVALID, p0.eof.parse('a+b+') p1 = seq(/[a-z]{1,3}/, '3')[0].join seq(/\s/.r, '+', /\s/)[1] ase ['abc'], p1.eof.parse('abc3') ase %w[a + bc + d], p1.parse('a3 + bc3 + d3') ase INVALID, p1.eof.parse('a+b+') end def test_nest_join p = 'a'.r.join(/\s*\*\s*/.r).join(/\s*\+\s*/.r) ase [['a'], ' + ', ['a', ' * ', 'a'], ' +', ['a']], p.parse('a + a * a +a') end def test_join_with_mapping_block p = 'a'.r.join('+'){|res| res.grep /\+/ } ase ['+', '+'], p.parse('a+a+a') ase [], p.parse('a') end def test_join_even p = 'a'.r.join('+').even ase %w[a a a], p.parse('a+a+a') ase %w[a], p.parse('a') ase INVALID, p.eof.parse('a+') ase INVALID, p.parse('b') ase INVALID, p.parse('') end def test_join_odd p = 'a'.r.join('+').odd ase %w[+ +], p.parse('a+a+a') ase [], p.parse('a') ase INVALID, p.parse('') ase INVALID, p.parse('+') ase INVALID, p.parse('b') end def test_nest_join_even_odd p = 'a'.r.join('+').odd.join('*') ase [['+'], '*', []], p.parse('a+a*a') p = 'a'.r.join('+').even.join('*') ase [['a','a'], '*', ['a']], p.parse('a+a*a') end end rsec-0.4.2/test/test_examples.rb0000644000175100017500000000151613176057521015667 0ustar mmollmmollrequire "#{File.dirname(__FILE__)}/helpers.rb" $:.unshift "#{File.dirname __FILE__}/../examples" require "arithmetic" require "s_exp" class TestExamples < TC def initialize *xs super(*xs) @a = arithmetic() @s_exp = s_exp() end def test_arithmetic # step by step s = '1' ase eval(s), @a.parse(s) s = '3+ 2' ase eval(s), @a.parse(s) s = '5-2*1' ase eval(s), @a.parse(s) s = '(2)' ase eval(s), @a.parse(s) s = '1+(2- (3+ 4))/5 * 2*4 +1' ase eval(s), @a.parse(s) end def test_s_exp res = @s_exp.parse! '(a 3 4.3 (add 1 3) (minus (multi 4 5)))' expected = ['a', 3.0, 4.3, ['add', 1, 3], ['minus', ['multi', 4, 5]]] ase expected, res res = @s_exp.parse! '(a (3) ce2 (add 1 3))' expected = ['a', 3.0, 'ce2', ['add', 1, 3]] ase expected, res end end rsec-0.4.2/test/test_branch.rb0000644000175100017500000000045513176057521015307 0ustar mmollmmollrequire "#{File.dirname(__FILE__)}/helpers.rb" class TestBranch < TC def test_branch p = 'a'.r | /\d+/ | seq('c', 'd') ase ['c','d'], p.parse('cd') ase '3', p.parse('3') ase INVALID, p.parse('c') p = 'x'.r | 'y' ase INVALID, p.parse('') ase 'y', p.parse('y') end end rsec-0.4.2/test/helpers.rb0000644000175100017500000000063413176057521014454 0ustar mmollmmoll# coding: utf-8 $:.unshift "#{File.dirname(__FILE__)}/../lib" require "rsec" include Rsec::Helpers require "test/unit" TC = Test::Unit::TestCase class TC INVALID = Rsec::INVALID end module Test::Unit::Assertions alias ase assert_equal def asr assert_raise(Rsec::SyntaxError) { yield } end # assert parse returns s def asp s, p assert_equal(s, p.parse(s)) end end rsec-0.4.2/readme.rdoc0000644000175100017500000000054313176057521013613 0ustar mmollmmoll== Parser / Regexp Combinator for Ruby. Easier and faster than treetop / rex+racc. It's ruby1.9 only. == License As Ruby's == Install The pure Ruby gem is fast enough (about 10+x faster than treetop generated code): gem in rsec == Doc http://rsec.herokuapp.com == Code http://github.com/luikore/rsec/tree/master rsec-0.4.2/license.txt0000644000175100017500000000001113176057521013656 0ustar mmollmmollAs Ruby'srsec-0.4.2/lib/0000755000175100017500000000000013176057521012251 5ustar mmollmmollrsec-0.4.2/lib/rsec/0000755000175100017500000000000013176057521013205 5ustar mmollmmollrsec-0.4.2/lib/rsec/utils.rb0000644000175100017500000000537613176057521014705 0ustar mmollmmoll# coding: utf-8 module Rsec #:nodoc: # error class for rescue class SyntaxError < StandardError attr_reader :msg, :line_text, :line, :col # constructor def initialize msg, line_text, line, col @msg, @line_text, @line, @col = msg, line_text, line, col end # info with source position def to_s %Q<#@msg\n#@line_text\n#{' ' * @col}^> end end # parse context inherits from StringScanner
#
# attributes:
#
  #   [R]  string: string to parse
  #   [RW] pos: current position
  #   [R]  source: source file name
  #   [R]  current_line_text: current line text
  #   [R]  cache: for memoization
  # 
class ParseContext < StringScanner attr_reader :source, :cache, :last_fail_pos attr_accessor :attr_names def initialize str, source super(str) @source = source @cache = {} @last_fail_pos = 0 @last_fail_mask = 0 end # clear packrat parser cache def clear_cache @cache.clear end # add fail message def on_fail mask if pos > @last_fail_pos @last_fail_pos = pos @last_fail_mask = mask elsif pos == @last_fail_pos @last_fail_mask |= mask end end # generate parse error def generate_error source if self.pos <= @last_fail_pos line = line @last_fail_pos col = col @last_fail_pos line_text = line_text @last_fail_pos expect_tokens = Fail.get_tokens @last_fail_mask expects = ", expect token [ #{expect_tokens.join ' | '} ]" else line = line pos col = col pos line_text = line_text pos expects = nil end msg = "\nin #{source}:#{line} at #{col}#{expects}" SyntaxError.new msg, line_text, line, col end # get line number def line pos string[0...pos].count("\n") + 1 end # get column number: position in line def col pos return 1 if pos == 0 newline_pos = string.rindex "\n", pos - 1 if newline_pos pos - newline_pos else pos + 1 end end # get line text containing pos # the text is 80 at most def line_text pos from = string.rindex "\n", pos (from = string.rindex "\n", pos - 1) if from == pos from = from ? from + 1 : 0 from = pos - 40 if (from < pos - 40) to = string.index("\n", pos) to = to ? to - 1 : string.size to = pos + 40 if (to > pos + 40) string[from..to] end end # the invalid token INVALID = Object.new class << INVALID def to_str 'INVALID_TOKEN' end alias :[] :== alias inspect to_str end end rsec-0.4.2/lib/rsec/parsers/0000755000175100017500000000000013176057521014664 5ustar mmollmmollrsec-0.4.2/lib/rsec/parsers/seq.rb0000644000175100017500000000373413176057521016010 0ustar mmollmmollmodule Rsec # sequence combinator
# result in an array class Seq < Unary def _parse ctx some.map do |e| res = e._parse ctx return INVALID if INVALID[res] res end end end # sequence combinator
# the result is the result of the parser at idx class SeqOne < Struct.new(:parsers, :idx) include Parser def _parse ctx ret = INVALID parsers.each_with_index do |p, i| res = p._parse ctx return INVALID if INVALID[res] ret = res if i == idx end ret end end # skips skipper between tokens class Seq_ < Struct.new(:first, :rest, :skipper) include Parser def _parse ctx res = first._parse ctx return INVALID if INVALID[res] ret = [res] rest.each do |e| return INVALID if INVALID[skipper._parse ctx] res = e._parse ctx return INVALID if INVALID[res] ret << res end ret end end # skips skipper between tokens class SeqOne_ < Struct.new(:first, :rest, :skipper, :idx) include Parser def _parse ctx ret = INVALID res = first._parse ctx return INVALID if INVALID[res] ret = res if 0 == idx check = idx - 1 rest.each_with_index do |p, i| return INVALID if INVALID[skipper._parse ctx] res = p._parse ctx return INVALID if INVALID[res] ret = res if i == check end ret end end # unbox result size # only work for seq and join and maybe'ed seq and join class Unbox < Unary def _parse ctx res = some._parse ctx return INVALID if INVALID[res] res.size == 1 ? res.first : res end end # inner # only work for seq class Inner < Unary def _parse ctx res = some._parse ctx return INVALID if INVALID[res] res.shift res.pop res end end end rsec-0.4.2/lib/rsec/parsers/repeat.rb0000644000175100017500000000365113176057521016476 0ustar mmollmmollmodule Rsec # the content appears 1 or 0 time class Maybe < Unary def _parse ctx save = ctx.pos res = some._parse ctx if INVALID[res] ctx.pos = save [] else [res] end end end # repeat from range.begin.abs to range.end.abs
# note: range's max should always be > 0
# see also helpers class RepeatRange include Parser def self.[] base, range self.new base, range end def initialize base, range @base = base @at_least = range.min.abs @optional = range.max - @at_least end def _parse ctx rp_node = [] @at_least.times do res = @base._parse ctx return INVALID if INVALID[res] rp_node.push res end @optional.times do save = ctx.pos res = @base._parse ctx if INVALID[res] ctx.pos = save break end rp_node.push res end rp_node end end # matches exactly n.abs times repeat
class RepeatN < Struct.new(:base, :n) include Parser def _parse ctx n.times.inject([]) do |rp_node| res = base._parse ctx return INVALID if INVALID[res] rp_node.push res end end end # repeat at least n.abs times <- [n, inf)
class RepeatAtLeastN < Struct.new(:base, :n) include Parser def _parse ctx rp_node = [] n.times do res = base._parse(ctx) return INVALID if INVALID[res] rp_node.push res end # note this may be an infinite action # returns if the pos didn't change loop do save = ctx.pos res = base._parse ctx if (INVALID[res] or ctx.pos == save) ctx.pos = save break end rp_node.push res end rp_node end end end rsec-0.4.2/lib/rsec/parsers/prim.rb0000644000175100017500000000431413176057521016162 0ustar mmollmmollmodule Rsec # primitive base module Prim def sign_strategy_to_pattern sign_strategy case sign_strategy when 3; '[\+\-]?' when 2; '\+?' when 1; '\-?' when 0; '' end end end # double precision float parser class PDouble < Binary include Prim def float_pattern sign_strategy, is_hex sign = sign_strategy_to_pattern sign_strategy if is_hex /#{sign}0x[\da-f]+(\.[\da-f]+)?/i else /#{sign}\d+(\.\d+)?(e[\+\-]?\d+)?/i end end def initialize sign_strategy, is_hex self.left = float_pattern sign_strategy, is_hex end def _parse ctx if (d = ctx.scan left) d = Float(d) return d if d.finite? end INVALID end end # primitive int parser commons class PInt < Binary include Prim def int_pattern sign_strategy, base sign = sign_strategy_to_pattern sign_strategy if base > 10 d_hi = 9 char_range = "a-#{('a'.ord + base - 11).chr}" else d_hi = base - 1 char_range = '' end /#{sign}[0-#{d_hi}#{char_range}]+/i end def _parse ctx if (d = ctx.scan left) d = d.to_i @base return d if right.include?(d) end INVALID end end # 32-bit int parser class PInt32 < PInt def initialize sign_strategy, base @base = base self.left = int_pattern sign_strategy, base self.right = (-(1<<31))..((1<<31)-1) end end # unsigned 32 bit int parser class PUnsignedInt32 < PInt def initialize sign_strategy, base @base = base self.left = int_pattern sign_strategy, base self.right = 0...(1<<32) end end # 64-bit int parser class PInt64 < PInt def initialize sign_strategy, base @base = base self.left = int_pattern sign_strategy, base self.right = (-(1<<63))..((1<<63)-1) end end # unsigned 64-bit int parser class PUnsignedInt64 < PInt def initialize sign_strategy, base @base = base self.left = int_pattern sign_strategy, base self.right = 0...(1<<64) end end end rsec-0.4.2/lib/rsec/parsers/misc.rb0000644000175100017500000001030513176057521016143 0ustar mmollmmollmodule Rsec #:nodoc # transform parse result class Map < Binary def _parse ctx res = left()._parse ctx return INVALID if INVALID[res] right()[res] end end # set expect tokens for parsing error in ctx
# if left failed, the error would be registered class Fail < Binary def Fail.[] left, tokens # TODO mutex if @mask_bit > 1000 raise "You've created too many fail parsers, If it is your intention, call Rsec::Fail.reset when previous expect settings can be thrown away." end parser = super(left, (1<<@mask_bit)) @token_table[@mask_bit] = tokens @mask_bit += 1 parser end def Fail.reset @mask_bit = 0 @token_table = [] end Fail.reset def Fail.get_tokens mask res = [] @token_table.each_with_index do |tokens, idx| next unless tokens if (mask & (1< 0 res += tokens end end res.uniq! res end def _parse ctx res = left()._parse ctx ctx.on_fail right if INVALID[res] res end end # look ahead class LookAhead < Binary def _parse ctx res = left()._parse ctx pos = ctx.pos return INVALID if INVALID[right()._parse ctx] ctx.pos = pos res end end # negative look ahead class NegativeLookAhead < Binary def _parse ctx res = left()._parse ctx pos = ctx.pos return INVALID unless INVALID[right()._parse ctx] ctx.pos = pos res end end # branch combinator
# result in one of the members, or INVALID class Branch < Unary def _parse ctx save_point = ctx.pos some.each do |e| res = e._parse ctx return res unless INVALID[res] ctx.pos = save_point end INVALID end end # matches a pattern class Pattern < Unary def _parse ctx ctx.scan some() or INVALID end end # scan until the pattern
# for optimizing class UntilPattern < Unary def _parse ctx ctx.scan_until some() or INVALID end end # for optimization, not disposed to users class SkipPattern < Unary def _parse ctx ctx.skip some() or INVALID end end # for optimization, not disposed to users class SkipUntilPattern < Unary def _parse ctx ctx.skip_until some() or INVALID end end # should be end-of-file after parsing # FIXME seems parser keeps a state when using parse!, see nasm manual parse class Eof < Unary def _parse ctx ret = some()._parse ctx ctx.eos? ? ret : INVALID end end # one of char in string class OneOf < Unary def _parse ctx return INVALID if ctx.eos? chr = ctx.getch if some().index(chr) chr else ctx.pos = ctx.pos - 1 INVALID end end end # one of char in string class OneOf_ < Unary def _parse ctx ctx.skip /\s*/ return INVALID if ctx.eos? chr = ctx.getch unless some().index(chr) return INVALID end ctx.skip /\s*/ chr end end # sometimes a variable is not defined yet
# lazy is used to capture it later # NOTE the value is captured the first time it is called class Lazy < Unary def _parse ctx @some ||= \ begin some()[] rescue NameError => ex some().binding.eval ex.name.to_s end @some._parse ctx end end # parse result is cached in ctx. # may improve performance class Cached include Parser def self.[] parser self.new parser end def initialize parser @parser = parser @salt = object_id() << 32 end def _parse ctx key = ctx.pos | @salt cache = ctx.cache # result maybe nil, so don't use ||= if cache.has_key? key ret, pos = cache[key] ctx.pos = pos ret else ret = @parser._parse ctx pos = ctx.pos cache[key] = [ret, pos] ret end end end end rsec-0.4.2/lib/rsec/parsers/join.rb0000644000175100017500000000342113176057521016150 0ustar mmollmmollmodule Rsec # Join base class Join < Binary def _parse ctx e = left._parse ctx return INVALID if INVALID[e] ret = [e] loop do save_point = ctx.pos i = right._parse ctx if INVALID[i] ctx.pos = save_point break end t = left._parse ctx if INVALID[t] ctx.pos = save_point break end break if save_point == ctx.pos # stop if no advance, prevent infinite loop ret << i ret << t end # loop ret end end # keep only tokens class JoinEven < Binary def _parse ctx e = left._parse ctx return INVALID if INVALID[e] ret = [e] loop do save_point = ctx.pos i = right._parse ctx if INVALID[i] ctx.pos = save_point break end t = left._parse ctx if INVALID[t] ctx.pos = save_point break end break if save_point == ctx.pos # stop if no advance, prevent infinite loop ret << t end # loop ret end end # keep only inters # NOTE if only 1 token matches, return empty array class JoinOdd < Binary def _parse ctx e = left._parse ctx return INVALID if INVALID[e] ret = [] loop do save_point = ctx.pos i = right._parse ctx if INVALID[i] ctx.pos = save_point break end t = left._parse ctx if INVALID[t] ctx.pos = save_point break end break if save_point == ctx.pos # stop if no advance, prevent infinite loop ret << i end # loop ret end end end rsec-0.4.2/lib/rsec/parser.rb0000644000175100017500000000257613176057521015040 0ustar mmollmmoll# coding: utf-8 module Rsec #:nodoc: # parser base module Parser # parses string
# returns nil if unparsed def parse str, source_name='source' ctx = ParseContext.new str, source_name _parse ctx end # almost the same as parse
# but raises SyntaxError def parse! str, source_name='source' ctx = ParseContext.new str, source_name ret = _parse ctx if INVALID[ret] raise ctx.generate_error source_name end ret end attr_accessor :name def inspect # TODO move @name ||= self.class.to_s[/\w+$/] case self when Lazy "<#{name}>" when Binary "<#{name} #{left.inspect} #{right.inspect}>" when Seq, Seq_, Branch # don't use redefined map! res = [] each{|e| res << e.inspect} "<#{name} #{res.join ' '}>" when Unary "<#{name} #{some.inspect}>" else "<#{name}>" end end end # parsers with 2 params base Binary = Struct.new :left, :right class Binary include Parser end # parsers with 1 param base Unary = Struct.new :some class Unary include Parser end end require "rsec/parsers/misc" require "rsec/parsers/seq" require "rsec/parsers/prim" require "rsec/parsers/join" require "rsec/parsers/repeat" rsec-0.4.2/lib/rsec/helpers.rb0000644000175100017500000003150113176057521015174 0ustar mmollmmoll# coding: utf-8 # ------------------------------------------------------------------------------ # Helpers(combinators) to construct parser module Rsec #:nodoc: # ------------------------------------------------------------------------------ # these are not callable from a parser module Helpers # @ desc.helper # Lazy parser is constructed when parsing starts. It is useful to reference a parser not defined yet # @ example # parser = lazy{future} # future = 'jim'.r # assert_equal 'jim', parser.parse '12323' def lazy &p raise ArgumentError, 'lazy() requires a block' unless p Lazy[p] end # @ desc.helper # Parses one of chars in str # @ example # multiplicative = one_of '*/%' # assert_equal '/', multiplicative.parse '/' # assert_equal Rsec::INVALID, actualmultiplicative.parse '+' def one_of str, &p Rsec.assert_type str, String raise ArgumentError, 'str len should > 0' if str.empty? one_of_klass = if (str.bytesize == str.size) and Rsec.const_defined?(:OneOfByte) # for C-ext OneOfByte else OneOf end one_of_klass[str.dup.freeze].map p end # @ desc.helper # See also #one_of#, with leading and trailing optional breakable spaces # @ example # additive = one_of_('+-') # assert_equal '+', additive.parse(' +') def one_of_ str, &p Rsec.assert_type str, String raise ArgumentError, 'str len should > 0' if str.empty? raise ArgumentError, 'str should be ascii' unless str.bytesize == str.size raise ArgumentError, 'str should not contain space' if str =~ /\s/ spaced_one_of_klass = if (str.bytesize == str.size) and Rsec.const_defined?(:OneOfByte_) # for C-ext OneOfByte_ else OneOf_ end spaced_one_of_klass[str.dup.freeze].map p end # @ desc.helper # Primitive parser, returns nil if overflow or underflow. # There can be an optional '+' or '-' at the beginning of string except unsinged_int32 | unsinged_int64. # type = # :double | # :hex_double | # :int32 | # :int64 | # :unsigned_int32 | # :unsigned_int64 # options: # :allowed_sign => '+' | '-' | '' | '+-' (default '+-') # :allowed_signs => (same as :allowed_sign) # :base => integer only (default 10) # @ example # p = prim :double # assert_equal 1.23, p.parse('1.23') # p = prim :double, allowed_sign: '-' # assert_equal 1.23, p.parse('1.23') # assert_equal -1.23, p.parse('-1.23') # assert_equal Rsec::INVALID, p.parse('+1.23') # p = prim :int32, base: 36 # assert_equal 49713, p.parse('12cx') def prim type, options={}, &p base = options[:base] if [:double, :hex_double].index base raise 'Floating points does not allow :base' end base ||= 10 Rsec.assert_type base, Fixnum unless (2..36).include? base raise RangeError, ":base should be in 2..36, but got #{base}" end sign_strategy = \ case (options[:allowed_sign] or options[:allowed_signs]) when nil, '+-', '-+'; 3 when '+'; 2 when '-'; 1 when ''; 0 else raise "allowed_sign should be one of nil, '', '+', '-', '+-', '-+'" end parser = \ case type when :double; PDouble.new sign_strategy, false # decimal when :hex_double; raise "Removed because Ruby 1.9.3 removed float from hex" # PDouble.new sign_strategy, true # hex when :int32; PInt32.new sign_strategy, base when :int64; PInt64.new sign_strategy, base when :unsigned_int32; raise 'unsigned int not allow - sign' if options[:allowed_signs] =~ /-/ PUnsignedInt32.new sign_strategy, base when :unsigned_int64; raise 'unsigned int not allow - sign' if options[:allowed_signs] =~ /-/ PUnsignedInt64.new sign_strategy, base else raise "Invalid primitive type #{type}" end parser.map p end # @ desc.helper # Sequence parser # @ example # assert_equal ['a', 'b', 'c'], actualseq('a', 'b', 'c').parse('abc') def seq *xs, &p xs.map! {|x| Rsec.make_parser x } Seq[xs].map p end # @ desc.helper # Sequence parser with skippable pattern(or parser) # option # :skip default= /\s*/ # @ example # assert_equal ['a', 'b', 'c'], actualseq_('a', 'b', 'c', skip: ',').parse('a,b,c') def seq_ *xs, &p skipper = if (xs.last.is_a? Hash) xs.pop[:skip] end skipper = skipper ? Rsec.make_parser(skipper) : /\s*/.r xs.map! {|x| Rsec.make_parser x } first, *rest = xs raise 'sequence should not be empty' unless first Seq_[first, rest, skipper].map p end # @ desc.helper # A symbol is something wrapped with optional space def symbol pattern, skip=/\s*/, &p pattern = Rsec.make_parser pattern skip = Rsec.try_skip_pattern Rsec.make_parser skip SeqOne[[skip, pattern, skip], 1].map p end # @ desc.helper # A word is wrapped with word boundaries # @ example # assert_equal ['yes', '3'], seq('yes', '3').parse('yes3') # assert_equal INVALID, seq(word('yes'), '3').parse('yes3') def word pattern, &p parser = Rsec.make_parser pattern # TODO check pattern type Pattern[/\b#{parser.some}\b/].map p end end # helpers # robust Helper = Helpers # ------------------------------------------------------------------------------ # combinators attached to parsers module Parser #:nodoc: # @ desc # Transform result # @ example # parser = /\w+/.r.map{|word| word * 2} # assert_equal 'hellohello', parser.parse!('hello') def map lambda_p=nil, &p return self if (lambda_p.nil? and p.nil?) p = lambda_p || p raise TypeError, 'should give a proc or lambda' unless (p.is_a? Proc) Map[self, p] end # @ desc # "p.join('+')" parses strings like "p+p+p+p+p". # Note that at least 1 of p appears in the string. # Sometimes it is useful to reverse the joining: # /\s*/.r.join('p').odd parses string like " p p p " def join inter, &p inter = Rsec.make_parser inter Join[self, inter].map p end # @ desc # Branch parser, note that rsec is a PEG parser generator, # beware of the difference between PEG and CFG. def | y, &p y = Rsec.make_parser y arr = if (is_a?(Branch) and !p) [*some, y] else [self, y] end Branch[arr].map p end # @ desc # Repeat n or in a range. # If range.end < 0, repeat at least range.begin # (Infinity and -Infinity are considered) def * n, &p # FIXME if self is an epsilon parser, will cause infinite loop parser = if n.is_a?(Range) raise "invalid n: #{n}" if n.begin < 0 Rsec.assert_type n.begin, Integer end_inf = (n.end.infinite? rescue false) (Rsec.assert_type n.end, Integer) unless end_inf if n.end > 0 RepeatRange[self, n] else RepeatAtLeastN[self, n.begin] end else Rsec.assert_type n, Integer raise "invalid n: #{n}" if n < 0 RepeatN[self, n] end parser.map p end # @ desc # Appears 0 or 1 times, result is wrapped in an array # @ example # parser = 'a'.r.maybe # assert_equal ['a'], parser.parse('a') # assert_equal [], parser.parse('') def maybe &p Maybe[self].map &p end alias _? maybe # @ desc # Kleen star, 0 or more any times def star &p self.* (0..-1), &p end # @ desc # Lookahead predicate, note that other can be a very complex parser def & other, &p other = Rsec.make_parser other LookAhead[self, other].map p end # @ desc # Negative lookahead predicate def ^ other, &p other = Rsec.make_parser other NegativeLookAhead[self, other].map p end # @ desc # When parsing failed, show "expect tokens" error def fail *tokens, &p return self if tokens.empty? Fail[self, tokens].map p end alias expect fail # @ desc # Short for seq_(parser, other)[1] def >> other, &p other = Rsec.make_parser other left = Rsec.try_skip_pattern self SeqOne_[left, [other], SkipPattern[/\s*/], 1].map p end # @ desc # Short for seq_(parser, other)[0] def << other, &p other = Rsec.make_parser other right = Rsec.try_skip_pattern other SeqOne_[self, [right], SkipPattern[/\s*/], 0].map p end # @ desc # Should be end of input after parse def eof &p Eof[self].map p end # @ desc # Packrat parser combinator, returns a parser that caches parse result, may optimize performance def cached &p Cached[self].map p end end # ------------------------------------------------------------------------------ # additional helper methods for special classes class Seq # @ desc.seq, seq_ # Returns the parse result at idx, shorter and faster than map{|array| array[idx]} # @ example # assert_equal 'b', seq('a', 'b', 'c')[1].parse('abc') def [] idx, &p raise 'index out of range' if (idx >= some().size or idx < 0) # optimize parsers = some().map.with_index do |p, i| i == idx ? p : Rsec.try_skip_pattern(p) end SeqOne[parsers, idx].map p end # @ desc.seq, seq_, join, join.even, join.odd # If parse result contains only 1 element, return the element instead of the array def unbox &p Unbox[self].map p end # @ desc # Think about "innerHTML"! # @ example # parser = seq('<b>', /[\w\s]+/, '</b>').inner # parser.parse('<b>the inside</b>') def inner &p Inner[self].map p end end class Seq_ def [] idx, &p raise 'index out of range' if idx > rest.size or idx < 0 # optimize parsers, use skip if possible new_first = (0 == idx ? first : Rsec.try_skip_pattern(first)) new_rest = rest().map.with_index do |p, i| # NOTE rest start with 1 (i+1) == idx ? p : Rsec.try_skip_pattern(p) end SeqOne_[new_first, new_rest, skipper, idx].map p end def unbox &p Unbox[self].map p end def inner &p Inner[self].map p end end class Join def unbox &p Unbox[self].map p end # @ desc.join # Only keep the even(left, token) parts def even &p JoinEven[left, Rsec.try_skip_pattern(right)].map p end # @ desc.join # Only keep the odd(right, inter) parts def odd &p JoinOdd[Rsec.try_skip_pattern(left), right].map p end end class JoinEven def unbox &p Unbox[self].map p end end class JoinOdd def unbox &p Unbox[self].map p end end class Pattern # @ desc.r # Scan until the pattern happens def until &p UntilPattern[some()].map p end end # ------------------------------------------------------------------------------ # helper methods for parser generation # ensure x is a parser def Rsec.make_parser x return x if x.is_a?(Parser) x = x.send(TO_PARSER_METHOD) if x.respond_to?(TO_PARSER_METHOD) Rsec.assert_type x, Parser x end # type assertion def Rsec.assert_type obj, type (raise TypeError, "#{obj} should be a #{type}") unless (obj.is_a? type) end # try to convert Pattern -> SkipPattern def Rsec.try_skip_pattern p # for C-ext if Rsec.const_defined?(:FixString) and p.is_a?(FixString) return SkipPattern[/#{Regexp.escape p.some}/] end case p when Pattern SkipPattern[p.some] when UntilPattern SkipUntilPattern[p.some] else p end end end class String #:nodoc: # String#r: convert self to parser # convienient string-to-parser transformer define_method ::Rsec::TO_PARSER_METHOD, ->(*expects, &p){ ::Rsec::Pattern[/#{Regexp.escape self}/].fail(*expects).map p } end class Regexp #:nodoc: # Regexp#r: convert self to parser # convienient regexp-to-parser transformer define_method ::Rsec::TO_PARSER_METHOD, ->(*expects, &p){ ::Rsec::Pattern[self].fail(*expects).map p } end rsec-0.4.2/lib/rsec.rb0000644000175100017500000000053413176057521013534 0ustar mmollmmoll# coding: utf-8 # load the gem # All code is under this module module Rsec # preload configs # config method name # default is :r unless Rsec.const_defined?(:TO_PARSER_METHOD) TO_PARSER_METHOD = :r end VERSION = '0.4.2' end require "strscan" require "rsec/utils" require "rsec/parser" require "rsec/helpers" rsec-0.4.2/examples/0000755000175100017500000000000013176057521013321 5ustar mmollmmollrsec-0.4.2/examples/slow_json.rb0000644000175100017500000000405313176057521015665 0ustar mmollmmoll# coding: utf-8 # grammar from # http://www.json.org/ require "rsec" class SlowJSON include Rsec::Helper def initialize generate_parser @parser = seq(/\s*/, @value, /\s*/)[1].eof end def parse s @parser.parse! s end private # term (, term)* def elem_parser term term.join(/\s*,\s*/.r).even end def chars_parser unicode_bytes = /[0-9a-f]{4}/i.r{|bytes| [bytes].pack('H*').force_encoding('utf-16be').encode!('utf-8') } escape_char = '"'.r | "\\" | '/' | 'b'.r{"\b"} | 'f'.r{"\f"} | 'n'.r{"\n"} | 'r'.r{"\r"} | 't'.r{"\t"} | seq('u'.r, unicode_bytes)[1] /[^"\\]+/.r | seq('\\', escape_char)[1] end def generate_parser string = '"'.r >> chars_parser.star.map(&:join) << '"' # -? int frac? exp? number = prim(:double, allowed_sign: '-') @value = string | number | lazy{@object} | lazy{@array} | 'true'.r{true} | 'false'.r{false} | 'null'.r{nil} pair = seq(string, /\s*:\s*/.r, @value){|k, _, v| [k, v]} @array = /\[\s*\]/.r{[]} | '['.r >> elem_parser(@value) << ']' @object = /\{\s*\}/.r{{}} | ('{'.r >> elem_parser(pair) << '}').map{|arr|Hash[arr]} end end if __FILE__ == $PROGRAM_NAME j = SlowJSON.new p j.parse '""' p j.parse '123.4e5' p j.parse 'null' p j.parse '[]' p j.parse '{}' p j.parse '{"no": [3, 4]}' p j.parse '[{}]' p j.parse '[{"S":321061,"T":"GetAttributeResp"},{"ERROR":null,"TS":0,"VAL":{"SqlList":[{"BatchSizeMax":0,"BatchSizeTotal":0,"ConcurrentMax":1,"DataSource":"jdbc:wrap-jdbc:filters=default,encoding:name=ds-offer:jdbc:mysql://100.10.10.10:8066/xxxx","EffectedRowCount":0,"ErrorCount":0,"ExecuteCount":5,"FetchRowCount":5,"File":null,"ID":2001,"LastError":null,"LastTime":1292742908178,"MaxTimespan":16,"MaxTimespanOccurTime":1292742668191,"Name":null,"RunningCount":0,"SQL":"SELECT @@SQL_MODE","TotalTime":83}]}}]' end rsec-0.4.2/examples/scheme.rb0000644000175100017500000000402413176057521015112 0ustar mmollmmoll# A simple-as-shit scheme interpreter. Usage: ruby scheme.rb hello.scm require "rsec" class Scheme include Rsec::Helpers Value = Struct.new :val class Bind < Hash def initialize parent = {} @parent = parent end def define id, &p # define lambda self[id] = -> bind, xs { p[* xs.map{|x| bind.eval x }] } end def eval node case node when Value; node.val when String; self[node] when Array head, *tail = node case head when String pr = self[head] pr.is_a?(Proc) ? pr[self, tail] : pr # invoke lambda when Array node.map{|n| self.eval n }.last # sequence execution end end end def [] key super(key) || @parent[key] end end def initialize boolean = /\#[tf]/. r {|n| Value[n=='#t'] } integer = /0|[1-9]\d*/.r {|n| Value[n.to_i] } id = /[^\s\(\)\[\]]+/.r atom = boolean | integer | id cell = atom | lazy{list} cells = /\s*/.r.join(cell).odd list = '('.r >> cells << ')' @parser = cells.eof @vm = Bind.new @vm['define'] = -> bind, (param, body) { if param.is_a?(String) @vm[param] = bind.eval body else func, *xs = param @vm[func] = @vm['lambda'][bind, [xs, body]] end } # declare: (lambda (xs[0] xs[1]) body) @vm['lambda'] = -> bind_def, (xs, body) { xs = [xs] if xs.is_a?(String) # calling: (some vs[0] vs[1]) -> bind_call, vs { vs = vs.map{|v| bind_call.eval v } new_bind = Bind.new bind_def xs.zip(vs){|x, v| new_bind[x] = v } new_bind.eval body } } @vm['if'] = -> bind, (p, left, right) { bind.eval(bind.eval(p) ? left : right) } %w|+ - * / ** % > <|.each{|s| @vm.define s, &s.to_sym } @vm.define '=', &:== @vm.define('display'){|x| puts x} end def run source @vm.eval @parser.parse! source end end ARGV[0] ? Scheme.new.run(File.read ARGV[0]) : puts('need a scheme file name') rsec-0.4.2/examples/s_exp.rb0000644000175100017500000000061713176057521014770 0ustar mmollmmoll# s-expression parser require "rsec" include Rsec::Helpers def s_exp id = /[a-zA-Z][\w\-]*/.r.fail 'id' num = prim(:double).fail 'num' naked_unit = id | num | seq_('(', lazy{exp}, ')')[1] unit = naked_unit | seq_('(', lazy{unit}, ')')[1] units = unit.join(/\s+/).even._? exp = seq_(id, units) {|(id, (units))| [id, *units]} seq_('(', exp, ')')[1].eof end rsec-0.4.2/examples/nasm_manual.rb0000644000175100017500000000765013176057521016151 0ustar mmollmmoll# Parse NASM manual [nasm.txt] and generate a list of opcodes. # Results are saved in [nasm_codes.txt], undocumented codes are printed. # Further: extend the parser to generate an X86 assembler. require "rsec" module NASMManualParser include Rsec::Helper extend self Instructions = {} class UnSupportedError < RuntimeError end class Instruction < Struct.new(:nemonic, :operands, :code, :archs) end def debug parser, *strs return parser unless $debug strs.each do |str| parser.eof.parse! str end parser end def reg_parser gp_reg = /E?[ABCD]X|E?(SP|BP|SI|DI)/ gp_reg8 = /[ABCD][HL]/ seg_reg = /ES|CS|SS|DS|FS|GS/ fpu_reg = /ST[0-7]/ mmx_reg = /MM[0-7]/ xr_reg = /CR[0234]|DR[012367]|TR[34567]/ reg = gp_reg.r | gp_reg8 | seg_reg | fpu_reg | mmx_reg | xr_reg debug reg, 'AX' end def operands_parser imm_class = /imm:imm(32|16)|imm(32|16|8)?/ mem_class = /mem(80|64|32|16|8)?/ # be ware of the order reg_class = /reg(32|16|8)|(fpu|mmx|seg)reg/ memoffs_class = /memoffs(32|16|8)/ tr_class = 'TR3/4/5/6/7' classes = (imm_class.r | memoffs_class | mem_class | reg_class | tr_class).fail 'operand class' reg = reg_parser.fail 'register' num = /\d/.r(&:to_i).fail 'num' # memoffs should be left of mem operand = classes | reg | num operands = operand.join('/').even.join(',').even debug operands, 'reg32', 'AX,memoffs16' end def code_parser plus_cc = /[0-9A-F][0-9A-F]\+cc/ plus_r = /[0-9A-F][0-9A-F]\+r/ hex = /[0-9A-F][0-9A-F]/.r {|s| s.to_i 16} slash = /\/[\dr]/ imm_code = /i[bwd]/ reg_code = /rw\/rd|r[bwd]/ ref_code = /ow\/od|o[wd]/ prefix_code = /[oa](32|16)/ code =\ (plus_cc.r | plus_r | hex | slash | imm_code | reg_code | ref_code | prefix_code).join(/\s+/).even debug code, 'o32 0F C8+r', 'o32 6B /r ib', 'o16 A1 ow/od' end def archs_parser arch = symbol(/8086|186|286|386|486|PENT|P6|CYRIX|FPU|MMX|PRIV|UNDOC/) archs = ('['.r >> arch.join(',').even << ']').map do |archs| # map to set archs.inject({}){|h, arch| raise UnSupportedError, 'not implemented' if arch == 'UNDOC' h[arch] = true h } end debug archs, '[386,FPU]' end def instruction_parser nemonic = /[A-Z]\w+|xxSAR/ operands = operands_parser._? code = ';'.r >> code_parser archs = archs_parser instruction = seq_ nemonic, operands, code, archs do |nemonic, (operands), code, archs| Instruction.new nemonic, operands, code, archs end debug instruction, 'FISUBR mem32 ; DA /5 [8086,FPU]', 'BSWAP reg32 ; o32 0F C8+r [486]' end def desugar line # r/m short hands line = line.gsub /r\/m(32|16|8)/, 'reg\1/mem\1' line.gsub! 'r/m64', 'mmxreg/mem64' # compress space line.sub! /\s(TO|NEAR|FAR|SHORT)/, '_\1' line end def parse_line parser, line parser.parse! desugar line rescue Rsec::SyntaxError rescue UnSupportedError end def parse filename parsed = '' parser = instruction_parser.eof src = File.read filename src.lines.with_index do |raw_line, idx| line = raw_line.strip # this shapy shows the line is something defining an nemonic if line =~ /^\w+\s+[^;\[]+;\ [^;\[]+\[.+\]$/ if (parse_line parser, line) parsed << raw_line else puts "unparsed:#{idx}\t#{line}" end end end parsed end end if __FILE__ == $PROGRAM_NAME $debug = true manual = "#{File.dirname __FILE__}/nasm_manual.txt" codes = "#{File.dirname __FILE__}/nasm_codes.txt" File.open codes, 'w' do |file| file.<< NASMManualParser.parse manual end puts '-' * 80 puts "X86 asm codes are saved to #{codes}" end rsec-0.4.2/examples/little_markdown.rb0000644000175100017500000001165613176057521017056 0ustar mmollmmoll# a markdown translator # # The differences between this and original markdown: # - markdown in inline tags are not processed # - every line-break in non-tag parts is translated into
# - nested list elements are not supported require "rsec" class LittleMarkdown include Rsec::Helper def initialize @markdown_line_translator = make_markdown_line_translator @parser = (make_xml_tag_parser | make_char_parser).star.eof end def translate src @stack = [] @charsbuf = '' @out = '' @parser.parse! src flush_chars @out end def flush_chars @out.<< translate_markdown @charsbuf @charsbuf = '' end def make_char_parser # care stringscanner's bug, see issues (/./.r | /\n/).fail('char'){|c| @charsbuf << c} end # make a single-line markdown parser def make_markdown_line_translator line_text = lazy{line}.map{|tokens| tokens.empty? ? Rsec::INVALID : tokens.join # filter out empty } title = /"[^"]*"|'[^']*'/.r._?{|(s)| s ? "title=#{s}" : '' } img = seq('!['.r >> /[^\]]+/ << '](', /[^\)"']+/, title, ')'){|(txt, path, title)| "#{txt}" } link = seq(('['.r >> /[^\]]+/ << ']('), /[^\)"']+/, title, ')'){|(txt, path, title)| "#{txt}" } # NOTE strong should be left of em strong = ('**'.r >> line_text << '**').map{|s| "#{s}" } em = ('*'.r >> line_text << '*').map{|s| "#{s}" } code = ('`'.r >> /[^`]+/ << '`').map{|s| "#{s}" } escape = '<'.r{'<'} | '&'.r{'&'} | /\\[\!\`\*\[\]]/.r{|s|s[1]} text = /[^\!\`\*\[\]]+/ id = seq_(('['.r >> /[^\]]+/ << ']:'), text){|(id, text)| "#{text}" } line = (img | link | strong | em | code | escape | id | text).star line.eof.map &:join end # pseudo xml tag parser, except
and
and