parslet-1.5.0/0000755000175000017500000000000012270776314011666 5ustar meromeroparslet-1.5.0/metadata.yml0000644000175000017500000001554412270776314014202 0ustar meromero--- !ruby/object:Gem::Specification name: parslet version: !ruby/object:Gem::Version version: 1.5.0 prerelease: platform: ruby authors: - Kaspar Schiess autorequire: bindir: bin cert_chain: [] date: 2012-12-27 00:00:00.000000000 Z dependencies: - !ruby/object:Gem::Dependency name: blankslate prerelease: false requirement: !ruby/object:Gem::Requirement requirements: - - ~> - !ruby/object:Gem::Version version: '2.0' none: false type: :runtime version_requirements: !ruby/object:Gem::Requirement requirements: - - ~> - !ruby/object:Gem::Version version: '2.0' none: false - !ruby/object:Gem::Dependency name: rspec prerelease: false requirement: !ruby/object:Gem::Requirement requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' none: false type: :development version_requirements: !ruby/object:Gem::Requirement requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' none: false - !ruby/object:Gem::Dependency name: flexmock prerelease: false requirement: !ruby/object:Gem::Requirement requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' none: false type: :development version_requirements: !ruby/object:Gem::Requirement requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' none: false - !ruby/object:Gem::Dependency name: rdoc prerelease: false requirement: !ruby/object:Gem::Requirement requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' none: false type: :development version_requirements: !ruby/object:Gem::Requirement requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' none: false - !ruby/object:Gem::Dependency name: sdoc prerelease: false requirement: !ruby/object:Gem::Requirement requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' none: false type: :development version_requirements: !ruby/object:Gem::Requirement requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' none: false - !ruby/object:Gem::Dependency name: guard prerelease: false requirement: !ruby/object:Gem::Requirement requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' none: false type: :development version_requirements: !ruby/object:Gem::Requirement requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' none: false - !ruby/object:Gem::Dependency name: guard-rspec prerelease: false requirement: !ruby/object:Gem::Requirement requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' none: false type: :development version_requirements: !ruby/object:Gem::Requirement requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' none: false - !ruby/object:Gem::Dependency name: rb-fsevent prerelease: false requirement: !ruby/object:Gem::Requirement requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' none: false type: :development version_requirements: !ruby/object:Gem::Requirement requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' none: false - !ruby/object:Gem::Dependency name: growl prerelease: false requirement: !ruby/object:Gem::Requirement requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' none: false type: :development version_requirements: !ruby/object:Gem::Requirement requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' none: false description: email: kaspar.schiess@absurd.li executables: [] extensions: [] extra_rdoc_files: - README files: - HISTORY.txt - LICENSE - Rakefile - README - lib/parslet/atoms/alternative.rb - lib/parslet/atoms/base.rb - lib/parslet/atoms/can_flatten.rb - lib/parslet/atoms/capture.rb - lib/parslet/atoms/context.rb - lib/parslet/atoms/dsl.rb - lib/parslet/atoms/dynamic.rb - lib/parslet/atoms/entity.rb - lib/parslet/atoms/lookahead.rb - lib/parslet/atoms/named.rb - lib/parslet/atoms/re.rb - lib/parslet/atoms/repetition.rb - lib/parslet/atoms/scope.rb - lib/parslet/atoms/sequence.rb - lib/parslet/atoms/str.rb - lib/parslet/atoms/visitor.rb - lib/parslet/atoms.rb - lib/parslet/cause.rb - lib/parslet/convenience.rb - lib/parslet/error_reporter/deepest.rb - lib/parslet/error_reporter/tree.rb - lib/parslet/error_reporter.rb - lib/parslet/export.rb - lib/parslet/expression/treetop.rb - lib/parslet/expression.rb - lib/parslet/parser.rb - lib/parslet/pattern/binding.rb - lib/parslet/pattern.rb - lib/parslet/rig/rspec.rb - lib/parslet/scope.rb - lib/parslet/slice.rb - lib/parslet/source/line_cache.rb - lib/parslet/source.rb - lib/parslet/transform/context.rb - lib/parslet/transform.rb - lib/parslet.rb - example/boolean_algebra.rb - example/calc.rb - example/capture.rb - example/comments.rb - example/deepest_errors.rb - example/documentation.rb - example/email_parser.rb - example/empty.rb - example/erb.rb - example/ignore.rb - example/ip_address.rb - example/json.rb - example/local.rb - example/mathn.rb - example/minilisp.rb - example/modularity.rb - example/nested_errors.rb - example/output/boolean_algebra.out - example/output/calc.out - example/output/capture.out - example/output/comments.out - example/output/deepest_errors.out - example/output/documentation.err - example/output/documentation.out - example/output/email_parser.out - example/output/empty.err - example/output/erb.out - example/output/ignore.out - example/output/ignore_whitespace.out - example/output/ip_address.out - example/output/json.out - example/output/local.out - example/output/mathn.out - example/output/minilisp.out - example/output/modularity.out - example/output/nested_errors.out - example/output/parens.out - example/output/readme.out - example/output/scopes.out - example/output/seasons.out - example/output/sentence.out - example/output/simple_xml.out - example/output/string_parser.out - example/parens.rb - example/readme.rb - example/scopes.rb - example/seasons.rb - example/sentence.rb - example/simple.lit - example/simple_xml.rb - example/string_parser.rb - example/test.lit homepage: http://kschiess.github.com/parslet licenses: [] post_install_message: rdoc_options: - --main - README require_paths: - lib required_ruby_version: !ruby/object:Gem::Requirement requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' none: false required_rubygems_version: !ruby/object:Gem::Requirement requirements: - - ! '>=' - !ruby/object:Gem::Version version: '0' none: false requirements: [] rubyforge_project: rubygems_version: 1.8.24 signing_key: specification_version: 3 summary: Parser construction library with great error reporting in Ruby. test_files: [] has_rdoc: parslet-1.5.0/example/0000755000175000017500000000000012270776314013321 5ustar meromeroparslet-1.5.0/example/test.lit0000644000175000017500000000021012270776314015003 0ustar meromero"THis is a string" "This is another string" "This string is escaped \"embedded quoted stuff \" " 12 // an integer literal and a comment parslet-1.5.0/example/string_parser.rb0000644000175000017500000000260112270776314016527 0ustar meromero# A more complex parser that illustrates how a compiler might be constructed. # The parser recognizes strings and integer literals and constructs almost a # useful AST from the file contents. require 'pp' $:.unshift File.dirname(__FILE__) + "/../lib" require 'parslet' include Parslet class LiteralsParser < Parslet::Parser rule :space do (match '[ ]').repeat(1) end rule :literals do (literal >> eol).repeat end rule :literal do (integer | string).as(:literal) >> space.maybe end rule :string do str('"') >> ( (str('\\') >> any) | (str('"').absent? >> any) ).repeat.as(:string) >> str('"') end rule :integer do match('[0-9]').repeat(1).as(:integer) end rule :eol do line_end.repeat(1) end rule :line_end do crlf >> space.maybe end rule :crlf do match('[\r\n]').repeat(1) end root :literals end input_name = File.join(File.dirname(__FILE__), 'simple.lit') file = File.read(input_name) parsetree = LiteralsParser.new.parse(file) class Lit < Struct.new(:text) def to_s text.inspect end end class StringLit < Lit end class IntLit < Lit def to_s text end end transform = Parslet::Transform.new do rule(:literal => {:integer => simple(:x)}) { IntLit.new(x) } rule(:literal => {:string => simple(:s)}) { StringLit.new(s) } end ast = transform.apply(parsetree) pp ast parslet-1.5.0/example/simple_xml.rb0000644000175000017500000000234712270776314016025 0ustar meromero# A simple xml parser. It is simple in the respect as that it doesn't address # any of the complexities of XML. This is ruby 1.9. $:.unshift File.dirname(__FILE__) + "/../lib" require 'pp' require 'parslet' class XML < Parslet::Parser root :document rule(:document) { tag(close: false).as(:o) >> document.as(:i) >> tag(close: true).as(:c) | text } # Perhaps we could have some syntax sugar to make this more easy? # def tag(opts={}) close = opts[:close] || false parslet = str('<') parslet = parslet >> str('/') if close parslet = parslet >> (str('>').absent? >> match("[a-zA-Z]")).repeat(1).as(:name) parslet = parslet >> str('>') parslet end rule(:text) { match('[^<>]').repeat(0) } end def check(xml) r = XML.new.parse(xml) # We'll validate the tree by reducing valid pairs of tags into simply the # string "verified". If the transformation ends on a string, then the # document was 'valid'. # t = Parslet::Transform.new do rule( o: {name: simple(:tag)}, c: {name: simple(:tag)}, i: simple(:t) ) { 'verified' } end t.apply(r) end pp check("some text in the tags") pp check("some text in the tags") parslet-1.5.0/example/simple.lit0000644000175000017500000000005212270776314015321 0ustar meromero123 12345 " Some String with \"escapes\"" parslet-1.5.0/example/sentence.rb0000644000175000017500000000277512270776314015465 0ustar meromero# encoding: UTF-8 # A small example contributed by John Mettraux (jmettraux) that demonstrates # working with Unicode. This only works on Ruby 1.9. $:.unshift File.dirname(__FILE__) + "/../lib" require 'parslet' class Parser < Parslet::Parser rule(:sentence) { (match('[^。]').repeat(1) >> str("。")).as(:sentence) } rule(:sentences) { sentence.repeat } root(:sentences) end class Transformer < Parslet::Transform rule(:sentence => simple(:sen)) { sen.to_s } end string = "RubyKaigi2009のテーマは、「変わる/変える」です。 前回の" + "RubyKaigi2008のテーマであった「多様性」の言葉の通り、 " + "2008年はRubyそのものに関しても、またRubyの活躍する舞台に関しても、 " + "ますます多様化が進みつつあります。RubyKaigi2008は、そのような " + "Rubyの生態系をあらためて認識する場となりました。 しかし、" + "こうした多様化が進む中、異なる者同士が単純に距離を 置いたままでは、" + "その違いを認識したところであまり意味がありません。 異なる実装、" + "異なる思想、異なる背景といった、様々な多様性を理解しつつ、 " + "すり合わせるべきものをすり合わせ、変えていくべきところを " + "変えていくことが、豊かな未来へとつながる道に違いありません。" parser = Parser.new transformer = Transformer.new tree = parser.parse(string) p transformer.apply(tree) parslet-1.5.0/example/seasons.rb0000644000175000017500000000167412270776314015331 0ustar meromero$:.unshift File.dirname(__FILE__) + "/../lib" require 'parslet' require 'pp' tree = {:bud => {:stem => []}} class Spring < Parslet::Transform rule(:stem => sequence(:branches)) { {:stem => (branches + [{:branch => :leaf}])} } end class Summer < Parslet::Transform rule(:stem => subtree(:branches)) { new_branches = branches.map { |b| {:branch => [:leaf, :flower]} } {:stem => new_branches} } end class Fall < Parslet::Transform rule(:branch => sequence(:x)) { x.each { |e| puts "Fruit!" if e==:flower } x.each { |e| puts "Falling Leaves!" if e==:leaf } {:branch => []} } end class Winter < Parslet::Transform rule(:stem => subtree(:x)) { {:stem => []} } end def do_seasons(tree) [Spring, Summer, Fall, Winter].each do |season| p "And when #{season} comes" tree = season.new.apply(tree) pp tree puts end tree end # What marvel of life! tree = do_seasons(tree) tree = do_seasons(tree) parslet-1.5.0/example/scopes.rb0000644000175000017500000000042212270776314015140 0ustar meromero $:.unshift File.dirname(__FILE__) + "/../lib" require 'parslet' include Parslet parser = str('a').capture(:a) >> scope { str('b').capture(:a) } >> dynamic { |s,c| str(c.captures[:a]) } begin parser.parse('aba') puts "parses 'aba'" rescue puts "exception!" endparslet-1.5.0/example/readme.rb0000644000175000017500000000162712270776314015111 0ustar meromero# The example from the readme. With this, I am making sure that the readme # 'works'. Is this too messy? $:.unshift File.dirname(__FILE__) + "/../lib" # cut here ------------------------------------------------------------------- require 'parslet' include Parslet # Constructs a parser using a Parser Expression Grammar like DSL: parser = str('"') >> ( str('\\') >> any | str('"').absent? >> any ).repeat.as(:string) >> str('"') # Parse the string and capture parts of the interpretation (:string above) tree = parser.parse('"This is a \\"String\\" in which you can escape stuff"') tree # => {:string=>"This is a \\\"String\\\" in which you can escape stuff"} # Here's how you can grab results from that tree: transform = Parslet::Transform.new do rule(:string => simple(:x)) { puts "String contents: #{x}" } end transform.apply(tree) parslet-1.5.0/example/parens.rb0000644000175000017500000000166112270776314015142 0ustar meromero# A small example that demonstrates the power of tree pattern matching. Also # uses '.as(:name)' to construct a tree that can reliably be matched # afterwards. $:.unshift File.dirname(__FILE__) + "/../lib" require 'pp' require 'parslet' module LISP # as in 'lots of insipid and stupid parenthesis' class Parser < Parslet::Parser rule(:balanced) { str('(').as(:l) >> balanced.maybe.as(:m) >> str(')').as(:r) } root(:balanced) end class Transform < Parslet::Transform rule(:l => '(', :m => simple(:x), :r => ')') { # innermost :m will contain nil x.nil? ? 1 : x+1 } end end parser = LISP::Parser.new transform = LISP::Transform.new %w! () (()) ((((())))) ((()) !.each do |pexp| begin result = parser.parse(pexp) puts "#{"%20s"%pexp}: #{result.inspect} (#{transform.apply(result)} parens)" rescue Parslet::ParseFailed => m puts "#{"%20s"%pexp}: #{m}" end puts end parslet-1.5.0/example/output/0000755000175000017500000000000012270776314014661 5ustar meromeroparslet-1.5.0/example/output/string_parser.out0000644000175000017500000000020412270776314020270 0ustar meromero[#, #, #] parslet-1.5.0/example/output/simple_xml.out0000644000175000017500000000010512270776314017557 0ustar meromero"verified" {:o=>{:name=>"b"@1}, :i=>"verified", :c=>{:name=>"a"@33}} parslet-1.5.0/example/output/sentence.out0000644000175000017500000000157412270776314017225 0ustar meromero["RubyKaigi2009のテーマは、「変わる/変える」です。", " 前回のRubyKaigi2008のテーマであった「多様性」の言葉の通り、 2008年はRubyそのものに関しても、またRubyの活躍する舞台に関しても、 ますます多様化が進みつつあります。", "RubyKaigi2008は、そのような Rubyの生態系をあらためて認識する場となりました。", " しかし、こうした多様化が進む中、異なる者同士が単純に距離を 置いたままでは、その違いを認識したところであまり意味がありません。", " 異なる実装、異なる思想、異なる背景といった、様々な多様性を理解しつつ、 すり合わせるべきものをすり合わせ、変えていくべきところを 変えていくことが、豊かな未来へとつながる道に違いありません。"] parslet-1.5.0/example/output/seasons.out0000644000175000017500000000100212270776314017056 0ustar meromero"And when Spring comes" {:bud=>{:stem=>[{:branch=>:leaf}]}} "And when Summer comes" {:bud=>{:stem=>[{:branch=>[:leaf, :flower]}]}} "And when Fall comes" Fruit! Falling Leaves! {:bud=>{:stem=>[{:branch=>[]}]}} "And when Winter comes" {:bud=>{:stem=>[]}} "And when Spring comes" {:bud=>{:stem=>[{:branch=>:leaf}]}} "And when Summer comes" {:bud=>{:stem=>[{:branch=>[:leaf, :flower]}]}} "And when Fall comes" Fruit! Falling Leaves! {:bud=>{:stem=>[{:branch=>[]}]}} "And when Winter comes" {:bud=>{:stem=>[]}} parslet-1.5.0/example/output/scopes.out0000644000175000017500000000001512270776314016702 0ustar meromeroparses 'aba' parslet-1.5.0/example/output/readme.out0000644000175000017500000000010412270776314016642 0ustar meromeroString contents: This is a \"String\" in which you can escape stuff parslet-1.5.0/example/output/parens.out0000644000175000017500000000066012270776314016704 0ustar meromero (): {:l=>"("@0, :m=>nil, :r=>")"@1} (1 parens) (()): {:l=>"("@0, :m=>{:l=>"("@1, :m=>nil, :r=>")"@2}, :r=>")"@3} (2 parens) ((((())))): {:l=>"("@0, :m=>{:l=>"("@1, :m=>{:l=>"("@2, :m=>{:l=>"("@3, :m=>{:l=>"("@4, :m=>nil, :r=>")"@5}, :r=>")"@6}, :r=>")"@7}, :r=>")"@8}, :r=>")"@9} (5 parens) ((()): Failed to match sequence (l:'(' m:(BALANCED?) r:')') at line 1 char 6. parslet-1.5.0/example/output/nested_errors.out0000644000175000017500000000751712270776314020302 0ustar meromero-------------------------------------------------------------------------------- . 10 . 20 01 02 define f() 03 @res.name 04 end 05 Failed to match sequence (LINE_SEPARATOR? BLOCK LINE_SEPARATOR?) at line 2 char 5. `- Expected one of [DEFINE_BLOCK, BEGIN_BLOCK] at line 2 char 5. |- Failed to match sequence (define:'define' SPACE name:IDENTIFIER '()' BODY 'end') at line 2 char 15. | `- Failed to match sequence (body:((LINE_SEPARATOR (BLOCK / EXPRESSION)){1, }) LINE_SEPARATOR) at line 3 char 11. | `- Expected at least 1 of SPACE? (COMMENT? NEWLINE / ';') SPACE? at line 3 char 11. | `- Failed to match sequence (SPACE? (COMMENT? NEWLINE / ';') SPACE?) at line 3 char 11. | `- Expected one of [COMMENT? NEWLINE, ';'] at line 3 char 11. | |- Failed to match sequence (COMMENT? NEWLINE) at line 3 char 11. | | `- Failed to match [\\r\\n] at line 3 char 11. | `- Expected ";", but got "." at line 3 char 11. `- Failed to match sequence (pre:((type:'concurrent' SPACE)?) begin:'begin' BODY 'end') at line 2 char 5. `- Expected "begin", but got "defin" at line 2 char 5. -------------------------------------------------------------------------------- . 10 . 20 01 02 define f() 03 begin 04 @res.name 05 end 06 end 07 Failed to match sequence (LINE_SEPARATOR? BLOCK LINE_SEPARATOR?) at line 2 char 5. `- Expected one of [DEFINE_BLOCK, BEGIN_BLOCK] at line 2 char 5. |- Failed to match sequence (define:'define' SPACE name:IDENTIFIER '()' BODY 'end') at line 2 char 15. | `- Failed to match sequence (body:((LINE_SEPARATOR (BLOCK / EXPRESSION)){1, }) LINE_SEPARATOR) at line 2 char 15. | `- Expected at least 1 of LINE_SEPARATOR (BLOCK / EXPRESSION) at line 2 char 15. | `- Failed to match sequence (LINE_SEPARATOR (BLOCK / EXPRESSION)) at line 3 char 7. | `- Expected one of [BLOCK, EXPRESSION] at line 3 char 7. | |- Expected one of [DEFINE_BLOCK, BEGIN_BLOCK] at line 3 char 7. | | |- Failed to match sequence (define:'define' SPACE name:IDENTIFIER '()' BODY 'end') at line 3 char 7. | | | `- Expected "define", but got "begin\n" at line 3 char 7. | | `- Failed to match sequence (pre:((type:'concurrent' SPACE)?) begin:'begin' BODY 'end') at line 3 char 12. | | `- Failed to match sequence (body:((LINE_SEPARATOR (BLOCK / EXPRESSION)){1, }) LINE_SEPARATOR) at line 4 char 13. | | `- Expected at least 1 of SPACE? (COMMENT? NEWLINE / ';') SPACE? at line 4 char 13. | | `- Failed to match sequence (SPACE? (COMMENT? NEWLINE / ';') SPACE?) at line 4 char 13. | | `- Expected one of [COMMENT? NEWLINE, ';'] at line 4 char 13. | | |- Failed to match sequence (COMMENT? NEWLINE) at line 4 char 13. | | | `- Failed to match [\\r\\n] at line 4 char 13. | | `- Expected ";", but got "." at line 4 char 13. | `- Failed to match sequence (RES_ACTIONS res_field:((':' name:IDENTIFIER)?)) at line 3 char 7. | `- Failed to match sequence (resources:REFERENCE res_actions:(res_action:RES_ACTION_OR_LINK{0, })) at line 3 char 7. | `- Failed to match sequence ('@'{1, 2} IDENTIFIER) at line 3 char 7. | `- Expected at least 1 of '@' at line 3 char 7. | `- Expected "@", but got "b" at line 3 char 7. `- Failed to match sequence (pre:((type:'concurrent' SPACE)?) begin:'begin' BODY 'end') at line 2 char 5. `- Expected "begin", but got "defin" at line 2 char 5. -------------------------------------------------------------------------------- parslet-1.5.0/example/output/modularity.out0000644000175000017500000000000012270776314017571 0ustar meromeroparslet-1.5.0/example/output/minilisp.out0000644000175000017500000000015512270776314017237 0ustar meromero[:define, :test, [:lambda, [], [:begin, [:display, "something"@54], [:display, 1], [:display, 3.08]]]] parslet-1.5.0/example/output/mathn.out0000644000175000017500000000015512270776314016522 0ustar meromeroit terminates before we require mathn requiring mathn now and trying again (will hang without the fix) okay! parslet-1.5.0/example/output/local.out0000644000175000017500000000024012270776314016500 0ustar meromero{:e=>"a"@0, :rec=>{:e=>"a"@1, :rec=>{:e=>"a"@2, :rec=>{:e=>"a"@3, :rec=>nil}}}} e2:'aa' !. / e1:'a' rec:B {:e1=>"a"@0, :rec=>{:e1=>"a"@1, :rec=>{:e2=>"aa"@2}}} parslet-1.5.0/example/output/json.out0000644000175000017500000000102712270776314016363 0ustar meromero {:array=>[{:number=>"1"@5}, {:number=>"2"@8}, {:number=>"3"@11}, {:null=>"null"@14}, {:string=>"asdfasdf asdfds"@25}, {:object=>{:entry=>{:key=>{:string=>"a"@46}, :val=>{:number=>"-1.2"@50}}}}, {:object=>[{:entry=>{:key=>{:string=>"b"@61}, :val=>{:true=>"true"@65}}}, {:entry=>{:key=>{:string=>"c"@72}, :val=>{:false=>"false"@76}}}]}, {:number=>"0.1e24"@89}, {:true=>"true"@97}, {:false=>"false"@103}, {:array=>{:number=>"1"@112}}]} [1, 2, 3, nil, "asdfasdf asdfds", {"a"=>-1.2}, {"b"=>true, "c"=>false}, 1.0e+23, true, false, [1]] parslet-1.5.0/example/output/ip_address.out0000644000175000017500000000111612270776314017526 0ustar meromero 0.0.0.0 -> {:ipv4=>"0.0.0.0"@0} 255.255.255.255 -> {:ipv4=>"255.255.255.255"@0} 255.255.255 -> Failed: Expected one of [IPV4, IPV6] at line 1 char 1. 1:2:3:4:5:6:7:8 -> {:ipv6=>"1:2:3:4:5:6:7:8"@0} 12AD:34FC:A453:1922:: -> {:ipv6=>"12AD:34FC:A453:1922::"@0} 12AD::34FC -> {:ipv6=>"12AD::34FC"@0} 12AD:: -> {:ipv6=>"12AD::"@0} :: -> {:ipv6=>"::"@0} 1:2 -> Failed: Expected one of [IPV4, IPV6] at line 1 char 1. parslet-1.5.0/example/output/ignore_whitespace.out0000644000175000017500000000006512270776314021112 0ustar meromero[{:a=>"a"@0}, {:a=>"a"@1}, {:a=>"a"@5}, {:a=>"a"@7}] parslet-1.5.0/example/output/ignore.out0000644000175000017500000000000712270776314016672 0ustar meromero"ac"@0 parslet-1.5.0/example/output/erb.out0000644000175000017500000000053712270776314016167 0ustar meromero{:text=>[{:text=>"The value of x is "@0}, {:expression=>{:ruby=>" x "@21}}, {:text=>"."@26}]} {:text=>[{:code=>{:ruby=>" 1 + 2 "@2}}]} {:text=>[{:comment=>{:ruby=>" commented "@3}}]} The not printed result of "a = 2". The not printed non-evaluated comment "a = 1", see the value of a below. The nicely printed result. The value of a is 2, and b is 3. parslet-1.5.0/example/output/empty.err0000644000175000017500000000020312270776314016524 0ustar meromeroexample/empty.rb:13:in `
': rule(:empty) { ... } returns nil. Still not implemented, but already used? (NotImplementedError) parslet-1.5.0/example/output/email_parser.out0000644000175000017500000000016312270776314020055 0ustar meromerosince you haven't specified any EMAIL_ADDR, for testing purposes we're using a.b.c.d@gmail.com "a.b.c.d@gmail.com" parslet-1.5.0/example/output/documentation.out0000644000175000017500000000001112270776314020253 0ustar meromero"aaaa"@0 parslet-1.5.0/example/output/documentation.err0000644000175000017500000000051112270776314020241 0ustar meromero/Users/kaspar/git_work/own/parslet/lib/parslet/atoms/base.rb:326:in `parse_failed': Don't know what to do with bbbb at line 1 char 1. (Parslet::ParseFailed) from /Users/kaspar/git_work/own/parslet/lib/parslet/atoms/base.rb:55:in `parse' from example/documentation.rb:13:in `parse' from example/documentation.rb:18:in `
' parslet-1.5.0/example/output/deepest_errors.out0000644000175000017500000000753112270776314020445 0ustar meromero-------------------------------------------------------------------------------- . 10 . 20 01 02 define f() 03 @res.name 04 end 05 Failed to match sequence (LINE_SEPARATOR? BLOCK LINE_SEPARATOR?) at line 2 char 5. `- Expected one of [DEFINE_BLOCK, BEGIN_BLOCK] at line 2 char 5. |- Failed to match sequence (define:'define' SPACE name:IDENTIFIER '()' BODY 'end') at line 2 char 15. | `- Failed to match sequence (body:((LINE_SEPARATOR (BLOCK / EXPRESSION)){1, }) LINE_SEPARATOR) at line 3 char 11. | `- Expected at least 1 of SPACE? (COMMENT? NEWLINE / ';') SPACE? at line 3 char 11. | `- Failed to match sequence (SPACE? (COMMENT? NEWLINE / ';') SPACE?) at line 3 char 11. | `- Expected one of [COMMENT? NEWLINE, ';'] at line 3 char 11. | |- Failed to match sequence (COMMENT? NEWLINE) at line 3 char 11. | | `- Expected "()", but got "\n " at line 3 char 16. | `- Expected "()", but got "\n " at line 3 char 16. `- Failed to match sequence (pre:((type:'concurrent' SPACE)?) begin:'begin' BODY 'end') at line 2 char 5. `- Expected "()", but got "\n " at line 3 char 16. -------------------------------------------------------------------------------- . 10 . 20 01 02 define f() 03 begin 04 @res.name 05 end 06 end 07 Failed to match sequence (LINE_SEPARATOR? BLOCK LINE_SEPARATOR?) at line 2 char 5. `- Expected one of [DEFINE_BLOCK, BEGIN_BLOCK] at line 2 char 5. |- Failed to match sequence (define:'define' SPACE name:IDENTIFIER '()' BODY 'end') at line 2 char 15. | `- Failed to match sequence (body:((LINE_SEPARATOR (BLOCK / EXPRESSION)){1, }) LINE_SEPARATOR) at line 2 char 15. | `- Expected at least 1 of LINE_SEPARATOR (BLOCK / EXPRESSION) at line 2 char 15. | `- Failed to match sequence (LINE_SEPARATOR (BLOCK / EXPRESSION)) at line 3 char 7. | `- Expected one of [BLOCK, EXPRESSION] at line 3 char 7. | |- Expected one of [DEFINE_BLOCK, BEGIN_BLOCK] at line 3 char 7. | | |- Failed to match sequence (define:'define' SPACE name:IDENTIFIER '()' BODY 'end') at line 3 char 7. | | | `- Expected "define", but got "begin\n" at line 3 char 7. | | `- Failed to match sequence (pre:((type:'concurrent' SPACE)?) begin:'begin' BODY 'end') at line 3 char 12. | | `- Failed to match sequence (body:((LINE_SEPARATOR (BLOCK / EXPRESSION)){1, }) LINE_SEPARATOR) at line 4 char 13. | | `- Expected at least 1 of SPACE? (COMMENT? NEWLINE / ';') SPACE? at line 4 char 13. | | `- Failed to match sequence (SPACE? (COMMENT? NEWLINE / ';') SPACE?) at line 4 char 13. | | `- Expected one of [COMMENT? NEWLINE, ';'] at line 4 char 13. | | |- Failed to match sequence (COMMENT? NEWLINE) at line 4 char 13. | | | `- Expected "()", but got "\n " at line 4 char 18. | | `- Expected "()", but got "\n " at line 4 char 18. | `- Failed to match sequence (RES_ACTIONS res_field:((':' name:IDENTIFIER)?)) at line 3 char 7. | `- Failed to match sequence (resources:REFERENCE res_actions:(res_action:RES_ACTION_OR_LINK{0, })) at line 3 char 7. | `- Failed to match sequence ('@'{1, 2} IDENTIFIER) at line 3 char 7. | `- Expected at least 1 of '@' at line 3 char 7. | `- Expected "()", but got "\n " at line 4 char 18. `- Failed to match sequence (pre:((type:'concurrent' SPACE)?) begin:'begin' BODY 'end') at line 2 char 5. `- Expected "()", but got "\n " at line 4 char 18. -------------------------------------------------------------------------------- parslet-1.5.0/example/output/comments.out0000644000175000017500000000043412270776314017240 0ustar meromero[{:exp=>{:a=>"a"@3}}, {:line=>"// line comment"@7}, {:exp=>{:a=>"a"@25}}, {:exp=>{:a=>"a"@27}}, {:exp=>[{:a=>"a"@29}, {:line=>"// line comment"@31}]}, {:exp=>[{:a=>"a"@49}, {:multi=>"/* inline comment */"@51}]}, {:exp=>{:a=>"a"@72}}, {:multi=>"/* multiline\n comment */"@77}] parslet-1.5.0/example/output/capture.out0000644000175000017500000000015212270776314017053 0ustar meromero[{:line=>"Text1\n"@9}, {:doc=>[{:line=>"Text3\n"@23}, {:line=>"Text4\n"@29}]}, {:line=>"\nText2\n"@41}] parslet-1.5.0/example/output/calc.out0000644000175000017500000000003512270776314016312 0ustar meromero123*2 (command line): -> 246 parslet-1.5.0/example/output/boolean_algebra.out0000644000175000017500000000017612270776314020512 0ustar meromero{:and=> {:left=>{:var=>"1"@3}, :right=>{:or=>{:left=>{:var=>"2"@13}, :right=>{:var=>"3"@21}}}}} [["1", "2"], ["1", "3"]] parslet-1.5.0/example/nested_errors.rb0000644000175000017500000000434012270776314016525 0ustar meromero$:.unshift File.dirname(__FILE__) + "/../lib" require 'parslet' require 'parslet/convenience' # This example demonstrates tree error reporting in a real life example. # The parser code has been contributed by John Mettraux. def prettify(str) puts " "*3 + " "*4 + "." + " "*4 + "10" + " "*3 + "." + " "*4 + "20" str.lines.each_with_index do |line, index| printf "%02d %s\n", index+1, line.chomp end end class Parser < Parslet::Parser # commons rule(:space) { match('[ \t]').repeat(1) } rule(:space?) { space.maybe } rule(:newline) { match('[\r\n]') } rule(:comment) { str('#') >> match('[^\r\n]').repeat } rule(:line_separator) { (space? >> ((comment.maybe >> newline) | str(';')) >> space?).repeat(1) } rule(:blank) { line_separator | space } rule(:blank?) { blank.maybe } rule(:identifier) { match('[a-zA-Z0-9_]').repeat(1) } # res_statement rule(:reference) { (str('@').repeat(1,2) >> identifier).as(:reference) } rule(:res_action_or_link) { str('.').as(:dot) >> (identifier >> str('?').maybe ).as(:name) >> str('()') } rule(:res_actions) { ( reference ).as(:resources) >> ( res_action_or_link.as(:res_action) ).repeat(0).as(:res_actions) } rule(:res_statement) { res_actions >> (str(':') >> identifier.as(:name)).maybe.as(:res_field) } # expression rule(:expression) { res_statement } # body rule(:body) { (line_separator >> (block | expression)).repeat(1).as(:body) >> line_separator } # blocks rule(:begin_block) { (str('concurrent').as(:type) >> space).maybe.as(:pre) >> str('begin').as(:begin) >> body >> str('end') } rule(:define_block) { str('define').as(:define) >> space >> identifier.as(:name) >> str('()') >> body >> str('end') } rule(:block) { define_block | begin_block } # root rule(:radix) { line_separator.maybe >> block >> line_separator.maybe } root(:radix) end ds = [ %{ define f() @res.name end }, %{ define f() begin @res.name end end } ] ds.each do |d| puts '-' * 80 prettify(d) parser = Parser.new begin parser.parse_with_debug(d) end end puts '-' * 80parslet-1.5.0/example/modularity.rb0000644000175000017500000000216112270776314016037 0ustar meromero$:.unshift File.dirname(__FILE__) + "/../lib" require 'pp' require "parslet" # Demonstrates modular parsers, split out over many classes. Please look at # ip_address.rb as well. module ALanguage include Parslet # Parslet rules are really a special kind of method. Mix them into your # classes! rule(:a_language) { str('aaa') } end # Parslet parsers are parslet atoms as well. Create an instance and chain them # to your other rules. # class BLanguage < Parslet::Parser root :blang rule(:blang) { str('bbb') } end # Parslet atoms are really Ruby values, pass them around. c_language = Parslet.str('ccc') class Language < Parslet::Parser def initialize(c_language) @c_language = c_language super() end root :root include ALanguage rule(:root) { str('a(') >> a_language >> str(')') >> space | str('b(') >> BLanguage.new >> str(')') >> space | str('c(') >> @c_language >> str(')') >> space } rule(:space) { str(' ').maybe } end Language.new(c_language).parse('a(aaa)') Language.new(c_language).parse('b(bbb)') Language.new(c_language).parse('c(ccc)')parslet-1.5.0/example/minilisp.rb0000644000175000017500000000420512270776314015473 0ustar meromero# Reproduces [1] using parslet. # [1] http://thingsaaronmade.com/blog/a-quick-intro-to-writing-a-parser-using-treetop.html $:.unshift File.dirname(__FILE__) + "/../lib" require 'pp' require 'parslet' require 'parslet/convenience' module MiniLisp class Parser < Parslet::Parser root :expression rule(:expression) { space? >> str('(') >> space? >> body >> str(')') >> space? } rule(:body) { (expression | identifier | float | integer | string).repeat.as(:exp) } rule(:space) { match('\s').repeat(1) } rule(:space?) { space.maybe } rule(:identifier) { (match('[a-zA-Z=*]') >> match('[a-zA-Z=*_]').repeat).as(:identifier) >> space? } rule(:float) { ( integer >> ( str('.') >> match('[0-9]').repeat(1) | str('e') >> match('[0-9]').repeat(1) ).as(:e) ).as(:float) >> space? } rule(:integer) { ((str('+') | str('-')).maybe >> match("[0-9]").repeat(1)).as(:integer) >> space? } rule(:string) { str('"') >> ( str('\\') >> any | str('"').absent? >> any ).repeat.as(:string) >> str('"') >> space? } end class Transform include Parslet attr_reader :t def initialize @t = Parslet::Transform.new # To understand these, take a look at what comes out of the parser. t.rule(:identifier => simple(:ident)) { ident.to_sym } t.rule(:string => simple(:str)) { str } t.rule(:integer => simple(:int)) { Integer(int) } t.rule(:float=>{:integer=> simple(:a), :e=> simple(:b)}) { Float(a + b) } t.rule(:exp => subtree(:exp)) { exp } end def do(tree) t.apply(tree) end end end parser = MiniLisp::Parser.new transform = MiniLisp::Transform.new result = parser.parse_with_debug %Q{ (define test (lambda () (begin (display "something") (display 1) (display 3.08)))) } # Transform the result pp transform.do(result) if result # Thereby reducing it to the earlier problem: # http://github.com/kschiess/toylisp parslet-1.5.0/example/mathn.rb0000644000175000017500000000212312270776314014753 0ustar meromero# Demonstrates that we have a compatibility fix to mathn's weird idea of # integer mathematics. # This was contributed by Jonathan Hinkle (https://github.com/hynkle). Thanks! $:.unshift File.dirname(__FILE__) + "/../lib" require 'parslet' require 'parslet/convenience' include Parslet def attempt_parse possible_whitespace = match['\s'].repeat cephalopod = str('octopus') | str('squid') parenthesized_cephalopod = str('(') >> possible_whitespace >> cephalopod >> possible_whitespace >> str(')') parser = possible_whitespace >> parenthesized_cephalopod >> possible_whitespace # This parse fails, but that is not the point. When mathn is in the current # ruby environment, it modifies integer division in a way that makes # parslet loop indefinitely. parser.parse %{(\nsqeed)\n} rescue Parslet::ParseFailed end attempt_parse puts 'it terminates before we require mathn' puts "requiring mathn now" require 'mathn' puts "and trying again (will hang without the fix)" attempt_parse # but it doesn't terminate after requiring mathn puts "okay!"parslet-1.5.0/example/local.rb0000644000175000017500000000165612270776314014750 0ustar meromero # An exploration of two ideas: # a) Constructing a whole parser inline, without the artificial class around # it. # and: # b) Constructing non-greedy or non-blind parsers by transforming the # grammar. $:.unshift File.dirname(__FILE__) + "/../lib" require 'parslet' include Parslet a = str('a').repeat >> str('aa') # E1% E2 # # S = E2 | E1 S def this(name, &block); return Parslet::Atoms::Entity.new(name, &block) end def epsilon; any.absent? end # Traditional repetition will try as long as the pattern can be matched and # then give up. This is greedy and blind. a = str('a').as(:e) >> this('a') { a }.as(:rec) | epsilon # Here's a pattern match that is greedy and non-blind. The first pattern # 'a'* will be tried as many times as possible, while still matching the # end pattern 'aa'. b = str('aa').as(:e2) >> epsilon | str('a').as(:e1) >> this('b') { b }.as(:rec) p a.parse('aaaa') p b p b.parse('aaaa') parslet-1.5.0/example/json.rb0000644000175000017500000000520712270776314014623 0ustar meromero$:.unshift File.dirname(__FILE__) + "/../lib" # # MIT License - (c) 2011 John Mettraux # require 'rubygems' require 'parslet' # gem install parslet module MyJson class Parser < Parslet::Parser rule(:spaces) { match('\s').repeat(1) } rule(:spaces?) { spaces.maybe } rule(:comma) { spaces? >> str(',') >> spaces? } rule(:digit) { match('[0-9]') } rule(:number) { ( str('-').maybe >> ( str('0') | (match('[1-9]') >> digit.repeat) ) >> ( str('.') >> digit.repeat(1) ).maybe >> ( match('[eE]') >> (str('+') | str('-')).maybe >> digit.repeat(1) ).maybe ).as(:number) } rule(:string) { str('"') >> ( str('\\') >> any | str('"').absent? >> any ).repeat.as(:string) >> str('"') } rule(:array) { str('[') >> spaces? >> (value >> (comma >> value).repeat).maybe.as(:array) >> spaces? >> str(']') } rule(:object) { str('{') >> spaces? >> (entry >> (comma >> entry).repeat).maybe.as(:object) >> spaces? >> str('}') } rule(:value) { string | number | object | array | str('true').as(:true) | str('false').as(:false) | str('null').as(:null) } rule(:entry) { ( string.as(:key) >> spaces? >> str(':') >> spaces? >> value.as(:val) ).as(:entry) } rule(:attribute) { (entry | value).as(:attribute) } rule(:top) { spaces? >> value >> spaces? } root(:top) end class Transformer < Parslet::Transform class Entry < Struct.new(:key, :val); end rule(:array => subtree(:ar)) { ar.is_a?(Array) ? ar : [ ar ] } rule(:object => subtree(:ob)) { (ob.is_a?(Array) ? ob : [ ob ]).inject({}) { |h, e| h[e.key] = e.val; h } } rule(:entry => { :key => simple(:ke), :val => simple(:va) }) { Entry.new(ke, va) } rule(:string => simple(:st)) { st.to_s } rule(:number => simple(:nb)) { nb.match(/[eE\.]/) ? Float(nb) : Integer(nb) } rule(:null => simple(:nu)) { nil } rule(:true => simple(:tr)) { true } rule(:false => simple(:fa)) { false } end def self.parse(s) parser = Parser.new transformer = Transformer.new tree = parser.parse(s) puts; p tree; puts out = transformer.apply(tree) out end end s = %{ [ 1, 2, 3, null, "asdfasdf asdfds", { "a": -1.2 }, { "b": true, "c": false }, 0.1e24, true, false, [ 1 ] ] } out = MyJson.parse(s) p out; puts out == [ 1, 2, 3, nil, "asdfasdf asdfds", { "a" => -1.2 }, { "b" => true, "c" => false }, 0.1e24, true, false, [ 1 ] ] || raise("MyJson is a failure") parslet-1.5.0/example/ip_address.rb0000644000175000017500000000614412270776314015770 0ustar meromero# This example is heavily inspired by citrus' ip.citrus. Have a look at both # of these to get some choice! # The grammars in this file conform to the ABNF given in Appendix A of RFC 3986 # Uniform Resource Identifier (URI): Generic Syntax. # # See http://tools.ietf.org/html/rfc3986#appendix-A for more information. $:.unshift File.dirname(__FILE__) + "/../lib" require 'pp' require 'parslet' module IPv4 include Parslet # A host identified by an IPv4 literal address is represented in # dotted-decimal notation (a sequence of four decimal numbers in the range 0 # to 255, separated by "."), as described in [RFC1123] by reference to # [RFC0952]. Note that other forms of dotted notation may be interpreted on # some platforms, as described in Section 7.4, but only the dotted-decimal # form of four octets is allowed by this grammar. rule(:ipv4) { (dec_octet >> str('.') >> dec_octet >> str('.') >> dec_octet >> str('.') >> dec_octet).as(:ipv4) } rule(:dec_octet) { str('25') >> match("[0-5]") | str('2') >> match("[0-4]") >> digit | str('1') >> digit >> digit | match('[1-9]') >> digit | digit } rule(:digit) { match('[0-9]') } end # Must be used in concert with IPv4 module IPv6 include Parslet rule(:colon) { str(':') } rule(:dcolon) { colon >> colon } # h16 : def h16r(times) (h16 >> colon).repeat(times, times) end # : h16 def h16l(times) (colon >> h16).repeat(0,times) end # A 128-bit IPv6 address is divided into eight 16-bit pieces. Each piece is # represented numerically in case-insensitive hexadecimal, using one to four # hexadecimal digits (leading zeroes are permitted). The eight encoded # pieces are given most-significant first, separated by colon characters. # Optionally, the least-significant two pieces may instead be represented in # IPv4 address textual format. A sequence of one or more consecutive # zero-valued 16-bit pieces within the address may be elided, omitting all # their digits and leaving exactly two consecutive colons in their place to # mark the elision. rule(:ipv6) { ( ( h16r(6) | dcolon >> h16r(5) | h16.maybe >> dcolon >> h16r(4) | (h16 >> h16l(1)).maybe >> dcolon >> h16r(3) | (h16 >> h16l(2)).maybe >> dcolon >> h16r(2) | (h16 >> h16l(3)).maybe >> dcolon >> h16r(1) | (h16 >> h16l(4)).maybe >> dcolon ) >> ls32 | (h16 >> h16l(5)).maybe >> dcolon >> h16 | (h16 >> h16l(6)).maybe >> dcolon ).as(:ipv6) } rule(:h16) { hexdigit.repeat(1,4) } rule(:ls32) { (h16 >> colon >> h16) | ipv4 } rule(:hexdigit) { digit | match("[a-fA-F]") } end class Parser include IPv4 include IPv6 def parse(str) (ipv4 | ipv6).parse(str) end end %W( 0.0.0.0 255.255.255.255 255.255.255 1:2:3:4:5:6:7:8 12AD:34FC:A453:1922:: 12AD::34FC 12AD:: :: 1:2 ).each do |address| parser = Parser.new printf "%30s -> ", address begin result = parser.parse(address) puts result.inspect rescue Parslet::ParseFailed => m puts "Failed: #{m}" end end parslet-1.5.0/example/ignore.rb0000644000175000017500000000123412270776314015131 0ustar meromero# A small example on how to make parslet ignore parts of the parse tree. $:.unshift File.dirname(__FILE__) + "/../lib" require 'parslet' class IgnoreParslet < Parslet::Atoms::Base def initialize(parslet) @parslet = parslet end def to_s_inner(prec) @parslet.to_s(prec) end def try(source, context, consume_all) success, value = result = @parslet.try(source, context, consume_all) return succ(nil) if success return result end end module IgnoreDSL def ignore IgnoreParslet.new(self) end end class Parslet::Atoms::Base include IgnoreDSL end include Parslet p (str('a') >> str('b').ignore >> str('c')). parse('abc')parslet-1.5.0/example/erb.rb0000644000175000017500000000264612270776314014426 0ustar meromero# Example that demonstrates how a simple erb-like parser could be constructed. $:.unshift File.dirname(__FILE__) + "/../lib" require 'parslet' class ErbParser < Parslet::Parser rule(:ruby) { (str('%>').absent? >> any).repeat.as(:ruby) } rule(:expression) { (str('=') >> ruby).as(:expression) } rule(:comment) { (str('#') >> ruby).as(:comment) } rule(:code) { ruby.as(:code) } rule(:erb) { expression | comment | code } rule(:erb_with_tags) { str('<%') >> erb >> str('%>') } rule(:text) { (str('<%').absent? >> any).repeat(1) } rule(:text_with_ruby) { (text.as(:text) | erb_with_tags).repeat.as(:text) } root(:text_with_ruby) end parser = ErbParser.new p parser.parse "The value of x is <%= x %>." p parser.parse "<% 1 + 2 %>" p parser.parse "<%# commented %>" evaluator = Parslet::Transform.new do erb_binding = binding rule(:code => { :ruby => simple(:ruby) }) { eval(ruby, erb_binding); '' } rule(:expression => { :ruby => simple(:ruby) }) { eval(ruby, erb_binding) } rule(:comment => { :ruby => simple(:ruby) }) { '' } rule(:text => simple(:text)) { text } rule(:text => sequence(:texts)) { texts.join } end puts evaluator.apply(parser.parse(<<-ERB The <% a = 2 %>not printed result of "a = 2". The <%# a = 1 %>not printed non-evaluated comment "a = 1", see the value of a below. The <%= 'nicely' %> printed result. The <% b = 3 %>value of a is <%= a %>, and b is <%= b %>. ERB )) parslet-1.5.0/example/empty.rb0000644000175000017500000000044412270776314015006 0ustar meromero# Basically just demonstrates that you can leave rules empty and get a nice # NotImplementedError. A way to quickly spec out your parser rules? $:.unshift File.dirname(__FILE__) + "/../lib" require 'parslet' class Parser < Parslet::Parser rule(:empty) { } end Parser.new.empty.parslet parslet-1.5.0/example/email_parser.rb0000644000175000017500000000261512270776314016315 0ustar meromero#!/usr/bin/env ruby # Example contributed by Hal Brodigan (postmodern). Thanks! $:.unshift File.dirname(__FILE__) + "/../lib" require 'parslet' require 'parslet/convenience' class EmailParser < Parslet::Parser rule(:space) { match('\s').repeat(1) } rule(:space?) { space.maybe } rule(:dash?) { match['_-'].maybe } rule(:at) { str('@') | (dash? >> (str('at') | str('AT')) >> dash?) } rule(:dot) { str('.') | (dash? >> (str('dot') | str('DOT')) >> dash?) } rule(:word) { match('[a-z0-9]').repeat(1).as(:word) >> space? } rule(:separator) { dot.as(:dot) >> space? | space } rule(:words) { word >> (separator >> word).repeat } rule(:email) { (words.as(:username) >> space? >> at >> space? >> words).as(:email) } root(:email) end class EmailSanitizer < Parslet::Transform rule(:dot => simple(:dot), :word => simple(:word)) { ".#{word}" } rule(:word => simple(:word)) { word } rule(:username => sequence(:username)) { username.join + "@" } rule(:username => simple(:username)) { username.to_s + "@" } rule(:email => sequence(:email)) { email.join } end parser = EmailParser.new sanitizer = EmailSanitizer.new unless ARGV[0] STDERR.puts "usage: #{$0} \"EMAIL_ADDR\"" STDOUT.puts "since you haven't specified any EMAIL_ADDR, for testing purposes we're using a.b.c.d@gmail.com" end p sanitizer.apply(parser.parse_with_debug(ARGV[0] || 'a.b.c.d@gmail.com')) parslet-1.5.0/example/documentation.rb0000644000175000017500000000052512270776314016521 0ustar meromero# A small example that shows a really small parser and what happens on parser # errors. $:.unshift File.dirname(__FILE__) + "/../lib" require 'pp' require 'parslet' class MyParser < Parslet::Parser rule(:a) { str('a').repeat } def parse(str) a.parse(str) end end pp MyParser.new.parse('aaaa') pp MyParser.new.parse('bbbb') parslet-1.5.0/example/deepest_errors.rb0000644000175000017500000000440012270776314016671 0ustar meromero$:.unshift File.dirname(__FILE__) + "/../lib" # This example demonstrates how to do deepest error reporting, as invented # by John Mettraux (issue #64). require 'parslet' require 'parslet/convenience' def prettify(str) puts " "*3 + " "*4 + "." + " "*4 + "10" + " "*3 + "." + " "*4 + "20" str.lines.each_with_index do |line, index| printf "%02d %s\n", index+1, line.chomp end end class Parser < Parslet::Parser # commons rule(:space) { match('[ \t]').repeat(1) } rule(:space?) { space.maybe } rule(:newline) { match('[\r\n]') } rule(:comment) { str('#') >> match('[^\r\n]').repeat } rule(:line_separator) { (space? >> ((comment.maybe >> newline) | str(';')) >> space?).repeat(1) } rule(:blank) { line_separator | space } rule(:blank?) { blank.maybe } rule(:identifier) { match('[a-zA-Z0-9_]').repeat(1) } # res_statement rule(:reference) { (str('@').repeat(1,2) >> identifier).as(:reference) } rule(:res_action_or_link) { str('.').as(:dot) >> (identifier >> str('?').maybe ).as(:name) >> str('()') } rule(:res_actions) { ( reference ).as(:resources) >> ( res_action_or_link.as(:res_action) ).repeat(0).as(:res_actions) } rule(:res_statement) { res_actions >> (str(':') >> identifier.as(:name)).maybe.as(:res_field) } # expression rule(:expression) { res_statement } # body rule(:body) { (line_separator >> (block | expression)).repeat(1).as(:body) >> line_separator } # blocks rule(:begin_block) { (str('concurrent').as(:type) >> space).maybe.as(:pre) >> str('begin').as(:begin) >> body >> str('end') } rule(:define_block) { str('define').as(:define) >> space >> identifier.as(:name) >> str('()') >> body >> str('end') } rule(:block) { define_block | begin_block } # root rule(:radix) { line_separator.maybe >> block >> line_separator.maybe } root(:radix) end ds = [ %{ define f() @res.name end }, %{ define f() begin @res.name end end } ] ds.each do |d| puts '-' * 80 prettify(d) parser = Parser.new begin parser.parse_with_debug(d, :reporter => Parslet::ErrorReporter::Deepest.new) end end puts '-' * 80parslet-1.5.0/example/comments.rb0000644000175000017500000000165212270776314015477 0ustar meromero# A small example on how to parse common types of comments. The example # started out with parser code from Stephen Waits. $:.unshift File.dirname(__FILE__) + "/../lib" require 'pp' require 'parslet' require 'parslet/convenience' class ALanguage < Parslet::Parser root(:lines) rule(:lines) { line.repeat } rule(:line) { spaces >> expression.repeat >> newline } rule(:newline) { str("\n") >> str("\r").maybe } rule(:expression) { (str('a').as(:a) >> spaces).as(:exp) } rule(:spaces) { space.repeat } rule(:space) { multiline_comment | line_comment | str(' ') } rule(:line_comment) { (str('//') >> (newline.absent? >> any).repeat).as(:line) } rule(:multiline_comment) { (str('/*') >> (str('*/').absent? >> any).repeat >> str('*/')).as(:multi) } end code = %q( a // line comment a a a // line comment a /* inline comment */ a /* multiline comment */ ) pp ALanguage.new.parse_with_debug(code) parslet-1.5.0/example/capture.rb0000644000175000017500000000262412270776314015315 0ustar meromero # This example demonstrates how pieces of input can be captured and matched # against later on. Without this, you cannot match here-documents and other # self-dependent grammars. $:.unshift File.dirname(__FILE__) + "/../lib" require 'parslet' require 'parslet/convenience' require 'pp' class CapturingParser < Parslet::Parser root :document # Introduce a scope for each document. This ensures that documents can be # nested. rule(:document) { scope { doc_start >> text >> doc_end } } # Start of a document is a heredoc marker. This is captured in :marker rule(:doc_start) { str('<') >> marker >> newline } rule(:marker) { match['A-Z'].repeat(1).capture(:marker) } # The content of a document can be either lines of text or another # document, introduced by > any >> (newline.absent? >> any).repeat >> newline } # The end of the document is marked by the marker that was at the beginning # of the document, by itself on a line. rule(:doc_end) { captured_marker } rule(:captured_marker) { dynamic { |source, context| str(context.captures[:marker]) } } rule(:newline) { match["\n"] } end parser = CapturingParser.new pp parser.parse_with_debug %Q(> (add_op >> multiplication.as(:r)).repeat(1) | multiplication } rule(:multiplication) { integer.as(:l) >> (mult_op >> integer.as(:r)).repeat(1) | integer } rule(:integer) { digit.repeat(1).as(:i) >> space? } rule(:mult_op) { match['*/'].as(:o) >> space? } rule(:add_op) { match['+-'].as(:o) >> space? } rule(:digit) { match['0-9'] } rule(:space?) { match['\s'].repeat } end # Classes for the abstract syntax tree. Int = Struct.new(:int) { def eval; self end def op(operation, other) left = int right = other.int Int.new( case operation when '+' left + right when '-' left - right when '*' left * right when '/' left / right end) end def to_i int end } Seq = Struct.new(:sequence) { def eval sequence.reduce { |accum, operation| operation.call(accum) } end } LeftOp = Struct.new(:operation, :right) { def call(left) left = left.eval right = self.right.eval left.op(operation, right) end } # Transforming intermediary syntax tree into a real AST. class CalcTransform < Parslet::Transform rule(i: simple(:i)) { Int.new(Integer(i)) } rule(o: simple(:o), r: simple(:i)) { LeftOp.new(o, i) } rule(l: simple(:i)) { i } rule(sequence(:seq)) { Seq.new(seq) } end # And this calls everything in the right order. def calculate(str) intermediary_tree = CalcParser.new.parse(str) abstract_tree = CalcTransform.new.apply(intermediary_tree) result = abstract_tree.eval result.to_i end # A test suite for the above parser describe CalcParser do let(:p) { described_class.new } describe '#integer' do let(:i) { p.integer } it "parses integers" do i.should parse('1') i.should parse('123') end it "consumes trailing white space" do i.should parse('123 ') end it "doesn't parse floats" do i.should_not parse('1.3') end end describe '#multiplication' do let(:m) { p.multiplication } it "parses simple multiplication" do m.should parse('1*2') end it "parses division" do m.should parse('1/2') end end describe '#addition' do let(:a) { p.addition } it "parses simple addition" do a.should parse('1+2') a.should parse('1+2+3-4') end end end describe CalcTransform do def t(obj) described_class.new.apply(obj) end it "transforms integers" do t(i: '1').should == Int.new(1) end it "unwraps left operand" do t(l: :obj).should == :obj end end describe 'whole computation specs' do def self.result_of(str, int) it(str) { calculate(str).should == int } end result_of '1+1', 2 result_of '1-1-1', -1 result_of '1+1+3*5/2', 9 result_of '123*2', 246 end # Enable these if you want to change the code. # RSpec::Core::Runner.run([], $stderr, $stdout) str = ARGV.join str = '123*2' if str.match(/^\s*$/) print "#{str} (command line): -> " puts calculate(str) parslet-1.5.0/example/boolean_algebra.rb0000644000175000017500000000357612270776314016755 0ustar meromero$:.unshift File.dirname(__FILE__) + "/../lib" require "parslet" require "pp" # Parses strings like "var1 and (var2 or var3)" respecting operator precedence # and parentheses. After that transforms the parse tree into an array of # arrays like this: # # [["1", "2"], ["1", "3"]] # # The array represents a DNF (disjunctive normal form). Elements of outer # array are connected with "or" operator, while elements of inner arrays are # joined with "and". # class Parser < Parslet::Parser rule(:space) { match[" "].repeat(1) } rule(:space?) { space.maybe } rule(:lparen) { str("(") >> space? } rule(:rparen) { str(")") >> space? } rule(:and_operator) { str("and") >> space? } rule(:or_operator) { str("or") >> space? } rule(:var) { str("var") >> match["0-9"].repeat(1).as(:var) >> space? } # The primary rule deals with parentheses. rule(:primary) { lparen >> or_operation >> rparen | var } # Note that following rules are both right-recursive. rule(:and_operation) { (primary.as(:left) >> and_operator >> and_operation.as(:right)).as(:and) | primary } rule(:or_operation) { (and_operation.as(:left) >> or_operator >> or_operation.as(:right)).as(:or) | and_operation } # We start at the lowest precedence rule. root(:or_operation) end class Transformer < Parslet::Transform rule(:var => simple(:var)) { [[String(var)]] } rule(:or => { :left => subtree(:left), :right => subtree(:right) }) do (left + right) end rule(:and => { :left => subtree(:left), :right => subtree(:right) }) do res = [] left.each do |l| right.each do |r| res << (l + r) end end res end end pp tree = Parser.new.parse("var1 and (var2 or var3)") # {:and=> # {:left=>{:var=>"1"@3}, # :right=>{:or=>{:left=>{:var=>"2"@13}, :right=>{:var=>"3"@21}}}}} pp Transformer.new.apply(tree) # [["1", "2"], ["1", "3"]] parslet-1.5.0/lib/0000755000175000017500000000000012270776314012434 5ustar meromeroparslet-1.5.0/lib/parslet.rb0000644000175000017500000002025112270776314014433 0ustar meromero# A simple parser generator library. Typical usage would look like this: # # require 'parslet' # # class MyParser < Parslet::Parser # rule(:a) { str('a').repeat } # root(:a) # end # # pp MyParser.new.parse('aaaa') # => 'aaaa'@0 # pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed: # # Don't know what to do with bbbb at line 1 char 1. # # The simple DSL allows you to define grammars in PEG-style. This kind of # grammar construction does away with the ambiguities that usually comes with # parsers; instead, it allows you to construct grammars that are easier to # debug, since less magic is involved. # # Parslet is typically used in stages: # # # * Parsing the input string; this yields an intermediary tree, see # Parslet.any, Parslet.match, Parslet.str, Parslet::ClassMethods#rule and # Parslet::ClassMethods#root. # * Transformation of the tree into something useful to you, see # Parslet::Transform, Parslet.simple, Parslet.sequence and Parslet.subtree. # # The first stage is traditionally intermingled with the second stage; output # from the second stage is usually called the 'Abstract Syntax Tree' or AST. # # The stages are completely decoupled; You can change your grammar around and # use the second stage to isolate the rest of your code from the changes # you've effected. # # == Further reading # # All parslet atoms are subclasses of {Parslet::Atoms::Base}. You might want to # look at all of those: {Parslet::Atoms::Re}, {Parslet::Atoms::Str}, # {Parslet::Atoms::Repetition}, {Parslet::Atoms::Sequence}, # {Parslet::Atoms::Alternative}. # # == When things go wrong # # A parse that fails will raise {Parslet::ParseFailed}. This exception contains # all the details of what went wrong, including a detailed error trace that # can be printed out as an ascii tree. ({Parslet::Cause}) # module Parslet # Extends classes that include Parslet with the module # {Parslet::ClassMethods}. # def self.included(base) base.extend(ClassMethods) end # Raised when the parse failed to match. It contains the message that should # be presented to the user. More details can be extracted from the # exceptions #cause member: It contains an instance of {Parslet::Cause} that # stores all the details of your failed parse in a tree structure. # # begin # parslet.parse(str) # rescue Parslet::ParseFailed => failure # puts failure.cause.ascii_tree # end # # Alternatively, you can just require 'parslet/convenience' and call the # method #parse_with_debug instead of #parse. This method will never raise # and print error trees to stdout. # # require 'parslet/convenience' # parslet.parse_with_debug(str) # class ParseFailed < StandardError def initialize(message, cause=nil) super(message) @cause = cause end # Why the parse failed. # # @return [Parslet::Cause] attr_reader :cause end module ClassMethods # Define an entity for the parser. This generates a method of the same # name that can be used as part of other patterns. Those methods can be # freely mixed in your parser class with real ruby methods. # # class MyParser # include Parslet # # rule(:bar) { str('bar') } # rule(:twobar) do # bar >> bar # end # # root :twobar # end # def rule(name, &definition) define_method(name) do @rules ||= {} # memoization return @rules[name] if @rules.has_key?(name) # Capture the self of the parser class along with the definition. definition_closure = proc { self.instance_eval(&definition) } @rules[name] = Atoms::Entity.new(name, &definition_closure) end end end # Allows for delayed construction of #match. See also Parslet.match. # # @api private class DelayedMatchConstructor def [](str) Atoms::Re.new("[" + str + "]") end end # Returns an atom matching a character class. All regular expressions can be # used, as long as they match only a single character at a time. # # match('[ab]') # will match either 'a' or 'b' # match('[\n\s]') # will match newlines and spaces # # There is also another (convenience) form of this method: # # match['a-z'] # synonymous to match('[a-z]') # match['\n'] # synonymous to match('[\n]') # # @overload match(str) # @param str [String] character class to match (regexp syntax) # @return [Parslet::Atoms::Re] a parslet atom # def match(str=nil) return DelayedMatchConstructor.new unless str return Atoms::Re.new(str) end module_function :match # Returns an atom matching the +str+ given: # # str('class') # will match 'class' # # @param str [String] string to match verbatim # @return [Parslet::Atoms::Str] a parslet atom # def str(str) Atoms::Str.new(str) end module_function :str # Returns an atom matching any character. It acts like the '.' (dot) # character in regular expressions. # # any.parse('a') # => 'a' # # @return [Parslet::Atoms::Re] a parslet atom # def any Atoms::Re.new('.') end module_function :any # Introduces a new capture scope. This means that all old captures stay # accessible, but new values stored will only be available during the block # given and the old values will be restored after the block. # # Example: # # :a will be available until the end of the block. Afterwards, # # :a from the outer scope will be available again, if such a thing # # exists. # scope { str('a').capture(:a) } # def scope(&block) Parslet::Atoms::Scope.new(block) end module_function :scope # Designates a piece of the parser as being dynamic. Dynamic parsers can # either return a parser at runtime, which will be applied on the input, or # return a result from a parse. # # Dynamic parse pieces are never cached and can introduce performance # abnormalitites - use sparingly where other constructs fail. # # Example: # # Parses either 'a' or 'b', depending on the weather # dynamic { rand() < 0.5 ? str('a') : str('b') } # def dynamic(&block) Parslet::Atoms::Dynamic.new(block) end module_function :dynamic # A special kind of atom that allows embedding whole treetop expressions # into parslet construction. # # # the same as str('a') >> str('b').maybe # exp(%Q("a" "b"?)) # # @param str [String] a treetop expression # @return [Parslet::Atoms::Base] the corresponding parslet parser # def exp(str) Parslet::Expression.new(str).to_parslet end module_function :exp # Returns a placeholder for a tree transformation that will only match a # sequence of elements. The +symbol+ you specify will be the key for the # matched sequence in the returned dictionary. # # # This would match a body element that contains several declarations. # { :body => sequence(:declarations) } # # The above example would match :body => ['a', 'b'], but not # :body => 'a'. # # see {Parslet::Transform} # def sequence(symbol) Pattern::SequenceBind.new(symbol) end module_function :sequence # Returns a placeholder for a tree transformation that will only match # simple elements. This matches everything that #sequence # doesn't match. # # # Matches a single header. # { :header => simple(:header) } # # see {Parslet::Transform} # def simple(symbol) Pattern::SimpleBind.new(symbol) end module_function :simple # Returns a placeholder for tree transformation patterns that will match # any kind of subtree. # # { :expression => subtree(:exp) } # def subtree(symbol) Pattern::SubtreeBind.new(symbol) end module_function :subtree autoload :Expression, 'parslet/expression' end require 'parslet/slice' require 'parslet/cause' require 'parslet/source' require 'parslet/atoms' require 'parslet/pattern' require 'parslet/pattern/binding' require 'parslet/transform' require 'parslet/parser' require 'parslet/error_reporter' require 'parslet/scope'parslet-1.5.0/lib/parslet/0000755000175000017500000000000012270776314014106 5ustar meromeroparslet-1.5.0/lib/parslet/transform.rb0000644000175000017500000001522612270776314016454 0ustar meromero require 'parslet/pattern' # Transforms an expression tree into something else. The transformation # performs a depth-first, post-order traversal of the expression tree. During # that traversal, each time a rule matches a node, the node is replaced by the # result of the block associated to the rule. Otherwise the node is accepted # as is into the result tree. # # This is almost what you would generally do with a tree visitor, except that # you can match several levels of the tree at once. # # As a consequence of this, the resulting tree will contain pieces of the # original tree and new pieces. Most likely, you will want to transform the # original tree wholly, so this isn't a problem. # # You will not be able to create a loop, given that each node will be replaced # only once and then left alone. This means that the results of a replacement # will not be acted upon. # # Example: # # class Example < Parslet::Transform # rule(:string => simple(:x)) { # (1) # StringLiteral.new(x) # } # end # # A tree transform (Parslet::Transform) is defined by a set of rules. Each # rule can be defined by calling #rule with the pattern as argument. The block # given will be called every time the rule matches somewhere in the tree given # to #apply. It is passed a Hash containing all the variable bindings of this # pattern match. # # In the above example, (1) illustrates a simple matching rule. # # Let's say you want to parse matching parentheses and distill a maximum nest # depth. You would probably write a parser like the one in example/parens.rb; # here's the relevant part: # # rule(:balanced) { # str('(').as(:l) >> balanced.maybe.as(:m) >> str(')').as(:r) # } # # If you now apply this to a string like '(())', you get a intermediate parse # tree that looks like this: # # { # l: '(', # m: { # l: '(', # m: nil, # r: ')' # }, # r: ')' # } # # This parse tree is good for debugging, but what we would really like to have # is just the nesting depth. This transformation rule will produce that: # # rule(:l => '(', :m => simple(:x), :r => ')') { # # innermost :m will contain nil # x.nil? ? 1 : x+1 # } # # = Usage patterns # # There are four ways of using this class. The first one is very much # recommended, followed by the second one for generality. The other ones are # omitted here. # # Recommended usage is as follows: # # class MyTransformator < Parslet::Transform # rule(...) { ... } # rule(...) { ... } # # ... # end # MyTransformator.new.apply(tree) # # Alternatively, you can use the Transform class as follows: # # transform = Parslet::Transform.new do # rule(...) { ... } # end # transform.apply(tree) # # = Execution context # # The execution context of action blocks differs depending on the arity of # said blocks. This can be confusing. It is however somewhat intentional. You # should not create fat Transform descendants containing a lot of helper methods, # instead keep your AST class construction in global scope or make it available # through a factory. The following piece of code illustrates usage of global # scope: # # transform = Parslet::Transform.new do # rule(...) { AstNode.new(a_variable) } # rule(...) { Ast.node(a_variable) } # modules are nice # end # transform.apply(tree) # # And here's how you would use a class builder (a factory): # # transform = Parslet::Transform.new do # rule(...) { builder.add_node(a_variable) } # rule(...) { |d| d[:builder].add_node(d[:a_variable]) } # end # transform.apply(tree, :builder => Builder.new) # # As you can see, Transform allows you to inject local context for your rule # action blocks to use. # class Parslet::Transform # FIXME: Maybe only part of it? Or maybe only include into constructor # context? include Parslet autoload :Context, 'parslet/transform/context' class << self # FIXME: Only do this for subclasses? include Parslet # Define a rule for the transform subclass. # def rule(expression, &block) @__transform_rules ||= [] @__transform_rules << [Parslet::Pattern.new(expression), block] end # Allows accessing the class' rules # def rules @__transform_rules || [] end end def initialize(&block) @rules = [] if block instance_eval(&block) end end # Defines a rule to be applied whenever apply is called on a tree. A rule # is composed of two parts: # # * an *expression pattern* # * a *transformation block* # def rule(expression, &block) @rules << [ Parslet::Pattern.new(expression), block ] end # Applies the transformation to a tree that is generated by Parslet::Parser # or a simple parslet. Transformation will proceed down the tree, replacing # parts/all of it with new objects. The resulting object will be returned. # def apply(obj, context=nil) transform_elt( case obj when Hash recurse_hash(obj, context) when Array recurse_array(obj, context) else obj end, context ) end # Executes the block on the bindings obtained by Pattern#match, if such a match # can be made. Depending on the arity of the given block, it is called in # one of two environments: the current one or a clean toplevel environment. # # If you would like the current environment preserved, please use the # arity 1 variant of the block. Alternatively, you can inject a context object # and call methods on it (think :ctx => self). # # # the local variable a is simulated # t.call_on_match(:a => :b) { a } # # no change of environment here # t.call_on_match(:a => :b) { |d| d[:a] } # def call_on_match(bindings, block) if block if block.arity == 1 return block.call(bindings) else context = Context.new(bindings) return context.instance_eval(&block) end end end # Allow easy access to all rules, the ones defined in the instance and the # ones predefined in a subclass definition. # def rules self.class.rules + @rules end # @api private # def transform_elt(elt, context) rules.each do |pattern, block| if bindings=pattern.match(elt, context) # Produces transformed value return call_on_match(bindings, block) end end # No rule matched - element is not transformed return elt end # @api private # def recurse_hash(hsh, ctx) hsh.inject({}) do |new_hsh, (k,v)| new_hsh[k] = apply(v, ctx) new_hsh end end # @api private # def recurse_array(ary, ctx) ary.map { |elt| apply(elt, ctx) } end endparslet-1.5.0/lib/parslet/transform/0000755000175000017500000000000012270776314016121 5ustar meromeroparslet-1.5.0/lib/parslet/transform/context.rb0000644000175000017500000000130712270776314020133 0ustar meromerorequire 'blankslate' # Provides a context for tree transformations to run in. The context allows # accessing each of the bindings in the bindings hash as local method. # # Example: # # ctx = Context.new(:a => :b) # ctx.instance_eval do # a # => :b # end # class Parslet::Transform::Context < BlankSlate reveal :methods reveal :respond_to? reveal :inspect reveal :to_s reveal :instance_variable_set def meta_def(name, &body) metaclass = class < tuple for the given position. If no position is # given, line/column information is returned for the current position # given by #pos. # def line_and_column(position=nil) @line_cache.line_and_column(position || self.pos) end end endparslet-1.5.0/lib/parslet/source/0000755000175000017500000000000012270776314015406 5ustar meromeroparslet-1.5.0/lib/parslet/source/line_cache.rb0000644000175000017500000000527012270776314020011 0ustar meromero class Parslet::Source # A cache for line start positions. # class LineCache def initialize # Stores line endings as a simple position number. The first line always # starts at 0; numbers beyond the biggest entry are on any line > size, # but probably make a scan to that position neccessary. @line_ends = [] @line_ends.extend RangeSearch end # Returns a tuple for the given input position. # def line_and_column(pos) eol_idx = @line_ends.lbound(pos) if eol_idx # eol_idx points to the offset that ends the current line. # Let's try to find the offset that starts it: offset = eol_idx>0 && @line_ends[eol_idx-1] || 0 return [eol_idx+1, pos-offset+1] else # eol_idx is nil, that means that we're beyond the last line end that # we know about. Pretend for now that we're just on the last line. offset = @line_ends.last || 0 return [@line_ends.size+1, pos-offset+1] end end def scan_for_line_endings(start_pos, buf) return unless buf return unless buf.index("\n") cur = -1 # If we have already read part or all of buf, we already know about # line ends in that portion. remove it and correct cur (search index) if @last_line_end && start_pos < @last_line_end # Let's not search the range from start_pos to last_line_end again. cur = @last_line_end - start_pos -1 end # Scan the string for line endings; store the positions of all endings # in @line_ends. while buf && cur = buf.index("\n", cur+1) @last_line_end = (start_pos + cur+1) @line_ends << @last_line_end end end end # Mixin for arrays that implicitly give a number of ranges, where one range # begins where the other one ends. # # Example: # # [10, 20, 30] # # would describe [0, 10], (10, 20], (20, 30] # module RangeSearch def find_mid(left, right) # NOTE: Jonathan Hinkle reported that when mathn is required, just # dividing and relying on the integer truncation is not enough. left + ((right - left) / 2).floor end # Scans the array for the first number that is > than bound. Returns the # index of that number. # def lbound(bound) return nil if empty? return nil unless last > bound left = 0 right = size - 1 loop do mid = find_mid(left, right) if self[mid] > bound right = mid else # assert: self[mid] <= bound left = mid+1 end if right <= left return right end end end end end parslet-1.5.0/lib/parslet/slice.rb0000644000175000017500000000461112270776314015534 0ustar meromero # A slice is a small part from the parse input. A slice mainly behaves like # any other string, except that it remembers where it came from (offset in # original input). # # == Extracting line and column # # Using the #line_and_column method, you can extract the line and column in # the original input where this slice starts. # # Example: # slice.line_and_column # => [1, 13] # slice.offset # => 12 # # == Likeness to strings # # Parslet::Slice behaves in many ways like a Ruby String. This likeness # however is not complete - many of the myriad of operations String supports # are not yet in Slice. You can always extract the internal string instance by # calling #to_s. # # These omissions are somewhat intentional. Rather than maintaining a full # delegation, we opt for a partial emulation that gets the job done. # class Parslet::Slice attr_reader :str, :offset attr_reader :line_cache # Construct a slice using a string, an offset and an optional line cache. # The line cache should be able to answer to the #line_and_column message. # def initialize(string, offset, line_cache=nil) @str, @offset = string, offset @line_cache = line_cache end # Compares slices to other slices or strings. # def == other str == other end # Match regular expressions. # def match(regexp) str.match(regexp) end # Returns the slices size in characters. # def size str.size end # Concatenate two slices; it is assumed that the second slice begins # where the first one ends. The offset of the resulting slice is the same # as the one of this slice. # def +(other) self.class.new(str + other.to_s, offset, line_cache) end # Returns a tuple referring to the original input. # def line_and_column raise ArgumentError, "No line cache was given, cannot infer line and column." \ unless line_cache line_cache.line_and_column(self.offset) end # Conversion operators ----------------------------------------------------- def to_str str end alias to_s to_str def to_slice self end def to_sym str.to_sym end def to_int Integer(str) end def to_i str.to_i end def to_f str.to_f end # Inspection & Debugging --------------------------------------------------- # Prints the slice as "string"@offset. def inspect str.inspect << "@#{offset}" end endparslet-1.5.0/lib/parslet/scope.rb0000644000175000017500000000124612270776314015547 0ustar meromeroclass Parslet::Scope # Raised when the accessed slot has never been assigned a value. # class NotFound < StandardError end class Binding attr_reader :parent def initialize(parent=nil) @parent = parent @hash = Hash.new end def [](k) @hash.has_key?(k) && @hash[k] || parent && parent[k] or raise NotFound end def []=(k,v) @hash.store(k,v) end end def [](k) @current[k] end def []=(k,v) @current[k] = v end def initialize @current = Binding.new end def push @current = Binding.new(@current) end def pop @current = @current.parent end endparslet-1.5.0/lib/parslet/rig/0000755000175000017500000000000012270776314014667 5ustar meromeroparslet-1.5.0/lib/parslet/rig/rspec.rb0000644000175000017500000000267712270776314016344 0ustar meromeroRSpec::Matchers.define(:parse) do |input, opts| as = block = nil result = trace = nil match do |parser| begin result = parser.parse(input) block ? block.call(result) : (as == result || as.nil?) rescue Parslet::ParseFailed => ex trace = ex.cause.ascii_tree if opts && opts[:trace] false end end failure_message_for_should do |is| if block "expected output of parsing #{input.inspect}" << " with #{is.inspect} to meet block conditions, but it didn't" else "expected " << (as ? "output of parsing #{input.inspect}"<< " with #{is.inspect} to equal #{as.inspect}, but was #{result.inspect}" : "#{is.inspect} to be able to parse #{input.inspect}") << (trace ? "\n"+trace : '') end end failure_message_for_should_not do |is| if block "expected output of parsing #{input.inspect} with #{is.inspect} not to meet block conditions, but it did" else "expected " << (as ? "output of parsing #{input.inspect}"<< " with #{is.inspect} not to equal #{as.inspect}" : "#{is.inspect} to not parse #{input.inspect}, but it did") end end # NOTE: This has a nodoc tag since the rdoc parser puts this into # Object, a thing I would never allow. chain :as do |expected_output, &block| as = expected_output block = block end end parslet-1.5.0/lib/parslet/pattern.rb0000644000175000017500000000676512270776314016126 0ustar meromero# Matches trees against expressions. Trees are formed by arrays and hashes # for expressing membership and sequence. The leafs of the tree are other # classes. # # A tree issued by the parslet library might look like this: # # { # :function_call => { # :name => 'foobar', # :args => [1, 2, 3] # } # } # # A pattern that would match against this tree would be: # # { :function_call => { :name => simple(:name), :args => sequence(:args) }} # # Note that Parslet::Pattern only matches at a given subtree; it wont try # to match recursively. To do that, please use Parslet::Transform. # class Parslet::Pattern def initialize(pattern) @pattern = pattern end # Decides if the given subtree matches this pattern. Returns the bindings # made on a successful match or nil if the match fails. If you specify # bindings to be a hash, the mappings in it will be treated like bindings # made during an attempted match. # # Pattern.new('a').match('a', :foo => 'bar') # => { :foo => 'bar' } # # @param subtree [String, Hash, Array] poro subtree returned by a parse # @param bindings [Hash] variable bindings to be verified # @return [Hash, nil] On success: variable bindings that allow a match. On # failure: nil # def match(subtree, bindings=nil) bindings = bindings && bindings.dup || Hash.new return bindings if element_match(subtree, @pattern, bindings) end # Returns true if the tree element given by +tree+ matches the expression # given by +exp+. This match must respect bindings already made in # +bindings+. Note that bindings is carried along and modified. # # @api private # def element_match(tree, exp, bindings) # p [:elm, tree, exp] case [tree, exp].map { |e| e.class } when [Hash,Hash] return element_match_hash(tree, exp, bindings) when [Array,Array] return element_match_ary_single(tree, exp, bindings) else # If elements match exactly, then that is good enough in all cases return true if tree == exp # If exp is a bind variable: Check if the binding matches if exp.respond_to?(:can_bind?) && exp.can_bind?(tree) return element_match_binding(tree, exp, bindings) end # Otherwise: No match (we don't know anything about the element # combination) return false end end # @api private # def element_match_binding(tree, exp, bindings) var_name = exp.variable_name # TODO test for the hidden :_ feature. if var_name && bound_value = bindings[var_name] return bound_value == tree end # New binding: bindings.store var_name, tree return true end # @api private # def element_match_ary_single(sequence, exp, bindings) return false if sequence.size != exp.size return sequence.zip(exp).all? { |elt, subexp| element_match(elt, subexp, bindings) } end # @api private # def element_match_hash(tree, exp, bindings) # Early failure when one hash is bigger than the other return false unless exp.size == tree.size # We iterate over expected pattern, since we demand that the keys that # are there should be in tree as well. exp.each do |expected_key, expected_value| return false unless tree.has_key? expected_key # Recurse into the value and stop early on failure value = tree[expected_key] return false unless element_match(value, expected_value, bindings) end return true end endparslet-1.5.0/lib/parslet/pattern/0000755000175000017500000000000012270776314015563 5ustar meromeroparslet-1.5.0/lib/parslet/pattern/binding.rb0000644000175000017500000000235212270776314017524 0ustar meromero # Used internally for representing a bind placeholder in a Parslet::Transform # pattern. This is the superclass for all bindings. # # It defines the most permissive kind of bind, the one that matches any subtree # whatever it looks like. # class Parslet::Pattern::SubtreeBind < Struct.new(:symbol) def variable_name symbol end def inspect "#{bind_type_name}(#{symbol.inspect})" end def can_bind?(subtree) true end private def bind_type_name if md=self.class.name.match(/(\w+)Bind/) md.captures.first.downcase else # This path should never be used, but since this is for inspection only, # let's not raise. 'unknown_bind' end end end # Binds a symbol to a simple subtree, one that is not either a sequence of # elements or a collection of attributes. # class Parslet::Pattern::SimpleBind < Parslet::Pattern::SubtreeBind def can_bind?(subtree) not [Hash, Array].include?(subtree.class) end end # Binds a symbol to a sequence of simple leafs ([element1, element2, ...]) # class Parslet::Pattern::SequenceBind < Parslet::Pattern::SubtreeBind def can_bind?(subtree) subtree.kind_of?(Array) && (not subtree.any? { |el| [Hash, Array].include?(el.class) }) end endparslet-1.5.0/lib/parslet/parser.rb0000644000175000017500000000346012270776314015732 0ustar meromero # The base class for all your parsers. Use as follows: # # require 'parslet' # # class MyParser < Parslet::Parser # rule(:a) { str('a').repeat } # root(:a) # end # # pp MyParser.new.parse('aaaa') # => 'aaaa' # pp MyParser.new.parse('bbbb') # => Parslet::Atoms::ParseFailed: # # Don't know what to do with bbbb at line 1 char 1. # # Parslet::Parser is also a grammar atom. This means that you can mix full # fledged parsers freely with small parts of a different parser. # # Example: # class ParserA < Parslet::Parser # root :aaa # rule(:aaa) { str('a').repeat(3,3) } # end # class ParserB < Parslet::Parser # root :expression # rule(:expression) { str('b') >> ParserA.new >> str('b') } # end # # In the above example, ParserB would parse something like 'baaab'. # class Parslet::Parser < Parslet::Atoms::Base include Parslet class <> (spaced('/') >> simple).repeat).as(:alt) } # sequence by simple concatenation 'a' 'b' rule(:simple) { occurrence.repeat(1).as(:seq) } # occurrence modifiers rule(:occurrence) { atom.as(:repetition) >> spaced('*').as(:sign) | atom.as(:repetition) >> spaced('+').as(:sign) | atom.as(:repetition) >> repetition_spec | atom.as(:maybe) >> spaced('?') | atom } rule(:atom) { spaced('(') >> expression.as(:unwrap) >> spaced(')') | dot | string | char_class } # a character class rule(:char_class) { (str('[') >> (str('\\') >> any | str(']').absent? >> any).repeat(1) >> str(']')).as(:match) >> space? } # anything at all rule(:dot) { spaced('.').as(:any) } # recognizing strings rule(:string) { str('\'') >> ( (str('\\') >> any) | (str("'").absent? >> any) ).repeat.as(:string) >> str('\'') >> space? } # repetition specification like {1, 2} rule(:repetition_spec) { spaced('{') >> integer.maybe.as(:min) >> spaced(',') >> integer.maybe.as(:max) >> spaced('}') } rule(:integer) { match['0-9'].repeat(1) } # whitespace handling rule(:space) { match("\s").repeat(1) } rule(:space?) { space.maybe } def spaced(str) str(str) >> space? end end class Transform < Parslet::Transform rule(:repetition => simple(:rep), :sign => simple(:sign)) { min = sign=='+' ? 1 : 0 Parslet::Atoms::Repetition.new(rep, min, nil) } rule(:repetition => simple(:rep), :min => simple(:min), :max => simple(:max)) { Parslet::Atoms::Repetition.new(rep, Integer(min || 0), max && Integer(max) || nil) } rule(:alt => subtree(:alt)) { Parslet::Atoms::Alternative.new(*alt) } rule(:seq => sequence(:s)) { Parslet::Atoms::Sequence.new(*s) } rule(:unwrap => simple(:u)) { u } rule(:maybe => simple(:m)) { |d| d[:m].maybe } rule(:string => simple(:s)) { Parslet::Atoms::Str.new(s) } rule(:match => simple(:m)) { Parslet::Atoms::Re.new(m) } rule(:any => simple(:a)) { Parslet::Atoms::Re.new('.') } end end parslet-1.5.0/lib/parslet/export.rb0000644000175000017500000000764712270776314015772 0ustar meromero# Allows exporting parslet grammars to other lingos. require 'set' require 'parslet/atoms/visitor' class Parslet::Parser module Visitors class Citrus attr_reader :context, :output def initialize(context) @context = context end def visit_str(str) "\"#{str.inspect[1..-2]}\"" end def visit_re(match) match.to_s end def visit_entity(name, block) context.deferred(name, block) "(#{context.mangle_name(name)})" end def visit_named(name, parslet) parslet.accept(self) end def visit_sequence(parslets) '(' << parslets. map { |el| el.accept(self) }. join(' ') << ')' end def visit_repetition(tag, min, max, parslet) parslet.accept(self) << "#{min}*#{max}" end def visit_alternative(alternatives) '(' << alternatives. map { |el| el.accept(self) }. join(' | ') << ')' end def visit_lookahead(positive, bound_parslet) (positive ? '&' : '!') << bound_parslet.accept(self) end end class Treetop < Citrus def visit_repetition(tag, min, max, parslet) parslet.accept(self) << "#{min}..#{max}" end def visit_alternative(alternatives) '(' << alternatives. map { |el| el.accept(self) }. join(' / ') << ')' end end end # A helper class that formats Citrus and Treetop grammars as a string. # class PrettyPrinter attr_reader :visitor def initialize(visitor_klass) @visitor = visitor_klass.new(self) end # Pretty prints the given parslet using the visitor that has been # configured in initialize. Returns the string representation of the # Citrus or Treetop grammar. # def pretty_print(name, parslet) output = "grammar #{name}\n" output << rule('root', parslet) seen = Set.new loop do # @todo is constantly filled by the visitor (see #deferred). We # keep going until it is empty. break if @todo.empty? name, block = @todo.shift # Track what rules we've already seen. This breaks loops. next if seen.include?(name) seen << name output << rule(name, block.call) end output << "end\n" end # Formats a rule in either dialect. # def rule(name, parslet) " rule #{mangle_name name}\n" << " " << parslet.accept(visitor) << "\n" << " end\n" end # Whenever the visitor encounters an rule in a parslet, it defers the # pretty printing of the rule by calling this method. # def deferred(name, content) @todo ||= [] @todo << [name, content] end # Mangles names so that Citrus and Treetop can live with it. This mostly # transforms some of the things that Ruby allows into other patterns. If # there is collision, we will not detect it for now. # def mangle_name(str) str.to_s.sub(/\?$/, '_p') end end # Exports the current parser instance as a string in the Citrus dialect. # # Example: # # require 'parslet/export' # class MyParser < Parslet::Parser # root(:expression) # rule(:expression) { str('foo') } # end # # MyParser.new.to_citrus # => a citrus grammar as a string # def to_citrus PrettyPrinter.new(Visitors::Citrus). pretty_print(self.class.name, root) end # Exports the current parser instance as a string in the Treetop dialect. # # Example: # # require 'parslet/export' # class MyParser < Parslet::Parser # root(:expression) # rule(:expression) { str('foo') } # end # # MyParser.new.to_treetop # => a treetop grammar as a string # def to_treetop PrettyPrinter.new(Visitors::Treetop). pretty_print(self.class.name, root) end end parslet-1.5.0/lib/parslet/error_reporter.rb0000644000175000017500000000023212270776314017503 0ustar meromero# A namespace for all error reporters. # module Parslet::ErrorReporter end require 'parslet/error_reporter/tree' require 'parslet/error_reporter/deepest'parslet-1.5.0/lib/parslet/error_reporter/0000755000175000017500000000000012270776314017161 5ustar meromeroparslet-1.5.0/lib/parslet/error_reporter/tree.rb0000644000175000017500000000521012270776314020443 0ustar meromeromodule Parslet module ErrorReporter # An error reporter has two central methods, one for reporting errors at # the current parse position (#err) and one for reporting errors at a # given parse position (#err_at). The reporter can return an object (a # 'cause') that will be returned to the caller along with the information # that the parse failed. # # When reporting errors on the outer levels of your parser, these methods # get passed a list of error objects ('causes') from the inner levels. In # this default implementation, the inner levels are considered error # subtrees and are appended to the generated tree node at each level, # thereby constructing an error tree. # # This error tree will report in parallel with the grammar structure that # failed. A one-to-one correspondence exists between each error in the # tree and the parslet atom that produced that error. # # The implementor is really free to use these return values as he sees # fit. One example would be to return an error state object from these # methods that is then updated as errors cascade up the parse derivation # tree. # class Tree # Produces an error cause that combines the message at the current level # with the errors that happened at a level below (children). # # @param atom [Parslet::Atoms::Base] parslet that failed # @param source [Source] Source that we're using for this parse. (line # number information...) # @param message [String, Array] Error message at this level. # @param children [Array] A list of errors from a deeper level (or nil). # @return [Cause] An error tree combining children with message. # def err(atom, source, message, children=nil) position = source.pos Cause.format(source, position, message, children) end # Produces an error cause that combines the message at the current level # with the errors that happened at a level below (children). # # @param atom [Parslet::Atoms::Base] parslet that failed # @param source [Source] Source that we're using for this parse. (line # number information...) # @param message [String, Array] Error message at this level. # @param pos [Fixnum] The real position of the error. # @param children [Array] A list of errors from a deeper level (or nil). # @return [Cause] An error tree combining children with message. # def err_at(atom, source, message, pos, children=nil) position = pos Cause.format(source, position, message, children) end end end endparslet-1.5.0/lib/parslet/error_reporter/deepest.rb0000644000175000017500000000702712270776314021145 0ustar meromeromodule Parslet module ErrorReporter # Instead of reporting the latest error that happens like {Tree} does, # this class reports the deepest error. Depth is defined here as how # advanced into the input an error happens. The errors close to the # greatest depth tend to be more relevant to the end user, since they # specify what could be done to make them go away. # # More specifically, errors produced by this reporter won't be related to # the structure of the grammar at all. The positions of the errors will # be advanced and convey at every grammar level what the deepest rule # was to fail. # class Deepest def initialize @deepest_cause = nil end # Produces an error cause that combines the message at the current level # with the errors that happened at a level below (children). # # @param atom [Parslet::Atoms::Base] parslet that failed # @param source [Source] Source that we're using for this parse. (line # number information...) # @param message [String, Array] Error message at this level. # @param children [Array] A list of errors from a deeper level (or nil). # @return [Cause] An error tree combining children with message. # def err(atom, source, message, children=nil) position = source.pos cause = Cause.format(source, position, message, children) return deepest(cause) end # Produces an error cause that combines the message at the current level # with the errors that happened at a level below (children). # # @param atom [Parslet::Atoms::Base] parslet that failed # @param source [Source] Source that we're using for this parse. (line # number information...) # @param message [String, Array] Error message at this level. # @param pos [Fixnum] The real position of the error. # @param children [Array] A list of errors from a deeper level (or nil). # @return [Cause] An error tree combining children with message. # def err_at(atom, source, message, pos, children=nil) position = pos cause = Cause.format(source, position, message, children) return deepest(cause) end # Returns the cause that is currently deepest. Mainly for specs. # attr_reader :deepest_cause # Checks to see if the lineage of the cause given includes a cause with # an error position deeper than the current deepest cause stored. If # yes, it passes the cause through to the caller. If no, it returns the # current deepest error that was saved as a reference. # def deepest(cause) rank, leaf = deepest_child(cause) if !deepest_cause || leaf.pos >= deepest_cause.pos # This error reaches deeper into the input, save it as reference. @deepest_cause = leaf return cause end return deepest_cause end private # Returns the leaf from a given error tree with the biggest rank. # def deepest_child(cause, rank=0) max_child = cause max_rank = rank if cause.children && !cause.children.empty? cause.children.each do |child| c_rank, c_cause = deepest_child(child, rank+1) if c_rank > max_rank max_rank = c_rank max_child = c_cause end end end return max_rank, max_child end end end endparslet-1.5.0/lib/parslet/convenience.rb0000644000175000017500000000124212270776314016726 0ustar meromeroclass Parslet::Atoms::Base # Packages the common idiom # # begin # tree = parser.parse('something') # rescue Parslet::ParseFailed => error # puts parser.cause.ascii_tree # end # # into a convenient method. # # Usage: # # require 'parslet' # require 'parslet/convenience' # # class FooParser < Parslet::Parser # rule(:foo) { str('foo') } # root(:foo) # end # # FooParser.new.parse_with_debug('bar') # # @see Parslet::Atoms::Base#parse # def parse_with_debug str, opts={} parse str, opts rescue Parslet::ParseFailed => error puts error.cause.ascii_tree end endparslet-1.5.0/lib/parslet/cause.rb0000644000175000017500000000602612270776314015537 0ustar meromeromodule Parslet # Represents a cause why a parse did fail. A lot of these objects are # constructed - not all of the causes turn out to be failures for the whole # parse. # class Cause def initialize(message, source, pos, children) @message, @source, @pos, @children = message, source, pos, children end # @return [String, Array] A string or an array of message pieces that # provide failure information. Use #to_s to get a formatted string. attr_reader :message # @return [Parslet::Source] Source that was parsed when this error # happend. Mainly used for line number information. attr_reader :source # Location of the error. # # @return [Fixnum] Position where the error happened. (character offset) attr_reader :pos # When this cause is part of a tree of error causes: child nodes for this # node. Very often carries the reasons for this cause. # # @return [Array] A list of reasons for this cause. def children @children ||= [] end # Appends 'at line LINE char CHAR' to the string given. Use +pos+ to # override the position of the +source+. This method returns an object # that can be turned into a string using #to_s. # # @param source [Parslet::Source] source that was parsed when this error # happened # @param pos [Fixnum] position of error # @param str [String, Array] message parts # @param children [Array] child nodes for this error tree # @return [Parslet::Cause] a new instance of {Parslet::Cause} # def self.format(source, pos, str, children=[]) self.new(str, source, pos, children) end def to_s line, column = source.line_and_column(pos) # Allow message to be a list of objects. Join them here, since we now # really need it. Array(message).map { |o| o.respond_to?(:to_slice) ? o.str.inspect : o.to_s }.join + " at line #{line} char #{column}." end # Signals to the outside that the parse has failed. Use this in # conjunction with .format for nice error messages. # def raise(exception_klass=Parslet::ParseFailed) exception = exception_klass.new(self.to_s, self) Kernel.raise exception end # Returns an ascii tree representation of the causes of this node and its # children. # def ascii_tree StringIO.new.tap { |io| recursive_ascii_tree(self, io, [true]) }. string end private def recursive_ascii_tree(node, stream, curved) append_prefix(stream, curved) stream.puts node.to_s node.children.each do |child| last_child = (node.children.last == child) recursive_ascii_tree(child, stream, curved + [last_child]) end end def append_prefix(stream, curved) return if curved.size < 2 curved[1..-2].each do |c| stream.print c ? " " : "| " end stream.print curved.last ? "`- " : "|- " end end endparslet-1.5.0/lib/parslet/atoms.rb0000644000175000017500000000204212270776314015554 0ustar meromero # This is where parslets name comes from: Small parser atoms. # module Parslet::Atoms # The precedence module controls parenthesis during the #inspect printing # of parslets. It is not relevant to other aspects of the parsing. # module Precedence prec = 0 BASE = (prec+=1) # everything else LOOKAHEAD = (prec+=1) # &SOMETHING REPETITION = (prec+=1) # 'a'+, 'a'? SEQUENCE = (prec+=1) # 'a' 'b' ALTERNATE = (prec+=1) # 'a' | 'b' OUTER = (prec+=1) # printing is done here. end require 'parslet/atoms/can_flatten' require 'parslet/atoms/context' require 'parslet/atoms/dsl' require 'parslet/atoms/base' require 'parslet/atoms/named' require 'parslet/atoms/lookahead' require 'parslet/atoms/alternative' require 'parslet/atoms/sequence' require 'parslet/atoms/repetition' require 'parslet/atoms/re' require 'parslet/atoms/str' require 'parslet/atoms/entity' require 'parslet/atoms/capture' require 'parslet/atoms/dynamic' require 'parslet/atoms/scope' end parslet-1.5.0/lib/parslet/atoms/0000755000175000017500000000000012270776314015231 5ustar meromeroparslet-1.5.0/lib/parslet/atoms/visitor.rb0000644000175000017500000000361412270776314017261 0ustar meromero# Augments all parslet atoms with an accept method that will call back # to the visitor given. # module Parslet::Atoms class Base def accept(visitor) raise NotImplementedError, "No #accept method on #{self.class.name}." end end class Str # Call back visitors #visit_str method. See parslet/export for an example. # def accept(visitor) visitor.visit_str(str) end end class Entity # Call back visitors #visit_entity method. See parslet/export for an # example. # def accept(visitor) visitor.visit_entity(name, block) end end class Named # Call back visitors #visit_named method. See parslet/export for an # example. # def accept(visitor) visitor.visit_named(name, parslet) end end class Sequence # Call back visitors #visit_sequence method. See parslet/export for an # example. # def accept(visitor) visitor.visit_sequence(parslets) end end class Repetition # Call back visitors #visit_repetition method. See parslet/export for an # example. # def accept(visitor) visitor.visit_repetition(@tag, min, max, parslet) end end class Alternative # Call back visitors #visit_alternative method. See parslet/export for an # example. # def accept(visitor) visitor.visit_alternative(alternatives) end end class Lookahead # Call back visitors #visit_lookahead method. See parslet/export for an # example. # def accept(visitor) visitor.visit_lookahead(positive, bound_parslet) end end class Re # Call back visitors #visit_re method. See parslet/export for an example. # def accept(visitor) visitor.visit_re(match) end end end class Parslet::Parser # Call back visitors #visit_parser method. # def accept(visitor) visitor.visit_parser(root) end end parslet-1.5.0/lib/parslet/atoms/str.rb0000644000175000017500000000153512270776314016372 0ustar meromero# Matches a string of characters. # # Example: # # str('foo') # matches 'foo' # class Parslet::Atoms::Str < Parslet::Atoms::Base attr_reader :str def initialize(str) super() @str = str.to_s @len = str.size @error_msgs = { :premature => "Premature end of input", :failed => "Expected #{str.inspect}, but got " } end def try(source, context, consume_all) return succ(source.consume(@len)) if source.matches?(str) # Input ending early: return context.err(self, source, @error_msgs[:premature]) \ if source.chars_left<@len # Expected something, but got something else instead: error_pos = source.pos return context.err_at( self, source, [@error_msgs[:failed], source.consume(@len)], error_pos) end def to_s_inner(prec) "'#{str}'" end end parslet-1.5.0/lib/parslet/atoms/sequence.rb0000644000175000017500000000203512270776314017366 0ustar meromero# A sequence of parslets, matched from left to right. Denoted by '>>' # # Example: # # str('a') >> str('b') # matches 'a', then 'b' # class Parslet::Atoms::Sequence < Parslet::Atoms::Base attr_reader :parslets def initialize(*parslets) super() @parslets = parslets @error_msgs = { :failed => "Failed to match sequence (#{self.inspect})" } end def >>(parslet) self.class.new(* @parslets+[parslet]) end def try(source, context, consume_all) # Presize an array result = Array.new(parslets.size + 1) result[0] = :sequence parslets.each_with_index do |p, idx| child_consume_all = consume_all && (idx == parslets.size-1) success, value = p.apply(source, context, child_consume_all) unless success return context.err(self, source, @error_msgs[:failed], [value]) end result[idx+1] = value end return succ(result) end precedence SEQUENCE def to_s_inner(prec) parslets.map { |p| p.to_s(prec) }.join(' ') end end parslet-1.5.0/lib/parslet/atoms/scope.rb0000644000175000017500000000076012270776314016672 0ustar meromero# Starts a new scope in the parsing process. Please also see the #captures # method. # class Parslet::Atoms::Scope < Parslet::Atoms::Base attr_reader :block def initialize(block) super() @block = block end def cached? false end def apply(source, context, consume_all) context.scope do parslet = block.call return parslet.apply(source, context, consume_all) end end def to_s_inner(prec) "scope { #{block.call.to_s(prec)} }" end end parslet-1.5.0/lib/parslet/atoms/repetition.rb0000644000175000017500000000417112270776314017743 0ustar meromero # Matches a parslet repeatedly. # # Example: # # str('a').repeat(1,3) # matches 'a' at least once, but at most three times # str('a').maybe # matches 'a' if it is present in the input (repeat(0,1)) # class Parslet::Atoms::Repetition < Parslet::Atoms::Base attr_reader :min, :max, :parslet def initialize(parslet, min, max, tag=:repetition) super() @parslet = parslet @min, @max = min, max @tag = tag @error_msgs = { :minrep => "Expected at least #{min} of #{parslet.inspect}", :unconsumed => "Extra input after last repetition" } end def try(source, context, consume_all) occ = 0 accum = [@tag] # initialize the result array with the tag (for flattening) start_pos = source.pos break_on = nil loop do success, value = parslet.apply(source, context, false) break_on = value break unless success occ += 1 accum << value # If we're not greedy (max is defined), check if that has been reached. return succ(accum) if max && occ>=max end # Last attempt to match parslet was a failure, failure reason in break_on. # Greedy matcher has produced a failure. Check if occ (which will # contain the number of sucesses) is >= min. return context.err_at( self, source, @error_msgs[:minrep], start_pos, [break_on]) if occ < min # consume_all is true, that means that we're inside the part of the parser # that should consume the input completely. Repetition failing here means # probably that we didn't. # # We have a special clause to create an error here because otherwise # break_on would get thrown away. It turns out, that contains very # interesting information in a lot of cases. # return context.err( self, source, @error_msgs[:unconsumed], [break_on]) if consume_all && source.chars_left>0 return succ(accum) end precedence REPETITION def to_s_inner(prec) minmax = "{#{min}, #{max}}" minmax = '?' if min == 0 && max == 1 parslet.to_s(prec) + minmax end end parslet-1.5.0/lib/parslet/atoms/re.rb0000644000175000017500000000176512270776314016175 0ustar meromero# Matches a special kind of regular expression that only ever matches one # character at a time. Useful members of this family are: character # ranges, \\w, \\d, \\r, \\n, ... # # Example: # # match('[a-z]') # matches a-z # match('\s') # like regexps: matches space characters # class Parslet::Atoms::Re < Parslet::Atoms::Base attr_reader :match, :re def initialize(match) super() @match = match.to_s @re = Regexp.new(self.match, Regexp::MULTILINE) @error_msgs = { :premature => "Premature end of input", :failed => "Failed to match #{match.inspect[1..-2]}" } end def try(source, context, consume_all) return succ(source.consume(1)) if source.matches?(re) # No string could be read return context.err(self, source, @error_msgs[:premature]) \ if source.chars_left < 1 # No match return context.err(self, source, @error_msgs[:failed]) end def to_s_inner(prec) match.inspect[1..-2] end end parslet-1.5.0/lib/parslet/atoms/named.rb0000644000175000017500000000127712270776314016651 0ustar meromero# Names a match to influence tree construction. # # Example: # # str('foo') # will return 'foo', # str('foo').as(:foo) # will return :foo => 'foo' # class Parslet::Atoms::Named < Parslet::Atoms::Base attr_reader :parslet, :name def initialize(parslet, name) super() @parslet, @name = parslet, name end def apply(source, context, consume_all) success, value = result = parslet.apply(source, context, consume_all) return result unless success succ( produce_return_value( value)) end def to_s_inner(prec) "#{name}:#{parslet.to_s(prec)}" end private def produce_return_value(val) { name => flatten(val, true) } end end parslet-1.5.0/lib/parslet/atoms/lookahead.rb0000644000175000017500000000250112270776314017503 0ustar meromero# Either positive or negative lookahead, doesn't consume its input. # # Example: # # str('foo').present? # matches when the input contains 'foo', but leaves it # class Parslet::Atoms::Lookahead < Parslet::Atoms::Base attr_reader :positive attr_reader :bound_parslet def initialize(bound_parslet, positive=true) super() # Model positive and negative lookahead by testing this flag. @positive = positive @bound_parslet = bound_parslet @error_msgs = { :positive => ["Input should start with ", bound_parslet], :negative => ["Input should not start with ", bound_parslet] } end def try(source, context, consume_all) pos = source.pos success, value = bound_parslet.apply(source, context, consume_all) if positive return succ(nil) if success return context.err_at(self, source, @error_msgs[:positive], pos) else return succ(nil) unless success return context.err_at(self, source, @error_msgs[:negative], pos) end # This is probably the only parslet that rewinds its input in #try. # Lookaheads NEVER consume their input, even on success, that's why. ensure source.pos = pos end precedence LOOKAHEAD def to_s_inner(prec) char = positive ? '&' : '!' "#{char}#{bound_parslet.to_s(prec)}" end end parslet-1.5.0/lib/parslet/atoms/entity.rb0000644000175000017500000000227412270776314017077 0ustar meromero# This wraps pieces of parslet definition and gives them a name. The wrapped # piece is lazily evaluated and cached. This has two purposes: # # * Avoid infinite recursion during evaluation of the definition # * Be able to print things by their name, not by their sometimes # complicated content. # # You don't normally use this directly, instead you should generated it by # using the structuring method Parslet.rule. # class Parslet::Atoms::Entity < Parslet::Atoms::Base attr_reader :name, :block def initialize(name, &block) super() @name = name @block = block end def try(source, context, consume_all) parslet.apply(source, context, consume_all) end def parslet @parslet ||= @block.call.tap { |p| raise_not_implemented unless p } end def to_s_inner(prec) name.to_s.upcase end private def raise_not_implemented trace = caller.reject {|l| l =~ %r{#{Regexp.escape(__FILE__)}}} # blatantly stolen from dependencies.rb in activesupport exception = NotImplementedError.new("rule(#{name.inspect}) { ... } returns nil. Still not implemented, but already used?") exception.set_backtrace(trace) raise exception end end parslet-1.5.0/lib/parslet/atoms/dynamic.rb0000644000175000017500000000130412270776314017200 0ustar meromero# Evaluates a block at parse time. The result from the block must be a parser # (something which implements #apply). In the first case, the parser will then # be applied to the input, creating the result. # # Dynamic parses are never cached. # # Example: # dynamic { rand < 0.5 ? str('a') : str('b') } # class Parslet::Atoms::Dynamic < Parslet::Atoms::Base attr_reader :block def initialize(block) @block = block end def cached? false end def try(source, context, consume_all) result = block.call(source, context) # Result is a parslet atom. return result.apply(source, context, consume_all) end def to_s_inner(prec) "dynamic { ... }" end end parslet-1.5.0/lib/parslet/atoms/dsl.rb0000644000175000017500000000563612270776314016352 0ustar meromero # A mixin module that defines operations that can be called on any subclass # of Parslet::Atoms::Base. These operations make parslets atoms chainable and # allow combination of parslet atoms to form bigger parsers. # # Example: # # str('foo') >> str('bar') # str('f').repeat # any.absent? # also called The Epsilon # module Parslet::Atoms::DSL # Construct a new atom that repeats the current atom min times at least and # at most max times. max can be nil to indicate that no maximum is present. # # Example: # # match any number of 'a's # str('a').repeat # # # match between 1 and 3 'a's # str('a').repeat(1,3) # def repeat(min=0, max=nil) Parslet::Atoms::Repetition.new(self, min, max) end # Returns a new parslet atom that is only maybe present in the input. This # is synonymous to calling #repeat(0,1). Generated tree value will be # either nil (if atom is not present in the input) or the matched subtree. # # Example: # str('foo').maybe # def maybe Parslet::Atoms::Repetition.new(self, 0, 1, :maybe) end # Chains two parslet atoms together as a sequence. # # Example: # str('a') >> str('b') # def >>(parslet) Parslet::Atoms::Sequence.new(self, parslet) end # Chains two parslet atoms together to express alternation. A match will # always be attempted with the parslet on the left side first. If it doesn't # match, the right side will be tried. # # Example: # # matches either 'a' OR 'b' # str('a') | str('b') # def |(parslet) Parslet::Atoms::Alternative.new(self, parslet) end # Tests for absence of a parslet atom in the input stream without consuming # it. # # Example: # # Only proceed the parse if 'a' is absent. # str('a').absent? # def absent? Parslet::Atoms::Lookahead.new(self, false) end # Tests for presence of a parslet atom in the input stream without consuming # it. # # Example: # # Only proceed the parse if 'a' is present. # str('a').present? # def present? Parslet::Atoms::Lookahead.new(self, true) end # Alias for present? that will disappear in 2.0 (deprecated) # alias prsnt? present? # Alias for absent? that will disappear in 2.0 (deprecated) # alias absnt? absent? # Marks a parslet atom as important for the tree output. This must be used # to achieve meaningful output from the #parse method. # # Example: # str('a').as(:b) # will produce {:b => 'a'} # def as(name) Parslet::Atoms::Named.new(self, name) end # Captures a part of the input and stores it under the name given. This # is very useful to create self-referential parses. A capture stores # the result of its parse (may be complex) on a successful parse action. # # Example: # str('a').capture(:b) # will store captures[:b] == 'a' # def capture(name) Parslet::Atoms::Capture.new(self, name) end endparslet-1.5.0/lib/parslet/atoms/context.rb0000644000175000017500000000512212270776314017242 0ustar meromeromodule Parslet::Atoms # Helper class that implements a transient cache that maps position and # parslet object to results. This is used for memoization in the packrat # style. # # Also, error reporter is stored here and error reporting happens through # this class. This makes the reporting pluggable. # class Context # @param reporter [#err, #err_at] Error reporter (leave empty for default # reporter) def initialize(reporter=Parslet::ErrorReporter::Tree.new) @cache = Hash.new { |h, k| h[k] = {} } @reporter = reporter @captures = Parslet::Scope.new end # Caches a parse answer for obj at source.pos. Applying the same parslet # at one position of input always yields the same result, unless the input # has changed. # # We need the entire source here so we can ask for how many characters # were consumed by a successful parse. Imitation of such a parse must # advance the input pos by the same amount of bytes. # def try_with_cache(obj, source, consume_all) beg = source.pos # Not in cache yet? Return early. unless entry = lookup(obj, beg) result = obj.try(source, self, consume_all) if obj.cached? set obj, beg, [result, source.pos-beg] end return result end # the condition in unless has returned true, so entry is not nil. result, advance = entry # The data we're skipping here has been read before. (since it is in # the cache) PLUS the actual contents are not interesting anymore since # we know obj matches at beg. So skip reading. source.pos = beg + advance return result end # Report an error at a given position. # @see ErrorReporter # def err_at(*args) return [false, @reporter.err_at(*args)] if @reporter return [false, nil] end # Report an error. # @see ErrorReporter # def err(*args) return [false, @reporter.err(*args)] if @reporter return [false, nil] end # Returns the current captures made on the input (see # Parslet::Atoms::Base#capture). Use as follows: # # context.captures[:foobar] # => returns capture :foobar # attr_reader :captures # Starts a new scope. Use the #scope method of Parslet::Atoms::DSL # to call this. # def scope captures.push yield ensure captures.pop end private def lookup(obj, pos) @cache[pos][obj] end def set(obj, pos, val) @cache[pos][obj] = val end end endparslet-1.5.0/lib/parslet/atoms/capture.rb0000644000175000017500000000170012270776314017217 0ustar meromero # Stores the result of matching an atom against input in the #captures in # parse context. Doing so will allow you to pull parts of the ongoing parse # out later and use them to match other pieces of input. # # Example: # # After this, context.captures[:an_a] returns 'a' # str('a').capture(:an_a) # # # Capture and use of the capture: (matches either 'aa' or 'bb') # match['ab'].capture(:first) >> # dynamic { |src, ctx| str(ctx.captures[:first]) } # class Parslet::Atoms::Capture < Parslet::Atoms::Base attr_reader :parslet, :name def initialize(parslet, name) super() @parslet, @name = parslet, name end def apply(source, context, consume_all) success, value = result = parslet.apply(source, context, consume_all) if success context.captures[name.to_sym] = flatten(value) end return result end def to_s_inner(prec) "(#{name.inspect} = #{parslet.to_s(prec)})" end end parslet-1.5.0/lib/parslet/atoms/can_flatten.rb0000644000175000017500000001101312270776314020030 0ustar meromero module Parslet::Atoms # A series of helper functions that have the common topic of flattening # result values into the intermediary tree that consists of Ruby Hashes and # Arrays. # # This module has one main function, #flatten, that takes an annotated # structure as input and returns the reduced form that users expect from # Atom#parse. # # NOTE: Since all of these functions are just that, functions without # side effects, they are in a module and not in a class. Its hard to draw # the line sometimes, but this is beyond. # module CanFlatten # Takes a mixed value coming out of a parslet and converts it to a return # value for the user by dropping things and merging hashes. # # Named is set to true if this result will be embedded in a Hash result from # naming something using .as(...). It changes the folding # semantics of repetition. # def flatten(value, named=false) # Passes through everything that isn't an array of things return value unless value.instance_of? Array # Extracts the s-expression tag tag, *tail = value # Merges arrays: result = tail. map { |e| flatten(e) } # first flatten each element case tag when :sequence return flatten_sequence(result) when :maybe return named ? result.first : result.first || '' when :repetition return flatten_repetition(result, named) end fail "BUG: Unknown tag #{tag.inspect}." end # Lisp style fold left where the first element builds the basis for # an inject. # def foldl(list, &block) return '' if list.empty? list[1..-1].inject(list.first, &block) end # Flatten results from a sequence of parslets. # # @api private # def flatten_sequence(list) foldl(list.compact) { |r, e| # and then merge flat elements merge_fold(r, e) } end # @api private def merge_fold(l, r) # equal pairs: merge. ---------------------------------------------------- if l.class == r.class if l.is_a?(Hash) warn_about_duplicate_keys(l, r) return l.merge(r) else return l + r end end # unequal pairs: hoist to same level. ------------------------------------ # Maybe classes are not equal, but both are stringlike? if l.respond_to?(:to_str) && r.respond_to?(:to_str) # if we're merging a String with a Slice, the slice wins. return r if r.respond_to? :to_slice return l if l.respond_to? :to_slice fail "NOTREACHED: What other stringlike classes are there?" end # special case: If one of them is a string/slice, the other is more important return l if r.respond_to? :to_str return r if l.respond_to? :to_str # otherwise just create an array for one of them to live in return l + [r] if r.class == Hash return [l] + r if l.class == Hash fail "Unhandled case when foldr'ing sequence." end # Flatten results from a repetition of a single parslet. named indicates # whether the user has named the result or not. If the user has named # the results, we want to leave an empty list alone - otherwise it is # turned into an empty string. # # @api private # def flatten_repetition(list, named) if list.any? { |e| e.instance_of?(Hash) } # If keyed subtrees are in the array, we'll want to discard all # strings inbetween. To keep them, name them. return list.select { |e| e.instance_of?(Hash) } end if list.any? { |e| e.instance_of?(Array) } # If any arrays are nested in this array, flatten all arrays to this # level. return list. select { |e| e.instance_of?(Array) }. flatten(1) end # Consistent handling of empty lists, when we act on a named result return [] if named && list.empty? # If there are only strings, concatenate them and return that. foldl(list) { |s,e| s+e } end # That annoying warning 'Duplicate subtrees while merging result' comes # from here. You should add more '.as(...)' names to your intermediary tree. # def warn_about_duplicate_keys(h1, h2) d = h1.keys & h2.keys unless d.empty? warn "Duplicate subtrees while merging result of \n #{self.inspect}\nonly the values"+ " of the latter will be kept. (keys: #{d.inspect})" end end end endparslet-1.5.0/lib/parslet/atoms/base.rb0000644000175000017500000001150412270776314016471 0ustar meromero# Base class for all parslets, handles orchestration of calls and implements # a lot of the operator and chaining methods. # # Also see Parslet::Atoms::DSL chaining parslet atoms together. # class Parslet::Atoms::Base include Parslet::Atoms::Precedence include Parslet::Atoms::DSL include Parslet::Atoms::CanFlatten # Given a string or an IO object, this will attempt a parse of its contents # and return a result. If the parse fails, a Parslet::ParseFailed exception # will be thrown. # # @param io [String, Source] input for the parse process # @option options [Parslet::ErrorReporter] :reporter error reporter to use, # defaults to Parslet::ErrorReporter::Tree # @option options [Boolean] :prefix Should a prefix match be accepted? # (default: false) # @return [Hash, Array, Parslet::Slice] PORO (Plain old Ruby object) result # tree # def parse(io, options={}) source = io.respond_to?(:line_and_column) ? io : Parslet::Source.new(io) # Try to cheat. Assuming that we'll be able to parse the input, don't # run error reporting code. success, value = setup_and_apply(source, nil, !options[:prefix]) # If we didn't succeed the parse, raise an exception for the user. # Stack trace will be off, but the error tree should explain the reason # it failed. unless success # Cheating has not paid off. Now pay the cost: Rerun the parse, # gathering error information in the process. reporter = options[:reporter] || Parslet::ErrorReporter::Tree.new source.pos = 0 success, value = setup_and_apply(source, reporter, !options[:prefix]) fail "Assertion failed: success was true when parsing with reporter" \ if success # Value is a Parslet::Cause, which can be turned into an exception: value.raise fail "NEVER REACHED" end # assert: success is true # Extra input is now handled inline with the rest of the parsing. If # really we have success == true, prefix: false and still some input # is left dangling, that is a BUG. if !options[:prefix] && source.chars_left > 0 fail "BUG: New error strategy should not reach this point." end return flatten(value) end # Creates a context for parsing and applies the current atom to the input. # Returns the parse result. # # @return [] Result of the parse. If the first member is # true, the parse has succeeded. def setup_and_apply(source, error_reporter, consume_all) context = Parslet::Atoms::Context.new(error_reporter) apply(source, context, consume_all) end # Calls the #try method of this parslet. Success consumes input, error will # rewind the input. # # @param source [Parslet::Source] source to read input from # @param context [Parslet::Atoms::Context] context to use for the parsing # @param consume_all [Boolean] true if the current parse must consume # all input by itself. def apply(source, context, consume_all=false) old_pos = source.pos success, value = result = context.try_with_cache(self, source, consume_all) if success # If a consume_all parse was made and doesn't result in the consumption # of all the input, that is considered an error. if consume_all && source.chars_left>0 # Read 10 characters ahead. Why ten? I don't know. offending_pos = source.pos offending_input = source.consume(10) # Rewind input (as happens always in error case) source.pos = old_pos return context.err_at( self, source, "Don't know what to do with #{offending_input.to_s.inspect}", offending_pos ) end # Looks like the parse was successful after all. Don't rewind the input. return result end # We only reach this point if the parse has failed. Rewind the input. source.pos = old_pos return result end # Override this in your Atoms::Base subclasses to implement parsing # behaviour. # def try(source, context, consume_all) raise NotImplementedError, \ "Atoms::Base doesn't have behaviour, please implement #try(source, context)." end # Returns true if this atom can be cached in the packrat cache. Most parslet # atoms are cached, so this always returns true, unless overridden. # def cached? true end # Debug printing - in Treetop syntax. # def self.precedence(prec) define_method(:precedence) { prec } end precedence BASE def to_s(outer_prec=OUTER) if outer_prec < precedence "("+to_s_inner(precedence)+")" else to_s_inner(precedence) end end def inspect to_s(OUTER) end private # Produces an instance of Success and returns it. # def succ(result) [true, result] end end parslet-1.5.0/lib/parslet/atoms/alternative.rb0000644000175000017500000000245712270776314020104 0ustar meromero # Alternative during matching. Contains a list of parslets that is tried each # one in turn. Only fails if all alternatives fail. # # Example: # # str('a') | str('b') # matches either 'a' or 'b' # class Parslet::Atoms::Alternative < Parslet::Atoms::Base attr_reader :alternatives # Constructs an Alternative instance using all given parslets in the order # given. This is what happens if you call '|' on existing parslets, like # this: # # str('a') | str('b') # def initialize(*alternatives) super() @alternatives = alternatives @error_msg = "Expected one of #{alternatives.inspect}" end #--- # Don't construct a hanging tree of Alternative parslets, instead store them # all here. This reduces the number of objects created. #+++ def |(parslet) self.class.new(*@alternatives + [parslet]) end def try(source, context, consume_all) errors = alternatives.map { |a| success, value = result = a.apply(source, context, consume_all) return result if success # Aggregate all errors value } # If we reach this point, all alternatives have failed. context.err(self, source, @error_msg, errors) end precedence ALTERNATE def to_s_inner(prec) alternatives.map { |a| a.to_s(prec) }.join(' / ') end end parslet-1.5.0/README0000644000175000017500000000343512270776314012553 0ustar meromeroINTRODUCTION Parslet makes developing complex parsers easy. It does so by * providing the best error reporting possible * not generating reams of code for you to debug Parslet takes the long way around to make your job easier. It allows for incremental language construction. Often, you start out small, implementing the atoms of your language first; _parslet_ takes pride in making this possible. Eager to try this out? Please see the associated web site: http://kschiess.github.com/parslet SYNOPSIS require 'parslet' include Parslet # parslet parses strings str('foo'). parse('foo') # => "foo"@0 # it matches character sets match['abc'].parse('a') # => "a"@0 match['abc'].parse('b') # => "b"@0 match['abc'].parse('c') # => "c"@0 # and it annotates its output str('foo').as(:important_bit). parse('foo') # => {:important_bit=>"foo"@0} # you can construct parsers with just a few lines quote = str('"') simple_string = quote >> (quote.absent? >> any).repeat >> quote simple_string. parse('"Simple Simple Simple"') # => "\"Simple Simple Simple\""@0 # or by making a fuss about it class Smalltalk < Parslet::Parser root :smalltalk rule(:smalltalk) { statements } rule(:statements) { # insert smalltalk parser here (outside of the scope of this readme) } end # and then Smalltalk.new.parse('smalltalk') COMPATIBILITY This library should work with most rubies. I've tested it with MRI 1.8 (except 1.8.6), 1.9, rbx-head, jruby. Please report as a bug if you encounter issues. Note that due to Ruby 1.8 internals, Unicode parsing is not supported on that version. On Mac OS X Lion, ruby-1.8.7-p352 has been known to segfault. Use ruby-1.8.7-p334 for better results. STATUS Production worthy. (c) 2010, 2011, 2012 Kaspar Schiessparslet-1.5.0/Rakefile0000644000175000017500000000121112270776314013326 0ustar meromerorequire 'rdoc/task' require 'sdoc' require 'rspec/core/rake_task' require "rubygems/package_task" desc "Run all tests: Exhaustive." RSpec::Core::RakeTask.new namespace :spec do desc "Only run unit tests: Fast. " RSpec::Core::RakeTask.new(:unit) do |task| task.pattern = "spec/parslet/**/*_spec.rb" end end task :default => :spec # This task actually builds the gem. task :gem => :spec spec = eval(File.read('parslet.gemspec')) desc "Prints LOC stats" task :stat do %w(lib spec example).each do |dir| loc = %x(find #{dir} -name "*.rb" | xargs wc -l | grep 'total').split.first.to_i printf("%20s %d\n", dir, loc) end end parslet-1.5.0/LICENSE0000644000175000017500000000206612270776314012677 0ustar meromero Copyright (c) 2010 Kaspar Schiess Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. parslet-1.5.0/HISTORY.txt0000644000175000017500000001727512270776314013604 0ustar meromero= 2.0 / ?? (future release changes, like a reminder to self) - prsnt? and absnt? are now finally banned into oblivion. Wasting vocals for the win. = 1.5 / ?? + Handles unconsumed input at end of parse completely differently. Instead of generating a toplevel error, it now raises an error in every branch of the parse. More information in the resulting exception ensues! Thanks again to John Mettraux for inspiration & acceptance specs. NOTE that this means that the UnconsumedInput exception is gone, since the unconsumed input case is nothing special anymore. * This history now finally reads like the Changelog of the linux kernel. Meaning that probably no one ever reads this. + Captures and parsing subsequent input based on captured values. This has been long overdue - finally you can parse HEREdocs with parslet! = 1.4.0 / 25May2012 + Revised documentation. A few new API features have finally made it into the documentation. Examples in the documentation are now curated and run against the current code so that they really really work. Also, the website generation tools have been replaced with 2012-style tools. Much less pain to update now. + Parslet::Source now doesn't hold a StringIO, it directly holds the buffer to be parsed. The api of Source has changed a tiny bit. This change has been made for speed optimisation reasons. + :reporter argument to parse, allowing to customize error reporting within wide boundaries. See issue #64 for a discussion. Included are two error reporters, one (default) with the existing error tree functionality, one reporting deepest errors as defined by the above ticket. + Optimistic parse: Parsing is two phase, with the first phase assuming there will be no errors. This yields ~ 20% speed improvement in the case where the parse succeeds. Also, internal error handling is now using tuples. This and other optimizations have yielded ~ 30% overall improvement. ! #error_tree and #cause removed from all of parslet. The Parslet::ParseFailed exception now contains a #cause field that can be asked for an #ascii_tree as before. Cleaner internal error handling, not stateful in atoms anymore. Some parsers will see correct error reporting for the first time. (issue #65) + Made it possible to pass a custom Parslet::Source implementor to #parse. (see #63) + #parse has now a second argument that is an options hash. See Parslet::Atoms::Base#parse for documentation. - VM engine on the way out. No benefit except for the intellectual challenge. = 1.3.0 / 5Mar2012 ! Parslet::Transform::Context is now much more well-behaved. It has #respond_to? and #method_missing; it now looks like a plain old Ruby object with instance variables and attribute readers. - Grammar transforms turned out to be a dead end and have been removed. ! A few problems in error message generation have been fixed. This will improve diagnostics further. + A VM driven parser engine: Removes the limitation that parsing needs a lot of stack space, something dearly missing from Ruby 1.9.3 fibers. This engine is experimental and might be removed in the future. ! Interaction with mathn fixed - Line number generation will terminate. . Internal reorganisation, removing cruft and bit rot. = 1.2.3 / 22Sep2011 + Transform#apply can now be called with a hash as second argument. This provides bindings and a way to inject context. ! Fixes a bug thar modified parslet atoms in place, defeating oop chaining. (#50) = 1.2.1 / 6Jun2011 ! FIX: Input at the end of a parse raises Parslet::UnconsumedInput. (see issue 18) ! FIX: Unicode parsing should now work as expected. (see issue 38) ! FIX: Slice#slice returned wrong bits at times (see issue 36). = 1.2.0 / 4Feb2011 + Parslet::Parser is now also a grammar atom, it can be composed freely with other atoms. (str('f') >> MiniLispParser.new >> str('b')) + No strings, only slices are returned as part of the parser result. Parslet::Slice is almost a string class, but one that remembers the source offset. This has also bought us a slight speedup. + require 'parslet/convenience' now brings #parse_with_debug to all parslets. This is a consequence of the above change. + Deprecates prsnt? and absnt? in favor of the more readable absent? and prsnt?. Uses 3 bytes more RAM. The old variants will exist until we release 2.0. INTERNALLY + Visitors now should have methods that all begin with 'visit_*'. #str becomes #visit_str. + Parslet::Atoms::Entity now takes only a block argument instead of context and block. = 1.1.1 / 4Feb2011 ! FIX: Line counting was broken by performance optimisations. + Squeezed out another few drops of performance. = 1.1.0 / 2Feb2011 + Uses return (fail/success), cached line counts, memoizing of parse results and other tricks internally for at least an order of magnitude increase in execution speed. + str('foo').maybe will now return an empty string again. Use .as(...) to name things and get back [] from #repeat and nil from #maybe. + If you require 'parslet/atoms/visitor', you'll get an accept method on all known Parslet::Atoms. + If you require 'parslet/export', you can call #to_citrus and #to_treetop to produce string versions of your grammar in those dialects. + Requiring 'parslet/convenience' will given you a parse_with_debug on your Parslet::Parser class. This prints some diagnostics on parse failure. (Thanks to Florian Hanke) = 1.0.1 / 17Jan2011 A happy new year! ! FIX: Parslet::Transform was wrongly fixed earlier - it now wont mangle hashes anymore. (Blake Sweeney) + parslet/rig/rspec.rb contains useful rspec matchers. (R. Konstantin Haase) = 1.0.0 / 29Dez2010 - #each_match was removed. There was some duplication of code that even confused me - and we should not have 2 methods of achieving the same goal. + Full documentation. Fixed sdoc. = 0.11.0 / 25Nov2010 ! Bugfixes to tree handling. Let's hope that was the last such significant change to the core. = 0.10.1 / 22Nov2010 + Allow match['a-z'], shortcut for match('[a-z]') ! Fixed output inconsistencies (behaviour in connection to 'maybe') = 0.10.0 / 22Nov2010 + Parslet::Transform now takes a block on initialisation, wherein you can define all the rules directly. + Parslet::Transform now only passes a hash to the block during transform when its arity is 1. Otherwise all hash contents as bound as local variables. + Both inline and other documentation have been improved. + You can now use 'subtree(:x)' to bind any subtree to x during tree pattern matching. + Transform classes can now include rules into class definition. This makes Parser and Transformer behave the same. = 0.9.0 / 28Oct2010 * More of everything: Examples, documentation, etc... * Breaking change: Ruby's binary or ('|') is now used for alternatives, instead of the division sign ('/') - this reduces the amount of parenthesis needed for a grammar overall. * parslet.maybe now yields the result or nil in case of parse failure. This is probably better than the array it did before; the jury is still out on that. * parslet.repeat(min, max) is now valid syntax = 0.1.0 / not released. * Initial version. Classes for parsing, matching in the resulting trees and transforming the trees into something more useful. * Parses and outputs intermediary trees * Matching of single elements and sequences