pax_global_header00006660000000000000000000000064143441724660014525gustar00rootroot0000000000000052 comment=bb205a94f3464f85421fbefdb4468a2c02f77aa8 ruby-oedipus-lex-2.6.0/000077500000000000000000000000001434417246600147475ustar00rootroot00000000000000ruby-oedipus-lex-2.6.0/.autotest000066400000000000000000000010401434417246600166130ustar00rootroot00000000000000# -*- ruby -*- require "autotest/restart" Autotest.add_hook :initialize do |at| at.testlib = "minitest/autorun" at.add_exception "tmp" # at.extra_files << "../some/external/dependency.rb" # # at.libs << ":../some/external" # # at.add_exception "vendor" # # at.add_mapping(/dependency.rb/) do |f, _| # at.files_matching(/test_.*rb$/) # end # # %w(TestA TestB).each do |klass| # at.extra_class_map[klass] = "test/test_misc.rb" # end end # Autotest.add_hook :run_command do |at| # system "rake build" # end ruby-oedipus-lex-2.6.0/History.rdoc000066400000000000000000000047061434417246600172700ustar00rootroot00000000000000=== 2.6.0 / 2021-10-27 * 2 minor enhancements: * Add frozen_string_literal comment to generated lexers. * Allow empty regex. (marcandre) * 1 bug fix: * Switched from peek(1) == "\n" to check(/\n/) to save a ton of strings. === 2.5.3 / 2021-05-29 * 1 bug fix: * Added require_ruby_version >= 2.4 to gemspec === 2.5.2 / 2020-06-14 * 1 minor enhancement: * Speedup of column position computation. It went from roughly 10s to 2s for a big file! (vdbijl) === 2.5.1 / 2019-06-03 * 1 minor enhancement: * Added full rdoc an re-bootstrapped. * 1 bug fix: * Fixed a deprecation warning in ruby 2.6+. === 2.5.0 / 2016-11-30 * 5 minor enhancements: * Added #location to generated template, provides file:line:column per options. * Added LexerError and made ScanError subclass it. * Added column option. * Errors try to provide location now. * Re-bootstrapped. * 2 bug fixes: * Fixed some whitespace generation when using :column. * Fixed wiring on column. (steakknife) === 2.4.1 / 2016-01-21 * 1 minor enhancement: * Use `skip` and `match?` instead of `scan` and `check`. Better on GC. (presidentbeef) === 2.4.0 / 2014-08-29 * 1 minor enhancement: * Added column option & accessor. * 1 bug fix: * lineno shouldn't be visible at all if the option isn't on. === 2.3.2 / 2014-08-06 * 1 bug fix: * Increase lineno on nil token. (hashmal) === 2.3.1 / 2014-06-09 * 1 minor enhancement: * If there is an encoding comment on the first line, put it above generated headers. === 2.3.0 / 2014-05-16 * 4 minor enhancements: * Added == method to lexer. * Added pretty_print methods to lexer+rule+group. * Added structural test so I can later make optimization changes cleanly. * Refactored to (mostly) use an AST for code generation. Nukes 2 ERB templates. === 2.2.1 / 2014-04-02 * 1 bug fix: * Correct installation instructions in README. (luislavena) === 2.2.0 / 2014-03-14 * 3 minor enhancements: * Added debug rake task. * Added rule grouping. Naive benchmarking seems to show ~15% improvement in ruby_parser. * Refactored rule handling part of template to its own variable. === 2.1.0 / 2014-01-22 * 3 minor enhancements: * Added lineno and do_parse as options via grammar. * All options are now opt-in. You might want to add do_parse and lineno to your grammar. * New option lineno will turn on automatic line number handling at the top of next_token. === 2.0.0 / 2013-12-13 * 1 major enhancement * Birthday! ruby-oedipus-lex-2.6.0/Manifest.txt000066400000000000000000000007551434417246600172650ustar00rootroot00000000000000.autotest History.rdoc Manifest.txt README.rdoc Rakefile lib/oedipus_lex.rake lib/oedipus_lex.rb lib/oedipus_lex.rex lib/oedipus_lex.rex.rb rex-mode.el sample/calc3.racc sample/calc3.rex sample/error1.rex sample/error1.txt sample/error2.rex sample/sample.html sample/sample.rex sample/sample.xhtml sample/sample1.c sample/sample1.rex sample/sample2.bas sample/sample2.rex sample/xhtmlparser.html sample/xhtmlparser.racc sample/xhtmlparser.rex sample/xhtmlparser.xhtml test/test_oedipus_lex.rb ruby-oedipus-lex-2.6.0/README.rdoc000066400000000000000000000333051434417246600165610ustar00rootroot00000000000000= Oedipus Lex - This is not your father's lexer home :: http://github.com/seattlerb/oedipus_lex rdoc :: http://docs.seattlerb.org/oedipus_lex == DESCRIPTION Oedipus Lex is a lexer generator in the same family as Rexical and Rex. Oedipus Lex is my independent lexer fork of Rexical. Rexical was in turn a fork of Rex. We've been unable to contact the author of rex in order to take it over, fix it up, extend it, and relicense it to MIT. So, Oedipus was written clean-room in order to bypass licensing constraints (and because bootstrapping is fun). Oedipus brings a lot of extras to the table and at this point is only historically related to rexical. The syntax has changed enough that any rexical lexer will have to be tweaked to work inside of oedipus. At the very least, you need to add slashes to all your regexps. Oedipus, like rexical, is based primarily on generating code much like you would a hand-written lexer. It is _not_ a table or hash driven lexer. It uses StrScanner within a multi-level case statement. As such, Oedipus matches on the _first_ match, not the longest (like lex and its ilk). This documentation is not meant to bypass any prerequisite knowledge on lexing or parsing. If you'd like to study the subject in further detail, please try [TIN321] or the [LLVM Tutorial] or some other good resource for CS learning. Books... books are good. I like books. == Syntax: lexer = (misc_line)* /class/ class_id (option_section)? (inner_section)? (start_section)? (macro_section)? (rule_section)? /end/ (misc_line)* misc_line = /.*/ class_id = /\w+.*/ option_section = /options?/ NL (option)* option = /stub/i | /debug/i | /do_parse/i | /lineno/i | /column/i inner_section = /inner/ NL (misc_line)* start_section = /start/ NL (misc_line)* macro_section = /macros?/ NL (macro)* macro = name regexp name = /\w+/ regexp = /(\/(?:\\.|[^\/])+\/[io]?)/ rule_section = /rules?/ NL (rule|group)* rule = (state)? regexp (action)? group = /:/ regexp NL (rule)+ state = label | predicate label = /:\w+/ predicate = /\w+\?/ action = name | /\{.*\}.*/ === Basic Example class Calculator macros NUMBER /\d+/ rules /rpn/ :RPN # sets @state to :RPN /#{NUMBER}/ { [:number, text.to_i] } /\s+/ /[+-]/ { [:op, text] } :RPN /\s+/ :RPN /[+-]/ { [:op2, text] } :RPN /#{NUMBER}/ { [:number2, text.to_i] } :RPN /alg/ nil # clears state end ==== Header Anything before the class line is considered the "header" and will be added to the top of your file. This includes extra lines like module namespacing. ==== Class Line The class line, like a regular ruby class declaration, specifies what class all of the lexer code belongs to. You may simply specify a class name like: class MyLexer or it may specify a superclass as well: class MyLexer < MyParser You might do this latter case to mix your lexer and your racc parser together. Personally, I recommend keeping them apart for cleanliness and testability. ==== Options All options are opt-in and can be specified either in the grammar or via an options hash in `OedipusLex#initialize`. Specify `debug` to turn on basic tracing output. Specify `stub` to create a generic handler that processes all files specified on the commandline with a rather generic handler. This makes it easy to get up and running before you have the rest of your system in place. Specify `do_parse` to generate a generic do_parse method that automatically dispatches off to `lex_` methods. Specify `lineno` to generate automatic line number handling at the beginning of `next_token`. This was the default in 1.0.0 and you must now activate it. Specify `column` to generate automatic column number handling. ==== Inner The inner section is just code, like header or footer, but inner gets put _inside_ the class body. You can put extra methods here. Personally, I recommend you don't use inner and you put all of your extra methods and class code in a separate file. This makes lexer generation faster and keeps things separate and small. ==== Macros Macros define named regexps that you can use via interpolation inside other subsequent macros or within rule matchers. ==== Start The lexer runs in a loop until it finds a match or has to bail. Use the `start` section to place extra code at the top of your `next_token` method, before the loop. Eg: start space_seen = false This code will get expanded into the very top of the lexer method. Do note that this code gets run before _every token_, not just on lexer initialization. ==== Rules The rule section is the meat of the lexer. It contains one or more rule lines where each line consists of: * a required state (as a `:symbol`), a predicate method, or nothing. * a regular expression. * an action method, an action block, or nothing. More often than not, a rule should not specify a required state. Only use them when you're convinced you need them. So a rule can very simple, including _just_ a regexp: rules /#.*/ # ignore comments or can contain any combination of state checks or action types: rules :state /token/ action_method predicate? /another/ { do_something } ===== States and Predicates In order for the tokenizer to determine if the rule's regexp should even be considered, a rule may specify a required state, a predicate method to call, or leave it blank. If the rule does not specify a state, it can be used whenever `@state` is nil or a symbol that starts lowercase (an inclusive rule). If the rule specifies a symbol that starts uppercase (an exclusive rule), it will _only_ use those rules when `@state` matches. Alternatively, a rule may specify a predicate method to check. If that method returns a truthy value, the rule is currently valid. This is equivalent to setting the required state to nil, as it will be used with inclusive and nil states, and ignored for exclusive states. ==== End & Footer Like the header, anything after the end line is considered the "footer" and will be added to the bottom of your file. == Suggested Structure Here's how I suggest you structure things: === Rakefile You only need a minimum of dependencies to wire stuff up if you use the supplied rake rule. Rake.application.rake_require "oedipus_lex" task :lexer => "lib/mylexer.rex.rb" task :parser => :lexer # plus appropriate parser rules/deps task :test => :parser === lib/mylexer.rex Put your lexer definition here. It will generate into `"lib/mylexer.rex.rb"`. class MyLexer macros # ... rules # ... end === lib/mylexer.rb require "new_ruby_lexer.rex" class MyLexer # ... predicate methods and stuff end === lib/myparser.rb Assuming you're using a racc based parser, you'll need to define a `next_token` method that bridges over to your lexer: class MyParser def next_token lexer.next_token # plus any sanity checking / error handling... end end == Differences with Rexical If you're already familiar with rexical, this might help you get up and running faster. If not, it could provide an overview of the value-added. === Additions or Changes ==== A generic rake rule is defined for rex files. Oedipus defines a rake rule that allows you simply define a file-based dependency and rake will take care of the rest. Eg: file "lib/mylexer.rex.rb" => "lib/mylexer.rex" task :generated => %w[lib/mylexer.rex.rb] task :test => :generated ==== All regular expressions must be slash delimited. Basically, regexps are now plain slashed ruby regexps. This allows for regexp flags to be provided individually, rather than specifying an entire grammar is case-insensitive, you can have a single rule be case insensitive. Right now only `/i` and `/o` are properly handled. ==== Regular expressions now use ruby interpolation. Instead of `aaa{{macro}}ccc` it is `/aaa#{macro}ccc/`. ==== Macros define class constants. Macros simply become class constants inside the lexer class. This makes them immediately available to other macros and to the regexps in the rules section. This also implies that they must start uppercase, since that is required by ruby. ==== Rules can be activated by predicate methods. Instead of just switching on state, rules can now check predicate methods to see if they should trigger. Eg: rules sad? /\w+/ { [:sad, text] } happy? /\w+/ { [:happy, text] } end # elsewhere: def sad? # ... end def happy? not sad? end ==== Rule actions are only a single-line. In order to push complexity down, `{ rule actions }` may only be a single line. ==== Rules can invoke methods. For more complex actions, use a method by specifying its name: rules /\w+/ process_word end And then define the handler method to return a result pair: def process_word text # do lots of normalization... [:word, token] end This strikes a good balance between readability and maintainability. It also makes it much easier to write unit tests for the complex actions. ==== Rules can define state. There are shortcuts built in to define or clear state: rules /rpn/ :RPN # sets @state to :RPN # ... :RPN /alg/ nil # clears @state ==== Use a `start` section to define pre-lex code. The lexer runs in a loop until it finds a match or has to bail. Sometimes more complex lexers need to set some local state. You can now do this in a `start` section. Eg: start space_seen = false This code will get expanded into the very top of the lexer method. Do note that this code gets run before _every token_, not just on initialization. ==== Rule state can be inclusive or exclusive. This actually isn't new from rexical... It just wasn't really well documented. Exclusive states start with an uppercase letter (and are generally all uppercase). Inclusive states start with a lowercase letter. Exclusive states will _only_ try their own matchers. Inclusive states will also try any matcher w/o a state. In both cases, the order of generated matchers is strictly defined by the source file. Nothing is re-ordered, ever. Eg: rules /\d+/ /\s+/ # used in both nil-state and :rpn state /[+-]/ :rpn /\d+/ # won't hit, because of nil-state matcher above :OP /\s+/ # must define its own because no-nil-state matchers are used :OP /\d+/ end ==== Default `do_parse` will dispatch to lex_xxx automatically. The method `do_parse` is generated for you and automatically dispatches off to user-defined methods named `lex_` where token-type is the first value returned from any matching action. Eg: rules /\s*(\#.*)/ { [:comment, text] } # elsewhere: def lex_comment line # do nothing end ==== `text` is passed in, or use `match[n]` or `matches` You can use the `text` variable for the entire match inside an action, or you can use `match[n]` to access a specific match group, or `matches` to get an array of all match groups. Eg: /class ([\w:]+)(.*)/ { [:class, *matches] } In this case, the action will return something like: `[:class, "ClassName" "< Superclass"]`. ==== You can override the scanner class by defining `scanner_class`. Oedipus will define the method `scanner_class` to return `StringScanner` unless you define one yourself. Because it uses reflection to figure out whether you've defined it or not, you may need to require the generated lexer AFTER you've defined `scanner_class`. Eg: class MyLexer # ... def scanner_class CustomStringScanner end # ... end require "my_lexer.rex" **NOTE:** I'm _totally_ open to better ways of doing this. I simply needed to get stuff done and this presented itself as _viable-enough_. === Removals ==== There is no command-line tool. There is no command-line tool. Instead, use the rake rule described above. ==== There are only two options: debug and stub. All other options from rexical have been removed because they don't make sense in Oedipus. ==== Probably others... It's hard to think about what I took out. What I added is plain as day. :P [TIN321]: http://www.cse.chalmers.se/edu/year/2011/course/TIN321/lectures/proglang-04.html [LLVM Tutorial]: http://llvm.org/docs/tutorial/LangImpl1.html#language == Requirements: * ruby version 1.8.x or later. == Install * sudo gem install oedipus_lex == License (The MIT License) Copyright (c) Ryan Davis, seattle.rb Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ruby-oedipus-lex-2.6.0/Rakefile000066400000000000000000000035331434417246600164200ustar00rootroot00000000000000# -*- ruby -*- require "rubygems" require "hoe" Hoe.plugin :isolate Hoe.plugin :seattlerb Hoe.spec "oedipus_lex" do developer "Ryan Davis", "ryand-ruby@zenspider.com" license "MIT" self.readme_file = "README.rdoc" self.history_file = "History.rdoc" require_ruby_version [">= 2.4", "< 4.0"] end Hoe.bad_plugins.each do |bad| warn "BAD: Hoe.plugin :#{bad}" end task :bootstrap do ruby "-Ilib lib/oedipus_lex.rb lib/oedipus_lex.rex > lib/oedipus_lex.rex.rb.new" system "diff -uw lib/oedipus_lex.rex.rb lib/oedipus_lex.rex.rb.new" sh "mv lib/oedipus_lex.rex.rb.new lib/oedipus_lex.rex.rb" ruby "-S rake" end $: << "lib" Rake.application.rake_require "oedipus_lex" $rex_option[:stub] = true task :demo => Dir["sample/*.rex"].map { |s| "#{s}.rb" }.sort task :demo => :isolate do Dir.chdir "sample" do ruby "sample.rex.rb sample.html" ruby "sample.rex.rb sample.xhtml" ruby "sample1.rex.rb sample1.c" ruby "sample2.rex.rb sample2.bas" ruby "xhtmlparser.rex.rb xhtmlparser.html" ruby "xhtmlparser.rex.rb xhtmlparser.xhtml" cmd = "#{Gem.ruby} error1.rex.rb error1.txt" warn cmd system cmd cmd = "#{Gem.ruby} error2.rex.rb error1.txt" warn cmd system cmd end end task :raccdemo => :isolate do $rex_option[:stub] = false $rex_option[:do_parse] = false rm_f "sample/calc3.rex.rb" t = Rake.application["sample/calc3.rex.rb"] t.reenable t.invoke ruby "-S racc sample/calc3.racc" sh "echo 1 + 2 + 3 | #{Gem.ruby} -Isample sample/calc3.tab.rb" end task :clean do rm Dir["sample/*.rb"] end task :debug do require "oedipus_lex" f = ENV["F"] rex = OedipusLex.new $rex_option rex.parse_file f puts rex.generate end task :wtf => :isolate do puts `~/.rbenv/versions/2.2.0/bin/ruby -S gem env` puts `~/.rbenv/versions/2.2.0/bin/ruby -S gem list` end # vim: syntax=ruby ruby-oedipus-lex-2.6.0/checksums.yaml.gz.sig000066400000000000000000000004001434417246600210120ustar00rootroot00000000000000]X9a K;*/b>KLV8ӻΌ8xw&jSNnjV4#۟|fR+˹d#\}yUfT3i/m+>9yZF>^z&<%8"$w+"($BJp[w|kYRv D>>Ks.ƛE\u>eruby-oedipus-lex-2.6.0/data.tar.gz.sig000066400000000000000000000004001434417246600175620ustar00rootroot00000000000000lhm2+Z2k : VPh)j܄bDXBd9*2yn27-#{WA)v`=Jw>/yJ?0TWKJ\)wlGypž\Dsy ^߹Jb^bNXR9hG.vrOYR0Bdk<.@X },.s,IW=$ij_vARȷ`>soٖ1.-8 W=,ruby-oedipus-lex-2.6.0/lib/000077500000000000000000000000001434417246600155155ustar00rootroot00000000000000ruby-oedipus-lex-2.6.0/lib/oedipus_lex.rake000066400000000000000000000004621434417246600207030ustar00rootroot00000000000000# -*- ruby -*- $: << "lib" require "oedipus_lex" $rex_option = {} rule ".rex.rb" => proc {|path| path.sub(/\.rb$/, "") } do |t| warn "Generating #{t.name} from #{t.source}" rex = OedipusLex.new $rex_option rex.parse_file t.source File.open t.name, "w" do |f| f.write rex.generate end end ruby-oedipus-lex-2.6.0/lib/oedipus_lex.rb000066400000000000000000000350001434417246600203600ustar00rootroot00000000000000require "stringio" require 'strscan' require "erb" require "oedipus_lex.rex" ## # Oedipus Lex is a lexer generator in the same family as Rexical and # Rex. Oedipus Lex is my independent lexer fork of Rexical. Rexical # was in turn a fork of Rex. We've been unable to contact the author # of rex in order to take it over, fix it up, extend it, and relicense # it to MIT. So, Oedipus was written clean-room in order to bypass # licensing constraints (and because bootstrapping is fun). # # Oedipus brings a lot of extras to the table and at this point is # only historically related to rexical. The syntax has changed enough # that any rexical lexer will have to be tweaked to work inside of # oedipus. At the very least, you need to add slashes to all your # regexps. # # Oedipus, like rexical, is based primarily on generating code much # like you would a hand-written lexer. It is _not_ a table or hash # driven lexer. It uses StrScanner within a multi-level case # statement. As such, Oedipus matches on the _first_ match, not the # longest (like lex and its ilk). # # This documentation is not meant to bypass any prerequisite knowledge # on lexing or parsing. If you'd like to study the subject in further # detail, please try [TIN321] or the [LLVM Tutorial] or some other # good resource for CS learning. Books... books are good. I like # books. class OedipusLex VERSION = "2.6.0" # :nodoc: ## # The class name to generate. attr_accessor :class_name ## # An array of header lines to have before the lexer class. attr_accessor :header ## # An array of lines to have after the lexer class. attr_accessor :ends ## # An array of lines to have inside (but at the bottom of) the lexer # class. attr_accessor :inners ## # An array of name/regexp pairs to generate constants inside the # lexer class. attr_accessor :macros ## # A hash of options for the code generator. See README.rdoc for # supported options. attr_accessor :option ## # The rules for the lexer. attr_accessor :rules ## # An array of lines of code to generate into the top of the lexer # (next_token) loop. attr_accessor :starts ## # An array of all the groups within the lexer rules. attr_accessor :group DEFAULTS = { # :nodoc: :debug => false, :do_parse => false, :lineno => false, :column => false, :stub => false, } ## # A Rule represents the main component of Oedipus Lex. These are the # things that "get stuff done" at the lexical level. They consist of: # # + an optional required start state symbol or predicate method name # + a regexp to match on # + an optional action method or block class Rule < Struct.new :start_state, :regexp, :action ## # What group this rule is in, if any. attr_accessor :group alias :group? :group # :nodoc: ## # A simple constructor def self.[] start, regexp, action new start, regexp.inspect, action end def initialize start_state, regexp, action # :nodoc: super self.group = nil end undef_method :to_a ## # Generate equivalent ruby code for the rule. def to_ruby state, predicates, exclusive return unless group? or start_state == state or (state.nil? and predicates.include? start_state) uses_text = false body = case action when nil, false then " # do nothing" when /^\{/ then uses_text = action =~ /\btext\b/ " action #{action}" when /^:/, "nil" then " [:state, #{action}]" else # plain method name uses_text = true " #{action} text" end check = uses_text ? "text = ss.scan(#{regexp})" : "ss.skip(#{regexp})" cond = if exclusive or not start_state then check elsif /^:/.match?(start_state) then "(state == #{start_state}) && (#{check})" else # predicate method "#{start_state} && (#{check})" end ["when #{cond} then", body] end def pretty_print pp # :nodoc: pp.text "Rule" pp.group 2, "[", "]" do pp.pp start_state pp.text ", " pp.text regexp pp.text ", " pp.send(action ? :text : :pp, action) end end end ## # A group allows you to group up multiple rules under a single # regular prefix expression, allowing optimized code to be generated # that skips over all actions if the prefix isn't matched. class Group < Struct.new :regex, :rules alias :start_state :regex ## # A convenience method to create a new group with a +start+ and # given +subrules+. def self.[] start, *subrules r = new start.inspect r.rules.concat subrules r end def initialize start # :nodoc: super(start, []) end ## # Add a rule to this group. def << rule rules << rule nil end def to_ruby state, predicates, exclusive # :nodoc: [ "when ss.match?(#{regex}) then", " case", rules.map { |subrule| s = subrule.to_ruby(state, predicates, exclusive) s && s.join("\n").gsub(/^/, " ") }.compact, " end # group #{regex}" ] end def pretty_print pp # :nodoc: pp.text "Group" pp.group 2, "[", "]" do pp.seplist([regex] + rules, lambda { pp.comma_breakable }, :each) { |v| pp.send(String === v ? :text : :pp, v) } end end end ## # A convenience method to create a new lexer with a +name+ and given # +rules+. def self.[](name, *rules) r = new r.class_name = name r.rules.concat rules r end def initialize opts = {} # :nodoc: self.option = DEFAULTS.merge opts self.class_name = nil self.header = [] self.ends = [] self.inners = [] self.macros = [] self.rules = [] self.starts = [] self.group = nil end def == o # :nodoc: (o.class == self.class and o.class_name == self.class_name and o.header == self.header and o.ends == self.ends and o.inners == self.inners and o.macros == self.macros and o.rules == self.rules and o.starts == self.starts) end def pretty_print pp # :nodoc: commas = lambda { pp.comma_breakable } pp.text "Lexer" pp.group 2, "[", "]" do pp.seplist([class_name] + rules, commas, :each) { |v| pp.pp v } end end ## # Process a +class+ lexeme. def lex_class prefix, name header.concat prefix.split(/\n/) self.class_name = name end ## # Process a +comment+ lexeme. def lex_comment line # do nothing end ## # Process an +end+ lexeme. def lex_end line ends << line end ## # Process an +inner+ lexeme. def lex_inner line inners << line end ## # Process a +start+ lexeme. def lex_start line starts << line.strip end ## # Process a +macro+ lexeme. def lex_macro name, value macros << [name, value] end ## # Process an +option+ lexeme. def lex_option option self.option[option.to_sym] = true end ## # Process a +X+ lexeme. def lex_rule start_state, regexp, action = nil rules << Rule.new(start_state, regexp, action) end ## # Process a +group head+ lexeme. def lex_grouphead re end_group if group self.state = :group self.group = Group.new re end ## # Process a +group+ lexeme. def lex_group start_state, regexp, action = nil rule = Rule.new(start_state, regexp, action) rule.group = group self.group << rule end ## # End a group. def end_group rules << group self.group = nil self.state = :rule end ## # Process the end of a +group+ lexeme. def lex_groupend start_state, regexp, action = nil end_group lex_rule start_state, regexp, action end ## # Process a +state+ lexeme. def lex_state _new_state end_group if group # do nothing -- lexer switches state for us end ## # Generate the lexer. def generate filter = lambda { |r| Rule === r && r.start_state || nil } _mystates = rules.map(&filter).flatten.compact.uniq exclusives, inclusives = _mystates.partition { |s| s =~ /^:[A-Z]/ } # NOTE: doubling up assignment to remove unused var warnings in # ERB binding. all_states = all_states = [[nil, *inclusives], # nil+incls # eg [[nil, :a], *exclusives.map { |s| [s] }] # [excls] # [:A], [:B]] encoding = header.shift if /encoding:/.match?(header.first) encoding ||= "# encoding: UTF-8" erb = if RUBY_VERSION >= "2.6.0" then ERB.new(TEMPLATE, trim_mode:"%") else ERB.new(TEMPLATE, nil, "%") end erb.result binding end # :stopdoc: TEMPLATE = <<-'REX'.gsub(/^ {6}/, '') # frozen_string_literal: true <%= encoding %> #-- # This file is automatically generated. Do not modify it. # Generated by: oedipus_lex version <%= VERSION %>. % if filename then # Source: <%= filename %> % end #++ % unless header.empty? then % header.each do |s| <%= s %> % end % end ## # The generated lexer <%= class_name %> class <%= class_name %> require 'strscan' % unless macros.empty? then # :stopdoc: % max = macros.map { |(k,_)| k.size }.max % macros.each do |(k,v)| <%= "%-#{max}s = %s" % [k, v] %> % end # :startdoc: % end # :stopdoc: class LexerError < StandardError ; end class ScanError < LexerError ; end # :startdoc: % if option[:lineno] then ## # The current line number. attr_accessor :lineno % end ## # The file name / path attr_accessor :filename ## # The StringScanner for this lexer. attr_accessor :ss ## # The current lexical state. attr_accessor :state alias :match :ss ## # The match groups for the current scan. def matches m = (1..9).map { |i| ss[i] } m.pop until m[-1] or m.empty? m end ## # Yields on the current action. def action yield end % if option[:column] then ## # The previous position. Only available if the :column option is on. attr_accessor :old_pos ## # The position of the start of the current line. Only available if the # :column option is on. attr_accessor :start_of_current_line_pos ## # The current column, starting at 0. Only available if the # :column option is on. def column old_pos - start_of_current_line_pos end % end % if option[:do_parse] then ## # Parse the file by getting all tokens and calling lex_+type+ on them. def do_parse while token = next_token do type, *vals = token send "lex_#{type}", *vals end end % end ## # The current scanner class. Must be overridden in subclasses. def scanner_class StringScanner end unless instance_methods(false).map(&:to_s).include?("scanner_class") ## # Parse the given string. def parse str self.ss = scanner_class.new str % if option[:lineno] then self.lineno = 1 % end % if option[:column] then self.start_of_current_line_pos = 0 % end self.state ||= nil do_parse end ## # Read in and parse the file at +path+. def parse_file path self.filename = path open path do |f| parse f.read end end ## # The current location in the parse. def location [ (filename || ""), % if option[:lineno] then lineno, % elsif option[:column] then "?", % end % if option[:column] then column, % end ].compact.join(":") end ## # Lex the next token. def next_token % starts.each do |s| <%= s %> % end token = nil until ss.eos? or token do % if option[:lineno] then if ss.check(/\n/) then self.lineno += 1 % if option[:column] then # line starts 1 position after the newline self.start_of_current_line_pos = ss.pos + 1 % end end % end % if option[:column] then self.old_pos = ss.pos % end token = case state % all_states.each do |the_states| % exclusive = the_states.first != nil % the_states, predicates = the_states.partition { |s| s.nil? or s.start_with? ":" } when <%= the_states.map { |s| s || "nil" }.join ", " %> then case % the_states.each do |state| % lines = rules.map { |r| r.to_ruby state, predicates, exclusive }.compact <%= lines.join("\n").gsub(/^/, " " * 10) %> % end # the_states.each else text = ss.string[ss.pos .. -1] raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'" end % end # all_states else raise ScanError, "undefined state at #{location}: '#{state}'" end # token = case state next unless token # allow functions to trigger redo w/ nil end # while raise LexerError, "bad lexical result at #{location}: #{token.inspect}" unless token.nil? || (Array === token && token.size >= 2) # auto-switch state self.state = token.last if token && token.first == :state % if option[:debug] then p [state, token] % end token end # def next_token % inners.each do |s| <%= s %> % end end # class % unless ends.empty? then % ends.each do |s| <%= s %> % end % end % if option[:stub] then if __FILE__ == $0 ARGV.each do |path| rex = <%= class_name %>.new def rex.do_parse while token = self.next_token p token end end begin rex.parse_file path rescue lineno = rex.respond_to?(:lineno) ? rex.lineno : -1 $stderr.printf "%s:%d:%s\n", rex.filename, lineno, $!.message exit 1 end end end % end REX # :startdoc: end if $0 == __FILE__ then ARGV.each do |path| rex = OedipusLex.new rex.parse_file path puts rex.generate end end ruby-oedipus-lex-2.6.0/lib/oedipus_lex.rex000066400000000000000000000034441434417246600205620ustar00rootroot00000000000000class OedipusLex option do_parse lineno column macro ST /(?:(:\S+|\w+\??))/ RE /(\/(?:\\.|[^\/])*\/[ion]?)/ ACT /(\{.*|:?\w+)/ rule # [state] /pattern/[flags] [actions] # nil state applies to all states, so we use this to switch lexing modes /options?.*/ :option /inner.*/ :inner /macros?.*/ :macro /rules?.*/ :rule /start.*/ :start /end/ :END /\A((?:.|\n)*)class ([\w:]+.*)/ { [:class, *matches] } /\n+/ # do nothing /\s*(\#.*)/ { [:comment, text] } :option /\s+/ # do nothing :option /stub/i { [:option, text] } :option /debug/i { [:option, text] } :option /do_parse/i { [:option, text] } :option /lineno/i { [:option, text] } :option /column/i { [:option, text] } :inner /.*/ { [:inner, text] } :start /.*/ { [:start, text] } :macro /\s+(\w+)\s+#{RE}/o { [:macro, *matches] } :rule /\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o { [:rule, *matches] } :rule /\s*:[\ \t]*#{RE}/o { [:grouphead, *matches] } :group /\s*:[\ \t]*#{RE}/o { [:grouphead, *matches] } :group /\s*\|\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o { [:group, *matches] } :group /\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o { [:groupend, *matches] } :END /\n+/ # do nothing :END /.*/ { [:end, text] } end ruby-oedipus-lex-2.6.0/lib/oedipus_lex.rex.rb000066400000000000000000000133711434417246600211640ustar00rootroot00000000000000# frozen_string_literal: true # encoding: UTF-8 #-- # This file is automatically generated. Do not modify it. # Generated by: oedipus_lex version 2.6.0. # Source: lib/oedipus_lex.rex #++ ## # The generated lexer OedipusLex class OedipusLex require 'strscan' # :stopdoc: ST = /(?:(:\S+|\w+\??))/ RE = /(\/(?:\\.|[^\/])*\/[ion]?)/ ACT = /(\{.*|:?\w+)/ # :startdoc: # :stopdoc: class LexerError < StandardError ; end class ScanError < LexerError ; end # :startdoc: ## # The current line number. attr_accessor :lineno ## # The file name / path attr_accessor :filename ## # The StringScanner for this lexer. attr_accessor :ss ## # The current lexical state. attr_accessor :state alias :match :ss ## # The match groups for the current scan. def matches m = (1..9).map { |i| ss[i] } m.pop until m[-1] or m.empty? m end ## # Yields on the current action. def action yield end ## # The previous position. Only available if the :column option is on. attr_accessor :old_pos ## # The position of the start of the current line. Only available if the # :column option is on. attr_accessor :start_of_current_line_pos ## # The current column, starting at 0. Only available if the # :column option is on. def column old_pos - start_of_current_line_pos end ## # Parse the file by getting all tokens and calling lex_+type+ on them. def do_parse while token = next_token do type, *vals = token send "lex_#{type}", *vals end end ## # The current scanner class. Must be overridden in subclasses. def scanner_class StringScanner end unless instance_methods(false).map(&:to_s).include?("scanner_class") ## # Parse the given string. def parse str self.ss = scanner_class.new str self.lineno = 1 self.start_of_current_line_pos = 0 self.state ||= nil do_parse end ## # Read in and parse the file at +path+. def parse_file path self.filename = path open path do |f| parse f.read end end ## # The current location in the parse. def location [ (filename || ""), lineno, column, ].compact.join(":") end ## # Lex the next token. def next_token token = nil until ss.eos? or token do if ss.check(/\n/) then self.lineno += 1 # line starts 1 position after the newline self.start_of_current_line_pos = ss.pos + 1 end self.old_pos = ss.pos token = case state when nil, :option, :inner, :start, :macro, :rule, :group then case when ss.skip(/options?.*/) then [:state, :option] when ss.skip(/inner.*/) then [:state, :inner] when ss.skip(/macros?.*/) then [:state, :macro] when ss.skip(/rules?.*/) then [:state, :rule] when ss.skip(/start.*/) then [:state, :start] when ss.skip(/end/) then [:state, :END] when ss.skip(/\A((?:.|\n)*)class ([\w:]+.*)/) then action { [:class, *matches] } when ss.skip(/\n+/) then # do nothing when text = ss.scan(/\s*(\#.*)/) then action { [:comment, text] } when (state == :option) && (ss.skip(/\s+/)) then # do nothing when (state == :option) && (text = ss.scan(/stub/i)) then action { [:option, text] } when (state == :option) && (text = ss.scan(/debug/i)) then action { [:option, text] } when (state == :option) && (text = ss.scan(/do_parse/i)) then action { [:option, text] } when (state == :option) && (text = ss.scan(/lineno/i)) then action { [:option, text] } when (state == :option) && (text = ss.scan(/column/i)) then action { [:option, text] } when (state == :inner) && (text = ss.scan(/.*/)) then action { [:inner, text] } when (state == :start) && (text = ss.scan(/.*/)) then action { [:start, text] } when (state == :macro) && (ss.skip(/\s+(\w+)\s+#{RE}/o)) then action { [:macro, *matches] } when (state == :rule) && (ss.skip(/\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o)) then action { [:rule, *matches] } when (state == :rule) && (ss.skip(/\s*:[\ \t]*#{RE}/o)) then action { [:grouphead, *matches] } when (state == :group) && (ss.skip(/\s*:[\ \t]*#{RE}/o)) then action { [:grouphead, *matches] } when (state == :group) && (ss.skip(/\s*\|\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o)) then action { [:group, *matches] } when (state == :group) && (ss.skip(/\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o)) then action { [:groupend, *matches] } else text = ss.string[ss.pos .. -1] raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'" end when :END then case when ss.skip(/\n+/) then # do nothing when text = ss.scan(/.*/) then action { [:end, text] } else text = ss.string[ss.pos .. -1] raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'" end else raise ScanError, "undefined state at #{location}: '#{state}'" end # token = case state next unless token # allow functions to trigger redo w/ nil end # while raise LexerError, "bad lexical result at #{location}: #{token.inspect}" unless token.nil? || (Array === token && token.size >= 2) # auto-switch state self.state = token.last if token && token.first == :state token end # def next_token end # class ruby-oedipus-lex-2.6.0/metadata.gz.sig000066400000000000000000000004001434417246600176440ustar00rootroot00000000000000-VFkrXpG0_3t6pu+H_rtO|F=yg'0rZ {3U#͍KJT7(/z?Tp&4EsSbzM5Fk,= F '4&j%*[wtri6uOzIE xktfN ҨOw%2(Wvrq&j}_hCN1·gcsruby-oedipus-lex-2.6.0/oedipus_lex.gemspec000066400000000000000000000113561434417246600206420ustar00rootroot00000000000000######################################################### # This file has been automatically generated by gem2tgz # ######################################################### # -*- encoding: utf-8 -*- # stub: oedipus_lex 2.6.0 ruby lib Gem::Specification.new do |s| s.name = "oedipus_lex".freeze s.version = "2.6.0" s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version= s.metadata = { "homepage_uri" => "http://github.com/seattlerb/oedipus_lex" } if s.respond_to? :metadata= s.require_paths = ["lib".freeze] s.authors = ["Ryan Davis".freeze] s.cert_chain = ["-----BEGIN CERTIFICATE-----\nMIIDPjCCAiagAwIBAgIBBTANBgkqhkiG9w0BAQsFADBFMRMwEQYDVQQDDApyeWFu\nZC1ydWJ5MRkwFwYKCZImiZPyLGQBGRYJemVuc3BpZGVyMRMwEQYKCZImiZPyLGQB\nGRYDY29tMB4XDTIwMTIyMjIwMzgzMFoXDTIxMTIyMjIwMzgzMFowRTETMBEGA1UE\nAwwKcnlhbmQtcnVieTEZMBcGCgmSJomT8ixkARkWCXplbnNwaWRlcjETMBEGCgmS\nJomT8ixkARkWA2NvbTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALda\nb9DCgK+627gPJkB6XfjZ1itoOQvpqH1EXScSaba9/S2VF22VYQbXU1xQXL/WzCkx\ntaCPaLmfYIaFcHHCSY4hYDJijRQkLxPeB3xbOfzfLoBDbjvx5JxgJxUjmGa7xhcT\noOvjtt5P8+GSK9zLzxQP0gVLS/D0FmoE44XuDr3iQkVS2ujU5zZL84mMNqNB1znh\nGiadM9GHRaDiaxuX0cIUBj19T01mVE2iymf9I6bEsiayK/n6QujtyCbTWsAS9Rqt\nqhtV7HJxNKuPj/JFH0D2cswvzznE/a5FOYO68g+YCuFi5L8wZuuM8zzdwjrWHqSV\ngBEfoTEGr7Zii72cx+sCAwEAAaM5MDcwCQYDVR0TBAIwADALBgNVHQ8EBAMCBLAw\nHQYDVR0OBBYEFEfFe9md/r/tj/Wmwpy+MI8d9k/hMA0GCSqGSIb3DQEBCwUAA4IB\nAQAE3XRm1YZcCVjAJy5yMZvTOFrS7B2SYErc+0QwmKYbHztTTDY2m5Bii+jhpuxh\nH+ETcU1z8TUKLpsBUP4kUpIRowkVN1p/jKapV8T3Rbwq+VuYFe+GMKsf8wGZSecG\noMQ8DzzauZfbvhe2kDg7G9BBPU0wLQlY25rDcCy9bLnD7R0UK3ONqpwvsI5I7x5X\nZIMXR0a9/DG+55mawwdGzCQobDKiSNLK89KK7OcNTALKU0DfgdTkktdgKchzKHqZ\nd/AHw/kcnU6iuMUoJEcGiJd4gVCTn1l3cDcIvxakGslCA88Jubw0Sqatan0TnC9g\nKToW560QIey7SPfHWduzFJnV\n-----END CERTIFICATE-----\n".freeze] s.date = "2021-10-27" s.description = "Oedipus Lex is a lexer generator in the same family as Rexical and\nRex. Oedipus Lex is my independent lexer fork of Rexical. Rexical was\nin turn a fork of Rex. We've been unable to contact the author of rex\nin order to take it over, fix it up, extend it, and relicense it to\nMIT. So, Oedipus was written clean-room in order to bypass licensing\nconstraints (and because bootstrapping is fun).\n\nOedipus brings a lot of extras to the table and at this point is only\nhistorically related to rexical. The syntax has changed enough that\nany rexical lexer will have to be tweaked to work inside of oedipus.\nAt the very least, you need to add slashes to all your regexps.\n\nOedipus, like rexical, is based primarily on generating code much like\nyou would a hand-written lexer. It is _not_ a table or hash driven\nlexer. It uses StrScanner within a multi-level case statement. As such,\nOedipus matches on the _first_ match, not the longest (like lex and\nits ilk).\n\nThis documentation is not meant to bypass any prerequisite knowledge\non lexing or parsing. If you'd like to study the subject in further\ndetail, please try [TIN321] or the [LLVM Tutorial] or some other good\nresource for CS learning. Books... books are good. I like books.".freeze s.email = ["ryand-ruby@zenspider.com".freeze] s.extra_rdoc_files = ["History.rdoc".freeze, "Manifest.txt".freeze, "README.rdoc".freeze, "sample/error1.txt".freeze] s.files = [".autotest".freeze, "History.rdoc".freeze, "Manifest.txt".freeze, "README.rdoc".freeze, "Rakefile".freeze, "lib/oedipus_lex.rake".freeze, "lib/oedipus_lex.rb".freeze, "lib/oedipus_lex.rex".freeze, "lib/oedipus_lex.rex.rb".freeze, "rex-mode.el".freeze, "sample/calc3.racc".freeze, "sample/calc3.rex".freeze, "sample/error1.rex".freeze, "sample/error1.txt".freeze, "sample/error2.rex".freeze, "sample/sample.html".freeze, "sample/sample.rex".freeze, "sample/sample.xhtml".freeze, "sample/sample1.c".freeze, "sample/sample1.rex".freeze, "sample/sample2.bas".freeze, "sample/sample2.rex".freeze, "sample/xhtmlparser.html".freeze, "sample/xhtmlparser.racc".freeze, "sample/xhtmlparser.rex".freeze, "sample/xhtmlparser.xhtml".freeze, "test/test_oedipus_lex.rb".freeze] s.homepage = "http://github.com/seattlerb/oedipus_lex".freeze s.licenses = ["MIT".freeze] s.rdoc_options = ["--main".freeze, "README.rdoc".freeze] s.required_ruby_version = Gem::Requirement.new([">= 2.4".freeze, "< 4.0".freeze]) s.rubygems_version = "3.1.2".freeze s.summary = "Oedipus Lex is a lexer generator in the same family as Rexical and Rex".freeze if s.respond_to? :specification_version then s.specification_version = 4 end if s.respond_to? :add_runtime_dependency then s.add_development_dependency(%q.freeze, ["~> 3.22"]) s.add_development_dependency(%q.freeze, [">= 4.0", "< 7"]) else s.add_dependency(%q.freeze, ["~> 3.22"]) s.add_dependency(%q.freeze, [">= 4.0", "< 7"]) end end ruby-oedipus-lex-2.6.0/rex-mode.el000066400000000000000000000036641434417246600170220ustar00rootroot00000000000000;;; rex-mode.el --- Generic mode for rex/rexical/oedipus_lex files ;; Copyright (c) Ryan Davis, seattle.rb ;; ;; Author: Ryan Davis ;; Keywords: languages ;; (The MIT License) ;; ;; Permission is hereby granted, free of charge, to any person obtaining ;; a copy of this software and associated documentation files (the ;; 'Software'), to deal in the Software without restriction, including ;; without limitation the rights to use, copy, modify, merge, publish, ;; distribute, sublicense, and/or sell copies of the Software, and to ;; permit persons to whom the Software is furnished to do so, subject to ;; the following conditions: ;; ;; The above copyright notice and this permission notice shall be ;; included in all copies or substantial portions of the Software. ;; ;; THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, ;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ;;; Commentary: ;; woot ;;; Code: (define-generic-mode rex-mode ;; comments '(?#) ;; keywords nil ;; font-lock-faces '(("/\\(\\\\.\\|[^/]\\)*/" . font-lock-string-face) (":[a-zA-Z_][a-zA-Z0-9_]*" . font-lock-variable-name-face) ("^ *\\([A-Z][A-Z0-9_]*\\)" 1 font-lock-variable-name-face) ("^\\(?:end\\|inner\\|macro\\|option\\|rule\\)" . font-lock-keyword-face) ("class [A-Z][a-zA-Z_]+" . font-lock-keyword-face)) ;; auto-mode '("\\.rex$") ;; functions nil "Simple generic mode for rex/rexical/t-rex files") (provide 'rex-mode) ;;; rex-mode.el ends here ruby-oedipus-lex-2.6.0/sample/000077500000000000000000000000001434417246600162305ustar00rootroot00000000000000ruby-oedipus-lex-2.6.0/sample/calc3.racc000066400000000000000000000016131434417246600200500ustar00rootroot00000000000000# # A simple calculator, version 3. # class Calculator3 prechigh nonassoc UMINUS left '*' '/' left '+' '-' preclow options no_result_var rule target : exp | /* none */ { 0 } exp : exp '+' exp { val[0] + val[2] } | exp '-' exp { val[0] - val[2] } | exp '*' exp { val[0] * val[2] } | exp '/' exp { val[0] / val[2] } | '(' exp ')' { val[1] } | '-' NUMBER =UMINUS { -(val[1]) } | NUMBER end ---- header ---- # # generated by racc # require 'calc3.rex' ---- inner ---- ---- footer ---- if $stdin.tty? then puts 'sample calc' puts '"q" to quit.' end calc = Calculator3.new while true if $stdin.tty? then print '>>> '; $stdout.flush end str = $stdin.gets.strip break if /q/i === str begin p calc.parse str rescue ParseError puts 'parse error' end break unless $stdin.tty? end ruby-oedipus-lex-2.6.0/sample/calc3.rex000066400000000000000000000003561434417246600177410ustar00rootroot00000000000000# # calc3.rex # lexical scanner definition for rex # class Calculator3 < Racc::Parser macro BLANK /\s+/ DIGIT /\d+/ rule /{{BLANK}}/ /{{DIGIT}}/ { [:NUMBER, text.to_i] } /.|\n/ { [text, text] } inner end ruby-oedipus-lex-2.6.0/sample/error1.rex000066400000000000000000000003721434417246600201640ustar00rootroot00000000000000# # eooro1.rex # lexical definition sample for rex # class Error1 macro BLANK /[\ \t]+/ rule /{{BLANK}}/ # no action /\d+/ { [:digit, text.to_i] } /\w+/ { [:word, text] } /\n/ # /./ { [text, text] } end ruby-oedipus-lex-2.6.0/sample/error1.txt000066400000000000000000000000111434417246600201730ustar00rootroot000000000000001 woot ? ruby-oedipus-lex-2.6.0/sample/error2.rex000066400000000000000000000004211434417246600201600ustar00rootroot00000000000000# # error2.rex # lexical definition sample for rex # class Error2 macro BLANK /[\ \t]+/ rule /{{BLANK}}/ # no action /\d+/ { [:digit, text.to_i] } /\w+/ { [:word, text] } /\n/ /./ { self.state = :NONDEF ; [text, text] } end ruby-oedipus-lex-2.6.0/sample/sample.html000066400000000000000000000014671434417246600204070ustar00rootroot00000000000000 Title

HTML 4.01

ruby-oedipus-lex-2.6.0/sample/sample.rex000066400000000000000000000003721434417246600202330ustar00rootroot00000000000000# # sample.rex # lexical definition sample for rex # class Sample macro BLANK /[\ \t]+/ rule /{{BLANK}}/ # no action /\d+/ { [:digit, text.to_i] } /\w+/ { [:word, text] } /\n/ /./ { [text, text] } end ruby-oedipus-lex-2.6.0/sample/sample.xhtml000066400000000000000000000016001434417246600205640ustar00rootroot00000000000000 Title

XHTML 1.1

ruby-oedipus-lex-2.6.0/sample/sample1.c000066400000000000000000000001721434417246600177360ustar00rootroot00000000000000 int main(int argc, char **argv) { /* block remark */ int i = 100; // inline remark printf("hello, world\n"); } ruby-oedipus-lex-2.6.0/sample/sample1.rex000066400000000000000000000016671434417246600203240ustar00rootroot00000000000000# # sample1.rex # lexical definition sample for rex # # usage # rex sample1.rex --stub # ruby sample1.rex.rb sample1.c # class Sample1 macro BLANK /\s+/ REM_IN /\/\*/ REM_OUT /\*\// REM /\/\// rule # [:state] pattern [actions] # remark /{{REM_IN}}/ :REMS :REMS /{{REM_OUT}}/ { [:state, nil] } :REMS /.*(?={{REM_OUT}})/ { [:remark, text] } /{{REM}}/ :REM :REM /\n/ { [:state, nil] } :REM /.*(?=$)/ { [:remark, text] } # literal /\"[^"]*\"/ { [:string, text] } /\'[^']\'/ { [:character, text] } # skip /{{BLANK}}/ # numeric /\d+/ { [:digit, text.to_i] } # identifier /\w+/ { [:word, text] } /./ { [text, text] } end ruby-oedipus-lex-2.6.0/sample/sample2.bas000066400000000000000000000000671434417246600202650ustar00rootroot00000000000000' inline remark i = 100 input st print "hello, world" ruby-oedipus-lex-2.6.0/sample/sample2.rex000066400000000000000000000013131434417246600203110ustar00rootroot00000000000000# # sample2.rex # lexical definition sample for rex # # usage # rex sample2.rex --stub # ruby sample2.rex.rb sample2.bas # class Sample2 macro BLANK /\s+/ REMARK /\'/ # ' rule /{{REMARK}}/ :REM :REM /\n/ { [:state, nil] } :REM /.*(?=$)/ { [:remark, text] } /\"[^"]*\"/ { [:string, text] } /{{BLANK}}/ # no action /INPUT/i { [:input, text] } /PRINT/i { [:print, text] } /\d+/ { [:digit, text.to_i] } /\w+/ { [:word, text] } /./ { [text, text] } end ruby-oedipus-lex-2.6.0/sample/xhtmlparser.html000066400000000000000000000001071434417246600214650ustar00rootroot00000000000000

Hello World.

ruby-oedipus-lex-2.6.0/sample/xhtmlparser.racc000066400000000000000000000024331434417246600214350ustar00rootroot00000000000000# # xml parser # class XHTMLParser rule target : /* none */ | xml_doc xml_doc : xml_header extra xml_body | xml_header xml_body | xml_body xml_header : xtag_in element attributes xtag_out xml_body : tag_from contents tag_to tag_from : tag_in element attributes tag_out tag_empty : tag_in element attributes etag_out tag_to : etag_in element tag_out attributes : /* none */ | attributes attribute attribute : attr equal quoted quoted : quote1 value quote1 | quote2 value quote2 contents : /* none */ | contents content content : text | extra | tag_from contents tag_to | tag_empty extra : tag_in ext extra_texts tag_out extra_texts : /* none */ | extra_texts rem_in remtexts rem_out | extra_texts exttext remtexts : remtext | remtexts remtext end ---- header ---- # # generated by racc # require 'xhtmlparser.rex' ---- inner ---- ---- footer ---- exit if ARGV.size == 0 filename = ARGV.shift htmlparser = XHTMLParser.new htmlparser.scan_file filename ruby-oedipus-lex-2.6.0/sample/xhtmlparser.rex000066400000000000000000000053551434417246600213310ustar00rootroot00000000000000# # xhtmlparser.rex # lexical scanner definition for rex # # usage # rex xhtmlparser.rex --stub # ruby xhtmlparser.rex.rb sample.xhtml # class XHTMLParser macro BLANK /\s+/ TAG_IN /\/ ETAG_IN /\<\// ETAG_OUT /\/\>/ XTAG_IN /\<\?/ XTAG_OUT /\?\>/ EXT /\!/ REM /\-\-/ EQUAL /\=/ Q1 /\'/ Q2 /\"/ rule # [:state] pattern [actions] /{{XTAG_IN}}/ { state = :TAG; [:xtag_in, text] } /{{ETAG_IN}}/ { state = :TAG; [:etag_in, text] } /{{TAG_IN}}/ { state = :TAG; [:tag_in, text] } :TAG /{{EXT}}/ { state = :EXT; [:ext, text] } :EXT /{{REM}}/ { state = :REM; [:rem_in, text] } :EXT /{{XTAG_OUT}}/ { state = nil; [:xtag_out, text] } :EXT /{{TAG_OUT}}/ { state = nil; [:tag_out, text] } :EXT /.+(?={{REM}})/ { [:exttext, text] } :EXT /.+(?={{TAG_OUT}})/ { [:exttext, text] } :EXT /.+(?=$)/ { [:exttext, text] } :EXT /\n/ :REM /{{REM}}/ { state = :EXT; [:rem_out, text] } :REM /.+(?={{REM}})/ { [:remtext, text] } :REM /.+(?=$)/ { [:remtext, text] } :REM /\n/ :TAG /{{BLANK}}/ :TAG /{{XTAG_OUT}}/ { state = nil; [:xtag_out, text] } :TAG /{{ETAG_OUT}}/ { state = nil; [:etag_out, text] } :TAG /{{TAG_OUT}}/ { state = nil; [:tag_out, text] } :TAG /{{EQUAL}}/ { [:equal, text] } :TAG /{{Q1}}/ { state = :Q1; [:quote1, text] } :Q1 /{{Q1}}/ { state = :TAG; [:quote1, text] } :Q1 /[^{{Q1}}]+(?={{Q1}})/ { [:value, text] } :TAG /{{Q2}}/ { state = :Q2; [:quote2, text] } :Q2 /{{Q2}}/ { state = :TAG; [:quote2, text] } :Q2 /[^{{Q2}}]+(?={{Q2}})/ { [:value, text] } :TAG /[\w\-]+(?={{EQUAL}})/ { [:attr, text] } :TAG /[\w\-]+/ { [:element, text] } /\s+(?=\S)/ /.*\S(?=\s*{{ETAG_IN}})/ { [:text, text] } /.*\S(?=\s*{{TAG_IN}})/ { [:text, text] } /.*\S(?=\s*$)/ { [:text, text] } /\s+(?=$)/ inner end ruby-oedipus-lex-2.6.0/sample/xhtmlparser.xhtml000066400000000000000000000003751434417246600216640ustar00rootroot00000000000000

XHTML 1.1

ruby-oedipus-lex-2.6.0/test/000077500000000000000000000000001434417246600157265ustar00rootroot00000000000000ruby-oedipus-lex-2.6.0/test/test_oedipus_lex.rb000066400000000000000000000452601434417246600216410ustar00rootroot00000000000000require "minitest/autorun" require "oedipus_lex" require "stringio" class TestOedipusLex < Minitest::Test attr_accessor :option def setup self.option = {} end def generate_lexer grammar rex = OedipusLex.new option rex.parse cleanup grammar rex.generate end def assert_generate_error grammar, expected_msg rex = OedipusLex.new option e = assert_raises OedipusLex::ScanError do rex.parse cleanup grammar end assert_match expected_msg, e.message end def cleanup s s.gsub(/^ {6}/, "") end def eval_lexer grammar ruby = generate_lexer grammar if option[:wtf] puts puts ruby puts end mod = Module.new mod.module_eval ruby return ruby, mod end def assert_lexer grammar, input, expected _, mod = eval_lexer grammar calc = mod::Calculator.new def calc.do_parse tokens = [] while token = next_token tokens << token end tokens end tokens = calc.parse input assert_equal expected, tokens end def assert_lexer_error grammar, input, expected_msg _, mod = eval_lexer grammar calc = mod::Calculator.new def calc.do_parse tokens = [] while token = next_token tokens << token end tokens end e = assert_raises mod::Calculator::ScanError do calc.parse input end assert_equal expected_msg, e.message end def assert_token_error grammar, input, expected_msg _, mod = eval_lexer grammar calc = mod::Calculator.new def calc.do_parse tokens = [] while token = next_token tokens << token end tokens end e = assert_raises mod::Calculator::LexerError do calc.parse input end assert_equal expected_msg, e.message end def test_simple_scanner src = <<-'REX' class Calculator rule /\d+/ { [:number, text.to_i] } /\s+/ /[+-]/ { [:op, text] } end REX txt = "1 + 2 + 3" exp = [[:number, 1], [:op, "+"], [:number, 2], [:op, "+"], [:number, 3]] assert_lexer src, txt, exp end def test_simple_scanner_bug_trailing_comment src = <<-'REX' class Calculator rule /\d+/ { [:number, text.to_i] } # numbers /\s+/ # do nothing /[+-]/ { [:op, text] } end REX txt = "1 + 2 + 3" exp = [[:number, 1], [:op, "+"], [:number, 2], [:op, "+"], [:number, 3]] assert_lexer src, txt, exp end def test_simple_scanner_multiline_action_error src = <<-'REX' class Calculator rule /\d+/ { [:number, text.to_i] } /\s+/ /[+-]/ { [:op, text] } end REX assert_generate_error src, "can not match (:rule) at :4:0: '" end def test_simple_scanner_macro src = <<-'REX' class Calculator macro N /\d+/ rule /#{N}/ { [:number, text.to_i] } /\s+/ /[+-]/ { [:op, text] } end REX txt = "1 + 2 + 30" exp = [[:number, 1], [:op, "+"], [:number, 2], [:op, "+"], [:number, 30]] assert_lexer src, txt, exp end def test_simple_scanner_macro_slashes src = <<-'REX' class Calculator macro N /\d+/i rule /#{N}/o { [:number, text.to_i] } /\s+/ /[+-]/ { [:op, text] } end REX txt = "1 + 2 + 30" exp = [[:number, 1], [:op, "+"], [:number, 2], [:op, "+"], [:number, 30]] assert_lexer src, txt, exp end def test_simple_scanner_macro_slash_n_generator src = <<-'REX' class Calculator macro N /\d+/n rule /#{N}/o { [:number, text.to_i] } /\s+/ /[+-]/ { [:op, text] } end REX ruby = generate_lexer src assert_match "/\\d+/n", ruby end def test_simple_scanner_recursive_macro src = <<-'REX' class Calculator macro D /\d/ N /#{D}+/ rule /#{N}/ { [:number, text.to_i] } /\s+/ /[+-]/ { [:op, text] } end REX txt = "1 + 2 + 30" exp = [[:number, 1], [:op, "+"], [:number, 2], [:op, "+"], [:number, 30]] assert_lexer src, txt, exp end def test_simple_scanner_debug_arg src = <<-'REX' class Calculator rule /\d+/ { [:number, text.to_i] } /\s+/ /[+-]/ { [:op, text] } end REX txt = "1 + 2 + 30" exp = [[:number, 1], [:op, "+"], [:number, 2], [:op, "+"], [:number, 30]] option[:debug] = true out, err = capture_io do assert_lexer src, txt, exp end exp = exp.zip([nil]).flatten(1) # ugly, but much more compact exp.pop # remove last nil exp = exp.map(&:inspect).join("\n") + "\n" assert_equal "", err assert_match "[:number, 1]", out assert_match "[:op, \"+\"]", out end def test_column src = <<-'REX' class Calculator rule /\d+/ { [:number, text.to_i, lineno, column] } /\s+/ /[+-]/ { [:op, text, lineno, column] } end REX txt = "1 + 2\n+ 30" exp = [[:number, 1, 1, 0], [:op, "+", 1, 2], [:number, 2, 1, 4], [:op, "+", 2, 0], [:number, 30, 2, 2]] option[:column] = true option[:lineno] = true assert_lexer src, txt, exp end def test_simple_scanner_debug_src src = <<-'REX' class Calculator option debug rule /\d+/ { [:number, text.to_i] } /\s+/ /[+-]/ { [:op, text] } end REX txt = "1 + 2 + 30" exp = [[:number, 1], [:op, "+"], [:number, 2], [:op, "+"], [:number, 30]] out, err = capture_io do assert_lexer src, txt, exp end exp = exp.zip([nil]).flatten(1) # ugly, but much more compact exp.pop # remove last nil exp = exp.map(&:inspect).join("\n") + "\n" assert_equal "", err assert_match "[:number, 1]", out assert_match "[:op, \"+\"]", out end def test_simple_scanner_inclusive src = <<-'REX' class Calculator rule /\d+/ { [:number, text.to_i] } /\s+/ /[+-]/ { @state = :op; [:op, text] } # nil state always goes first, so we won't get this :op /\d+/ { @state = nil; [:bad, text.to_i] } end REX txt = "1 + 2 + 30" exp = [[:number, 1], [:op, "+"], [:number, 2], [:op, "+"], [:number, 30]] assert_lexer src, txt, exp end def test_simple_scanner_exclusive src = <<-'REX' class Calculator rule /\d+/ { [:number, text.to_i] } /\s+/ /[+-]/ { @state = :OP; [:op, text] } :OP /\s+/ :OP /\d+/ { @state = nil; [:number2, text.to_i] } end REX txt = "1 + 2 + 30" exp = [[:number, 1], [:op, "+"], [:number2, 2], [:op, "+"], [:number2, 30]] assert_lexer src, txt, exp end def test_simple_scanner_auto_action src = <<-'REX' class Calculator rule /rpn/ { [:state, :RPN] } /\d+/ { [:number, text.to_i] } /\s+/ /[+-]/ { [:op, text] } :RPN /\s+/ :RPN /[+-]/ { [:op2, text] } :RPN /\d+/ { [:number2, text.to_i] } :RPN /alg/ { [:state, nil] } end REX txt = "rpn 1 2 30 + + alg" exp = [[:state, :RPN], [:number2, 1], [:number2, 2], [:number2, 30], [:op2, "+"], [:op2, "+"], [:state, nil]] assert_lexer src, txt, exp end def test_simple_scanner_auto_action_symbol src = <<-'REX' class Calculator rule /rpn/ :RPN /\d+/ { [:number, text.to_i] } /\s+/ /[+-]/ { [:op, text] } :RPN /\s+/ :RPN /[+-]/ { [:op2, text] } :RPN /\d+/ { [:number2, text.to_i] } :RPN /alg/ nil end REX txt = "rpn 1 2 30 + + alg" exp = [[:state, :RPN], [:number2, 1], [:number2, 2], [:number2, 30], [:op2, "+"], [:op2, "+"], [:state, nil]] assert_lexer src, txt, exp end def test_simple_scanner_predicate_generator src = <<-'REX' class Calculator rules /\d+/ { [:number, text.to_i] } /\s+/ :ARG /\d+/ poot? /[+-]/ { [:bad1, text] } woot? /[+-]/ { [:op, text] } end REX ruby = generate_lexer src assert_match "when poot? && (text = ss.scan(/[+-]/)) then", ruby assert_match "when woot? && (text = ss.scan(/[+-]/)) then", ruby assert_match "when nil then", ruby assert_match "when :ARG then", ruby end def test_simple_scanner_group src = <<-'REX' class Calculator rules : /\d/ | /\d+\.\d+/ { [:float, text.to_f] } | /\d+/ { [:int, text.to_i] } /\s+/ end REX ruby = generate_lexer src assert_match "when ss.match?(/\\d/) then", ruby assert_match "when text = ss.scan(/\\d+\\.\\d+/) then", ruby assert_match "when text = ss.scan(/\\d+/) then", ruby assert_match "end # group /\\d/", ruby end def test_simple_scanner_group_I_am_dumb src = <<-'REX' class Calculator rules : /\d/ | /\d+\.\d+/ { [:float, text.to_f] } | /\d+/ { [:int, text.to_i] } : /\+/ | xx? /\+whatever/ { [:x, text] } | :x /\+\d+/ { [:y, text] } /\s+/ end REX ruby = generate_lexer src assert_match "when ss.match?(/\\d/) then", ruby assert_match "when text = ss.scan(/\\d+\\.\\d+/) then", ruby assert_match "when text = ss.scan(/\\d+/) then", ruby assert_match "end # group /\\d/", ruby assert_match "when ss.match?(/\\+/) then", ruby assert_match "when xx? && (text = ss.scan(/\\+whatever/)) then", ruby assert_match "when (state == :x) && (text = ss.scan(/\\+\\d+/)) then", ruby assert_match "end # group /\\d/", ruby end def test_scanner_inspect_slash_structure src = <<-'REX' class Calculator rules : /\d/ | /\d+\.\d+/ { [:float, text.to_f] } | /\d+/ { [:int, text.to_i] } : /\+/ | xx? /\+whatever/ { [:x, text] } | :x /\+\d+/ { [:y, text] } | :x /\+\w+/ { [:z, text] } /\s+/ end REX rex = OedipusLex.new option rex.parse cleanup src lex = OedipusLex group, rule = lex::Group, lex::Rule expected = lex["Calculator", group[/\d/, rule[nil, /\d+\.\d+/, "{ [:float, text.to_f] }"], rule[nil, /\d+/, "{ [:int, text.to_i] }"]], group[/\+/, rule["xx?", /\+whatever/, "{ [:x, text] }"], rule[":x", /\+\d+/, "{ [:y, text] }"], rule[":x", /\+\w+/, "{ [:z, text] }"]], rule[nil, /\s+/, nil]] assert_equal expected, rex end make_my_diffs_pretty! def test_generator_start src = <<-'REX' class Calculator start do_the_thing rules /\d+/ { [:number, text.to_i] } /\s+/ end REX ruby = generate_lexer src assert_match " def next_token\n do_the_thing", ruby end def test_simple_scanner_predicate src = <<-'REX' class Calculator inner def woot? true end def poot? false end rules /\d+/ { [:number, text.to_i] } /\s+/ poot? /[+-]/ { [:bad1, text] } woot? /[+-]/ { [:op, text] } end REX txt = "1 + 2 + 30" exp = [[:number, 1], [:op, "+"], [:number, 2], [:op, "+"], [:number, 30]] assert_lexer src, txt, exp end def test_simple_scanner_method_actions src = <<-'REX' class Calculator inner def thingy text [:number, text.to_i] end rule /\d+/ thingy /\s+/ /[+-]/ { [:op, text] } end REX txt = "1 + 2 + 30" exp = [[:number, 1], [:op, "+"], [:number, 2], [:op, "+"], [:number, 30]] assert_lexer src, txt, exp end def test_header_is_written_after_module src = <<-'REX' module X module Y class Calculator rule /\d+/ { [:number, text.to_i] } /\s+/ /[+-]/ { [:op, text] } end end end REX ruby = generate_lexer src exp = ["# frozen_string_literal: true", "# encoding: UTF-8", "#--", "# This file is automatically generated. Do not modify it.", "# Generated by: oedipus_lex version #{OedipusLex::VERSION}.", "#++", "", "module X", "module Y"] assert_equal exp, ruby.lines.map(&:chomp).first(9) end def test_header_encoding_is_on_top src = <<-'REX' # encoding: UTF-8 module X module Y class Calculator rule /\d+/ { [:number, text.to_i] } /\s+/ /[+-]/ { [:op, text] } end end end REX ruby = generate_lexer src exp = ["# frozen_string_literal: true", "# encoding: UTF-8", "#--", "# This file is automatically generated. Do not modify it.", "# Generated by: oedipus_lex version #{OedipusLex::VERSION}.", "#++", "", "", "module X"] assert_equal exp, ruby.lines.map(&:chomp).first(9) end def test_read_non_existent_file rex = OedipusLex.new assert_raises Errno::ENOENT do rex.parse_file 'non_existent_file' end end def test_scanner_nests_classes src = <<-'REX' module Foo class Baz::Calculator < Bar rule /\d+/ { [:number, text.to_i] } /\s+/ { [:S, text] } end end REX ruby = generate_lexer src assert_match 'Baz::Calculator < Bar', ruby end def test_scanner_inherits source = generate_lexer <<-'REX' class Calculator < Bar rule /\d+/ { [:number, text.to_i] } /\s+/ { [:S, text] } end REX assert_match 'Calculator < Bar', source end def test_scanner_inherits_many_levels source = generate_lexer <<-'REX' class Calculator < Foo::Bar rule /\d+/ { [:number, text.to_i] } /\s+/ { [:S, text] } end REX assert_match 'Calculator < Foo::Bar', source end def test_parses_macros_with_escapes source = generate_lexer %q{ class Foo macro W /[\ \t]+/ rule /#{W}/ { [:SPACE, text] } end } assert_match 'ss.scan(/#{W}/)', source end def test_parses_regexp_with_interpolation_o source = generate_lexer %q{ class Foo rule /#{W}/o { [:SPACE, text] } end } assert_match 'ss.scan(/#{W}/o)', source end def test_parses_regexp_with_interpolation_o_macro source = generate_lexer %q{ class Foo macro W /[\ \t]+/ rule /#{X}/ { [:SPACE, text] } /#{W}/o { [:X, text] } end } assert_match 'W = /[\ \t]+/', source assert_match 'ss.scan(/#{W}/o)', source assert_match 'ss.scan(/#{X}/)', source end def test_parses_empty_regexp source = generate_lexer %q{ class Foo rule /\w+/ { @state = :ARG; emit :tFUNCTION_CALL } :ARG /\(/ { @state = nil; emit :tARG_LIST_BEGIN } :ARG // { @state = nil } end } assert_match 'ss.skip(//)', source end def test_changing_state_during_lexing src = <<-'REX' class Calculator rule /a/ { self.state = :B ; [:A, text] } :B /b/ { self.state = nil ; [:B, text] } end REX txt = "aba" exp = [[:A, 'a'], [:B, 'b'], [:A, 'a']] assert_lexer src, txt, exp txt = "aa" assert_lexer_error src, txt, "can not match (:B) at : 'a'" end def test_error_undefined_state src = <<-'REX' class Calculator rule /a/ { self.state = :C ; [:A, text] } :B /b/ { self.state = nil ; [:B, text] } end REX txt = "aa" assert_lexer_error src, txt, "undefined state at : 'C'" end def test_error_bad_token src = <<-'REX' class Calculator rule /a/ { self.state = :B ; :A } :B /b/ { self.state = nil ; [:B, text] } end REX txt = "aa" assert_token_error src, txt, "bad lexical result at : :A" end def test_error_bad_token_size src = <<-'REX' class Calculator rule /a/ { self.state = :B ; [:A] } :B /b/ { self.state = nil ; [:B, text] } end REX txt = "aa" assert_token_error src, txt, "bad lexical result at : [:A]" end def test_incrementing_lineno_on_nil_token src = <<-'REX' class Calculator option lineno rule /\n/ /a/ { [:A, lineno] } end REX txt = "\n\na" exp = [[:A, 3]] assert_lexer src, txt, exp end def assert_location exp, option = {} self.option = option src = "class Calculator\nrule\n /\\d+/ { [:number, text.to_i] }\nend\n" _, mod = eval_lexer src calc = mod::Calculator.new def calc.do_parse [next_token] end calc.filename = option[:filename] if option[:filename] calc.parse "42" assert_equal exp, calc.location end def test_location t = true assert_location "" assert_location ":1", :lineno => t assert_location ":?:0", :column => t assert_location ":1:0", :lineno => t, :column => t assert_location "blah", :filename => "blah" assert_location "blah:1", :filename => "blah", :lineno => t assert_location "blah:?:0", :filename => "blah", :column => t assert_location "blah:1:0", :filename => "blah", :lineno => t, :column => t end end