scanlex-0.1.4/.cargo_vcs_info.json0000644000000001121375202201500125200ustar { "git": { "sha1": "7a672ea8c1bda54c24fd64d38fe2b90519bd9cda" } } scanlex-0.1.4/.gitignore010066400017500001750000000000331341142557600133300ustar 00000000000000target Cargo.lock scratch/ scanlex-0.1.4/Cargo.lock0000644000000002121375202201500104740ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. [[package]] name = "scanlex" version = "0.1.4" scanlex-0.1.4/Cargo.toml0000644000000015311375202201500105240ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] name = "scanlex" version = "0.1.4" authors = ["steve donovan "] description = "a simple lexical scanner for parsing text into tokens" readme = "readme.md" keywords = ["scan", "text", "tokenize", "input"] license = "MIT" repository = "https://github.com/stevedonovan/scanlex.git" [dependencies] scanlex-0.1.4/Cargo.toml.orig010066400017500001750000000005161375202157000142270ustar 00000000000000[package] name = "scanlex" version = "0.1.4" authors = ["steve donovan "] description = "a simple lexical scanner for parsing text into tokens" repository = "https://github.com/stevedonovan/scanlex.git" readme = "readme.md" license = "MIT" keywords = ["scan","text","tokenize","input"] [dependencies] scanlex-0.1.4/LICENSE.txt010066400017500001750000000020701341142557600131660ustar 00000000000000The MIT License (MIT) Copyright (c) 2016 Steve Donovan Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. scanlex-0.1.4/examples/scan.rs010066400017500001750000000004121341142557600144510ustar 00000000000000extern crate scanlex; fn main() { let def = "10 0.1 0.0 + 1.0e4 1e-3-5+4 0.1e+2"; let text = std::env::args().skip(1).next().unwrap_or(def.to_string()); let scan = scanlex::Scanner::new(&text); for t in scan { println!("{:?}",t); } } scanlex-0.1.4/examples/scanline.rs010066400017500001750000000012611341142557600153240ustar 00000000000000// ScanLines is a struct that is used to 'iterate' over a Scanner // for each line in a readable source. It cannot (currently) be // an actual iterator because of lifetime constraints, because // it returns a Scanner that borrows a string from the struct. This // however makes it more efficient. // // This example prints out the first token of each line in this file extern crate scanlex; use scanlex::ScanLines; use std::fs::File; fn main() { let f = File::open("scanline.rs").expect("cannot open scanline.rs"); let mut iter = ScanLines::new(&f); while let Some(s) = iter.next() { let mut s = s.expect("cannot read line"); println!("{:?}",s.get()); } } scanlex-0.1.4/readme.md010066400017500001750000000124261352575167700131420ustar 00000000000000# scanlex - a simple lexical scanner. ## The Problem of Input It is easier to write things out than to read them in, since more things can go wrong. The read may fail, the text may not be valid UTF-8, the number may be malformed or simply out of range. ## Lexical Scanners Lexical scanners split a stream of characters into _tokens_. Tokens are returned by repeatedly calling the `get` method of `Scanner`, (which will return `Token::End` if no tokens are left) or by iterating over the scanner. They represent numbers, characters, identifiers, or single/double quoted strings. There is also `Token::Error` to indicate a badly formed token. This lexical scanner makes some assumptions, such as a number may not be directly followed by a letter, etc. No attempt is made in this version to decode C-style escape codes in strings. All whitespace is ignored. It's intended for processing generic structured data, rather than code. For example, the string "hello 'dolly' * 42" will be broken into four tokens: - an _identifier_ 'hello' - a quoted string 'dolly' - a character '*' - and a number 42 ```rust extern crate scanlex; use scanlex::{Scanner,Token}; let mut scan = Scanner::new("hello 'dolly' * 42"); assert_eq!(scan.get(),Token::Iden("hello".into())); assert_eq!(scan.get(),Token::Str("dolly".into())); assert_eq!(scan.get(),Token::Char('*')); assert_eq!(scan.get(),Token::Int(10)); assert_eq!(scan.get(),Token::End); ``` To extract the values, use code like this: ```rust let greeting = scan.get_iden()?; let person = scan.get_string()?; let op = scan.get_char()?; let answer = scan.get_integer(); // i64 ``` `Scanner` implements `Iterator`. If you just wanted to extract the words from a string, then filtering with `as_iden` will do the trick, since it returns `Option`. ```rust let s = Scanner::new("bonzo 42 dog (cat)"); let v: Vec<_> = s.filter_map(|t| t.as_iden()).collect(); assert_eq!(v,&["bonzo","dog","cat"]); ``` Using `as_number` instead you can use this strategy to extract all the numbers out of a document, ignoring all other structure. The `scan.rs` example shows you the tokens that would be generated by parsing the given string on the commmand-line. This iterator only stops at `Token::End` - you can handle `Token::Error` yourself. Usually it's important _not_ to ignore structure. Say we have input strings that look like this "(WORD) = NUMBER": ```rust scan.skip_chars("(")?; let word = scan.get_iden()?; scan.skip_chars(")=")?; let num = scan.get_number()?; ``` _Any_ of these calls may fail! It is a common pattern to create a scanner for each line of text read from a readable source. The `scanline.rs` example shows how to use `ScanLines` to accomplish this. ```rust let f = File::open("scanline.rs").expect("cannot open scanline.rs"); let mut iter = ScanLines::new(&f); while let Some(s) = iter.next() { let mut s = s.expect("cannot read line"); // show the first token of each line println!("{:?}",s.get()); } ``` A more serious example (taken from the tests) is parsing JSON: ```rust type JsonArray = Vec>; type JsonObject = HashMap>; #[derive(Debug, Clone, PartialEq)] pub enum Value { Str(String), Num(f64), Bool(bool), Arr(JsonArray), Obj(JsonObject), Null } fn scan_json(scan: &mut Scanner) -> Result { use Value::*; match scan.get() { Token::Str(s) => Ok(Str(s)), Token::Num(x) => Ok(Num(x)), Token::Int(n) => Ok(Num(n as f64)), Token::End => Err(scan.scan_error("unexpected end of input",None)), Token::Error(e) => Err(e), Token::Iden(s) => if s == "null" {Ok(Null)} else if s == "true" {Ok(Bool(true))} else if s == "false" {Ok(Bool(false))} else {Err(scan.scan_error(&format!("unknown identifier '{}'",s),None))}, Token::Char(c) => if c == '[' { let mut ja = Vec::new(); let mut ch = c; while ch != ']' { let o = scan_json(scan)?; ch = scan.get_ch_matching(&[',',']'])?; ja.push(Box::new(o)); } Ok(Arr(ja)) } else if c == '{' { let mut jo = HashMap::new(); let mut ch = c; while ch != '}' { let key = scan.get_string()?; scan.get_ch_matching(&[':'])?; let o = scan_json(scan)?; ch = scan.get_ch_matching(&[',','}'])?; jo.insert(key,Box::new(o)); } Ok(Obj(jo)) } else { Err(scan.scan_error(&format!("bad char '{}'",c),None)) } } } ``` (This is of course an Illustrative Example. JSON is a solved problem.) ## Options With `no_float` you get a barebones parser that does not recognize floats, just integers, strings, chars and identifiers. This is useful if the existing rules are too strict - e.g "2d" is fine in `no_float` mode, but an error in the default mode. [chrono-english](https://github.com/stevedonovan/chrono-english) uses this mode to parse date expressions. With `line_comment` you provide a character; after this character, the rest of the current line will be ignored. scanlex-0.1.4/src/error.rs010066400017500001750000000011771374433527700136470ustar 00000000000000use std::{fmt,io}; use std::error::Error; /// a scanner error type #[derive(Debug)] #[derive(PartialEq)] pub struct ScanError { pub details: String, pub lineno: u32, } impl fmt::Display for ScanError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f,"line {}: {}",self.lineno,self.details) } } impl ScanError { /// create a new error pub fn new(msg: &str) -> ScanError { ScanError{details: msg.into(), lineno: 1} } } impl Error for ScanError {} impl From for ScanError { fn from(err: io::Error) -> ScanError { ScanError::new(&err.to_string()) } } scanlex-0.1.4/src/int.rs010066400017500001750000000013641341142557600132770ustar 00000000000000pub trait Int { type Type; fn min_value() -> i64; fn max_value() -> i64; fn name() -> &'static str; fn cast(n: i64) -> Self::Type; } macro_rules! impl_int { ($t:ident) => { impl Int for $t { type Type = $t; fn min_value() -> i64 { $t::min_value() as i64 } fn max_value() -> i64 { $t::max_value() as i64 } fn name() -> &'static str { stringify!($t) } fn cast(n: i64) -> Self::Type { n as Self::Type } } } } impl_int!(i8); impl_int!(i16); impl_int!(i32); impl_int!(i64); impl_int!(u8); impl_int!(u16); impl_int!(u32); impl_int!(u64); scanlex-0.1.4/src/lib.rs010066400017500001750000000457011375202151500132470ustar 00000000000000//! `scanlex` implements a simple _lexical scanner_. //! //! Tokens are returned by repeatedly calling the `get` method, //! (which will return `Token::End` if no tokens are left) //! or by iterating over the scanner. //! //! They represent floats (stored as f64), integers (as i64), characters, identifiers, //! or single or double quoted strings. There is also `Token::Error` to //! indicate a badly formed token. This lexical scanner makes some //! sensible assumptions, such as a number may not be directly followed //! by a letter, etc. No attempt is made in this version to decode C-style //! escape codes in strings. All whitespace is ignored. //! //! ## Examples //! //! ``` //! use scanlex::{Scanner,Token}; //! //! let mut scan = Scanner::new("iden 'string' * 10"); //! assert_eq!(scan.get(),Token::Iden("iden".into())); //! assert_eq!(scan.get(),Token::Str("string".into())); //! assert_eq!(scan.get(),Token::Char('*')); //! assert_eq!(scan.get(),Token::Int(10)); //! assert_eq!(scan.get(),Token::End); //! ``` //! //! The scanner struct implements iterator, so: //! //! ``` //! let v: Vec<_> = scanlex::Scanner::new("bonzo 42 dog (cat)") //! .filter_map(|t| t.to_iden()).collect(); //! assert_eq!(v,&["bonzo","dog","cat"]); //! ``` use std::str::FromStr; use std::error::Error; use std::io; mod int; use int::Int; mod error; pub use error::ScanError; mod token; pub use token::Token; /// a struct for lexical scanning of a string pub struct Scanner <'a> { iter: ::std::str::Chars<'a>, ch: char, pub lineno: u32, no_float: bool, line_comment: Option, } fn expecting_chars(chars: &[char]) -> String { let mut res = String::new(); for c in chars { res.push_str(&format!("'{}'",c)); res.push(',') } res.pop(); res } impl<'a> Iterator for Scanner<'a> { type Item = Token; fn next(&mut self) -> Option { match self.get() { Token::End => None, t => Some(t) } } } impl<'a> Scanner<'a> { /// create a new scanner from a string slice. /// /// Empty text is not a problem, but `get` will then /// return `Token::End`. pub fn new(s: &'a str) -> Scanner<'a> { Scanner::new_ex(s,1) } fn new_ex(s: &'a str, lineno: u32) -> Scanner<'a> { let mut iter = s.chars(); let mch = iter.next(); Scanner { iter: iter, ch: match mch {Some(c) => c, None => '\0'}, lineno: lineno, no_float: false, line_comment: None, } } /// this scanner will not recognize floats /// /// "2.5" is tokenized as Int(2),Char('.'),Int(5) pub fn no_float(mut self) -> Scanner<'a> { self.no_float = true; self } /// ignore everything in a line after this char pub fn line_comment(mut self, c: char) -> Scanner<'a> { self.line_comment = Some(c); self } pub fn scan_error(&self, msg: &str, cause: Option<&dyn Error>) -> ScanError { ScanError{ details: format!("{}{}", msg, match cause { Some(c) => format!(": caused by {}",c), None => "".into() } ), lineno: self.lineno } } fn update_lineno(&self, mut err: ScanError) -> ScanError { err.lineno = self.lineno; err } fn token_error(&self, msg: &str, cause: Option<&dyn Error>) -> Token { Token::Error(self.scan_error(msg,cause)) } fn check_line_comment(&mut self) -> bool { if let Some(lc) = self.line_comment { if self.ch == lc { self.skip_until(|c| c=='\n'); return true; } } return false; } /// skip any whitespace characters - return false if we're at the end. pub fn skip_whitespace(&mut self) -> bool { loop { self.check_line_comment(); if self.ch.is_whitespace() { if self.ch == '\n' { self.lineno += 1; } while let Some(c) = self.iter.next() { if c == '\n' { self.lineno += 1; } if ! c.is_whitespace() { self.ch = c; if self.check_line_comment() { continue; } else { return true; } } } // run of chars! self.ch = '\0'; break; } else { break; } } if self.ch == '\0' { false } else { true } } /// look ahead at the next character pub fn peek(&self) -> char { self.ch } /// get the next character pub fn nextch(&mut self) -> char { let old_ch = self.ch; self.ch = match self.iter.next() { Some(c) => c, None => '\0' }; old_ch } fn either_plus_or_minus(&self) -> Option { if self.ch == '+' || self.ch == '-' { Some(self.ch) } else { None } } fn is_digit(&self) -> bool { self.ch.is_digit(10) } /// get the next token pub fn get(&mut self) -> Token { use self::Token::*; if ! self.skip_whitespace() { return End; } // a number starts with a digit or a sign let plusminus = if ! self.no_float {self.either_plus_or_minus()} else {None}; if self.is_digit() || plusminus.is_some() { let mut s = String::new(); if plusminus.is_some() { s.push(plusminus.unwrap()); } if ! self.no_float { let mut maybe_hex = self.ch == '0'; if plusminus.is_some() || maybe_hex { // look ahead! Might be a number or just a char self.nextch(); if maybe_hex { // after a '0'? maybe_hex = self.ch == 'X' || self.ch == 'x'; if ! maybe_hex { s.push('0'); if ! self.is_digit() && self.ch != '.' { self.ch = '\0'; } } } else if ! self.is_digit() { // false alarm, wuz just a char... return Char(plusminus.unwrap()); } } // integer part if maybe_hex { // in hex... self.nextch(); // skip the 'x' self.take_while_into(&mut s,|c| c.is_digit(16)); return match i64::from_str_radix(&s,16) { Ok(n) => Int(n), Err(e) => self.token_error("bad hex constant",Some(&e)) } } } if self.ch != '.' { // for 0. case - we already peeked ahead self.take_digits_into(&mut s); } // floating point part? if ! self.no_float && (self.ch == '.' || self.ch == 'e' || self.ch == 'E') { if self.ch == '.' { self.take_digits_into(&mut s); } if self.ch == 'e' || self.ch == 'E' { s.push(self.nextch()); if self.is_digit() || self.either_plus_or_minus().is_some() { self.take_digits_into(&mut s); } } return if self.ch.is_alphabetic() { self.token_error("bad floating-point number: letter follows",None) } else { match f64::from_str(&s) { Ok(x) => Num(x), Err(e) => self.token_error(&format!("bad floating-point number {:?}",s),Some(&e)) } } } else { return if ! self.no_float && self.ch.is_alphabetic() { self.token_error("bad integer: letter follows",None) } else { match i64::from_str(&s) { Ok(x) => Int(x), Err(e) => self.token_error(&format!("bad integer {:?}",s),Some(&e)) } } } } else if self.ch == '\'' || self.ch == '\"' { let endquote = self.ch; self.nextch(); // skip the opening quote let s = self.grab_while(|c| c != endquote); // TODO unfinished quote self.nextch(); // skip end quote Str(s) } else if self.ch.is_alphabetic() || self.ch == '_' { let s = self.grab_while(|c| c.is_alphanumeric() || c == '_'); Iden(s) } else { Char(self.nextch()) } } /// collect chars matching the condition, returning a string /// ``` /// let mut scan = scanlex::Scanner::new("hello + goodbye"); /// assert_eq!(scan.grab_while(|c| c != '+'), "hello "); /// ``` pub fn grab_while(&mut self, pred: F ) -> String where F: Fn(char) -> bool { let mut s = String::new(); self.take_while_into(&mut s,pred); s } /// collect chars matching the condition into a given string pub fn take_while_into(&mut self, s: &mut String, pred: F ) where F: Fn(char) -> bool { if self.ch != '\0' { s.push(self.ch); } while let Some(c) = self.iter.next() { if ! pred(c) { self.ch = c; return; } s.push(c); } self.ch = '\0'; } fn take_digits_into(&mut self, s: &mut String) { self.take_while_into(s, |c| c.is_digit(10)); } /// skip chars while the condition is false /// /// ``` /// let mut scan = scanlex::Scanner::new("hello and\nwelcome"); /// scan.skip_until(|c| c == '\n'); /// assert_eq!(scan.get_iden().unwrap(),"welcome"); /// ``` pub fn skip_until(&mut self, pred: F ) -> bool where F: Fn(char) -> bool { while let Some(c) = self.iter.next() { if pred(c) { self.ch = c; return true; } } self.ch = '\0'; false } /// collect the rest of the chars /// /// ``` /// use scanlex::{Scanner,Token}; /// /// let mut scan = Scanner::new("42 the answer"); /// assert_eq!(scan.get(),Token::Int(42)); /// assert_eq!(scan.take_rest()," the answer"); /// ``` pub fn take_rest(&mut self) -> String { self.grab_while(|c| c != '\0') } /// collect until we match one of the chars pub fn take_until (&mut self, chars: &[char]) -> String { self.grab_while(|c| ! chars.contains(&c)) } /// get a String token, failing otherwise pub fn get_string(&mut self) -> Result { self.get().to_string_result().map_err(|e| self.update_lineno(e)) } /// get an Identifier token, failing otherwise /// /// ``` /// let mut scan = scanlex::Scanner::new("hello dolly"); /// assert_eq!(scan.get_iden().unwrap(),"hello"); /// ``` pub fn get_iden(&mut self) -> Result { self.get().to_iden_result().map_err(|e| self.update_lineno(e)) } /// get a number, failing otherwise /// /// ``` /// let mut scan = scanlex::Scanner::new("(42)"); /// scan.get(); // skip '(' /// assert_eq!(scan.get_number().unwrap(),42.0); /// ``` pub fn get_number(&mut self) -> Result { self.get().to_number_result().map_err(|e| self.update_lineno(e)) } /// get an integer, failing otherwise pub fn get_integer(&mut self) -> Result { self.get().to_integer_result().map_err(|e| self.update_lineno(e)) } /// get an integer of a particular type, failing otherwise pub fn get_int(&mut self) -> Result { self.get().to_int_result::().map_err(|e| self.update_lineno(e)) } /// get an float, failing otherwise pub fn get_float(&mut self) -> Result { self.get().to_float_result().map_err(|e| self.update_lineno(e)) } /// get a character, failing otherwise pub fn get_char(&mut self) -> Result { self.get().to_char_result().map_err(|e| self.update_lineno(e)) } /// get a Character token that must be one of the given chars pub fn get_ch_matching(&mut self, chars: &[char]) -> Result { let c = self.get_char()?; if chars.contains(&c) { Ok(c) } else { let s = expecting_chars(chars); Err(self.scan_error(&format!("expected one of {}, got {}",s,c),None)) } } /// skip each character in the string. pub fn skip_chars(&mut self, chars: &str) -> Result<(),ScanError> { for ch in chars.chars() { let c = self.get_char()?; if c != ch { return Err(self.scan_error(&format!("expected '{}' got '{}'",ch,c),None)); } } Ok(()) } /// grab 'balanced' text between some open and close chars pub fn grab_brackets(&mut self, pair: &str) -> Result { let mut chars = pair.chars(); let open = chars.next().expect("provide open bracket"); let close = chars.next().expect("provide close bracket"); self.skip_whitespace(); let mut s = String::new(); if self.ch != '\0' { s.push(self.ch); } let mut level = 1; while let Some(c) = self.iter.next() { if c == open { level += 1; } else if c == close { level -= 1; } s.push(c); if level == 0 { self.nextch(); return Ok(s); } } Err(self.scan_error("expect close bracket",None)) } } use std::io::prelude::*; /// used to generate Scanner structs for each line pub struct ScanLines { rdr: io::BufReader, line: String, lineno: u32, } impl <'a, R: Read> ScanLines { /// create a Scanner 'iterator' over all lines from a readable. /// This cannot be a proper `Iterator` because the lifetime constraint /// on `Scanner` cannot be satisfied. You need to use the explicit form: /// /// ```rust,ignore /// let mut iter = ScanLines::new(File::open("lines.txt")?); /// while let Some(s) = iter.next() { /// let mut s = s?; /// // first token of each line /// println!("{:?}",s.get()); /// } /// ``` pub fn new(f: R) -> ScanLines { ScanLines { rdr: io::BufReader::new(f), line: String::new(), lineno: 0, } } /// call this to return a `Scanner` for the next line in the source. pub fn next(&'a mut self) -> Option>> { self.line.clear(); match self.rdr.read_line(&mut self.line) { Ok(nbytes) => if nbytes == 0 { return None; }, Err(e) => return Some(Err(e)) } self.lineno += 1; Some(Ok(Scanner::new_ex(&self.line,self.lineno))) } } #[cfg(test)] mod tests { use super::*; #[test] fn skipping() { // skipping let mut scan = Scanner::new("here we go\nand more *yay*"); scan.skip_until(|c| c == '\n'); assert_eq!(scan.get(),Token::Iden("and".to_string())); scan.skip_until(|c| c == '*'); assert_eq!(scan.get(),Token::Char('*')); assert_eq!(scan.get(),Token::Iden("yay".to_string())); } #[test] fn getting() { use Token::*; let mut scan = Scanner::new("'hello' 42 * / -10 24B 2.0e6 0xFF-\"yay\""); assert_eq!(scan.get_string().unwrap(), "hello"); assert_eq!(scan.get_number().unwrap(), 42.0); assert_eq!(scan.get_ch_matching(&['*']).unwrap(),'*'); assert_eq!( scan.get_ch_matching(&[',',':']).err().unwrap(), ScanError::new("expected one of ',',':', got /") ); assert_eq!(scan.get(),Int(-10)); assert_eq!(scan.get(),Error(ScanError::new("bad integer: letter follows"))); assert_eq!(scan.get(),Iden("B".to_string())); assert_eq!(scan.get(),Num(2000000.0)); assert_eq!(scan.get(),Int(255)); assert_eq!(scan.get(),Char('-')); assert_eq!(scan.get(),Str("yay".to_string())); } fn try_scan_err() -> Result<(),ScanError> { let mut scan = Scanner::new("hello: 42"); let s = scan.get_iden()?; let ch = scan.get_char()?; let n = scan.get_integer()?; assert_eq!(s,"hello"); assert_eq!(ch,':'); assert_eq!(n,42); Ok(()) } #[test] fn try_scan_test() { let _ = try_scan_err(); } fn try_skip_chars(test: &str) -> Result<(),ScanError> { let mut scan = Scanner::new(test); scan.skip_chars("(")?; let name = scan.get_iden()?; scan.skip_chars(")=")?; let num = scan.get_integer()?; assert_eq!(name,"hello"); assert_eq!(num,42); Ok(()) } #[test] fn skip_chars() { let _ = try_skip_chars("(hello)=42"); let _ = try_skip_chars(" ( hello ) = 42 "); } #[test] fn numbers() { let mut scan = Scanner::new("10 0.0 1.0e1 1e1 0 "); assert_eq!(scan.get_integer(),Ok(10)); assert_eq!(scan.get_number(),Ok(0.0)); assert_eq!(scan.get_number(),Ok(10.0)); assert_eq!(scan.get_float(),Ok(10.0)); assert_eq!(scan.get_integer(),Ok(0)); } #[test] fn no_float() { use Token::*; let scan = Scanner::new("0.0 1e4").no_float(); let c: Vec<_> = scan.collect(); assert_eq!(c,&[Int(0),Char('.'),Int(0),Int(1),Iden("e4".into())]); } #[test] fn classifying_tokens() { let mut s = Scanner::new("10 2.0 'hello' hello?"); let t = s.get(); assert!(t.is_integer()); assert!(t.is_number()); assert!(s.get().is_float()); assert!(s.get().is_string()); assert!(s.get().is_iden()); assert!(s.get().is_char()); } #[test] fn collecting_tokens_of_type() { let s = Scanner::new("if let Some(a) = Bonzo::Dog {}"); let c: Vec<_> = s.filter_map(|t| t.to_iden()).collect(); assert_eq!(c,&["if","let","Some","a","Bonzo","Dog"]); } #[test] fn collecting_same_tokens_or_error() { let s = Scanner::new("10 1.5 20.0 30.1"); let c: Result,_> = s.map(|t| t.to_number_result()).collect(); assert_eq!(c.unwrap(),&[10.0,1.5,20.0,30.1]); } #[test] fn line_comments() { let text = " one # some comment 20 "; let mut scan = Scanner::new(text) .line_comment('#'); assert_eq!(scan.get_iden(),Ok("one".into())); assert_eq!(scan.get_number(),Ok(20.0)); } } scanlex-0.1.4/src/token.rs010066400017500001750000000132721352575371100136270ustar 00000000000000use error::ScanError; use int::Int; /// Represents a token returned by `Scanner::get` #[derive(Debug)] #[derive(PartialEq)] pub enum Token { /// a floating-point number, stored as double-precision float Num(f64), /// an integer, stored as eight-byte unsigned Int(i64), /// a quoted string Str(String), /// an identifier \a+[\a\d_]* Iden(String), /// a character (anything not recognized as any of the above Char(char), /// represents an error Error(ScanError), /// end of stream End } fn type_error(t: Token, expected: &str) -> Result { Err(ScanError{details: format!("{} expected, got {:?}",expected,t), lineno: 1}) } fn int_error(msg: &str, tname: &str) -> Result { Err(ScanError{details: format!("integer {} for {}",msg,tname), lineno: 1}) } impl Token { /// is this the end token? pub fn finished(&self) -> bool { match *self { Token::End => true, _ => false } } /// is this token a float? pub fn is_float(&self) -> bool { match *self { Token::Num(_) => true, _ => false } } /// extract the float pub fn to_float(self) -> Option { match self { Token::Num(n) => Some(n), _ => None } } /// extract the float, or complain pub fn to_float_result(self) -> Result { match self { Token::Num(n) => Ok(n), t => type_error(t,"float") } } /// is this token an integer? pub fn is_integer(&self) -> bool { match *self { Token::Int(_) => true, _ => false } } /// extract the integer pub fn to_integer(self) -> Option { match self { Token::Int(n) => Some(n), _ => None } } /// extract the integer, or complain pub fn to_integer_result(self) -> Result { match self { Token::Int(n) => Ok(n), t => type_error(t,"integer") } } /// extract the integer as a particular subtype pub fn to_int_result(self) -> Result { let num = self.to_integer_result()?; if num < I::min_value() { return int_error("underflow",I::name()); } else if num > I::max_value() { return int_error("overflow",I::name()); } Ok(I::cast(num)) } /// is this token an integer? pub fn is_number(&self) -> bool { match *self { Token::Int(_) | Token::Num(_) => true, _ => false } } /// extract the number, not caring about float or integer pub fn to_number(self) -> Option { match self { Token::Num(n) => Some(n), Token::Int(n) => Some(n as f64), _ => None } } /// extract the number, not caring about float or integer, or complain pub fn to_number_result(self) -> Result { match self { Token::Num(n) => Ok(n), Token::Int(n) => Ok(n as f64), t => type_error(t,"number") } } /// is this token a string? pub fn is_string(&self) -> bool { match *self { Token::Str(_) => true, _ => false } } /// extract the string pub fn to_string(self) -> Option { match self { Token::Str(s) => Some(s), _ => None } } /// extract a reference the string pub fn as_string(&self) -> Option<&str> { match *self { Token::Str(ref s) => Some(s.as_str()), _ => None } } /// extract the string, or complain pub fn to_string_result(self) -> Result { match self { Token::Str(s) => Ok(s), t => type_error(t,"string") } } /// is this token an identifier? pub fn is_iden(&self) -> bool { match *self { Token::Iden(_) => true, _ => false } } /// extract the identifier pub fn to_iden(self) -> Option { match self { Token::Iden(n) => Some(n), _ => None } } /// extract a reference to the identifier pub fn as_iden(&self) -> Option<&str> { match *self { Token::Iden(ref n) => Some(n.as_str()), _ => None } } /// extract the identifier, or complain pub fn to_iden_result(self) -> Result { match self { Token::Iden(n) => Ok(n), t => type_error(t,"iden") } } /// is this token a character? pub fn is_char(&self) -> bool { match *self { Token::Char(_) => true, _ => false } } /// extract the character pub fn to_char(self) -> Option { match self { Token::Char(c) => Some(c), _ => None } } /// extract the character pub fn as_char(&self) -> Option { match *self { Token::Char(c) => Some(c), _ => None } } /// extract the character, or complain pub fn to_char_result(self) -> Result { match self { Token::Char(c) => Ok(c), t => type_error(t,"char") } } /// is this token an error? pub fn is_error(&self) -> bool { match *self { Token::Error(_) => true, _ => false } } /// extract the error pub fn to_error(self) -> Option { match self { Token::Error(e) => Some(e), _ => None } } } scanlex-0.1.4/tests/json.rs010066400017500001750000000046401341142557600140310ustar 00000000000000// A Simple, toy JSON parser. // Remember there are Crates for This extern crate scanlex; use scanlex::{Scanner,Token,ScanError}; use std::collections::HashMap; type JsonArray = Vec>; type JsonObject = HashMap>; #[derive(Debug, Clone, PartialEq)] pub enum Value { Str(String), Num(f64), Bool(bool), Arr(JsonArray), Obj(JsonObject), Null } fn scan_json(scan: &mut Scanner) -> Result { use Value::*; match scan.get() { Token::Str(s) => Ok(Str(s)), Token::Num(x) => Ok(Num(x)), Token::Int(n) => Ok(Num(n as f64)), Token::End => Err(scan.scan_error("unexpected end of input",None)), Token::Error(e) => Err(e), Token::Iden(s) => if s == "null" {Ok(Null)} else if s == "true" {Ok(Bool(true))} else if s == "false" {Ok(Bool(false))} else {Err(scan.scan_error(&format!("unknown identifier '{}'",s),None))}, Token::Char(c) => if c == '[' { let mut ja = Vec::new(); let mut ch = c; while ch != ']' { let o = scan_json(scan)?; ch = scan.get_ch_matching(&[',',']'])?; ja.push(Box::new(o)); } Ok(Arr(ja)) } else if c == '{' { let mut jo = HashMap::new(); let mut ch = c; while ch != '}' { let key = scan.get_string()?; scan.get_ch_matching(&[':'])?; let o = scan_json(scan)?; ch = scan.get_ch_matching(&[',','}'])?; jo.insert(key,Box::new(o)); } Ok(Obj(jo)) } else { Err(scan.scan_error(&format!("bad char '{}'",c),None)) } } } fn parse_json(txt: &str) -> Value { let mut scan = Scanner::new(txt); scan_json(&mut scan).expect("bad json") } use Value::*; #[test] fn array() { let s = parse_json("[10,20]"); assert_eq!(s, Arr(vec![Box::new(Num(10.0)),Box::new(Num(20.0))])); } #[test] fn array2() { let s = parse_json("[null,true]"); assert_eq!(s, Arr(vec![Box::new(Null),Box::new(Bool(true))])); } #[test] fn map() { let s = parse_json("{'bonzo':10}"); let mut m = HashMap::new(); m.insert("bonzo".to_string(),Box::new(Num(10.0))); assert_eq!(s, Obj(m)); }