litrs-0.4.0/.cargo_vcs_info.json0000644000000001360000000000100121720ustar { "git": { "sha1": "458d6e121f923196a287f966427702ce78ed5647" }, "path_in_vcs": "" }litrs-0.4.0/.gitignore000064400000000000000000000000231046102023000127450ustar 00000000000000/target Cargo.lock litrs-0.4.0/CHANGELOG.md000064400000000000000000000073041046102023000125770ustar 00000000000000# Changelog All notable changes to this project will be documented in this file. ## [Unreleased] ## [0.4.0] - 2023-03-05 ### Added - Add ability to parse literals with arbitrary suffixes (e.g. `"foo"bla` or `23px`) - Add `suffix()` method to all literal types except `BoolLit` - Add `IntegerBase::value` - Add `from_suffix` and `suffix` methods to `FloatType` and `IntegerType` - Add `FromStr` and `Display` impls to `FloatType` and `IntegerType` ### Changed - **Breaking**: Mark `FloatType` and `IntegerType` as `#[non_exhaustive]` - **Breaking**: Fix integer parsing for cases like `27f32`. `Literal::parse` and `IntegerLit::parse` will both identify this as an integer literal. - **Breaking**: Fix float parsing by correctly rejecting inputs like `27f32`. A float literal must have a period OR an exponent part, according to the spec. Previously decimal integers were accepted in `FloatLit::parse`. - Improved some parts of the docs ### Removed - **Breaking**: Remove `OwnedLiteral` and `SharedLiteral` ## [0.3.0] - 2022-12-19 ### Breaking - Bump MSRV (minimal supported Rust version) to 1.54 ### Added - Add `raw_input` and `into_raw_input` to non-bool `*Lit` types - Add `impl From<*Lit> for pm::Literal` (for non-bool literals) - Add `impl From for pm::Ident` ### Fixed - Fix link to reference and clarify bool literals ([#7](https://github.com/LukasKalbertodt/litrs/pull/7)) ### Internals - Move lots of parsing code into non-generic functions (this hopefully reduces compile times) - To implement `[into_]raw_input` for integer and float literals, their internals were changed a bit so that they store the full input string now. ## [0.2.3] - 2021-06-09 ### Changed - Minor internal code change to bring MSRV from 1.52 to 1.42 ## [0.2.2] - 2021-06-09 ### Changed - Fixed (byte) string literal parsing by: - Correctly handling "string continue" sequences - Correctly converting `\n\r` into `\n` ## [0.2.1] - 2021-06-04 ### Changed - Fixed the `expected` value of the error returned from `TryFrom` impls in some cases ## [0.2.0] - 2021-05-28 ### Changed - **Breaking**: rename `Error` to `ParseError`. That describes its purpose more closely and is particular useful now that other error types exist in the library. ### Removed - **Breaking**: remove `proc-macro` feature and instead offer the corresponding `impl`s unconditionally. Since the feature didn't enable/disable a dependency (`proc-macro` is a compiler provided crate) and since apparently it works fine in `no_std` environments, I dropped this feature. I don't currently see a reason why the corresponding impls should be conditional. ### Added - `TryFrom for litrs::Literal` impls - `From<*Lit> for litrs::Literal` impls - `TryFrom for *Lit` - `TryFrom for *Lit` - `InvalidToken` error type for all new `TryFrom` impls ## [0.1.1] - 2021-05-25 ### Added - `From` impls to create a `Literal` from references to proc-macro literal types: - `From<&proc_macro::Literal>` - `From<&proc_macro2::Literal>` - Better examples in README and repository ## 0.1.0 - 2021-05-24 ### Added - Everything [Unreleased]: https://github.com/LukasKalbertodt/litrs/compare/v0.4.0...HEAD [0.4.0]: https://github.com/LukasKalbertodt/litrs/compare/v0.3.0...v0.4.0 [0.3.0]: https://github.com/LukasKalbertodt/litrs/compare/v0.2.3...v0.3.0 [0.2.3]: https://github.com/LukasKalbertodt/litrs/compare/v0.2.2...v0.2.3 [0.2.2]: https://github.com/LukasKalbertodt/litrs/compare/v0.2.1...v0.2.2 [0.2.1]: https://github.com/LukasKalbertodt/litrs/compare/v0.2.0...v0.2.1 [0.2.0]: https://github.com/LukasKalbertodt/litrs/compare/v0.1.1...v0.2.0 [0.1.1]: https://github.com/LukasKalbertodt/litrs/compare/v0.1.0...v0.1.1 litrs-0.4.0/Cargo.toml0000644000000026060000000000100101740ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" rust-version = "1.54" name = "litrs" version = "0.4.0" authors = ["Lukas Kalbertodt "] exclude = [".github"] description = """ Parse and inspect Rust literals (i.e. tokens in the Rust programming language representing fixed values). Particularly useful for proc macros, but can also be used outside of a proc-macro context. """ documentation = "https://docs.rs/litrs/" readme = "README.md" keywords = [ "literal", "parsing", "proc-macro", "type", "procedural", ] categories = [ "development-tools::procedural-macro-helpers", "parser-implementations", "development-tools::build-utils", ] license = "MIT/Apache-2.0" repository = "https://github.com/LukasKalbertodt/litrs/" [dependencies.proc-macro2] version = "1" optional = true [dependencies.unicode-xid] version = "0.2.4" optional = true [features] check_suffix = ["unicode-xid"] default = ["proc-macro2"] litrs-0.4.0/Cargo.toml.orig000064400000000000000000000016351046102023000136560ustar 00000000000000[package] name = "litrs" version = "0.4.0" authors = ["Lukas Kalbertodt "] edition = "2018" rust-version = "1.54" description = """ Parse and inspect Rust literals (i.e. tokens in the Rust programming language representing fixed values). Particularly useful for proc macros, but can also be used outside of a proc-macro context. """ documentation = "https://docs.rs/litrs/" repository = "https://github.com/LukasKalbertodt/litrs/" readme = "README.md" license = "MIT/Apache-2.0" keywords = ["literal", "parsing", "proc-macro", "type", "procedural"] categories = [ "development-tools::procedural-macro-helpers", "parser-implementations", "development-tools::build-utils", ] exclude = [".github"] [features] default = ["proc-macro2"] check_suffix = ["unicode-xid"] [dependencies] proc-macro2 = { version = "1", optional = true } unicode-xid = { version = "0.2.4", optional = true } litrs-0.4.0/LICENSE-APACHE000064400000000000000000000227731046102023000127210ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS litrs-0.4.0/LICENSE-MIT000064400000000000000000000020461046102023000124200ustar 00000000000000Copyright (c) 2020 Project Developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. litrs-0.4.0/README.md000064400000000000000000000070231046102023000122430ustar 00000000000000# `litrs`: parsing and inspecting Rust literals [CI status of main](https://github.com/LukasKalbertodt/litrs/actions/workflows/ci.yml) [Crates.io Version](https://crates.io/crates/litrs) [docs.rs](https://docs.rs/litrs) `litrs` offers functionality to parse Rust literals, i.e. tokens in the Rust programming language that represent fixed values. For example: `27`, `"crab"`, `bool`. This is particularly useful for proc macros, but can also be used outside of a proc-macro context. **Why this library?** Unfortunately, the `proc_macro` API shipped with the compiler offers no easy way to inspect literals. There are mainly two libraries for this purpose: [`syn`](https://github.com/dtolnay/syn) and [`literalext`](https://github.com/mystor/literalext). The latter is deprecated. And `syn` is oftentimes overkill for the task at hand, especially when developing function-like proc-macros (e.g. `foo!(..)`). This crate is a lightweight alternative. Also, when it comes to literals, `litrs` offers a bit more flexibility and a few more features compared to `syn`. I'm interested in community feedback! If you consider using this, please speak your mind [in this issue](https://github.com/LukasKalbertodt/litrs/issues/1). ## Example ### In proc macro ```rust use std::convert::TryFrom; use proc_macro::TokenStream; use litrs::Literal; #[proc_macro] pub fn foo(input: TokenStream) -> TokenStream { // Please do proper error handling in your real code! let first_token = input.into_iter().next().expect("no input"); // `try_from` will return an error if the token is not a literal. match Literal::try_from(first_token) { // Convenient methods to produce decent errors via `compile_error!`. Err(e) => return e.to_compile_error(), // You can now inspect your literal! Ok(Literal::Integer(i)) => { println!("Got an integer specified in base {:?}", i.base()); let value = i.value::().expect("integer literal too large"); println!("Is your integer even? {}", value % 2 == 0); } Ok(other) => { println!("Got a non-integer literal"); } } TokenStream::new() // dummy output } ``` If you are expecting a specific kind of literal, you can also use this, which will return an error if the token is not a float literal. ```rust FloatLit::try_from(first_token) ``` ### Parsing from a `&str` Outside of a proc macro context you might want to parse a string directly. ```rust use litrs::{FloatLit, Literal}; let lit = Literal::parse("'🦀'").expect("failed to parse literal"); let float_lit = FloatLit::parse("2.7e3").expect("failed to parse as float literal"); ``` See [**the documentation**](https://docs.rs/litrs) or the `examples/` directory for more examples and information.
--- ## License Licensed under either of Apache License, Version 2.0 or MIT license at your option. Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in this project by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. litrs-0.4.0/src/bool/mod.rs000064400000000000000000000026711046102023000136370ustar 00000000000000use std::fmt; use crate::{ParseError, err::{perr, ParseErrorKind::*}}; /// A bool literal: `true` or `false`. Also see [the reference][ref]. /// /// Notice that, strictly speaking, from Rust point of view "boolean literals" are not /// actual literals but [keywords]. /// /// [ref]: https://doc.rust-lang.org/reference/expressions/literal-expr.html#boolean-literal-expressions /// [keywords]: https://doc.rust-lang.org/reference/keywords.html#strict-keywords #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum BoolLit { False, True, } impl BoolLit { /// Parses the input as a bool literal. Returns an error if the input is /// invalid or represents a different kind of literal. pub fn parse(s: &str) -> Result { match s { "false" => Ok(Self::False), "true" => Ok(Self::True), _ => Err(perr(None, InvalidLiteral)), } } /// Returns the actual Boolean value of this literal. pub fn value(self) -> bool { match self { Self::False => false, Self::True => true, } } /// Returns the literal as string. pub fn as_str(&self) -> &'static str { match self { Self::False => "false", Self::True => "true", } } } impl fmt::Display for BoolLit { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.pad(self.as_str()) } } #[cfg(test)] mod tests; litrs-0.4.0/src/bool/tests.rs000064400000000000000000000023361046102023000142200ustar 00000000000000use crate::{ Literal, BoolLit, test_util::assert_parse_ok_eq, }; macro_rules! assert_bool_parse { ($input:literal, $expected:expr) => { assert_parse_ok_eq( $input, Literal::parse($input), Literal::Bool($expected), "Literal::parse"); assert_parse_ok_eq($input, BoolLit::parse($input), $expected, "BoolLit::parse"); }; } #[test] fn parse_ok() { assert_bool_parse!("false", BoolLit::False); assert_bool_parse!("true", BoolLit::True); } #[test] fn parse_err() { assert!(Literal::parse("fa").is_err()); assert!(Literal::parse("fal").is_err()); assert!(Literal::parse("fals").is_err()); assert!(Literal::parse(" false").is_err()); assert!(Literal::parse("false ").is_err()); assert!(Literal::parse("False").is_err()); assert!(Literal::parse("tr").is_err()); assert!(Literal::parse("tru").is_err()); assert!(Literal::parse(" true").is_err()); assert!(Literal::parse("true ").is_err()); assert!(Literal::parse("True").is_err()); } #[test] fn value() { assert!(!BoolLit::False.value()); assert!(BoolLit::True.value()); } #[test] fn as_str() { assert_eq!(BoolLit::False.as_str(), "false"); assert_eq!(BoolLit::True.as_str(), "true"); } litrs-0.4.0/src/byte/mod.rs000064400000000000000000000061211046102023000136410ustar 00000000000000use core::fmt; use crate::{ Buffer, ParseError, err::{perr, ParseErrorKind::*}, escape::unescape, parse::check_suffix, }; /// A (single) byte literal, e.g. `b'k'` or `b'!'`. /// /// See [the reference][ref] for more information. /// /// [ref]: https://doc.rust-lang.org/reference/tokens.html#byte-literals #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct ByteLit { raw: B, /// Start index of the suffix or `raw.len()` if there is no suffix. start_suffix: usize, value: u8, } impl ByteLit { /// Parses the input as a byte literal. Returns an error if the input is /// invalid or represents a different kind of literal. pub fn parse(input: B) -> Result { if input.is_empty() { return Err(perr(None, Empty)); } if !input.starts_with("b'") { return Err(perr(None, InvalidByteLiteralStart)); } let (value, start_suffix) = parse_impl(&input)?; Ok(Self { raw: input, value, start_suffix }) } /// Returns the byte value that this literal represents. pub fn value(&self) -> u8 { self.value } /// The optional suffix. Returns `""` if the suffix is empty/does not exist. pub fn suffix(&self) -> &str { &(*self.raw)[self.start_suffix..] } /// Returns the raw input that was passed to `parse`. pub fn raw_input(&self) -> &str { &self.raw } /// Returns the raw input that was passed to `parse`, potentially owned. pub fn into_raw_input(self) -> B { self.raw } } impl ByteLit<&str> { /// Makes a copy of the underlying buffer and returns the owned version of /// `Self`. pub fn to_owned(&self) -> ByteLit { ByteLit { raw: self.raw.to_owned(), start_suffix: self.start_suffix, value: self.value, } } } impl fmt::Display for ByteLit { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.pad(&self.raw) } } /// Precondition: must start with `b'`. #[inline(never)] pub(crate) fn parse_impl(input: &str) -> Result<(u8, usize), ParseError> { let input_bytes = input.as_bytes(); let first = input_bytes.get(2).ok_or(perr(None, UnterminatedByteLiteral))?; let (c, len) = match first { b'\'' if input_bytes.get(3) == Some(&b'\'') => return Err(perr(2, UnescapedSingleQuote)), b'\'' => return Err(perr(None, EmptyByteLiteral)), b'\n' | b'\t' | b'\r' => return Err(perr(2, UnescapedSpecialWhitespace)), b'\\' => unescape::(&input[2..], 2)?, other if other.is_ascii() => (*other, 1), _ => return Err(perr(2, NonAsciiInByteLiteral)), }; match input[2 + len..].find('\'') { Some(0) => {} Some(_) => return Err(perr(None, OverlongByteLiteral)), None => return Err(perr(None, UnterminatedByteLiteral)), } let start_suffix = 2 + len + 1; let suffix = &input[start_suffix..]; check_suffix(suffix).map_err(|kind| perr(start_suffix, kind))?; Ok((c, start_suffix)) } #[cfg(test)] mod tests; litrs-0.4.0/src/byte/tests.rs000064400000000000000000000131271046102023000142300ustar 00000000000000use crate::{ByteLit, Literal, test_util::{assert_parse_ok_eq, assert_roundtrip}}; // ===== Utility functions ======================================================================= macro_rules! check { ($lit:literal) => { check!($lit, stringify!($lit), "") }; ($lit:literal, $input:expr, $suffix:literal) => { let input = $input; let expected = ByteLit { raw: input, start_suffix: input.len() - $suffix.len(), value: $lit, }; assert_parse_ok_eq(input, ByteLit::parse(input), expected.clone(), "ByteLit::parse"); assert_parse_ok_eq(input, Literal::parse(input), Literal::Byte(expected), "Literal::parse"); let lit = ByteLit::parse(input).unwrap(); assert_eq!(lit.value(), $lit); assert_eq!(lit.suffix(), $suffix); assert_roundtrip(expected.to_owned(), input); }; } // ===== Actual tests ============================================================================ #[test] fn alphanumeric() { check!(b'a'); check!(b'b'); check!(b'y'); check!(b'z'); check!(b'A'); check!(b'B'); check!(b'Y'); check!(b'Z'); check!(b'0'); check!(b'1'); check!(b'8'); check!(b'9'); } #[test] fn special_chars() { check!(b' '); check!(b'!'); check!(b'"'); check!(b'#'); check!(b'$'); check!(b'%'); check!(b'&'); check!(b'('); check!(b')'); check!(b'*'); check!(b'+'); check!(b','); check!(b'-'); check!(b'.'); check!(b'/'); check!(b':'); check!(b';'); check!(b'<'); check!(b'='); check!(b'>'); check!(b'?'); check!(b'@'); check!(b'['); check!(b']'); check!(b'^'); check!(b'_'); check!(b'`'); check!(b'{'); check!(b'|'); check!(b'}'); check!(b'~'); } #[test] fn quote_escapes() { check!(b'\''); check!(b'\"'); } #[test] fn ascii_escapes() { check!(b'\n'); check!(b'\r'); check!(b'\t'); check!(b'\\'); check!(b'\0'); check!(b'\x00'); check!(b'\x01'); check!(b'\x0c'); check!(b'\x0D'); check!(b'\x13'); check!(b'\x30'); check!(b'\x30'); check!(b'\x4B'); check!(b'\x6b'); check!(b'\x7F'); check!(b'\x7f'); } #[test] fn byte_escapes() { check!(b'\x80'); check!(b'\x8a'); check!(b'\x8C'); check!(b'\x99'); check!(b'\xa0'); check!(b'\xAd'); check!(b'\xfe'); check!(b'\xFe'); check!(b'\xfF'); check!(b'\xFF'); } #[test] fn suffixes() { check!(b'a', r##"b'a'peter"##, "peter"); check!(b'#', r##"b'#'peter"##, "peter"); check!(b'\n', r##"b'\n'peter"##, "peter"); check!(b'\'', r##"b'\''peter"##, "peter"); check!(b'\"', r##"b'\"'peter"##, "peter"); check!(b'\xFF', r##"b'\xFF'peter"##, "peter"); } #[test] fn invald_escapes() { assert_err!(ByteLit, r"b'\a'", UnknownEscape, 2..4); assert_err!(ByteLit, r"b'\y'", UnknownEscape, 2..4); assert_err!(ByteLit, r"b'\", UnterminatedEscape, 2..3); assert_err!(ByteLit, r"b'\x'", UnterminatedEscape, 2..5); assert_err!(ByteLit, r"b'\x1'", InvalidXEscape, 2..6); assert_err!(ByteLit, r"b'\xaj'", InvalidXEscape, 2..6); assert_err!(ByteLit, r"b'\xjb'", InvalidXEscape, 2..6); } #[test] fn unicode_escape_not_allowed() { assert_err!(ByteLit, r"b'\u{0}'", UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteLit, r"b'\u{00}'", UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteLit, r"b'\u{b}'", UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteLit, r"b'\u{B}'", UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteLit, r"b'\u{7e}'", UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteLit, r"b'\u{E4}'", UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteLit, r"b'\u{e4}'", UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteLit, r"b'\u{fc}'", UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteLit, r"b'\u{Fc}'", UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteLit, r"b'\u{fC}'", UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteLit, r"b'\u{FC}'", UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteLit, r"b'\u{b10}'", UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteLit, r"b'\u{B10}'", UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteLit, r"b'\u{0b10}'", UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteLit, r"b'\u{2764}'", UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteLit, r"b'\u{1f602}'", UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteLit, r"b'\u{1F602}'", UnicodeEscapeInByteLiteral, 2..4); } #[test] fn parse_err() { assert_err!(ByteLit, r"b''", EmptyByteLiteral, None); assert_err!(ByteLit, r"b' ''", UnexpectedChar, 4..5); assert_err!(ByteLit, r"b'", UnterminatedByteLiteral, None); assert_err!(ByteLit, r"b'a", UnterminatedByteLiteral, None); assert_err!(ByteLit, r"b'\n", UnterminatedByteLiteral, None); assert_err!(ByteLit, r"b'\x35", UnterminatedByteLiteral, None); assert_err!(ByteLit, r"b'ab'", OverlongByteLiteral, None); assert_err!(ByteLit, r"b'a _'", OverlongByteLiteral, None); assert_err!(ByteLit, r"b'\n3'", OverlongByteLiteral, None); assert_err!(ByteLit, r"", Empty, None); assert_err!(ByteLit, r"b'''", UnescapedSingleQuote, 2); assert_err!(ByteLit, r"b''''", UnescapedSingleQuote, 2); assert_err!(ByteLit, "b'\n'", UnescapedSpecialWhitespace, 2); assert_err!(ByteLit, "b'\t'", UnescapedSpecialWhitespace, 2); assert_err!(ByteLit, "b'\r'", UnescapedSpecialWhitespace, 2); assert_err!(ByteLit, "b'న'", NonAsciiInByteLiteral, 2); assert_err!(ByteLit, "b'犬'", NonAsciiInByteLiteral, 2); assert_err!(ByteLit, "b'🦊'", NonAsciiInByteLiteral, 2); } litrs-0.4.0/src/bytestr/mod.rs000064400000000000000000000100771046102023000143770ustar 00000000000000use std::{fmt, ops::Range}; use crate::{ Buffer, ParseError, err::{perr, ParseErrorKind::*}, escape::{scan_raw_string, unescape_string}, }; /// A byte string or raw byte string literal, e.g. `b"hello"` or `br#"abc"def"#`. /// /// See [the reference][ref] for more information. /// /// [ref]: https://doc.rust-lang.org/reference/tokens.html#byte-string-literals #[derive(Debug, Clone, PartialEq, Eq)] pub struct ByteStringLit { /// The raw input. raw: B, /// The string value (with all escaped unescaped), or `None` if there were /// no escapes. In the latter case, `input` is the string value. value: Option>, /// The number of hash signs in case of a raw string literal, or `None` if /// it's not a raw string literal. num_hashes: Option, /// Start index of the suffix or `raw.len()` if there is no suffix. start_suffix: usize, } impl ByteStringLit { /// Parses the input as a (raw) byte string literal. Returns an error if the /// input is invalid or represents a different kind of literal. pub fn parse(input: B) -> Result { if input.is_empty() { return Err(perr(None, Empty)); } if !input.starts_with(r#"b""#) && !input.starts_with("br") { return Err(perr(None, InvalidByteStringLiteralStart)); } let (value, num_hashes, start_suffix) = parse_impl(&input)?; Ok(Self { raw: input, value, num_hashes, start_suffix }) } /// Returns the string value this literal represents (where all escapes have /// been turned into their respective values). pub fn value(&self) -> &[u8] { self.value.as_deref().unwrap_or(&self.raw.as_bytes()[self.inner_range()]) } /// Like `value` but returns a potentially owned version of the value. /// /// The return value is either `Cow<'static, [u8]>` if `B = String`, or /// `Cow<'a, [u8]>` if `B = &'a str`. pub fn into_value(self) -> B::ByteCow { let inner_range = self.inner_range(); let Self { raw, value, .. } = self; value.map(B::ByteCow::from).unwrap_or_else(|| raw.cut(inner_range).into_byte_cow()) } /// The optional suffix. Returns `""` if the suffix is empty/does not exist. pub fn suffix(&self) -> &str { &(*self.raw)[self.start_suffix..] } /// Returns whether this literal is a raw string literal (starting with /// `r`). pub fn is_raw_byte_string(&self) -> bool { self.num_hashes.is_some() } /// Returns the raw input that was passed to `parse`. pub fn raw_input(&self) -> &str { &self.raw } /// Returns the raw input that was passed to `parse`, potentially owned. pub fn into_raw_input(self) -> B { self.raw } /// The range within `self.raw` that excludes the quotes and potential `r#`. fn inner_range(&self) -> Range { match self.num_hashes { None => 2..self.start_suffix - 1, Some(n) => 2 + n as usize + 1..self.start_suffix - n as usize - 1, } } } impl ByteStringLit<&str> { /// Makes a copy of the underlying buffer and returns the owned version of /// `Self`. pub fn into_owned(self) -> ByteStringLit { ByteStringLit { raw: self.raw.to_owned(), value: self.value, num_hashes: self.num_hashes, start_suffix: self.start_suffix, } } } impl fmt::Display for ByteStringLit { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.pad(&self.raw) } } /// Precondition: input has to start with either `b"` or `br`. #[inline(never)] fn parse_impl(input: &str) -> Result<(Option>, Option, usize), ParseError> { if input.starts_with("br") { scan_raw_string::(&input, 2) .map(|(v, num, start_suffix)| (v.map(String::into_bytes), Some(num), start_suffix)) } else { unescape_string::(&input, 2) .map(|(v, start_suffix)| (v.map(String::into_bytes), None, start_suffix)) } } #[cfg(test)] mod tests; litrs-0.4.0/src/bytestr/tests.rs000064400000000000000000000214071046102023000147610ustar 00000000000000use crate::{Literal, ByteStringLit, test_util::{assert_parse_ok_eq, assert_roundtrip}}; // ===== Utility functions ======================================================================= macro_rules! check { ($lit:literal, $has_escapes:expr, $num_hashes:expr) => { check!($lit, stringify!($lit), $has_escapes, $num_hashes, "") }; ($lit:literal, $input:expr, $has_escapes:expr, $num_hashes:expr, $suffix:literal) => { let input = $input; let expected = ByteStringLit { raw: input, value: if $has_escapes { Some($lit.to_vec()) } else { None }, num_hashes: $num_hashes, start_suffix: input.len() - $suffix.len(), }; assert_parse_ok_eq( input, ByteStringLit::parse(input), expected.clone(), "ByteStringLit::parse"); assert_parse_ok_eq( input, Literal::parse(input), Literal::ByteString(expected.clone()), "Literal::parse"); let lit = ByteStringLit::parse(input).unwrap(); assert_eq!(lit.value(), $lit); assert_eq!(lit.suffix(), $suffix); assert_eq!(lit.into_value().as_ref(), $lit); assert_roundtrip(expected.into_owned(), input); }; } // ===== Actual tests ============================================================================ #[test] fn simple() { check!(b"", false, None); check!(b"a", false, None); check!(b"peter", false, None); } #[test] fn special_whitespace() { let strings = ["\n", "\t", "foo\tbar", "baz\n"]; for &s in &strings { let input = format!(r#"b"{}""#, s); let input_raw = format!(r#"br"{}""#, s); for (input, num_hashes) in vec![(input, None), (input_raw, Some(0))] { let expected = ByteStringLit { raw: &*input, value: None, num_hashes, start_suffix: input.len(), }; assert_parse_ok_eq( &input, ByteStringLit::parse(&*input), expected.clone(), "ByteStringLit::parse"); assert_parse_ok_eq( &input, Literal::parse(&*input), Literal::ByteString(expected), "Literal::parse"); assert_eq!(ByteStringLit::parse(&*input).unwrap().value(), s.as_bytes()); assert_eq!(ByteStringLit::parse(&*input).unwrap().into_value(), s.as_bytes()); } } let res = ByteStringLit::parse("br\"\r\"").expect("failed to parse"); assert_eq!(res.value(), b"\r"); } #[test] fn simple_escapes() { check!(b"a\nb", true, None); check!(b"\nb", true, None); check!(b"a\n", true, None); check!(b"\n", true, None); check!(b"\x60foo \t bar\rbaz\n banana \0kiwi", true, None); check!(b"foo \\ferris", true, None); check!(b"baz \\ferris\"box", true, None); check!(b"\\foo\\ banana\" baz\"", true, None); check!(b"\"foo \\ferris \" baz\\", true, None); check!(b"\x00", true, None); check!(b" \x01", true, None); check!(b"\x0c foo", true, None); check!(b" foo\x0D ", true, None); check!(b"\\x13", true, None); check!(b"\"x30", true, None); } #[test] fn string_continue() { check!(b"foo\ bar", true, None); check!(b"foo\ bar", true, None); check!(b"foo\ banana", true, None); // Weird whitespace characters let lit = ByteStringLit::parse("b\"foo\\\n\r\t\n \n\tbar\"").expect("failed to parse"); assert_eq!(lit.value(), b"foobar"); // Raw strings do not handle "string continues" check!(br"foo\ bar", false, Some(0)); } #[test] fn crlf_newlines() { let lit = ByteStringLit::parse("b\"foo\r\nbar\"").expect("failed to parse"); assert_eq!(lit.value(), b"foo\nbar"); let lit = ByteStringLit::parse("b\"\r\nbar\"").expect("failed to parse"); assert_eq!(lit.value(), b"\nbar"); let lit = ByteStringLit::parse("b\"foo\r\n\"").expect("failed to parse"); assert_eq!(lit.value(), b"foo\n"); let lit = ByteStringLit::parse("br\"foo\r\nbar\"").expect("failed to parse"); assert_eq!(lit.value(), b"foo\nbar"); let lit = ByteStringLit::parse("br#\"\r\nbar\"#").expect("failed to parse"); assert_eq!(lit.value(), b"\nbar"); let lit = ByteStringLit::parse("br##\"foo\r\n\"##").expect("failed to parse"); assert_eq!(lit.value(), b"foo\n"); } #[test] fn raw_byte_string() { check!(br"", false, Some(0)); check!(br"a", false, Some(0)); check!(br"peter", false, Some(0)); check!(br"Greetings jason!", false, Some(0)); check!(br#""#, false, Some(1)); check!(br#"a"#, false, Some(1)); check!(br##"peter"##, false, Some(2)); check!(br###"Greetings # Jason!"###, false, Some(3)); check!(br########"we ## need #### more ####### hashtags"########, false, Some(8)); check!(br#"foo " bar"#, false, Some(1)); check!(br##"foo " bar"##, false, Some(2)); check!(br#"foo """" '"'" bar"#, false, Some(1)); check!(br#""foo""#, false, Some(1)); check!(br###""foo'"###, false, Some(3)); check!(br#""x'#_#s'"#, false, Some(1)); check!(br"#", false, Some(0)); check!(br"foo#", false, Some(0)); check!(br"##bar", false, Some(0)); check!(br###""##foo"##bar'"###, false, Some(3)); check!(br"foo\n\t\r\0\\x60\u{123}doggo", false, Some(0)); check!(br#"cat\n\t\r\0\\x60\u{123}doggo"#, false, Some(1)); } #[test] fn suffixes() { check!(b"hello", r###"b"hello"suffix"###, false, None, "suffix"); check!(b"fox", r#"b"fox"peter"#, false, None, "peter"); check!(b"a\x0cb\\", r#"b"a\x0cb\\"_jürgen"#, true, None, "_jürgen"); check!(br"a\x0cb\\", r###"br#"a\x0cb\\"#_jürgen"###, false, Some(1), "_jürgen"); } #[test] fn parse_err() { assert_err!(ByteStringLit, r#"b""#, UnterminatedString, None); assert_err!(ByteStringLit, r#"b"cat"#, UnterminatedString, None); assert_err!(ByteStringLit, r#"b"Jurgen"#, UnterminatedString, None); assert_err!(ByteStringLit, r#"b"foo bar baz"#, UnterminatedString, None); assert_err!(ByteStringLit, r#"b"fox"peter""#, InvalidSuffix, 6); assert_err!(ByteStringLit, r###"br#"foo "# bar"#"###, UnexpectedChar, 10); assert_err!(ByteStringLit, "b\"\r\"", IsolatedCr, 2); assert_err!(ByteStringLit, "b\"fo\rx\"", IsolatedCr, 4); assert_err!(ByteStringLit, r##"br####""##, UnterminatedRawString, None); assert_err!(ByteStringLit, r#####"br##"foo"#bar"#####, UnterminatedRawString, None); assert_err!(ByteStringLit, r##"br####"##, InvalidLiteral, None); assert_err!(ByteStringLit, r##"br####x"##, InvalidLiteral, None); } #[test] fn non_ascii() { assert_err!(ByteStringLit, r#"b"న""#, NonAsciiInByteLiteral, 2); assert_err!(ByteStringLit, r#"b"foo犬""#, NonAsciiInByteLiteral, 5); assert_err!(ByteStringLit, r#"b"x🦊baz""#, NonAsciiInByteLiteral, 3); assert_err!(ByteStringLit, r#"br"న""#, NonAsciiInByteLiteral, 3); assert_err!(ByteStringLit, r#"br"foo犬""#, NonAsciiInByteLiteral, 6); assert_err!(ByteStringLit, r#"br"x🦊baz""#, NonAsciiInByteLiteral, 4); } #[test] fn invalid_escapes() { assert_err!(ByteStringLit, r#"b"\a""#, UnknownEscape, 2..4); assert_err!(ByteStringLit, r#"b"foo\y""#, UnknownEscape, 5..7); assert_err!(ByteStringLit, r#"b"\"#, UnterminatedEscape, 2); assert_err!(ByteStringLit, r#"b"\x""#, UnterminatedEscape, 2..4); assert_err!(ByteStringLit, r#"b"foo\x1""#, UnterminatedEscape, 5..8); assert_err!(ByteStringLit, r#"b" \xaj""#, InvalidXEscape, 3..7); assert_err!(ByteStringLit, r#"b"\xjbbaz""#, InvalidXEscape, 2..6); } #[test] fn unicode_escape_not_allowed() { assert_err!(ByteStringLit, r#"b"\u{0}""#, UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteStringLit, r#"b"\u{00}""#, UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteStringLit, r#"b"\u{b}""#, UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteStringLit, r#"b"\u{B}""#, UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteStringLit, r#"b"\u{7e}""#, UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteStringLit, r#"b"\u{E4}""#, UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteStringLit, r#"b"\u{e4}""#, UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteStringLit, r#"b"\u{fc}""#, UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteStringLit, r#"b"\u{Fc}""#, UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteStringLit, r#"b"\u{fC}""#, UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteStringLit, r#"b"\u{FC}""#, UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteStringLit, r#"b"\u{b10}""#, UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteStringLit, r#"b"\u{B10}""#, UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteStringLit, r#"b"\u{0b10}""#, UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteStringLit, r#"b"\u{2764}""#, UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteStringLit, r#"b"\u{1f602}""#, UnicodeEscapeInByteLiteral, 2..4); assert_err!(ByteStringLit, r#"b"\u{1F602}""#, UnicodeEscapeInByteLiteral, 2..4); } litrs-0.4.0/src/char/mod.rs000064400000000000000000000060171046102023000136170ustar 00000000000000use std::fmt; use crate::{ Buffer, ParseError, err::{perr, ParseErrorKind::*}, escape::unescape, parse::{first_byte_or_empty, check_suffix}, }; /// A character literal, e.g. `'g'` or `'🦊'`. /// /// See [the reference][ref] for more information. /// /// [ref]: https://doc.rust-lang.org/reference/tokens.html#character-literals #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct CharLit { raw: B, /// Start index of the suffix or `raw.len()` if there is no suffix. start_suffix: usize, value: char, } impl CharLit { /// Parses the input as a character literal. Returns an error if the input /// is invalid or represents a different kind of literal. pub fn parse(input: B) -> Result { match first_byte_or_empty(&input)? { b'\'' => { let (value, start_suffix) = parse_impl(&input)?; Ok(Self { raw: input, value, start_suffix }) }, _ => Err(perr(0, DoesNotStartWithQuote)), } } /// Returns the character value that this literal represents. pub fn value(&self) -> char { self.value } /// The optional suffix. Returns `""` if the suffix is empty/does not exist. pub fn suffix(&self) -> &str { &(*self.raw)[self.start_suffix..] } /// Returns the raw input that was passed to `parse`. pub fn raw_input(&self) -> &str { &self.raw } /// Returns the raw input that was passed to `parse`, potentially owned. pub fn into_raw_input(self) -> B { self.raw } } impl CharLit<&str> { /// Makes a copy of the underlying buffer and returns the owned version of /// `Self`. pub fn to_owned(&self) -> CharLit { CharLit { raw: self.raw.to_owned(), start_suffix: self.start_suffix, value: self.value, } } } impl fmt::Display for CharLit { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.pad(&self.raw) } } /// Precondition: first character in input must be `'`. #[inline(never)] pub(crate) fn parse_impl(input: &str) -> Result<(char, usize), ParseError> { let first = input.chars().nth(1).ok_or(perr(None, UnterminatedCharLiteral))?; let (c, len) = match first { '\'' if input.chars().nth(2) == Some('\'') => return Err(perr(1, UnescapedSingleQuote)), '\'' => return Err(perr(None, EmptyCharLiteral)), '\n' | '\t' | '\r' => return Err(perr(1, UnescapedSpecialWhitespace)), '\\' => unescape::(&input[1..], 1)?, other => (other, other.len_utf8()), }; match input[1 + len..].find('\'') { Some(0) => {} Some(_) => return Err(perr(None, OverlongCharLiteral)), None => return Err(perr(None, UnterminatedCharLiteral)), } let start_suffix = 1 + len + 1; let suffix = &input[start_suffix..]; check_suffix(suffix).map_err(|kind| perr(start_suffix, kind))?; Ok((c, start_suffix)) } #[cfg(test)] mod tests; litrs-0.4.0/src/char/tests.rs000064400000000000000000000150001046102023000141720ustar 00000000000000use crate::{Literal, test_util::{assert_parse_ok_eq, assert_roundtrip}}; use super::CharLit; // ===== Utility functions ======================================================================= macro_rules! check { ($lit:literal) => { check!($lit, stringify!($lit), "") }; ($lit:literal, $input:expr, $suffix:literal) => { let input = $input; let expected = CharLit { raw: input, start_suffix: input.len() - $suffix.len(), value: $lit, }; assert_parse_ok_eq(input, CharLit::parse(input), expected.clone(), "CharLit::parse"); assert_parse_ok_eq(input, Literal::parse(input), Literal::Char(expected), "Literal::parse"); let lit = CharLit::parse(input).unwrap(); assert_eq!(lit.value(), $lit); assert_eq!(lit.suffix(), $suffix); assert_roundtrip(expected.to_owned(), input); }; } // ===== Actual tests ============================================================================ #[test] fn alphanumeric() { check!('a'); check!('b'); check!('y'); check!('z'); check!('A'); check!('B'); check!('Y'); check!('Z'); check!('0'); check!('1'); check!('8'); check!('9'); } #[test] fn special_chars() { check!(' '); check!('!'); check!('"'); check!('#'); check!('$'); check!('%'); check!('&'); check!('('); check!(')'); check!('*'); check!('+'); check!(','); check!('-'); check!('.'); check!('/'); check!(':'); check!(';'); check!('<'); check!('='); check!('>'); check!('?'); check!('@'); check!('['); check!(']'); check!('^'); check!('_'); check!('`'); check!('{'); check!('|'); check!('}'); check!('~'); } #[test] fn unicode() { check!('న'); check!('犬'); check!('🦊'); } #[test] fn quote_escapes() { check!('\''); check!('\"'); } #[test] fn ascii_escapes() { check!('\n'); check!('\r'); check!('\t'); check!('\\'); check!('\0'); check!('\x00'); check!('\x01'); check!('\x0c'); check!('\x0D'); check!('\x13'); check!('\x30'); check!('\x30'); check!('\x4B'); check!('\x6b'); check!('\x7F'); check!('\x7f'); } #[test] fn unicode_escapes() { check!('\u{0}'); check!('\u{00}'); check!('\u{b}'); check!('\u{B}'); check!('\u{7e}'); check!('\u{E4}'); check!('\u{e4}'); check!('\u{fc}'); check!('\u{Fc}'); check!('\u{fC}'); check!('\u{FC}'); check!('\u{b10}'); check!('\u{B10}'); check!('\u{0b10}'); check!('\u{2764}'); check!('\u{1f602}'); check!('\u{1F602}'); check!('\u{0}'); check!('\u{0__}'); check!('\u{3_b}'); check!('\u{1_F_6_0_2}'); check!('\u{1_F6_02_____}'); } #[test] fn suffixes() { check!('a', r##"'a'peter"##, "peter"); check!('#', r##"'#'peter"##, "peter"); check!('\n', r##"'\n'peter"##, "peter"); check!('\'', r##"'\''peter"##, "peter"); check!('\"', r##"'\"'peter"##, "peter"); } #[test] fn invald_ascii_escapes() { assert_err!(CharLit, r"'\x80'", NonAsciiXEscape, 1..5); assert_err!(CharLit, r"'\x81'", NonAsciiXEscape, 1..5); assert_err!(CharLit, r"'\x8a'", NonAsciiXEscape, 1..5); assert_err!(CharLit, r"'\x8F'", NonAsciiXEscape, 1..5); assert_err!(CharLit, r"'\xa0'", NonAsciiXEscape, 1..5); assert_err!(CharLit, r"'\xB0'", NonAsciiXEscape, 1..5); assert_err!(CharLit, r"'\xc3'", NonAsciiXEscape, 1..5); assert_err!(CharLit, r"'\xDf'", NonAsciiXEscape, 1..5); assert_err!(CharLit, r"'\xff'", NonAsciiXEscape, 1..5); assert_err!(CharLit, r"'\xfF'", NonAsciiXEscape, 1..5); assert_err!(CharLit, r"'\xFf'", NonAsciiXEscape, 1..5); assert_err!(CharLit, r"'\xFF'", NonAsciiXEscape, 1..5); } #[test] fn invalid_escapes() { assert_err!(CharLit, r"'\a'", UnknownEscape, 1..3); assert_err!(CharLit, r"'\y'", UnknownEscape, 1..3); assert_err!(CharLit, r"'\", UnterminatedEscape, 1); assert_err!(CharLit, r"'\x'", UnterminatedEscape, 1..4); assert_err!(CharLit, r"'\x1'", InvalidXEscape, 1..5); assert_err!(CharLit, r"'\xaj'", InvalidXEscape, 1..5); assert_err!(CharLit, r"'\xjb'", InvalidXEscape, 1..5); } #[test] fn invalid_unicode_escapes() { assert_err!(CharLit, r"'\u'", UnicodeEscapeWithoutBrace, 1..3); assert_err!(CharLit, r"'\u '", UnicodeEscapeWithoutBrace, 1..3); assert_err!(CharLit, r"'\u3'", UnicodeEscapeWithoutBrace, 1..3); assert_err!(CharLit, r"'\u{'", UnterminatedUnicodeEscape, 1..5); assert_err!(CharLit, r"'\u{12'", UnterminatedUnicodeEscape, 1..7); assert_err!(CharLit, r"'\u{a0b'", UnterminatedUnicodeEscape, 1..8); assert_err!(CharLit, r"'\u{a0_b '", UnterminatedUnicodeEscape, 1..11); assert_err!(CharLit, r"'\u{_}'", InvalidStartOfUnicodeEscape, 4); assert_err!(CharLit, r"'\u{_5f}'", InvalidStartOfUnicodeEscape, 4); assert_err!(CharLit, r"'\u{x}'", NonHexDigitInUnicodeEscape, 4); assert_err!(CharLit, r"'\u{0x}'", NonHexDigitInUnicodeEscape, 5); assert_err!(CharLit, r"'\u{3bx}'", NonHexDigitInUnicodeEscape, 6); assert_err!(CharLit, r"'\u{3b_x}'", NonHexDigitInUnicodeEscape, 7); assert_err!(CharLit, r"'\u{4x_}'", NonHexDigitInUnicodeEscape, 5); assert_err!(CharLit, r"'\u{1234567}'", TooManyDigitInUnicodeEscape, 10); assert_err!(CharLit, r"'\u{1234567}'", TooManyDigitInUnicodeEscape, 10); assert_err!(CharLit, r"'\u{1_23_4_56_7}'", TooManyDigitInUnicodeEscape, 14); assert_err!(CharLit, r"'\u{abcdef123}'", TooManyDigitInUnicodeEscape, 10); assert_err!(CharLit, r"'\u{110000}'", InvalidUnicodeEscapeChar, 1..10); } #[test] fn parse_err() { assert_err!(CharLit, r"''", EmptyCharLiteral, None); assert_err!(CharLit, r"' ''", UnexpectedChar, 3); assert_err!(CharLit, r"'", UnterminatedCharLiteral, None); assert_err!(CharLit, r"'a", UnterminatedCharLiteral, None); assert_err!(CharLit, r"'\n", UnterminatedCharLiteral, None); assert_err!(CharLit, r"'\x35", UnterminatedCharLiteral, None); assert_err!(CharLit, r"'ab'", OverlongCharLiteral, None); assert_err!(CharLit, r"'a _'", OverlongCharLiteral, None); assert_err!(CharLit, r"'\n3'", OverlongCharLiteral, None); assert_err!(CharLit, r"", Empty, None); assert_err!(CharLit, r"'''", UnescapedSingleQuote, 1); assert_err!(CharLit, r"''''", UnescapedSingleQuote, 1); assert_err!(CharLit, "'\n'", UnescapedSpecialWhitespace, 1); assert_err!(CharLit, "'\t'", UnescapedSpecialWhitespace, 1); assert_err!(CharLit, "'\r'", UnescapedSpecialWhitespace, 1); } litrs-0.4.0/src/err.rs000064400000000000000000000320101046102023000127030ustar 00000000000000use std::{fmt, ops::Range}; /// An error signaling that a different kind of token was expected. Returned by /// the various `TryFrom` impls. #[derive(Debug, Clone, Copy)] pub struct InvalidToken { pub(crate) expected: TokenKind, pub(crate) actual: TokenKind, pub(crate) span: Span, } impl InvalidToken { /// Returns a token stream representing `compile_error!("msg");` where /// `"msg"` is the output of `self.to_string()`. **Panics if called outside /// of a proc-macro context!** pub fn to_compile_error(&self) -> proc_macro::TokenStream { use proc_macro::{Delimiter, Ident, Group, Punct, Spacing, TokenTree}; let span = match self.span { Span::One(s) => s, #[cfg(feature = "proc-macro2")] Span::Two(s) => s.unwrap(), }; let msg = self.to_string(); let tokens = vec![ TokenTree::from(Ident::new("compile_error", span)), TokenTree::from(Punct::new('!', Spacing::Alone)), TokenTree::from(Group::new( Delimiter::Parenthesis, TokenTree::from(proc_macro::Literal::string(&msg)).into(), )), ]; tokens.into_iter().map(|mut t| { t.set_span(span); t }).collect() } /// Like [`to_compile_error`][Self::to_compile_error], but returns a token /// stream from `proc_macro2` and does not panic outside of a proc-macro /// context. #[cfg(feature = "proc-macro2")] pub fn to_compile_error2(&self) -> proc_macro2::TokenStream { use proc_macro2::{Delimiter, Ident, Group, Punct, Spacing, TokenTree}; let span = match self.span { Span::One(s) => proc_macro2::Span::from(s), Span::Two(s) => s, }; let msg = self.to_string(); let tokens = vec![ TokenTree::from(Ident::new("compile_error", span)), TokenTree::from(Punct::new('!', Spacing::Alone)), TokenTree::from(Group::new( Delimiter::Parenthesis, TokenTree::from(proc_macro2::Literal::string(&msg)).into(), )), ]; tokens.into_iter().map(|mut t| { t.set_span(span); t }).collect() } } impl std::error::Error for InvalidToken {} impl fmt::Display for InvalidToken { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn kind_desc(kind: TokenKind) -> &'static str { match kind { TokenKind::Punct => "a punctuation character", TokenKind::Ident => "an identifier", TokenKind::Group => "a group", TokenKind::Literal => "a literal", TokenKind::BoolLit => "a bool literal (`true` or `false`)", TokenKind::ByteLit => "a byte literal (e.g. `b'r')", TokenKind::ByteStringLit => r#"a byte string literal (e.g. `b"fox"`)"#, TokenKind::CharLit => "a character literal (e.g. `'P'`)", TokenKind::FloatLit => "a float literal (e.g. `3.14`)", TokenKind::IntegerLit => "an integer literal (e.g. `27`)", TokenKind::StringLit => r#"a string literal (e.g. "Ferris")"#, } } write!(f, "expected {}, but found {}", kind_desc(self.expected), kind_desc(self.actual)) } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) enum TokenKind { Punct, Ident, Group, Literal, BoolLit, ByteLit, ByteStringLit, CharLit, FloatLit, IntegerLit, StringLit, } /// Unfortunately, we have to deal with both cases. #[derive(Debug, Clone, Copy)] pub(crate) enum Span { One(proc_macro::Span), #[cfg(feature = "proc-macro2")] Two(proc_macro2::Span), } impl From for Span { fn from(src: proc_macro::Span) -> Self { Self::One(src) } } #[cfg(feature = "proc-macro2")] impl From for Span { fn from(src: proc_macro2::Span) -> Self { Self::Two(src) } } /// Errors during parsing. /// /// This type should be seen primarily for error reporting and not for catching /// specific cases. The span and error kind are not guaranteed to be stable /// over different versions of this library, meaning that a returned error can /// change from one version to the next. There are simply too many fringe cases /// that are not easy to classify as a specific error kind. It depends entirely /// on the specific parser code how an invalid input is categorized. /// /// Consider these examples: /// - `'\` can be seen as /// - invalid escape in character literal, or /// - unterminated character literal. /// - `'''` can be seen as /// - empty character literal, or /// - unescaped quote character in character literal. /// - `0b64` can be seen as /// - binary integer literal with invalid digit 6, or /// - binary integer literal with invalid digit 4, or /// - decimal integer literal with invalid digit b, or /// - decimal integer literal 0 with unknown type suffix `b64`. /// /// If you want to see more if these examples, feel free to check out the unit /// tests of this library. /// /// While this library does its best to emit sensible and precise errors, and to /// keep the returned errors as stable as possible, full stability cannot be /// guaranteed. #[derive(Debug, Clone)] pub struct ParseError { pub(crate) span: Option>, pub(crate) kind: ParseErrorKind, } impl ParseError { /// Returns a span of this error, if available. **Note**: the returned span /// might change in future versions of this library. See [the documentation /// of this type][ParseError] for more information. pub fn span(&self) -> Option> { self.span.clone() } } /// This is a free standing function instead of an associated one to reduce /// noise around parsing code. There are lots of places that create errors, we /// I wanna keep them as short as possible. pub(crate) fn perr(span: impl SpanLike, kind: ParseErrorKind) -> ParseError { ParseError { span: span.into_span(), kind, } } pub(crate) trait SpanLike { fn into_span(self) -> Option>; } impl SpanLike for Option> { #[inline(always)] fn into_span(self) -> Option> { self } } impl SpanLike for Range { #[inline(always)] fn into_span(self) -> Option> { Some(self) } } impl SpanLike for usize { #[inline(always)] fn into_span(self) -> Option> { Some(self..self + 1) } } /// Kinds of errors. #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[non_exhaustive] pub(crate) enum ParseErrorKind { /// The input was an empty string Empty, /// An unexpected char was encountered. UnexpectedChar, /// Literal was not recognized. InvalidLiteral, /// Input does not start with decimal digit when trying to parse an integer. DoesNotStartWithDigit, /// A digit invalid for the specified integer base was found. InvalidDigit, /// Integer literal does not contain any valid digits. NoDigits, /// Exponent of a float literal does not contain any digits. NoExponentDigits, /// An unknown escape code, e.g. `\b`. UnknownEscape, /// A started escape sequence where the input ended before the escape was /// finished. UnterminatedEscape, /// An `\x` escape where the two digits are not valid hex digits. InvalidXEscape, /// A string or character literal using the `\xNN` escape where `NN > 0x7F`. NonAsciiXEscape, /// A `\u{...}` escape in a byte or byte string literal. UnicodeEscapeInByteLiteral, /// A Unicode escape that does not start with a hex digit. InvalidStartOfUnicodeEscape, /// A `\u{...}` escape that lacks the opening brace. UnicodeEscapeWithoutBrace, /// In a `\u{...}` escape, a non-hex digit and non-underscore character was /// found. NonHexDigitInUnicodeEscape, /// More than 6 digits found in unicode escape. TooManyDigitInUnicodeEscape, /// The value from a unicode escape does not represent a valid character. InvalidUnicodeEscapeChar, /// A `\u{..` escape that is not terminated (lacks the closing brace). UnterminatedUnicodeEscape, /// A character literal that's not terminated. UnterminatedCharLiteral, /// A character literal that contains more than one character. OverlongCharLiteral, /// An empty character literal, i.e. `''`. EmptyCharLiteral, UnterminatedByteLiteral, OverlongByteLiteral, EmptyByteLiteral, NonAsciiInByteLiteral, /// A `'` character was not escaped in a character or byte literal, or a `"` /// character was not escaped in a string or byte string literal. UnescapedSingleQuote, /// A \n, \t or \r raw character in a char or byte literal. UnescapedSpecialWhitespace, /// When parsing a character, byte, string or byte string literal directly /// and the input does not start with the corresponding quote character /// (plus optional raw string prefix). DoesNotStartWithQuote, /// Unterminated raw string literal. UnterminatedRawString, /// String literal without a `"` at the end. UnterminatedString, /// Invalid start for a string literal. InvalidStringLiteralStart, /// Invalid start for a byte literal. InvalidByteLiteralStart, InvalidByteStringLiteralStart, /// An literal `\r` character not followed by a `\n` character in a /// (raw) string or byte string literal. IsolatedCr, /// Literal suffix is not a valid identifier. InvalidSuffix, /// Returned by `Float::parse` if an integer literal (no fractional nor /// exponent part) is passed. UnexpectedIntegerLit, /// Integer suffixes cannot start with `e` or `E` as this conflicts with the /// grammar for float literals. IntegerSuffixStartingWithE, } impl std::error::Error for ParseError {} impl fmt::Display for ParseError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use ParseErrorKind::*; let description = match self.kind { Empty => "input is empty", UnexpectedChar => "unexpected character", InvalidLiteral => "invalid literal", DoesNotStartWithDigit => "number literal does not start with decimal digit", InvalidDigit => "integer literal contains a digit invalid for its base", NoDigits => "integer literal does not contain any digits", NoExponentDigits => "exponent of floating point literal does not contain any digits", UnknownEscape => "unknown escape", UnterminatedEscape => "unterminated escape: input ended too soon", InvalidXEscape => r"invalid `\x` escape: not followed by two hex digits", NonAsciiXEscape => r"`\x` escape in char/string literal exceed ASCII range", UnicodeEscapeInByteLiteral => r"`\u{...}` escape in byte (string) literal not allowed", InvalidStartOfUnicodeEscape => r"invalid start of `\u{...}` escape", UnicodeEscapeWithoutBrace => r"`Unicode \u{...}` escape without opening brace", NonHexDigitInUnicodeEscape => r"non-hex digit found in `\u{...}` escape", TooManyDigitInUnicodeEscape => r"more than six digits in `\u{...}` escape", InvalidUnicodeEscapeChar => r"value specified in `\u{...}` escape is not a valid char", UnterminatedUnicodeEscape => r"unterminated `\u{...}` escape", UnterminatedCharLiteral => "character literal is not terminated", OverlongCharLiteral => "character literal contains more than one character", EmptyCharLiteral => "empty character literal", UnterminatedByteLiteral => "byte literal is not terminated", OverlongByteLiteral => "byte literal contains more than one byte", EmptyByteLiteral => "empty byte literal", NonAsciiInByteLiteral => "non ASCII character in byte (string) literal", UnescapedSingleQuote => "character literal contains unescaped ' character", UnescapedSpecialWhitespace => r"unescaped newline (\n), tab (\t) or cr (\r) character", DoesNotStartWithQuote => "invalid start for char/byte/string literal", UnterminatedRawString => "unterminated raw (byte) string literal", UnterminatedString => "unterminated (byte) string literal", InvalidStringLiteralStart => "invalid start for string literal", InvalidByteLiteralStart => "invalid start for byte literal", InvalidByteStringLiteralStart => "invalid start for byte string literal", IsolatedCr => r"`\r` not immediately followed by `\n` in string", InvalidSuffix => "literal suffix is not a valid identifier", UnexpectedIntegerLit => "expected float literal, but found integer", IntegerSuffixStartingWithE => "integer literal suffix must not start with 'e' or 'E'", }; description.fmt(f)?; if let Some(span) = &self.span { write!(f, " (at {}..{})", span.start, span.end)?; } Ok(()) } } litrs-0.4.0/src/escape.rs000064400000000000000000000215121046102023000133600ustar 00000000000000use crate::{ParseError, err::{perr, ParseErrorKind::*}, parse::{hex_digit_value, check_suffix}}; /// Must start with `\` pub(crate) fn unescape(input: &str, offset: usize) -> Result<(E, usize), ParseError> { let first = input.as_bytes().get(1) .ok_or(perr(offset, UnterminatedEscape))?; let out = match first { // Quote escapes b'\'' => (E::from_byte(b'\''), 2), b'"' => (E::from_byte(b'"'), 2), // Ascii escapes b'n' => (E::from_byte(b'\n'), 2), b'r' => (E::from_byte(b'\r'), 2), b't' => (E::from_byte(b'\t'), 2), b'\\' => (E::from_byte(b'\\'), 2), b'0' => (E::from_byte(b'\0'), 2), b'x' => { let hex_string = input.get(2..4) .ok_or(perr(offset..offset + input.len(), UnterminatedEscape))? .as_bytes(); let first = hex_digit_value(hex_string[0]) .ok_or(perr(offset..offset + 4, InvalidXEscape))?; let second = hex_digit_value(hex_string[1]) .ok_or(perr(offset..offset + 4, InvalidXEscape))?; let value = second + 16 * first; if E::SUPPORTS_UNICODE && value > 0x7F { return Err(perr(offset..offset + 4, NonAsciiXEscape)); } (E::from_byte(value), 4) }, // Unicode escape b'u' => { if !E::SUPPORTS_UNICODE { return Err(perr(offset..offset + 2, UnicodeEscapeInByteLiteral)); } if input.as_bytes().get(2) != Some(&b'{') { return Err(perr(offset..offset + 2, UnicodeEscapeWithoutBrace)); } let closing_pos = input.bytes().position(|b| b == b'}') .ok_or(perr(offset..offset + input.len(), UnterminatedUnicodeEscape))?; let inner = &input[3..closing_pos]; if inner.as_bytes().first() == Some(&b'_') { return Err(perr(4, InvalidStartOfUnicodeEscape)); } let mut v: u32 = 0; let mut digit_count = 0; for (i, b) in inner.bytes().enumerate() { if b == b'_'{ continue; } let digit = hex_digit_value(b) .ok_or(perr(offset + 3 + i, NonHexDigitInUnicodeEscape))?; if digit_count == 6 { return Err(perr(offset + 3 + i, TooManyDigitInUnicodeEscape)); } digit_count += 1; v = 16 * v + digit as u32; } let c = std::char::from_u32(v) .ok_or(perr(offset..closing_pos + 1, InvalidUnicodeEscapeChar))?; (E::from_char(c), closing_pos + 1) } _ => return Err(perr(offset..offset + 2, UnknownEscape)), }; Ok(out) } pub(crate) trait Escapee: Into { const SUPPORTS_UNICODE: bool; fn from_byte(b: u8) -> Self; fn from_char(c: char) -> Self; } impl Escapee for u8 { const SUPPORTS_UNICODE: bool = false; fn from_byte(b: u8) -> Self { b } fn from_char(_: char) -> Self { panic!("bug: `::from_char` was called"); } } impl Escapee for char { const SUPPORTS_UNICODE: bool = true; fn from_byte(b: u8) -> Self { b.into() } fn from_char(c: char) -> Self { c } } /// Checks whether the character is skipped after a string continue start /// (unescaped backlash followed by `\n`). fn is_string_continue_skipable_whitespace(b: u8) -> bool { b == b' ' || b == b'\t' || b == b'\n' || b == b'\r' } /// Unescapes a whole string or byte string. #[inline(never)] pub(crate) fn unescape_string( input: &str, offset: usize, ) -> Result<(Option, usize), ParseError> { let mut closing_quote_pos = None; let mut i = offset; let mut end_last_escape = offset; let mut value = String::new(); while i < input.len() { match input.as_bytes()[i] { // Handle "string continue". b'\\' if input.as_bytes().get(i + 1) == Some(&b'\n') => { value.push_str(&input[end_last_escape..i]); // Find the first non-whitespace character. let end_escape = input[i + 2..].bytes() .position(|b| !is_string_continue_skipable_whitespace(b)) .ok_or(perr(None, UnterminatedString))?; i += 2 + end_escape; end_last_escape = i; } b'\\' => { let (c, len) = unescape::(&input[i..input.len() - 1], i)?; value.push_str(&input[end_last_escape..i]); value.push(c.into()); i += len; end_last_escape = i; } b'\r' => { if input.as_bytes().get(i + 1) == Some(&b'\n') { value.push_str(&input[end_last_escape..i]); value.push('\n'); i += 2; end_last_escape = i; } else { return Err(perr(i, IsolatedCr)) } } b'"' => { closing_quote_pos = Some(i); break; }, b if !E::SUPPORTS_UNICODE && !b.is_ascii() => return Err(perr(i, NonAsciiInByteLiteral)), _ => i += 1, } } let closing_quote_pos = closing_quote_pos.ok_or(perr(None, UnterminatedString))?; let start_suffix = closing_quote_pos + 1; let suffix = &input[start_suffix..]; check_suffix(suffix).map_err(|kind| perr(start_suffix, kind))?; // `value` is only empty if there was no escape in the input string // (with the special case of the input being empty). This means the // string value basically equals the input, so we store `None`. let value = if value.is_empty() { None } else { // There was an escape in the string, so we need to push the // remaining unescaped part of the string still. value.push_str(&input[end_last_escape..closing_quote_pos]); Some(value) }; Ok((value, start_suffix)) } /// Reads and checks a raw (byte) string literal, converting `\r\n` sequences to /// just `\n` sequences. Returns an optional new string (if the input contained /// any `\r\n`) and the number of hashes used by the literal. #[inline(never)] pub(crate) fn scan_raw_string( input: &str, offset: usize, ) -> Result<(Option, u32, usize), ParseError> { // Raw string literal let num_hashes = input[offset..].bytes().position(|b| b != b'#') .ok_or(perr(None, InvalidLiteral))?; if input.as_bytes().get(offset + num_hashes) != Some(&b'"') { return Err(perr(None, InvalidLiteral)); } let start_inner = offset + num_hashes + 1; let hashes = &input[offset..num_hashes + offset]; let mut closing_quote_pos = None; let mut i = start_inner; let mut end_last_escape = start_inner; let mut value = String::new(); while i < input.len() { let b = input.as_bytes()[i]; if b == b'"' && input[i + 1..].starts_with(hashes) { closing_quote_pos = Some(i); break; } if b == b'\r' { // Convert `\r\n` into `\n`. This is currently not well documented // in the Rust reference, but is done even for raw strings. That's // because rustc simply converts all line endings when reading // source files. if input.as_bytes().get(i + 1) == Some(&b'\n') { value.push_str(&input[end_last_escape..i]); value.push('\n'); i += 2; end_last_escape = i; continue; } else if E::SUPPORTS_UNICODE { // If no \n follows the \r and we are scanning a raw string // (not raw byte string), we error. return Err(perr(i, IsolatedCr)) } } if !E::SUPPORTS_UNICODE { if !b.is_ascii() { return Err(perr(i, NonAsciiInByteLiteral)); } } i += 1; } let closing_quote_pos = closing_quote_pos.ok_or(perr(None, UnterminatedRawString))?; let start_suffix = closing_quote_pos + num_hashes + 1; let suffix = &input[start_suffix..]; check_suffix(suffix).map_err(|kind| perr(start_suffix, kind))?; // `value` is only empty if there was no \r\n in the input string (with the // special case of the input being empty). This means the string value // equals the input, so we store `None`. let value = if value.is_empty() { None } else { // There was an \r\n in the string, so we need to push the remaining // unescaped part of the string still. value.push_str(&input[end_last_escape..closing_quote_pos]); Some(value) }; Ok((value, num_hashes as u32, start_suffix)) } litrs-0.4.0/src/float/mod.rs000064400000000000000000000201321046102023000140010ustar 00000000000000use std::{fmt, str::FromStr}; use crate::{ Buffer, ParseError, err::{perr, ParseErrorKind::*}, parse::{end_dec_digits, first_byte_or_empty, check_suffix}, }; /// A floating point literal, e.g. `3.14`, `8.`, `135e12`, `27f32` or `1.956e2f64`. /// /// This kind of literal has several forms, but generally consists of a main /// number part, an optional exponent and an optional type suffix. See /// [the reference][ref] for more information. /// /// A leading minus sign `-` is not part of the literal grammar! `-3.14` are two /// tokens in the Rust grammar. /// /// /// [ref]: https://doc.rust-lang.org/reference/tokens.html#floating-point-literals #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct FloatLit { /// The whole raw input. The `usize` fields in this struct partition this /// string. Always true: `end_integer_part <= end_fractional_part`. /// /// ```text /// 12_3.4_56e789f32 /// ╷ ╷ ╷ /// | | └ end_number_part = 13 /// | └ end_fractional_part = 9 /// └ end_integer_part = 4 /// /// 246. /// ╷╷ /// |└ end_fractional_part = end_number_part = 4 /// └ end_integer_part = 3 /// /// 1234e89 /// ╷ ╷ /// | └ end_number_part = 7 /// └ end_integer_part = end_fractional_part = 4 /// ``` raw: B, /// The first index not part of the integer part anymore. Since the integer /// part is at the start, this is also the length of that part. end_integer_part: usize, /// The first index after the fractional part. end_fractional_part: usize, /// The first index after the whole number part (everything except type suffix). end_number_part: usize, } impl FloatLit { /// Parses the input as a floating point literal. Returns an error if the /// input is invalid or represents a different kind of literal. Will also /// reject decimal integer literals like `23` or `17f32`, in accordance /// with the spec. pub fn parse(s: B) -> Result { match first_byte_or_empty(&s)? { b'0'..=b'9' => { // TODO: simplify once RFC 2528 is stabilized let FloatLit { end_integer_part, end_fractional_part, end_number_part, .. } = parse_impl(&s)?; Ok(Self { raw: s, end_integer_part, end_fractional_part, end_number_part }) }, _ => Err(perr(0, DoesNotStartWithDigit)), } } /// Returns the number part (including integer part, fractional part and /// exponent), but without the suffix. If you want an actual floating /// point value, you need to parse this string, e.g. with `f32::from_str` /// or an external crate. pub fn number_part(&self) -> &str { &(*self.raw)[..self.end_number_part] } /// Returns the non-empty integer part of this literal. pub fn integer_part(&self) -> &str { &(*self.raw)[..self.end_integer_part] } /// Returns the optional fractional part of this literal. Does not include /// the period. If a period exists in the input, `Some` is returned, `None` /// otherwise. Note that `Some("")` might be returned, e.g. for `3.`. pub fn fractional_part(&self) -> Option<&str> { if self.end_integer_part == self.end_fractional_part { None } else { Some(&(*self.raw)[self.end_integer_part + 1..self.end_fractional_part]) } } /// Optional exponent part. Might be empty if there was no exponent part in /// the input. Includes the `e` or `E` at the beginning. pub fn exponent_part(&self) -> &str { &(*self.raw)[self.end_fractional_part..self.end_number_part] } /// The optional suffix. Returns `""` if the suffix is empty/does not exist. pub fn suffix(&self) -> &str { &(*self.raw)[self.end_number_part..] } /// Returns the raw input that was passed to `parse`. pub fn raw_input(&self) -> &str { &self.raw } /// Returns the raw input that was passed to `parse`, potentially owned. pub fn into_raw_input(self) -> B { self.raw } } impl FloatLit<&str> { /// Makes a copy of the underlying buffer and returns the owned version of /// `Self`. pub fn to_owned(&self) -> FloatLit { FloatLit { raw: self.raw.to_owned(), end_integer_part: self.end_integer_part, end_fractional_part: self.end_fractional_part, end_number_part: self.end_number_part, } } } impl fmt::Display for FloatLit { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", &*self.raw) } } /// Precondition: first byte of string has to be in `b'0'..=b'9'`. #[inline(never)] pub(crate) fn parse_impl(input: &str) -> Result, ParseError> { // Integer part. let end_integer_part = end_dec_digits(input.as_bytes()); let rest = &input[end_integer_part..]; // Fractional part. let end_fractional_part = if rest.as_bytes().get(0) == Some(&b'.') { // The fractional part must not start with `_`. if rest.as_bytes().get(1) == Some(&b'_') { return Err(perr(end_integer_part + 1, UnexpectedChar)); } end_dec_digits(rest[1..].as_bytes()) + 1 + end_integer_part } else { end_integer_part }; let rest = &input[end_fractional_part..]; // If we have a period that is not followed by decimal digits, the // literal must end now. if end_integer_part + 1 == end_fractional_part && !rest.is_empty() { return Err(perr(end_integer_part + 1, UnexpectedChar)); } // Optional exponent. let end_number_part = if rest.starts_with('e') || rest.starts_with('E') { // Strip single - or + sign at the beginning. let exp_number_start = match rest.as_bytes().get(1) { Some(b'-') | Some(b'+') => 2, _ => 1, }; // Find end of exponent and make sure there is at least one digit. let end_exponent = end_dec_digits(rest[exp_number_start..].as_bytes()) + exp_number_start; if !rest[exp_number_start..end_exponent].bytes().any(|b| matches!(b, b'0'..=b'9')) { return Err(perr( end_fractional_part..end_fractional_part + end_exponent, NoExponentDigits, )); } end_exponent + end_fractional_part } else { end_fractional_part }; // Make sure the suffix is valid. let suffix = &input[end_number_part..]; check_suffix(suffix).map_err(|kind| perr(end_number_part..input.len(), kind))?; // A float literal needs either a fractional or exponent part, otherwise its // an integer literal. if end_integer_part == end_number_part { return Err(perr(None, UnexpectedIntegerLit)); } Ok(FloatLit { raw: input, end_integer_part, end_fractional_part, end_number_part, }) } /// All possible float type suffixes. #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[non_exhaustive] pub enum FloatType { F32, F64, } impl FloatType { /// Returns the type corresponding to the given suffix (e.g. `"f32"` is /// mapped to `Self::F32`). If the suffix is not a valid float type, `None` /// is returned. pub fn from_suffix(suffix: &str) -> Option { match suffix { "f32" => Some(FloatType::F32), "f64" => Some(FloatType::F64), _ => None, } } /// Returns the suffix for this type, e.g. `"f32"` for `Self::F32`. pub fn suffix(self) -> &'static str { match self { Self::F32 => "f32", Self::F64 => "f64", } } } impl FromStr for FloatType { type Err = (); fn from_str(s: &str) -> Result { Self::from_suffix(s).ok_or(()) } } impl fmt::Display for FloatType { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.suffix().fmt(f) } } #[cfg(test)] mod tests; litrs-0.4.0/src/float/tests.rs000064400000000000000000000236631046102023000144000ustar 00000000000000use crate::{ Literal, ParseError, test_util::{assert_parse_ok_eq, assert_roundtrip}, }; use super::{FloatLit, FloatType}; // ===== Utility functions ======================================================================= /// Helper macro to check parsing a float. /// /// This macro contains quite a bit of logic itself (which can be buggy of /// course), so we have a few test functions below to test a bunch of cases /// manually. macro_rules! check { ($intpart:literal $fracpart:literal $exppart:literal $suffix:tt) => { let input = concat!($intpart, $fracpart, $exppart, check!(@stringify_suffix $suffix)); let expected_float = FloatLit { raw: input, end_integer_part: $intpart.len(), end_fractional_part: $intpart.len() + $fracpart.len(), end_number_part: $intpart.len() + $fracpart.len() + $exppart.len(), }; assert_parse_ok_eq( input, FloatLit::parse(input), expected_float.clone(), "FloatLit::parse"); assert_parse_ok_eq( input, Literal::parse(input), Literal::Float(expected_float), "Literal::parse"); assert_eq!(FloatLit::parse(input).unwrap().suffix(), check!(@ty $suffix)); assert_roundtrip(expected_float.to_owned(), input); }; (@ty f32) => { "f32" }; (@ty f64) => { "f64" }; (@ty -) => { "" }; (@stringify_suffix -) => { "" }; (@stringify_suffix $suffix:ident) => { stringify!($suffix) }; } // ===== Actual tests =========================================================================== #[test] fn manual_without_suffix() -> Result<(), ParseError> { let f = FloatLit::parse("3.14")?; assert_eq!(f.number_part(), "3.14"); assert_eq!(f.integer_part(), "3"); assert_eq!(f.fractional_part(), Some("14")); assert_eq!(f.exponent_part(), ""); assert_eq!(f.suffix(), ""); let f = FloatLit::parse("9.")?; assert_eq!(f.number_part(), "9."); assert_eq!(f.integer_part(), "9"); assert_eq!(f.fractional_part(), Some("")); assert_eq!(f.exponent_part(), ""); assert_eq!(f.suffix(), ""); let f = FloatLit::parse("8e1")?; assert_eq!(f.number_part(), "8e1"); assert_eq!(f.integer_part(), "8"); assert_eq!(f.fractional_part(), None); assert_eq!(f.exponent_part(), "e1"); assert_eq!(f.suffix(), ""); let f = FloatLit::parse("8E3")?; assert_eq!(f.number_part(), "8E3"); assert_eq!(f.integer_part(), "8"); assert_eq!(f.fractional_part(), None); assert_eq!(f.exponent_part(), "E3"); assert_eq!(f.suffix(), ""); let f = FloatLit::parse("8_7_6.1_23e15")?; assert_eq!(f.number_part(), "8_7_6.1_23e15"); assert_eq!(f.integer_part(), "8_7_6"); assert_eq!(f.fractional_part(), Some("1_23")); assert_eq!(f.exponent_part(), "e15"); assert_eq!(f.suffix(), ""); let f = FloatLit::parse("8.2e-_04_9")?; assert_eq!(f.number_part(), "8.2e-_04_9"); assert_eq!(f.integer_part(), "8"); assert_eq!(f.fractional_part(), Some("2")); assert_eq!(f.exponent_part(), "e-_04_9"); assert_eq!(f.suffix(), ""); Ok(()) } #[test] fn manual_with_suffix() -> Result<(), ParseError> { let f = FloatLit::parse("3.14f32")?; assert_eq!(f.number_part(), "3.14"); assert_eq!(f.integer_part(), "3"); assert_eq!(f.fractional_part(), Some("14")); assert_eq!(f.exponent_part(), ""); assert_eq!(FloatType::from_suffix(f.suffix()), Some(FloatType::F32)); let f = FloatLit::parse("8e1f64")?; assert_eq!(f.number_part(), "8e1"); assert_eq!(f.integer_part(), "8"); assert_eq!(f.fractional_part(), None); assert_eq!(f.exponent_part(), "e1"); assert_eq!(FloatType::from_suffix(f.suffix()), Some(FloatType::F64)); let f = FloatLit::parse("8_7_6.1_23e15f32")?; assert_eq!(f.number_part(), "8_7_6.1_23e15"); assert_eq!(f.integer_part(), "8_7_6"); assert_eq!(f.fractional_part(), Some("1_23")); assert_eq!(f.exponent_part(), "e15"); assert_eq!(FloatType::from_suffix(f.suffix()), Some(FloatType::F32)); let f = FloatLit::parse("8.2e-_04_9f64")?; assert_eq!(f.number_part(), "8.2e-_04_9"); assert_eq!(f.integer_part(), "8"); assert_eq!(f.fractional_part(), Some("2")); assert_eq!(f.exponent_part(), "e-_04_9"); assert_eq!(FloatType::from_suffix(f.suffix()), Some(FloatType::F64)); Ok(()) } #[test] fn simple() { check!("3" ".14" "" -); check!("3" ".14" "" f32); check!("3" ".14" "" f64); check!("3" "" "e987654321" -); check!("3" "" "e987654321" f64); check!("42_888" ".05" "" -); check!("42_888" ".05" "E5___" f32); check!("123456789" "" "e_1" f64); check!("123456789" ".99" "e_1" f64); check!("123456789" ".99" "" f64); check!("123456789" ".99" "" -); check!("147" ".3_33" "" -); check!("147" ".3_33__" "E3" f64); check!("147" ".3_33__" "" f32); check!("147" ".333" "e-10" -); check!("147" ".333" "e-_7" f32); check!("147" ".333" "e+10" -); check!("147" ".333" "e+_7" f32); check!("86" "." "" -); check!("0" "." "" -); check!("0_" "." "" -); check!("0" ".0000001" "" -); check!("0" ".000_0001" "" -); check!("0" ".0" "e+0" -); check!("0" "" "E+0" -); check!("34" "" "e+0" -); check!("0" ".9182" "E+0" f32); } #[test] fn non_standard_suffixes() { #[track_caller] fn check_suffix( input: &str, integer_part: &str, fractional_part: Option<&str>, exponent_part: &str, suffix: &str, ) { let lit = FloatLit::parse(input) .unwrap_or_else(|e| panic!("expected to parse '{}' but got {}", input, e)); assert_eq!(lit.integer_part(), integer_part); assert_eq!(lit.fractional_part(), fractional_part); assert_eq!(lit.exponent_part(), exponent_part); assert_eq!(lit.suffix(), suffix); let lit = match Literal::parse(input) { Ok(Literal::Float(f)) => f, other => panic!("Expected float literal, but got {:?} for '{}'", other, input), }; assert_eq!(lit.integer_part(), integer_part); assert_eq!(lit.fractional_part(), fractional_part); assert_eq!(lit.exponent_part(), exponent_part); assert_eq!(lit.suffix(), suffix); } check_suffix("7.1f23", "7", Some("1"), "", "f23"); check_suffix("7.1f320", "7", Some("1"), "", "f320"); check_suffix("7.1f64_", "7", Some("1"), "", "f64_"); check_suffix("8.1f649", "8", Some("1"), "", "f649"); check_suffix("8.1f64f32", "8", Some("1"), "", "f64f32"); check_suffix("23e2_banana", "23", None, "e2_", "banana"); check_suffix("23.2_banana", "23", Some("2_"), "", "banana"); check_suffix("23e2pe55ter", "23", None, "e2", "pe55ter"); check_suffix("23e2p_e55ter", "23", None, "e2", "p_e55ter"); check_suffix("3.15Jürgen", "3", Some("15"), "", "Jürgen"); check_suffix("3e2e5", "3", None, "e2", "e5"); check_suffix("3e2e5f", "3", None, "e2", "e5f"); } #[test] fn parse_err() { assert_err!(FloatLit, "", Empty, None); assert_err_single!(FloatLit::parse("."), DoesNotStartWithDigit, 0); assert_err_single!(FloatLit::parse("+"), DoesNotStartWithDigit, 0); assert_err_single!(FloatLit::parse("-"), DoesNotStartWithDigit, 0); assert_err_single!(FloatLit::parse("e"), DoesNotStartWithDigit, 0); assert_err_single!(FloatLit::parse("e8"), DoesNotStartWithDigit, 0); assert_err!(FloatLit, "0e", NoExponentDigits, 1..2); assert_err_single!(FloatLit::parse("f32"), DoesNotStartWithDigit, 0); assert_err_single!(FloatLit::parse("foo"), DoesNotStartWithDigit, 0); assert_err_single!(FloatLit::parse("inf"), DoesNotStartWithDigit, 0); assert_err_single!(FloatLit::parse("nan"), DoesNotStartWithDigit, 0); assert_err_single!(FloatLit::parse("NaN"), DoesNotStartWithDigit, 0); assert_err_single!(FloatLit::parse("NAN"), DoesNotStartWithDigit, 0); assert_err_single!(FloatLit::parse("_2.7"), DoesNotStartWithDigit, 0); assert_err_single!(FloatLit::parse(".5"), DoesNotStartWithDigit, 0); assert_err!(FloatLit, "1e", NoExponentDigits, 1..2); assert_err!(FloatLit, "1.e4", UnexpectedChar, 2); assert_err!(FloatLit, "3._4", UnexpectedChar, 2); assert_err!(FloatLit, "3.f32", UnexpectedChar, 2); assert_err!(FloatLit, "3.e5", UnexpectedChar, 2); assert_err!(FloatLit, "12345._987", UnexpectedChar, 6); assert_err!(FloatLit, "46._", UnexpectedChar, 3); assert_err!(FloatLit, "46.f32", UnexpectedChar, 3); assert_err!(FloatLit, "46.e3", UnexpectedChar, 3); assert_err!(FloatLit, "46._e3", UnexpectedChar, 3); assert_err!(FloatLit, "46.e3f64", UnexpectedChar, 3); assert_err!(FloatLit, "23.4e_", NoExponentDigits, 4..6); assert_err!(FloatLit, "23E___f32", NoExponentDigits, 2..6); assert_err!(FloatLit, "55e3.1", UnexpectedChar, 4..6); assert_err!(FloatLit, "3.7+", UnexpectedChar, 3..4); assert_err!(FloatLit, "3.7+2", UnexpectedChar, 3..5); assert_err!(FloatLit, "3.7-", UnexpectedChar, 3..4); assert_err!(FloatLit, "3.7-2", UnexpectedChar, 3..5); assert_err!(FloatLit, "3.7e+", NoExponentDigits, 3..5); assert_err!(FloatLit, "3.7e-", NoExponentDigits, 3..5); assert_err!(FloatLit, "3.7e-+3", NoExponentDigits, 3..5); // suboptimal error assert_err!(FloatLit, "3.7e+-3", NoExponentDigits, 3..5); // suboptimal error assert_err_single!(FloatLit::parse("0x44.5"), InvalidSuffix, 1..6); assert_err_single!(FloatLit::parse("3"), UnexpectedIntegerLit, None); assert_err_single!(FloatLit::parse("35_389"), UnexpectedIntegerLit, None); assert_err_single!(FloatLit::parse("9_8_7f32"), UnexpectedIntegerLit, None); assert_err_single!(FloatLit::parse("9_8_7banana"), UnexpectedIntegerLit, None); assert_err_single!(FloatLit::parse("7f23"), UnexpectedIntegerLit, None); assert_err_single!(FloatLit::parse("7f320"), UnexpectedIntegerLit, None); assert_err_single!(FloatLit::parse("7f64_"), UnexpectedIntegerLit, None); assert_err_single!(FloatLit::parse("8f649"), UnexpectedIntegerLit, None); assert_err_single!(FloatLit::parse("8f64f32"), UnexpectedIntegerLit, None); } litrs-0.4.0/src/impls.rs000064400000000000000000000412321046102023000132450ustar 00000000000000use std::convert::TryFrom; use crate::{Literal, err::{InvalidToken, TokenKind}}; /// Helper macro to call a `callback` macro four times for all combinations of /// `proc_macro`/`proc_macro2` and `&`/owned. macro_rules! helper { ($callback:ident, $($input:tt)*) => { $callback!([proc_macro::] => $($input)*); $callback!([&proc_macro::] => $($input)*); #[cfg(feature = "proc-macro2")] $callback!([proc_macro2::] => $($input)*); #[cfg(feature = "proc-macro2")] $callback!([&proc_macro2::] => $($input)*); }; } /// Like `helper!` but without reference types. macro_rules! helper_no_refs { ($callback:ident, $($input:tt)*) => { $callback!([proc_macro::] => $($input)*); #[cfg(feature = "proc-macro2")] $callback!([proc_macro2::] => $($input)*); }; } // ============================================================================================== // ===== `From<*Lit> for Literal` // ============================================================================================== macro_rules! impl_specific_lit_to_lit { ($ty:ty, $variant:ident) => { impl From<$ty> for Literal { fn from(src: $ty) -> Self { Literal::$variant(src) } } }; } impl_specific_lit_to_lit!(crate::BoolLit, Bool); impl_specific_lit_to_lit!(crate::IntegerLit, Integer); impl_specific_lit_to_lit!(crate::FloatLit, Float); impl_specific_lit_to_lit!(crate::CharLit, Char); impl_specific_lit_to_lit!(crate::StringLit, String); impl_specific_lit_to_lit!(crate::ByteLit, Byte); impl_specific_lit_to_lit!(crate::ByteStringLit, ByteString); // ============================================================================================== // ===== `From for Literal` // ============================================================================================== macro_rules! impl_tt_to_lit { ([$($prefix:tt)*] => ) => { impl From<$($prefix)* Literal> for Literal { fn from(src: $($prefix)* Literal) -> Self { // We call `expect` in all these impls: this library aims to implement exactly // the Rust grammar, so if we have a valid Rust literal, we should always be // able to parse it. Self::parse(src.to_string()) .expect("bug: failed to parse output of `Literal::to_string`") } } } } helper!(impl_tt_to_lit, ); // ============================================================================================== // ===== `TryFrom for Literal` // ============================================================================================== macro_rules! impl_tt_to_lit { ([$($prefix:tt)*] => ) => { impl TryFrom<$($prefix)* TokenTree> for Literal { type Error = InvalidToken; fn try_from(tt: $($prefix)* TokenTree) -> Result { let span = tt.span(); let res = match tt { $($prefix)* TokenTree::Group(_) => Err(TokenKind::Group), $($prefix)* TokenTree::Punct(_) => Err(TokenKind::Punct), $($prefix)* TokenTree::Ident(ref ident) if ident.to_string() == "true" => return Ok(Literal::Bool(crate::BoolLit::True)), $($prefix)* TokenTree::Ident(ref ident) if ident.to_string() == "false" => return Ok(Literal::Bool(crate::BoolLit::False)), $($prefix)* TokenTree::Ident(_) => Err(TokenKind::Ident), $($prefix)* TokenTree::Literal(ref lit) => Ok(lit), }; match res { Ok(lit) => Ok(From::from(lit)), Err(actual) => Err(InvalidToken { actual, expected: TokenKind::Literal, span: span.into(), }), } } } } } helper!(impl_tt_to_lit, ); // ============================================================================================== // ===== `TryFrom`, `TryFrom` for non-bool `*Lit` // ============================================================================================== fn kind_of(lit: &Literal) -> TokenKind { match lit { Literal::String(_) => TokenKind::StringLit, Literal::Bool(_) => TokenKind::BoolLit, Literal::Integer(_) => TokenKind::IntegerLit, Literal::Float(_) => TokenKind::FloatLit, Literal::Char(_) => TokenKind::CharLit, Literal::Byte(_) => TokenKind::ByteLit, Literal::ByteString(_) => TokenKind::ByteStringLit, } } macro_rules! impl_for_specific_lit { ([$($prefix:tt)*] => $ty:ty, $variant:ident, $kind:ident) => { impl TryFrom<$($prefix)* Literal> for $ty { type Error = InvalidToken; fn try_from(src: $($prefix)* Literal) -> Result { let span = src.span(); let lit: Literal = src.into(); match lit { Literal::$variant(s) => Ok(s), other => Err(InvalidToken { expected: TokenKind::$kind, actual: kind_of(&other), span: span.into(), }), } } } impl TryFrom<$($prefix)* TokenTree> for $ty { type Error = InvalidToken; fn try_from(tt: $($prefix)* TokenTree) -> Result { let span = tt.span(); let res = match tt { $($prefix)* TokenTree::Group(_) => Err(TokenKind::Group), $($prefix)* TokenTree::Punct(_) => Err(TokenKind::Punct), $($prefix)* TokenTree::Ident(_) => Err(TokenKind::Ident), $($prefix)* TokenTree::Literal(ref lit) => Ok(lit), }; match res { Ok(lit) => <$ty>::try_from(lit), Err(actual) => Err(InvalidToken { actual, expected: TokenKind::$kind, span: span.into(), }), } } } }; } helper!(impl_for_specific_lit, crate::IntegerLit, Integer, IntegerLit); helper!(impl_for_specific_lit, crate::FloatLit, Float, FloatLit); helper!(impl_for_specific_lit, crate::CharLit, Char, CharLit); helper!(impl_for_specific_lit, crate::StringLit, String, StringLit); helper!(impl_for_specific_lit, crate::ByteLit, Byte, ByteLit); helper!(impl_for_specific_lit, crate::ByteStringLit, ByteString, ByteStringLit); // ============================================================================================== // ===== `From<*Lit> for pm::Literal` // ============================================================================================== macro_rules! impl_specific_lit_to_pm_lit { ([$($prefix:tt)*] => $ty:ident, $variant:ident, $kind:ident) => { impl From> for $($prefix)* Literal { fn from(l: crate::$ty) -> Self { // This should never fail: an input that is parsed successfuly // as one of our literal types should always parse as a // proc_macro literal as well! l.raw_input().parse().unwrap_or_else(|e| { panic!( "failed to parse `{}` as `{}`: {}", l.raw_input(), std::any::type_name::(), e, ) }) } } }; } helper_no_refs!(impl_specific_lit_to_pm_lit, IntegerLit, Integer, IntegerLit); helper_no_refs!(impl_specific_lit_to_pm_lit, FloatLit, Float, FloatLit); helper_no_refs!(impl_specific_lit_to_pm_lit, CharLit, Char, CharLit); helper_no_refs!(impl_specific_lit_to_pm_lit, StringLit, String, StringLit); helper_no_refs!(impl_specific_lit_to_pm_lit, ByteLit, Byte, ByteLit); helper_no_refs!(impl_specific_lit_to_pm_lit, ByteStringLit, ByteString, ByteStringLit); // ============================================================================================== // ===== `TryFrom for BoolLit` // ============================================================================================== macro_rules! impl_from_tt_for_bool { ([$($prefix:tt)*] => ) => { impl TryFrom<$($prefix)* TokenTree> for crate::BoolLit { type Error = InvalidToken; fn try_from(tt: $($prefix)* TokenTree) -> Result { let span = tt.span(); let actual = match tt { $($prefix)* TokenTree::Ident(ref ident) if ident.to_string() == "true" => return Ok(crate::BoolLit::True), $($prefix)* TokenTree::Ident(ref ident) if ident.to_string() == "false" => return Ok(crate::BoolLit::False), $($prefix)* TokenTree::Group(_) => TokenKind::Group, $($prefix)* TokenTree::Punct(_) => TokenKind::Punct, $($prefix)* TokenTree::Ident(_) => TokenKind::Ident, $($prefix)* TokenTree::Literal(ref lit) => kind_of(&Literal::from(lit)), }; Err(InvalidToken { actual, expected: TokenKind::BoolLit, span: span.into(), }) } } }; } helper!(impl_from_tt_for_bool, ); // ============================================================================================== // ===== `From for pm::Ident` // ============================================================================================== macro_rules! impl_bool_lit_to_pm_lit { ([$($prefix:tt)*] => ) => { impl From for $($prefix)* Ident { fn from(l: crate::BoolLit) -> Self { Self::new(l.as_str(), $($prefix)* Span::call_site()) } } }; } helper_no_refs!(impl_bool_lit_to_pm_lit, ); mod tests { //! # Tests //! //! ```no_run //! extern crate proc_macro; //! //! use std::convert::TryFrom; //! use litrs::Literal; //! //! fn give() -> T { //! panic!() //! } //! //! let _ = litrs::Literal::::from(give::()); //! let _ = litrs::Literal::::from(give::>()); //! let _ = litrs::Literal::::from(give::>()); //! let _ = litrs::Literal::::from(give::>()); //! let _ = litrs::Literal::::from(give::>()); //! let _ = litrs::Literal::::from(give::>()); //! let _ = litrs::Literal::::from(give::>()); //! //! let _ = litrs::Literal::<&'static str>::from(give::()); //! let _ = litrs::Literal::<&'static str>::from(give::>()); //! let _ = litrs::Literal::<&'static str>::from(give::>()); //! let _ = litrs::Literal::<&'static str>::from(give::>()); //! let _ = litrs::Literal::<&'static str>::from(give::>()); //! let _ = litrs::Literal::<&'static str>::from(give::>()); //! let _ = litrs::Literal::<&'static str>::from(give::>()); //! //! //! let _ = litrs::Literal::from(give::()); //! let _ = litrs::Literal::from(give::<&proc_macro::Literal>()); //! //! let _ = litrs::Literal::try_from(give::()); //! let _ = litrs::Literal::try_from(give::<&proc_macro::TokenTree>()); //! //! //! let _ = litrs::IntegerLit::try_from(give::()); //! let _ = litrs::IntegerLit::try_from(give::<&proc_macro::Literal>()); //! //! let _ = litrs::FloatLit::try_from(give::()); //! let _ = litrs::FloatLit::try_from(give::<&proc_macro::Literal>()); //! //! let _ = litrs::CharLit::try_from(give::()); //! let _ = litrs::CharLit::try_from(give::<&proc_macro::Literal>()); //! //! let _ = litrs::StringLit::try_from(give::()); //! let _ = litrs::StringLit::try_from(give::<&proc_macro::Literal>()); //! //! let _ = litrs::ByteLit::try_from(give::()); //! let _ = litrs::ByteLit::try_from(give::<&proc_macro::Literal>()); //! //! let _ = litrs::ByteStringLit::try_from(give::()); //! let _ = litrs::ByteStringLit::try_from(give::<&proc_macro::Literal>()); //! //! //! let _ = litrs::BoolLit::try_from(give::()); //! let _ = litrs::BoolLit::try_from(give::<&proc_macro::TokenTree>()); //! //! let _ = litrs::IntegerLit::try_from(give::()); //! let _ = litrs::IntegerLit::try_from(give::<&proc_macro::TokenTree>()); //! //! let _ = litrs::FloatLit::try_from(give::()); //! let _ = litrs::FloatLit::try_from(give::<&proc_macro::TokenTree>()); //! //! let _ = litrs::CharLit::try_from(give::()); //! let _ = litrs::CharLit::try_from(give::<&proc_macro::TokenTree>()); //! //! let _ = litrs::StringLit::try_from(give::()); //! let _ = litrs::StringLit::try_from(give::<&proc_macro::TokenTree>()); //! //! let _ = litrs::ByteLit::try_from(give::()); //! let _ = litrs::ByteLit::try_from(give::<&proc_macro::TokenTree>()); //! //! let _ = litrs::ByteStringLit::try_from(give::()); //! let _ = litrs::ByteStringLit::try_from(give::<&proc_macro::TokenTree>()); //! ``` } #[cfg(feature = "proc-macro2")] mod tests_proc_macro2 { //! # Tests //! //! ```no_run //! extern crate proc_macro; //! //! use std::convert::TryFrom; //! use litrs::Literal; //! //! fn give() -> T { //! panic!() //! } //! //! let _ = litrs::Literal::from(give::()); //! let _ = litrs::Literal::from(give::<&proc_macro2::Literal>()); //! //! let _ = litrs::Literal::try_from(give::()); //! let _ = litrs::Literal::try_from(give::<&proc_macro2::TokenTree>()); //! //! //! let _ = litrs::IntegerLit::try_from(give::()); //! let _ = litrs::IntegerLit::try_from(give::<&proc_macro2::Literal>()); //! //! let _ = litrs::FloatLit::try_from(give::()); //! let _ = litrs::FloatLit::try_from(give::<&proc_macro2::Literal>()); //! //! let _ = litrs::CharLit::try_from(give::()); //! let _ = litrs::CharLit::try_from(give::<&proc_macro2::Literal>()); //! //! let _ = litrs::StringLit::try_from(give::()); //! let _ = litrs::StringLit::try_from(give::<&proc_macro2::Literal>()); //! //! let _ = litrs::ByteLit::try_from(give::()); //! let _ = litrs::ByteLit::try_from(give::<&proc_macro2::Literal>()); //! //! let _ = litrs::ByteStringLit::try_from(give::()); //! let _ = litrs::ByteStringLit::try_from(give::<&proc_macro2::Literal>()); //! //! //! let _ = litrs::BoolLit::try_from(give::()); //! let _ = litrs::BoolLit::try_from(give::<&proc_macro2::TokenTree>()); //! //! let _ = litrs::IntegerLit::try_from(give::()); //! let _ = litrs::IntegerLit::try_from(give::<&proc_macro2::TokenTree>()); //! //! let _ = litrs::FloatLit::try_from(give::()); //! let _ = litrs::FloatLit::try_from(give::<&proc_macro2::TokenTree>()); //! //! let _ = litrs::CharLit::try_from(give::()); //! let _ = litrs::CharLit::try_from(give::<&proc_macro2::TokenTree>()); //! //! let _ = litrs::StringLit::try_from(give::()); //! let _ = litrs::StringLit::try_from(give::<&proc_macro2::TokenTree>()); //! //! let _ = litrs::ByteLit::try_from(give::()); //! let _ = litrs::ByteLit::try_from(give::<&proc_macro2::TokenTree>()); //! //! let _ = litrs::ByteStringLit::try_from(give::()); //! let _ = litrs::ByteStringLit::try_from(give::<&proc_macro2::TokenTree>()); //! ``` } litrs-0.4.0/src/integer/mod.rs000064400000000000000000000256471046102023000143510ustar 00000000000000use std::{fmt, str::FromStr}; use crate::{ Buffer, ParseError, err::{perr, ParseErrorKind::*}, parse::{first_byte_or_empty, hex_digit_value, check_suffix}, }; /// An integer literal, e.g. `27`, `0x7F`, `0b101010u8` or `5_000_000i64`. /// /// An integer literal consists of an optional base prefix (`0b`, `0o`, `0x`), /// the main part (digits and underscores), and an optional type suffix /// (e.g. `u64` or `i8`). See [the reference][ref] for more information. /// /// Note that integer literals are always positive: the grammar does not contain /// the minus sign at all. The minus sign is just the unary negate operator, /// not part of the literal. Which is interesting for cases like `- 128i8`: /// here, the literal itself would overflow the specified type (`i8` cannot /// represent 128). That's why in rustc, the literal overflow check is /// performed as a lint after parsing, not during the lexing stage. Similarly, /// [`IntegerLit::parse`] does not perform an overflow check. /// /// [ref]: https://doc.rust-lang.org/reference/tokens.html#integer-literals #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[non_exhaustive] pub struct IntegerLit { /// The raw literal. Grammar: `
`. raw: B, /// First index of the main number part (after the base prefix). start_main_part: usize, /// First index not part of the main number part. end_main_part: usize, /// Parsed `raw[..start_main_part]`. base: IntegerBase, } impl IntegerLit { /// Parses the input as an integer literal. Returns an error if the input is /// invalid or represents a different kind of literal. pub fn parse(input: B) -> Result { match first_byte_or_empty(&input)? { digit @ b'0'..=b'9' => { // TODO: simplify once RFC 2528 is stabilized let IntegerLit { start_main_part, end_main_part, base, .. } = parse_impl(&input, digit)?; Ok(Self { raw: input, start_main_part, end_main_part, base }) }, _ => Err(perr(0, DoesNotStartWithDigit)), } } /// Performs the actual string to int conversion to obtain the integer /// value. The optional type suffix of the literal **is ignored by this /// method**. This means `N` does not need to match the type suffix! /// /// Returns `None` if the literal overflows `N`. pub fn value(&self) -> Option { let base = N::from_small_number(self.base.value()); let mut acc = N::from_small_number(0); for digit in self.raw_main_part().bytes() { if digit == b'_' { continue; } // We don't actually need the base here: we already know this main // part only contains digits valid for the specified base. let digit = hex_digit_value(digit) .unwrap_or_else(|| unreachable!("bug: integer main part contains non-digit")); acc = acc.checked_mul(base)?; acc = acc.checked_add(N::from_small_number(digit))?; } Some(acc) } /// The base of this integer literal. pub fn base(&self) -> IntegerBase { self.base } /// The main part containing the digits and potentially `_`. Do not try to /// parse this directly as that would ignore the base! pub fn raw_main_part(&self) -> &str { &(*self.raw)[self.start_main_part..self.end_main_part] } /// The optional suffix. Returns `""` if the suffix is empty/does not exist. /// /// If you want the type, try `IntegerType::from_suffix(lit.suffix())`. pub fn suffix(&self) -> &str { &(*self.raw)[self.end_main_part..] } /// Returns the raw input that was passed to `parse`. pub fn raw_input(&self) -> &str { &self.raw } /// Returns the raw input that was passed to `parse`, potentially owned. pub fn into_raw_input(self) -> B { self.raw } } impl IntegerLit<&str> { /// Makes a copy of the underlying buffer and returns the owned version of /// `Self`. pub fn to_owned(&self) -> IntegerLit { IntegerLit { raw: self.raw.to_owned(), start_main_part: self.start_main_part, end_main_part: self.end_main_part, base: self.base, } } } impl fmt::Display for IntegerLit { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", &*self.raw) } } /// Integer literal types. *Implementation detail*. /// /// Implemented for all integer literal types. This trait is sealed and cannot /// be implemented outside of this crate. The trait's methods are implementation /// detail of this library and are not subject to semver. pub trait FromIntegerLiteral: self::sealed::Sealed + Copy { /// Creates itself from the given number. `n` is guaranteed to be `<= 16`. #[doc(hidden)] fn from_small_number(n: u8) -> Self; #[doc(hidden)] fn checked_add(self, rhs: Self) -> Option; #[doc(hidden)] fn checked_mul(self, rhs: Self) -> Option; #[doc(hidden)] fn ty() -> IntegerType; } macro_rules! impl_from_int_literal { ($( $ty:ty => $variant:ident ,)* ) => { $( impl self::sealed::Sealed for $ty {} impl FromIntegerLiteral for $ty { fn from_small_number(n: u8) -> Self { n as Self } fn checked_add(self, rhs: Self) -> Option { self.checked_add(rhs) } fn checked_mul(self, rhs: Self) -> Option { self.checked_mul(rhs) } fn ty() -> IntegerType { IntegerType::$variant } } )* }; } impl_from_int_literal!( u8 => U8, u16 => U16, u32 => U32, u64 => U64, u128 => U128, usize => Usize, i8 => I8, i16 => I16, i32 => I32, i64 => I64, i128 => I128, isize => Isize, ); mod sealed { pub trait Sealed {} } /// Precondition: first byte of string has to be in `b'0'..=b'9'`. #[inline(never)] pub(crate) fn parse_impl(input: &str, first: u8) -> Result, ParseError> { // Figure out base and strip prefix base, if it exists. let (end_prefix, base) = match (first, input.as_bytes().get(1)) { (b'0', Some(b'b')) => (2, IntegerBase::Binary), (b'0', Some(b'o')) => (2, IntegerBase::Octal), (b'0', Some(b'x')) => (2, IntegerBase::Hexadecimal), // Everything else is treated as decimal. Several cases are caught // by this: // - "123" // - "0" // - "0u8" // - "0r" -> this will error later _ => (0, IntegerBase::Decimal), }; let without_prefix = &input[end_prefix..]; // Scan input to find the first character that's not a valid digit. let is_valid_digit = match base { IntegerBase::Binary => |b| matches!(b, b'0' | b'1' | b'_'), IntegerBase::Octal => |b| matches!(b, b'0'..=b'7' | b'_'), IntegerBase::Decimal => |b| matches!(b, b'0'..=b'9' | b'_'), IntegerBase::Hexadecimal => |b| matches!(b, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'_'), }; let end_main = without_prefix.bytes() .position(|b| !is_valid_digit(b)) .unwrap_or(without_prefix.len()); let (main_part, suffix) = without_prefix.split_at(end_main); check_suffix(suffix).map_err(|kind| { // This is just to have a nicer error kind for this special case. If the // suffix is invalid, it is non-empty -> unwrap ok. let first = suffix.as_bytes()[0]; if !is_valid_digit(first) && first.is_ascii_digit() { perr(end_main + end_prefix, InvalidDigit) } else { perr(end_main + end_prefix..input.len(), kind) } })?; if suffix.starts_with('e') || suffix.starts_with('E') { return Err(perr(end_main, IntegerSuffixStartingWithE)); } // Make sure main number part is not empty. if main_part.bytes().filter(|&b| b != b'_').count() == 0 { return Err(perr(end_prefix..end_prefix + end_main, NoDigits)); } Ok(IntegerLit { raw: input, start_main_part: end_prefix, end_main_part: end_main + end_prefix, base, }) } /// The bases in which an integer can be specified. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum IntegerBase { Binary, Octal, Decimal, Hexadecimal, } impl IntegerBase { /// Returns the literal prefix that indicates this base, i.e. `"0b"`, /// `"0o"`, `""` and `"0x"`. pub fn prefix(self) -> &'static str { match self { Self::Binary => "0b", Self::Octal => "0o", Self::Decimal => "", Self::Hexadecimal => "0x", } } /// Returns the base value, i.e. 2, 8, 10 or 16. pub fn value(self) -> u8 { match self { Self::Binary => 2, Self::Octal => 8, Self::Decimal => 10, Self::Hexadecimal => 16, } } } /// All possible integer type suffixes. #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[non_exhaustive] pub enum IntegerType { U8, U16, U32, U64, U128, Usize, I8, I16, I32, I64, I128, Isize, } impl IntegerType { /// Returns the type corresponding to the given suffix (e.g. `"u8"` is /// mapped to `Self::U8`). If the suffix is not a valid integer type, /// `None` is returned. pub fn from_suffix(suffix: &str) -> Option { match suffix { "u8" => Some(Self::U8), "u16" => Some(Self::U16), "u32" => Some(Self::U32), "u64" => Some(Self::U64), "u128" => Some(Self::U128), "usize" => Some(Self::Usize), "i8" => Some(Self::I8), "i16" => Some(Self::I16), "i32" => Some(Self::I32), "i64" => Some(Self::I64), "i128" => Some(Self::I128), "isize" => Some(Self::Isize), _ => None, } } /// Returns the suffix for this type, e.g. `"u8"` for `Self::U8`. pub fn suffix(self) -> &'static str { match self { Self::U8 => "u8", Self::U16 => "u16", Self::U32 => "u32", Self::U64 => "u64", Self::U128 => "u128", Self::Usize => "usize", Self::I8 => "i8", Self::I16 => "i16", Self::I32 => "i32", Self::I64 => "i64", Self::I128 => "i128", Self::Isize => "isize", } } } impl FromStr for IntegerType { type Err = (); fn from_str(s: &str) -> Result { Self::from_suffix(s).ok_or(()) } } impl fmt::Display for IntegerType { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.suffix().fmt(f) } } #[cfg(test)] mod tests; litrs-0.4.0/src/integer/tests.rs000064400000000000000000000320021046102023000147130ustar 00000000000000use std::fmt::{Debug, Display}; use crate::{ FromIntegerLiteral, Literal, IntegerLit, IntegerType as Ty, IntegerBase, IntegerBase::*, test_util::{assert_parse_ok_eq, assert_roundtrip}, }; // ===== Utility functions ======================================================================= #[track_caller] fn check( input: &str, value: T, base: IntegerBase, main_part: &str, type_suffix: Option, ) { let expected_integer = IntegerLit { raw: input, start_main_part: base.prefix().len(), end_main_part: base.prefix().len() + main_part.len(), base, }; assert_parse_ok_eq( input, IntegerLit::parse(input), expected_integer.clone(), "IntegerLit::parse"); assert_parse_ok_eq( input, Literal::parse(input), Literal::Integer(expected_integer), "Literal::parse"); assert_roundtrip(expected_integer.to_owned(), input); assert_eq!(Ty::from_suffix(IntegerLit::parse(input).unwrap().suffix()), type_suffix); let actual_value = IntegerLit::parse(input) .unwrap() .value::() .unwrap_or_else(|| panic!("unexpected overflow in `IntegerLit::value` for `{}`", input)); if actual_value != value { panic!( "Parsing int literal `{}` should give value `{}`, but actually resulted in `{}`", input, value, actual_value, ); } } // ===== Actual tests =========================================================================== #[test] fn parse_decimal() { check("0", 0u128, Decimal, "0", None); check("1", 1u8, Decimal, "1", None); check("8", 8u16, Decimal, "8", None); check("9", 9u32, Decimal, "9", None); check("10", 10u64, Decimal, "10", None); check("11", 11i8, Decimal, "11", None); check("123456789", 123456789i128, Decimal, "123456789", None); check("05", 5i16, Decimal, "05", None); check("00005", 5i32, Decimal, "00005", None); check("0123456789", 123456789i64, Decimal, "0123456789", None); check("123_456_789", 123_456_789, Decimal, "123_456_789", None); check("0___4", 4, Decimal, "0___4", None); check("0___4_3", 43, Decimal, "0___4_3", None); check("0___4_3", 43, Decimal, "0___4_3", None); check("123___________", 123, Decimal, "123___________", None); check( "340282366920938463463374607431768211455", 340282366920938463463374607431768211455u128, Decimal, "340282366920938463463374607431768211455", None, ); check( "340_282_366_920_938_463_463_374_607_431_768_211_455", 340282366920938463463374607431768211455u128, Decimal, "340_282_366_920_938_463_463_374_607_431_768_211_455", None, ); check( "3_40_282_3669_20938_463463_3746074_31768211_455___", 340282366920938463463374607431768211455u128, Decimal, "3_40_282_3669_20938_463463_3746074_31768211_455___", None, ); } #[test] fn parse_binary() { check("0b0", 0b0, Binary, "0", None); check("0b000", 0b000, Binary, "000", None); check("0b1", 0b1, Binary, "1", None); check("0b01", 0b01, Binary, "01", None); check("0b101010", 0b101010, Binary, "101010", None); check("0b10_10_10", 0b10_10_10, Binary, "10_10_10", None); check("0b01101110____", 0b01101110____, Binary, "01101110____", None); check("0b10010u8", 0b10010u8, Binary, "10010", Some(Ty::U8)); check("0b10010i8", 0b10010u8, Binary, "10010", Some(Ty::I8)); check("0b10010u64", 0b10010u64, Binary, "10010", Some(Ty::U64)); check("0b10010i64", 0b10010i64, Binary, "10010", Some(Ty::I64)); check( "0b1011001_00110000_00101000_10100101u32", 0b1011001_00110000_00101000_10100101u32, Binary, "1011001_00110000_00101000_10100101", Some(Ty::U32), ); } #[test] fn parse_octal() { check("0o0", 0o0, Octal, "0", None); check("0o1", 0o1, Octal, "1", None); check("0o6", 0o6, Octal, "6", None); check("0o7", 0o7, Octal, "7", None); check("0o17", 0o17, Octal, "17", None); check("0o123", 0o123, Octal, "123", None); check("0o7654321", 0o7654321, Octal, "7654321", None); check("0o7_53_1", 0o7_53_1, Octal, "7_53_1", None); check("0o66_", 0o66_, Octal, "66_", None); check("0o755u16", 0o755u16, Octal, "755", Some(Ty::U16)); check("0o755i128", 0o755i128, Octal, "755", Some(Ty::I128)); } #[test] fn parse_hexadecimal() { check("0x0", 0x0, Hexadecimal, "0", None); check("0x1", 0x1, Hexadecimal, "1", None); check("0x9", 0x9, Hexadecimal, "9", None); check("0xa", 0xa, Hexadecimal, "a", None); check("0xf", 0xf, Hexadecimal, "f", None); check("0x17", 0x17, Hexadecimal, "17", None); check("0x1b", 0x1b, Hexadecimal, "1b", None); check("0x123", 0x123, Hexadecimal, "123", None); check("0xace", 0xace, Hexadecimal, "ace", None); check("0xfdb971", 0xfdb971, Hexadecimal, "fdb971", None); check("0xa_54_f", 0xa_54_f, Hexadecimal, "a_54_f", None); check("0x6d_", 0x6d_, Hexadecimal, "6d_", None); check("0xA", 0xA, Hexadecimal, "A", None); check("0xF", 0xF, Hexadecimal, "F", None); check("0x17", 0x17, Hexadecimal, "17", None); check("0x1B", 0x1B, Hexadecimal, "1B", None); check("0x123", 0x123, Hexadecimal, "123", None); check("0xACE", 0xACE, Hexadecimal, "ACE", None); check("0xFDB971", 0xFDB971, Hexadecimal, "FDB971", None); check("0xA_54_F", 0xA_54_F, Hexadecimal, "A_54_F", None); check("0x6D_", 0x6D_, Hexadecimal, "6D_", None); check("0xFdB97a1", 0xFdB97a1, Hexadecimal, "FdB97a1", None); check("0xfdB97A1", 0xfdB97A1, Hexadecimal, "fdB97A1", None); check("0x40u16", 0x40u16, Hexadecimal, "40", Some(Ty::U16)); check("0xffi128", 0xffi128, Hexadecimal, "ff", Some(Ty::I128)); } #[test] fn starting_underscore() { check("0b_1", 1, Binary, "_1", None); check("0b_010i16", 0b_010, Binary, "_010", Some(Ty::I16)); check("0o_5", 5, Octal, "_5", None); check("0o_750u128", 0o_750u128, Octal, "_750", Some(Ty::U128)); check("0x_c", 0xc, Hexadecimal, "_c", None); check("0x_cf3i8", 0x_cf3, Hexadecimal, "_cf3", Some(Ty::I8)); } #[test] fn parse_overflowing_just_fine() { check("256u8", 256u16, Decimal, "256", Some(Ty::U8)); check("123_456_789u8", 123_456_789u32, Decimal, "123_456_789", Some(Ty::U8)); check("123_456_789u16", 123_456_789u32, Decimal, "123_456_789", Some(Ty::U16)); check("123_123_456_789u8", 123_123_456_789u64, Decimal, "123_123_456_789", Some(Ty::U8)); check("123_123_456_789u16", 123_123_456_789u64, Decimal, "123_123_456_789", Some(Ty::U16)); check("123_123_456_789u32", 123_123_456_789u64, Decimal, "123_123_456_789", Some(Ty::U32)); } #[test] fn suffixes() { [ ("123i8", Ty::I8), ("123i16", Ty::I16), ("123i32", Ty::I32), ("123i64", Ty::I64), ("123i128", Ty::I128), ("123u8", Ty::U8), ("123u16", Ty::U16), ("123u32", Ty::U32), ("123u64", Ty::U64), ("123u128", Ty::U128), ].iter().for_each(|&(s, ty)| { assert_eq!(Ty::from_suffix(IntegerLit::parse(s).unwrap().suffix()), Some(ty)); }); } #[test] fn overflow_u128() { let inputs = [ "340282366920938463463374607431768211456", "0x100000000000000000000000000000000", "0o4000000000000000000000000000000000000000000", "0b1000000000000000000000000000000000000000000000000000000000000000000\ 00000000000000000000000000000000000000000000000000000000000000", "340282366920938463463374607431768211456u128", "340282366920938463463374607431768211457", "3_40_282_3669_20938_463463_3746074_31768211_456___", "3_40_282_3669_20938_463463_3746074_31768211_455___1", "3_40_282_3669_20938_463463_3746074_31768211_455___0u128", "3402823669209384634633746074317682114570", ]; for &input in &inputs { let lit = IntegerLit::parse(input).expect("failed to parse"); assert!(lit.value::().is_none()); } } #[test] fn overflow_u8() { let inputs = [ "256", "0x100", "0o400", "0b100000000", "257", "0x101", "0o401", "0b100000001", "300", "1548", "2548985", "256u128", "256u8", "2_5_6", "256_____1", "256__", ]; for &input in &inputs { let lit = IntegerLit::parse(input).expect("failed to parse"); assert!(lit.value::().is_none()); } } #[test] fn parse_err() { assert_err!(IntegerLit, "", Empty, None); assert_err_single!(IntegerLit::parse("a"), DoesNotStartWithDigit, 0); assert_err_single!(IntegerLit::parse(";"), DoesNotStartWithDigit, 0); assert_err_single!(IntegerLit::parse("0;"), UnexpectedChar, 1..2); assert_err!(IntegerLit, "0b", NoDigits, 2..2); assert_err_single!(IntegerLit::parse(" 0"), DoesNotStartWithDigit, 0); assert_err_single!(IntegerLit::parse("0 "), UnexpectedChar, 1); assert_err!(IntegerLit, "0b3", InvalidDigit, 2); assert_err_single!(IntegerLit::parse("_"), DoesNotStartWithDigit, 0); assert_err_single!(IntegerLit::parse("_3"), DoesNotStartWithDigit, 0); assert_err!(IntegerLit, "0x44.5", UnexpectedChar, 4..6); assert_err_single!(IntegerLit::parse("123em"), IntegerSuffixStartingWithE, 3); } #[test] fn invalid_digits() { assert_err!(IntegerLit, "0b10201", InvalidDigit, 4); assert_err!(IntegerLit, "0b9", InvalidDigit, 2); assert_err!(IntegerLit, "0b07", InvalidDigit, 3); assert_err!(IntegerLit, "0o12380", InvalidDigit, 5); assert_err!(IntegerLit, "0o192", InvalidDigit, 3); assert_err_single!(IntegerLit::parse("a_123"), DoesNotStartWithDigit, 0); assert_err_single!(IntegerLit::parse("B_123"), DoesNotStartWithDigit, 0); } #[test] fn no_valid_digits() { assert_err!(IntegerLit, "0x_", NoDigits, 2..3); assert_err!(IntegerLit, "0x__", NoDigits, 2..4); assert_err!(IntegerLit, "0x________", NoDigits, 2..10); assert_err!(IntegerLit, "0x_i8", NoDigits, 2..3); assert_err!(IntegerLit, "0x_u8", NoDigits, 2..3); assert_err!(IntegerLit, "0x_isize", NoDigits, 2..3); assert_err!(IntegerLit, "0x_usize", NoDigits, 2..3); assert_err!(IntegerLit, "0o_", NoDigits, 2..3); assert_err!(IntegerLit, "0o__", NoDigits, 2..4); assert_err!(IntegerLit, "0o________", NoDigits, 2..10); assert_err!(IntegerLit, "0o_i32", NoDigits, 2..3); assert_err!(IntegerLit, "0o_u32", NoDigits, 2..3); assert_err!(IntegerLit, "0b_", NoDigits, 2..3); assert_err!(IntegerLit, "0b__", NoDigits, 2..4); assert_err!(IntegerLit, "0b________", NoDigits, 2..10); assert_err!(IntegerLit, "0b_i128", NoDigits, 2..3); assert_err!(IntegerLit, "0b_u128", NoDigits, 2..3); } #[test] fn non_standard_suffixes() { #[track_caller] fn check_suffix( input: &str, value: T, base: IntegerBase, main_part: &str, suffix: &str, ) { check(input, value, base, main_part, None); assert_eq!(IntegerLit::parse(input).unwrap().suffix(), suffix); } check_suffix("5u7", 5, Decimal, "5", "u7"); check_suffix("5u7", 5, Decimal, "5", "u7"); check_suffix("5u9", 5, Decimal, "5", "u9"); check_suffix("5u0", 5, Decimal, "5", "u0"); check_suffix("33u12", 33, Decimal, "33", "u12"); check_suffix("84u17", 84, Decimal, "84", "u17"); check_suffix("99u80", 99, Decimal, "99", "u80"); check_suffix("1234uu16", 1234, Decimal, "1234", "uu16"); check_suffix("5i7", 5, Decimal, "5", "i7"); check_suffix("5i9", 5, Decimal, "5", "i9"); check_suffix("5i0", 5, Decimal, "5", "i0"); check_suffix("33i12", 33, Decimal, "33", "i12"); check_suffix("84i17", 84, Decimal, "84", "i17"); check_suffix("99i80", 99, Decimal, "99", "i80"); check_suffix("1234ii16", 1234, Decimal, "1234", "ii16"); check_suffix("0ui32", 0, Decimal, "0", "ui32"); check_suffix("1iu32", 1, Decimal, "1", "iu32"); check_suffix("54321a64", 54321, Decimal, "54321", "a64"); check_suffix("54321b64", 54321, Decimal, "54321", "b64"); check_suffix("54321x64", 54321, Decimal, "54321", "x64"); check_suffix("54321o64", 54321, Decimal, "54321", "o64"); check_suffix("0a", 0, Decimal, "0", "a"); check_suffix("0a3", 0, Decimal, "0", "a3"); check_suffix("0z", 0, Decimal, "0", "z"); check_suffix("0z3", 0, Decimal, "0", "z3"); check_suffix("0b0a", 0, Binary, "0", "a"); check_suffix("0b0A", 0, Binary, "0", "A"); check_suffix("0b01f", 1, Binary, "01", "f"); check_suffix("0b01F", 1, Binary, "01", "F"); check_suffix("0o7a_", 7, Octal, "7", "a_"); check_suffix("0o7A_", 7, Octal, "7", "A_"); check_suffix("0o72f_0", 0o72, Octal, "72", "f_0"); check_suffix("0o72F_0", 0o72, Octal, "72", "F_0"); check_suffix("0x8cg", 0x8c, Hexadecimal, "8c", "g"); check_suffix("0x8cG", 0x8c, Hexadecimal, "8c", "G"); check_suffix("0x8c1h_", 0x8c1, Hexadecimal, "8c1", "h_"); check_suffix("0x8c1H_", 0x8c1, Hexadecimal, "8c1", "H_"); check_suffix("0x8czu16", 0x8c, Hexadecimal, "8c", "zu16"); check_suffix("123_foo", 123, Decimal, "123_", "foo"); } litrs-0.4.0/src/lib.rs000064400000000000000000000307121046102023000126700ustar 00000000000000//! Parsing and inspecting Rust literal tokens. //! //! This library offers functionality to parse Rust literals, i.e. tokens in the //! Rust programming language that represent fixed values. The grammar for //! those is defined [here][ref]. //! //! This kind of functionality already exists in the crate `syn`. However, as //! you oftentimes don't need (nor want) the full power of `syn`, `litrs` was //! built. This crate also offers a bit more flexibility compared to `syn` //! (only regarding literals, of course). //! //! //! # Quick start //! //! | **`StringLit::try_from(tt)?.value()`** | //! | - | //! //! ... where `tt` is a `proc_macro::TokenTree` and where [`StringLit`] can be //! replaced with [`Literal`] or other types of literals (e.g. [`FloatLit`]). //! Calling `value()` returns the value that is represented by the literal. //! //! **Mini Example** //! //! ```ignore //! use proc_macro::TokenStream; //! //! #[proc_macro] //! pub fn foo(input: TokenStream) -> TokenStream { //! let first_token = input.into_iter().next().unwrap(); // Do proper error handling! //! let string_value = match litrs::StringLit::try_from(first_token) { //! Ok(string_lit) => string_lit.value(), //! Err(e) => return e.to_compile_error(), //! }; //! //! // `string_value` is the string value with all escapes resolved. //! todo!() //! } //! ``` //! //! # Overview //! //! The main types of this library are [`Literal`], representing any kind of //! literal, and `*Lit`, like [`StringLit`] or [`FloatLit`], representing a //! specific kind of literal. //! //! There are different ways to obtain such a literal type: //! //! - **`parse`**: parses a `&str` or `String` and returns `Result<_, //! ParseError>`. For example: [`Literal::parse`] and //! [`IntegerLit::parse`]. //! //! - **`From for Literal`**: turns a `Literal` value from //! the `proc_macro` crate into a `Literal` from this crate. //! //! - **`TryFrom for *Lit`**: tries to turn a //! `proc_macro::Literal` into a specific literal type of this crate. If //! the input is a literal of a different kind, `Err(InvalidToken)` is //! returned. //! //! - **`TryFrom`**: attempts to turn a token tree into a //! literal type of this crate. An error is returned if the token tree is //! not a literal, or if you are trying to turn it into a specific kind of //! literal and the token tree is a different kind of literal. //! //! All of the `From` and `TryFrom` conversions also work for reference to //! `proc_macro` types. Additionally, if the crate feature `proc-macro2` is //! enabled (which it is by default), all these `From` and `TryFrom` impls also //! exist for the corresponding `proc_macro2` types. //! //! **Note**: `true` and `false` are `Ident`s when passed to your proc macro. //! The `TryFrom` impls check for those two special idents and //! return a [`BoolLit`] appropriately. For that reason, there is also no //! `TryFrom` impl for [`BoolLit`]. The `proc_macro::Literal` //! simply cannot represent bool literals. //! //! //! # Examples //! //! In a proc-macro: //! //! ```ignore //! use std::convert::TryFrom; //! use proc_macro::TokenStream; //! use litrs::FloatLit; //! //! #[proc_macro] //! pub fn foo(input: TokenStream) -> TokenStream { //! let mut input = input.into_iter().collect::>(); //! if input.len() != 1 { //! // Please do proper error handling in your real code! //! panic!("expected exactly one token as input"); //! } //! let token = input.remove(0); //! //! match FloatLit::try_from(token) { //! Ok(float_lit) => { /* do something */ } //! Err(e) => return e.to_compile_error(), //! } //! //! // Dummy output //! TokenStream::new() //! } //! ``` //! //! Parsing from string: //! //! ``` //! use litrs::{FloatLit, Literal}; //! //! // Parse a specific kind of literal (float in this case): //! let float_lit = FloatLit::parse("3.14f32"); //! assert!(float_lit.is_ok()); //! assert_eq!(float_lit.unwrap().suffix(), "f32"); //! assert!(FloatLit::parse("'c'").is_err()); //! //! // Parse any kind of literal. After parsing, you can inspect the literal //! // and decide what to do in each case. //! let lit = Literal::parse("0xff80").expect("failed to parse literal"); //! match lit { //! Literal::Integer(lit) => { /* ... */ } //! Literal::Float(lit) => { /* ... */ } //! Literal::Bool(lit) => { /* ... */ } //! Literal::Char(lit) => { /* ... */ } //! Literal::String(lit) => { /* ... */ } //! Literal::Byte(lit) => { /* ... */ } //! Literal::ByteString(lit) => { /* ... */ } //! } //! ``` //! //! //! //! # Crate features //! //! - `proc-macro2` (**default**): adds the dependency `proc_macro2`, a bunch of //! `From` and `TryFrom` impls, and [`InvalidToken::to_compile_error2`]. //! - `check_suffix`: if enabled, `parse` functions will exactly verify that the //! literal suffix is valid. Adds the dependency `unicode-xid`. If disabled, //! only an approximate check (only in ASCII range) is done. If you are //! writing a proc macro, you don't need to enable this as the suffix is //! already checked by the compiler. //! //! //! [ref]: https://doc.rust-lang.org/reference/tokens.html#literals //! #![deny(missing_debug_implementations)] extern crate proc_macro; #[cfg(test)] #[macro_use] mod test_util; #[cfg(test)] mod tests; mod bool; mod byte; mod bytestr; mod char; mod err; mod escape; mod float; mod impls; mod integer; mod parse; mod string; use std::{borrow::{Borrow, Cow}, fmt, ops::{Deref, Range}}; pub use self::{ bool::BoolLit, byte::ByteLit, bytestr::ByteStringLit, char::CharLit, err::{InvalidToken, ParseError}, float::{FloatLit, FloatType}, integer::{FromIntegerLiteral, IntegerLit, IntegerBase, IntegerType}, string::StringLit, }; // ============================================================================================== // ===== `Literal` and type defs // ============================================================================================== /// A literal. This is the main type of this library. /// /// This type is generic over the underlying buffer `B`, which can be `&str` or /// `String`. /// /// To create this type, you have to either call [`Literal::parse`] with an /// input string or use the `From<_>` impls of this type. The impls are only /// available of the corresponding crate features are enabled (they are enabled /// by default). #[derive(Debug, Clone, PartialEq, Eq)] pub enum Literal { Bool(BoolLit), Integer(IntegerLit), Float(FloatLit), Char(CharLit), String(StringLit), Byte(ByteLit), ByteString(ByteStringLit), } impl Literal { /// Parses the given input as a Rust literal. pub fn parse(input: B) -> Result { parse::parse(input) } /// Returns the suffix of this literal or `""` if it doesn't have one. /// /// Rust token grammar actually allows suffixes for all kinds of tokens. /// Most Rust programmer only know the type suffixes for integer and /// floats, e.g. `0u32`. And in normal Rust code, everything else causes an /// error. But it is possible to pass literals with arbitrary suffixes to /// proc macros, for example: /// /// ```ignore /// some_macro!(3.14f33 16px '🦊'good_boy "toph"beifong); /// ``` /// /// Boolean literals, not actually being literals, but idents, cannot have /// suffixes and this method always returns `""` for those. /// /// There are some edge cases to be aware of: /// - Integer suffixes must not start with `e` or `E` as that conflicts with /// the exponent grammar for floats. `0e1` is a float; `0eel` is also /// parsed as a float and results in an error. /// - Hexadecimal integers eagerly parse digits, so `0x5abcdefgh` has a /// suffix von `gh`. /// - Suffixes can contain and start with `_`, but for integer and number /// literals, `_` is eagerly parsed as part of the number, so `1_x` has /// the suffix `x`. /// - The input `55f32` is regarded as integer literal with suffix `f32`. /// /// # Example /// /// ``` /// use litrs::Literal; /// /// assert_eq!(Literal::parse(r##"3.14f33"##).unwrap().suffix(), "f33"); /// assert_eq!(Literal::parse(r##"123hackerman"##).unwrap().suffix(), "hackerman"); /// assert_eq!(Literal::parse(r##"0x0fuck"##).unwrap().suffix(), "uck"); /// assert_eq!(Literal::parse(r##"'🦊'good_boy"##).unwrap().suffix(), "good_boy"); /// assert_eq!(Literal::parse(r##""toph"beifong"##).unwrap().suffix(), "beifong"); /// ``` pub fn suffix(&self) -> &str { match self { Literal::Bool(_) => "", Literal::Integer(l) => l.suffix(), Literal::Float(l) => l.suffix(), Literal::Char(l) => l.suffix(), Literal::String(l) => l.suffix(), Literal::Byte(l) => l.suffix(), Literal::ByteString(l) => l.suffix(), } } } impl Literal<&str> { /// Makes a copy of the underlying buffer and returns the owned version of /// `Self`. pub fn into_owned(self) -> Literal { match self { Literal::Bool(l) => Literal::Bool(l.to_owned()), Literal::Integer(l) => Literal::Integer(l.to_owned()), Literal::Float(l) => Literal::Float(l.to_owned()), Literal::Char(l) => Literal::Char(l.to_owned()), Literal::String(l) => Literal::String(l.into_owned()), Literal::Byte(l) => Literal::Byte(l.to_owned()), Literal::ByteString(l) => Literal::ByteString(l.into_owned()), } } } impl fmt::Display for Literal { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Literal::Bool(l) => l.fmt(f), Literal::Integer(l) => l.fmt(f), Literal::Float(l) => l.fmt(f), Literal::Char(l) => l.fmt(f), Literal::String(l) => l.fmt(f), Literal::Byte(l) => l.fmt(f), Literal::ByteString(l) => l.fmt(f), } } } // ============================================================================================== // ===== Buffer // ============================================================================================== /// A shared or owned string buffer. Implemented for `String` and `&str`. *Implementation detail*. /// /// This is trait is implementation detail of this library, cannot be /// implemented in other crates and is not subject to semantic versioning. /// `litrs` only guarantees that this trait is implemented for `String` and /// `for<'a> &'a str`. pub trait Buffer: sealed::Sealed + Deref { /// This is `Cow<'static, str>` for `String`, and `Cow<'a, str>` for `&'a str`. type Cow: From + AsRef + Borrow + Deref; #[doc(hidden)] fn into_cow(self) -> Self::Cow; /// This is `Cow<'static, [u8]>` for `String`, and `Cow<'a, [u8]>` for `&'a str`. type ByteCow: From> + AsRef<[u8]> + Borrow<[u8]> + Deref; #[doc(hidden)] fn into_byte_cow(self) -> Self::ByteCow; /// Cuts away some characters at the beginning and some at the end. Given /// range has to be in bounds. #[doc(hidden)] fn cut(self, range: Range) -> Self; } mod sealed { pub trait Sealed {} } impl<'a> sealed::Sealed for &'a str {} impl<'a> Buffer for &'a str { #[doc(hidden)] fn cut(self, range: Range) -> Self { &self[range] } type Cow = Cow<'a, str>; #[doc(hidden)] fn into_cow(self) -> Self::Cow { self.into() } type ByteCow = Cow<'a, [u8]>; #[doc(hidden)] fn into_byte_cow(self) -> Self::ByteCow { self.as_bytes().into() } } impl sealed::Sealed for String {} impl Buffer for String { #[doc(hidden)] fn cut(mut self, range: Range) -> Self { // This is not the most efficient way, but it works. First we cut the // end, then the beginning. Note that `drain` also removes the range if // the iterator is not consumed. self.truncate(range.end); self.drain(..range.start); self } type Cow = Cow<'static, str>; #[doc(hidden)] fn into_cow(self) -> Self::Cow { self.into() } type ByteCow = Cow<'static, [u8]>; #[doc(hidden)] fn into_byte_cow(self) -> Self::ByteCow { self.into_bytes().into() } } litrs-0.4.0/src/parse.rs000064400000000000000000000100021046102023000132220ustar 00000000000000use crate::{ BoolLit, Buffer, ByteLit, ByteStringLit, CharLit, ParseError, FloatLit, IntegerLit, Literal, StringLit, err::{perr, ParseErrorKind::{*, self}}, }; pub fn parse(input: B) -> Result, ParseError> { let (first, rest) = input.as_bytes().split_first().ok_or(perr(None, Empty))?; let second = input.as_bytes().get(1).copied(); match first { b'f' if &*input == "false" => Ok(Literal::Bool(BoolLit::False)), b't' if &*input == "true" => Ok(Literal::Bool(BoolLit::True)), // A number literal (integer or float). b'0'..=b'9' => { // To figure out whether this is a float or integer, we do some // quick inspection here. Yes, this is technically duplicate // work with what is happening in the integer/float parse // methods, but it makes the code way easier for now and won't // be a huge performance loss. // // The first non-decimal char in a float literal must // be '.', 'e' or 'E'. match input.as_bytes().get(1 + end_dec_digits(rest)) { Some(b'.') | Some(b'e') | Some(b'E') => FloatLit::parse(input).map(Literal::Float), _ => IntegerLit::parse(input).map(Literal::Integer), } }, b'\'' => CharLit::parse(input).map(Literal::Char), b'"' | b'r' => StringLit::parse(input).map(Literal::String), b'b' if second == Some(b'\'') => ByteLit::parse(input).map(Literal::Byte), b'b' if second == Some(b'r') || second == Some(b'"') => ByteStringLit::parse(input).map(Literal::ByteString), _ => Err(perr(None, InvalidLiteral)), } } pub(crate) fn first_byte_or_empty(s: &str) -> Result { s.as_bytes().get(0).copied().ok_or(perr(None, Empty)) } /// Returns the index of the first non-underscore, non-decimal digit in `input`, /// or the `input.len()` if all characters are decimal digits. pub(crate) fn end_dec_digits(input: &[u8]) -> usize { input.iter() .position(|b| !matches!(b, b'_' | b'0'..=b'9')) .unwrap_or(input.len()) } pub(crate) fn hex_digit_value(digit: u8) -> Option { match digit { b'0'..=b'9' => Some(digit - b'0'), b'a'..=b'f' => Some(digit - b'a' + 10), b'A'..=b'F' => Some(digit - b'A' + 10), _ => None, } } /// Makes sure that `s` is a valid literal suffix. pub(crate) fn check_suffix(s: &str) -> Result<(), ParseErrorKind> { if s.is_empty() { return Ok(()); } let mut chars = s.chars(); let first = chars.next().unwrap(); let rest = chars.as_str(); if first == '_' && rest.is_empty() { return Err(InvalidSuffix); } // This is just an extra check to improve the error message. If the first // character of the "suffix" is already some invalid ASCII // char, "unexpected character" seems like the more fitting error. if first.is_ascii() && !(first.is_ascii_alphabetic() || first == '_') { return Err(UnexpectedChar); } // Proper check is optional as it's not really necessary in proc macro // context. #[cfg(feature = "check_suffix")] fn is_valid_suffix(first: char, rest: &str) -> bool { use unicode_xid::UnicodeXID; (first == '_' || first.is_xid_start()) && rest.chars().all(|c| c.is_xid_continue()) } // When avoiding the dependency on `unicode_xid`, we just do a best effort // to catch the most common errors. #[cfg(not(feature = "check_suffix"))] fn is_valid_suffix(first: char, rest: &str) -> bool { if first.is_ascii() && !(first.is_ascii_alphabetic() || first == '_') { return false; } for c in rest.chars() { if c.is_ascii() && !(c.is_ascii_alphanumeric() || c == '_') { return false; } } true } if is_valid_suffix(first, rest) { Ok(()) } else { Err(InvalidSuffix) } } litrs-0.4.0/src/string/mod.rs000064400000000000000000000077111046102023000142120ustar 00000000000000use std::{fmt, ops::Range}; use crate::{ Buffer, ParseError, err::{perr, ParseErrorKind::*}, escape::{scan_raw_string, unescape_string}, parse::first_byte_or_empty, }; /// A string or raw string literal, e.g. `"foo"`, `"Grüße"` or `r#"a🦊c"d🦀f"#`. /// /// See [the reference][ref] for more information. /// /// [ref]: https://doc.rust-lang.org/reference/tokens.html#string-literals #[derive(Debug, Clone, PartialEq, Eq)] pub struct StringLit { /// The raw input. raw: B, /// The string value (with all escapes unescaped), or `None` if there were /// no escapes. In the latter case, the string value is in `raw`. value: Option, /// The number of hash signs in case of a raw string literal, or `None` if /// it's not a raw string literal. num_hashes: Option, /// Start index of the suffix or `raw.len()` if there is no suffix. start_suffix: usize, } impl StringLit { /// Parses the input as a (raw) string literal. Returns an error if the /// input is invalid or represents a different kind of literal. pub fn parse(input: B) -> Result { match first_byte_or_empty(&input)? { b'r' | b'"' => { let (value, num_hashes, start_suffix) = parse_impl(&input)?; Ok(Self { raw: input, value, num_hashes, start_suffix }) } _ => Err(perr(0, InvalidStringLiteralStart)), } } /// Returns the string value this literal represents (where all escapes have /// been turned into their respective values). pub fn value(&self) -> &str { self.value.as_deref().unwrap_or(&self.raw[self.inner_range()]) } /// Like `value` but returns a potentially owned version of the value. /// /// The return value is either `Cow<'static, str>` if `B = String`, or /// `Cow<'a, str>` if `B = &'a str`. pub fn into_value(self) -> B::Cow { let inner_range = self.inner_range(); let Self { raw, value, .. } = self; value.map(B::Cow::from).unwrap_or_else(|| raw.cut(inner_range).into_cow()) } /// The optional suffix. Returns `""` if the suffix is empty/does not exist. pub fn suffix(&self) -> &str { &(*self.raw)[self.start_suffix..] } /// Returns whether this literal is a raw string literal (starting with /// `r`). pub fn is_raw_string(&self) -> bool { self.num_hashes.is_some() } /// Returns the raw input that was passed to `parse`. pub fn raw_input(&self) -> &str { &self.raw } /// Returns the raw input that was passed to `parse`, potentially owned. pub fn into_raw_input(self) -> B { self.raw } /// The range within `self.raw` that excludes the quotes and potential `r#`. fn inner_range(&self) -> Range { match self.num_hashes { None => 1..self.start_suffix - 1, Some(n) => 1 + n as usize + 1..self.start_suffix - n as usize - 1, } } } impl StringLit<&str> { /// Makes a copy of the underlying buffer and returns the owned version of /// `Self`. pub fn into_owned(self) -> StringLit { StringLit { raw: self.raw.to_owned(), value: self.value, num_hashes: self.num_hashes, start_suffix: self.start_suffix, } } } impl fmt::Display for StringLit { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.pad(&self.raw) } } /// Precondition: input has to start with either `"` or `r`. #[inline(never)] pub(crate) fn parse_impl(input: &str) -> Result<(Option, Option, usize), ParseError> { if input.starts_with('r') { scan_raw_string::(&input, 1) .map(|(v, hashes, start_suffix)| (v, Some(hashes), start_suffix)) } else { unescape_string::(&input, 1) .map(|(v, start_suffix)| (v, None, start_suffix)) } } #[cfg(test)] mod tests; litrs-0.4.0/src/string/tests.rs000064400000000000000000000265321046102023000145770ustar 00000000000000use crate::{Literal, StringLit, test_util::{assert_parse_ok_eq, assert_roundtrip}}; // ===== Utility functions ======================================================================= macro_rules! check { ($lit:literal, $has_escapes:expr, $num_hashes:expr) => { check!($lit, stringify!($lit), $has_escapes, $num_hashes, "") }; ($lit:literal, $input:expr, $has_escapes:expr, $num_hashes:expr, $suffix:literal) => { let input = $input; let expected = StringLit { raw: input, value: if $has_escapes { Some($lit.to_string()) } else { None }, num_hashes: $num_hashes, start_suffix: input.len() - $suffix.len(), }; assert_parse_ok_eq(input, StringLit::parse(input), expected.clone(), "StringLit::parse"); assert_parse_ok_eq( input, Literal::parse(input), Literal::String(expected.clone()), "Literal::parse"); let lit = StringLit::parse(input).unwrap(); assert_eq!(lit.value(), $lit); assert_eq!(lit.suffix(), $suffix); assert_eq!(lit.into_value(), $lit); assert_roundtrip(expected.into_owned(), input); }; } // ===== Actual tests ============================================================================ #[test] fn simple() { check!("", false, None); check!("a", false, None); check!("peter", false, None); check!("Sei gegrüßt, Bärthelt!", false, None); check!("أنا لا أتحدث العربية", false, None); check!("お前はもう死んでいる", false, None); check!("Пушки - интересные музыкальные инструменты", false, None); check!("lit 👌 😂 af", false, None); } #[test] fn special_whitespace() { let strings = ["\n", "\t", "foo\tbar", "🦊\n"]; for &s in &strings { let input = format!(r#""{}""#, s); let input_raw = format!(r#"r"{}""#, s); for (input, num_hashes) in vec![(input, None), (input_raw, Some(0))] { let expected = StringLit { raw: &*input, value: None, num_hashes, start_suffix: input.len(), }; assert_parse_ok_eq( &input, StringLit::parse(&*input), expected.clone(), "StringLit::parse"); assert_parse_ok_eq( &input, Literal::parse(&*input), Literal::String(expected), "Literal::parse"); assert_eq!(StringLit::parse(&*input).unwrap().value(), s); assert_eq!(StringLit::parse(&*input).unwrap().into_value(), s); } } } #[test] fn simple_escapes() { check!("a\nb", true, None); check!("\nb", true, None); check!("a\n", true, None); check!("\n", true, None); check!("\x60犬 \t 猫\r馬\n うさぎ \0ネズミ", true, None); check!("నా \\పిల్లి లావుగా ఉంది", true, None); check!("నా \\పిల్లి లావుగా 🐈\"ఉంది", true, None); check!("\\నా\\ పిల్లి లావుగా\" ఉంది\"", true, None); check!("\"నా \\🐈 పిల్లి లావుగా \" ఉంది\\", true, None); check!("\x00", true, None); check!(" \x01", true, None); check!("\x0c 🦊", true, None); check!(" 🦊\x0D ", true, None); check!("\\x13", true, None); check!("\"x30", true, None); } #[test] fn unicode_escapes() { check!("\u{0}", true, None); check!(" \u{00}", true, None); check!("\u{b} ", true, None); check!(" \u{B} ", true, None); check!("\u{7e}", true, None); check!("నక్క\u{E4}", true, None); check!("\u{e4} నక్క", true, None); check!(" \u{fc}నక్క ", true, None); check!("\u{Fc}", true, None); check!("\u{fC}🦊\nлиса", true, None); check!("лиса\u{FC}", true, None); check!("лиса\u{b10}నక్క🦊", true, None); check!("\"నక్క\u{B10}", true, None); check!("лиса\\\u{0b10}", true, None); check!("ли🦊са\\\"\u{0b10}", true, None); check!("నక్క\\\\u{0b10}", true, None); check!("\u{2764}Füchsin", true, None); check!("Füchse \u{1f602}", true, None); check!("cd\u{1F602}ab", true, None); check!("\u{0}🦊", true, None); check!("лиса\u{0__}", true, None); check!("\\🦊\u{3_b}", true, None); check!("🦊\u{1_F_6_0_2}Füchsin", true, None); check!("నక్క\\\u{1_F6_02_____}నక్క", true, None); } #[test] fn string_continue() { check!("నక్క\ bar", true, None); check!("foo\ 🦊", true, None); check!("foo\ banana", true, None); // Weird whitespace characters let lit = StringLit::parse("\"foo\\\n\r\t\n \n\tbar\"").expect("failed to parse"); assert_eq!(lit.value(), "foobar"); let lit = StringLit::parse("\"foo\\\n\u{85}bar\"").expect("failed to parse"); assert_eq!(lit.value(), "foo\u{85}bar"); let lit = StringLit::parse("\"foo\\\n\u{a0}bar\"").expect("failed to parse"); assert_eq!(lit.value(), "foo\u{a0}bar"); // Raw strings do not handle "string continues" check!(r"foo\ bar", false, Some(0)); } #[test] fn crlf_newlines() { let lit = StringLit::parse("\"foo\r\nbar\"").expect("failed to parse"); assert_eq!(lit.value(), "foo\nbar"); let lit = StringLit::parse("\"\r\nbar\"").expect("failed to parse"); assert_eq!(lit.value(), "\nbar"); let lit = StringLit::parse("\"лиса\r\n\"").expect("failed to parse"); assert_eq!(lit.value(), "лиса\n"); let lit = StringLit::parse("r\"foo\r\nbar\"").expect("failed to parse"); assert_eq!(lit.value(), "foo\nbar"); let lit = StringLit::parse("r#\"\r\nbar\"#").expect("failed to parse"); assert_eq!(lit.value(), "\nbar"); let lit = StringLit::parse("r##\"лиса\r\n\"##").expect("failed to parse"); assert_eq!(lit.value(), "лиса\n"); } #[test] fn raw_string() { check!(r"", false, Some(0)); check!(r"a", false, Some(0)); check!(r"peter", false, Some(0)); check!(r"Sei gegrüßt, Bärthelt!", false, Some(0)); check!(r"أنا لا أتحدث العربية", false, Some(0)); check!(r"お前はもう死んでいる", false, Some(0)); check!(r"Пушки - интересные музыкальные инструменты", false, Some(0)); check!(r"lit 👌 😂 af", false, Some(0)); check!(r#""#, false, Some(1)); check!(r#"a"#, false, Some(1)); check!(r##"peter"##, false, Some(2)); check!(r###"Sei gegrüßt, Bärthelt!"###, false, Some(3)); check!(r########"lit 👌 😂 af"########, false, Some(8)); check!(r#"foo " bar"#, false, Some(1)); check!(r##"foo " bar"##, false, Some(2)); check!(r#"foo """" '"'" bar"#, false, Some(1)); check!(r#""foo""#, false, Some(1)); check!(r###""foo'"###, false, Some(3)); check!(r#""x'#_#s'"#, false, Some(1)); check!(r"#", false, Some(0)); check!(r"foo#", false, Some(0)); check!(r"##bar", false, Some(0)); check!(r###""##foo"##bar'"###, false, Some(3)); check!(r"さび\n\t\r\0\\x60\u{123}フェリス", false, Some(0)); check!(r#"さび\n\t\r\0\\x60\u{123}フェリス"#, false, Some(1)); } #[test] fn suffixes() { check!("hello", r###""hello"suffix"###, false, None, "suffix"); check!(r"お前はもう死んでいる", r###"r"お前はもう死んでいる"_banana"###, false, Some(0), "_banana"); check!("fox", r#""fox"peter"#, false, None, "peter"); check!("🦊", r#""🦊"peter"#, false, None, "peter"); check!("నక్క\\\\u{0b10}", r###""నక్క\\\\u{0b10}"jü_rgen"###, true, None, "jü_rgen"); } #[test] fn parse_err() { assert_err!(StringLit, r#"""#, UnterminatedString, None); assert_err!(StringLit, r#""犬"#, UnterminatedString, None); assert_err!(StringLit, r#""Jürgen"#, UnterminatedString, None); assert_err!(StringLit, r#""foo bar baz"#, UnterminatedString, None); assert_err!(StringLit, r#""fox"peter""#, InvalidSuffix, 5); assert_err!(StringLit, r###"r#"foo "# bar"#"###, UnexpectedChar, 9); assert_err!(StringLit, "\"\r\"", IsolatedCr, 1); assert_err!(StringLit, "\"fo\rx\"", IsolatedCr, 3); assert_err!(StringLit, "r\"\r\"", IsolatedCr, 2); assert_err!(StringLit, "r\"fo\rx\"", IsolatedCr, 4); assert_err!(StringLit, r##"r####""##, UnterminatedRawString, None); assert_err!(StringLit, r#####"r##"foo"#bar"#####, UnterminatedRawString, None); assert_err!(StringLit, r##"r####"##, InvalidLiteral, None); assert_err!(StringLit, r##"r####x"##, InvalidLiteral, None); } #[test] fn invald_ascii_escapes() { assert_err!(StringLit, r#""\x80""#, NonAsciiXEscape, 1..5); assert_err!(StringLit, r#""🦊\x81""#, NonAsciiXEscape, 5..9); assert_err!(StringLit, r#"" \x8a""#, NonAsciiXEscape, 2..6); assert_err!(StringLit, r#""\x8Ff""#, NonAsciiXEscape, 1..5); assert_err!(StringLit, r#""\xa0 ""#, NonAsciiXEscape, 1..5); assert_err!(StringLit, r#""నక్క\xB0""#, NonAsciiXEscape, 13..17); assert_err!(StringLit, r#""\xc3నక్క""#, NonAsciiXEscape, 1..5); assert_err!(StringLit, r#""\xDf🦊""#, NonAsciiXEscape, 1..5); assert_err!(StringLit, r#""నక్క\xffనక్క""#, NonAsciiXEscape, 13..17); assert_err!(StringLit, r#""\xfF ""#, NonAsciiXEscape, 1..5); assert_err!(StringLit, r#"" \xFf""#, NonAsciiXEscape, 2..6); assert_err!(StringLit, r#""నక్క \xFF""#, NonAsciiXEscape, 15..19); } #[test] fn invalid_escapes() { assert_err!(StringLit, r#""\a""#, UnknownEscape, 1..3); assert_err!(StringLit, r#""foo\y""#, UnknownEscape, 4..6); assert_err!(StringLit, r#""\"#, UnterminatedEscape, 1); assert_err!(StringLit, r#""\x""#, UnterminatedEscape, 1..3); assert_err!(StringLit, r#""🦊\x1""#, UnterminatedEscape, 5..8); assert_err!(StringLit, r#"" \xaj""#, InvalidXEscape, 2..6); assert_err!(StringLit, r#""నక్క\xjb""#, InvalidXEscape, 13..17); } #[test] fn invalid_unicode_escapes() { assert_err!(StringLit, r#""\u""#, UnicodeEscapeWithoutBrace, 1..3); assert_err!(StringLit, r#""🦊\u ""#, UnicodeEscapeWithoutBrace, 5..7); assert_err!(StringLit, r#""\u3""#, UnicodeEscapeWithoutBrace, 1..3); assert_err!(StringLit, r#""\u{""#, UnterminatedUnicodeEscape, 1..4); assert_err!(StringLit, r#""\u{12""#, UnterminatedUnicodeEscape, 1..6); assert_err!(StringLit, r#""🦊\u{a0b""#, UnterminatedUnicodeEscape, 5..11); assert_err!(StringLit, r#""\u{a0_b ""#, UnterminatedUnicodeEscape, 1..10); assert_err!(StringLit, r#""\u{_}నక్క""#, InvalidStartOfUnicodeEscape, 4); assert_err!(StringLit, r#""\u{_5f}""#, InvalidStartOfUnicodeEscape, 4); assert_err!(StringLit, r#""fox\u{x}""#, NonHexDigitInUnicodeEscape, 7); assert_err!(StringLit, r#""\u{0x}🦊""#, NonHexDigitInUnicodeEscape, 5); assert_err!(StringLit, r#""నక్క\u{3bx}""#, NonHexDigitInUnicodeEscape, 18); assert_err!(StringLit, r#""\u{3b_x}лиса""#, NonHexDigitInUnicodeEscape, 7); assert_err!(StringLit, r#""\u{4x_}""#, NonHexDigitInUnicodeEscape, 5); assert_err!(StringLit, r#""\u{1234567}""#, TooManyDigitInUnicodeEscape, 10); assert_err!(StringLit, r#""నక్క\u{1234567}🦊""#, TooManyDigitInUnicodeEscape, 22); assert_err!(StringLit, r#""నక్క\u{1_23_4_56_7}""#, TooManyDigitInUnicodeEscape, 26); assert_err!(StringLit, r#""\u{abcdef123}лиса""#, TooManyDigitInUnicodeEscape, 10); assert_err!(StringLit, r#""\u{110000}fox""#, InvalidUnicodeEscapeChar, 1..10); } litrs-0.4.0/src/test_util.rs000064400000000000000000000077121046102023000141420ustar 00000000000000use crate::*; use std::fmt::{Debug, Display}; #[track_caller] pub(crate) fn assert_parse_ok_eq( input: &str, result: Result, expected: T, parse_method: &str, ) { match result { Ok(actual) if actual == expected => { if actual.to_string() != input { panic!( "formatting does not yield original input `{}`: {:?}", input, actual, ); } } Ok(actual) => { panic!( "unexpected parsing result (with `{}`) for `{}`:\nactual: {:?}\nexpected: {:?}", parse_method, input, actual, expected, ); } Err(e) => { panic!( "expected `{}` to be parsed (with `{}`) successfully, but it failed: {:?}", input, parse_method, e, ); } } } // This is not ideal, but to perform this check we need `proc-macro2`. So we // just don't do anything if that feature is not enabled. #[cfg(not(feature = "proc-macro2"))] pub(crate) fn assert_roundtrip(_: T, _: &str) {} #[cfg(feature = "proc-macro2")] #[track_caller] pub(crate) fn assert_roundtrip(ours: T, input: &str) where T: std::convert::TryFrom + fmt::Debug + PartialEq + Clone, proc_macro2::Literal: From, >::Error: std::fmt::Display, { let pm_lit = input.parse::() .expect("failed to parse input as proc_macro2::Literal"); let t_name = std::any::type_name::(); // Unfortunately, `proc_macro2::Literal` does not implement `PartialEq`, so // this is the next best thing. if proc_macro2::Literal::from(ours.clone()).to_string() != pm_lit.to_string() { panic!( "Converting {} to proc_macro2::Literal has unexpected result:\ \nconverted: {:?}\nexpected: {:?}", t_name, proc_macro2::Literal::from(ours), pm_lit, ); } match T::try_from(pm_lit) { Err(e) => { panic!("Trying to convert proc_macro2::Literal to {} results in error: {}", t_name, e); } Ok(res) => { if res != ours { panic!( "Converting proc_macro2::Literal to {} has unexpected result:\ \nactual: {:?}\nexpected: {:?}", t_name, res, ours, ); } } } } macro_rules! assert_err { ($ty:ident, $input:literal, $kind:ident, $( $span:tt )+ ) => { assert_err_single!($ty::parse($input), $kind, $($span)+); assert_err_single!($crate::Literal::parse($input), $kind, $($span)+); }; } macro_rules! assert_err_single { ($expr:expr, $kind:ident, $( $span:tt )+ ) => { let res = $expr; let err = match res { Err(e) => e, Ok(v) => panic!( "Expected `{}` to return an error, but it returned Ok({:?})", stringify!($expr), v, ), }; if err.kind != $crate::err::ParseErrorKind::$kind { panic!( "Expected error kind {} for `{}` but got {:?}", stringify!($kind), stringify!($expr), err.kind, ) } let expected_span = assert_err_single!(@span $($span)+); if err.span != expected_span { panic!( "Expected error span {:?} for `{}` but got {:?}", expected_span, stringify!($expr), err.span, ) } }; (@span $start:literal .. $end:literal) => { Some($start .. $end) }; (@span $at:literal) => { Some($at.. $at + 1) }; (@span None) => { None }; } litrs-0.4.0/src/tests.rs000064400000000000000000000301631046102023000132640ustar 00000000000000use crate::Literal; #[test] fn empty() { assert_err!(Literal, "", Empty, None); } #[test] fn invalid_literals() { assert_err_single!(Literal::parse("."), InvalidLiteral, None); assert_err_single!(Literal::parse("+"), InvalidLiteral, None); assert_err_single!(Literal::parse("-"), InvalidLiteral, None); assert_err_single!(Literal::parse("e"), InvalidLiteral, None); assert_err_single!(Literal::parse("e8"), InvalidLiteral, None); assert_err_single!(Literal::parse("f32"), InvalidLiteral, None); assert_err_single!(Literal::parse("foo"), InvalidLiteral, None); assert_err_single!(Literal::parse("inf"), InvalidLiteral, None); assert_err_single!(Literal::parse("nan"), InvalidLiteral, None); assert_err_single!(Literal::parse("NaN"), InvalidLiteral, None); assert_err_single!(Literal::parse("NAN"), InvalidLiteral, None); assert_err_single!(Literal::parse("_2.7"), InvalidLiteral, None); assert_err_single!(Literal::parse(".5"), InvalidLiteral, None); } #[test] fn misc() { assert_err_single!(Literal::parse("0x44.5"), UnexpectedChar, 4..6); assert_err_single!(Literal::parse("a"), InvalidLiteral, None); assert_err_single!(Literal::parse(";"), InvalidLiteral, None); assert_err_single!(Literal::parse("0;"), UnexpectedChar, 1); assert_err_single!(Literal::parse(" 0"), InvalidLiteral, None); assert_err_single!(Literal::parse("0 "), UnexpectedChar, 1); assert_err_single!(Literal::parse("_"), InvalidLiteral, None); assert_err_single!(Literal::parse("_3"), InvalidLiteral, None); assert_err_single!(Literal::parse("a_123"), InvalidLiteral, None); assert_err_single!(Literal::parse("B_123"), InvalidLiteral, None); } macro_rules! assert_no_panic { ($input:expr) => { let arr = $input; let input = std::str::from_utf8(&arr).expect("not unicode"); let res = std::panic::catch_unwind(move || { let _ = Literal::parse(input); let _ = crate::BoolLit::parse(input); let _ = crate::IntegerLit::parse(input); let _ = crate::FloatLit::parse(input); let _ = crate::CharLit::parse(input); let _ = crate::StringLit::parse(input); let _ = crate::ByteLit::parse(input); let _ = crate::ByteStringLit::parse(input); }); if let Err(e) = res { println!("\n!!! panic for: {:?}", input); std::panic::resume_unwind(e); } }; } #[test] #[ignore] fn never_panic_up_to_3() { for a in 0..128 { assert_no_panic!([a]); for b in 0..128 { assert_no_panic!([a, b]); for c in 0..128 { assert_no_panic!([a, b, c]); } } } } // This test takes super long in debug mode, but in release mode it's fine. #[test] #[ignore] fn never_panic_len_4() { for a in 0..128 { for b in 0..128 { for c in 0..128 { for d in 0..128 { assert_no_panic!([a, b, c, d]); } } } } } #[cfg(feature = "proc-macro2")] #[test] fn proc_macro() { use std::convert::TryFrom; use proc_macro2::{ self as pm2, TokenTree, Group, TokenStream, Delimiter, Spacing, Punct, Span, Ident, }; use crate::{ BoolLit, ByteLit, ByteStringLit, CharLit, FloatLit, IntegerLit, StringLit, err::TokenKind }; macro_rules! assert_invalid_token { ($input:expr, expected: $expected:path, actual: $actual:path $(,)?) => { let err = $input.unwrap_err(); if err.expected != $expected { panic!( "err.expected was expected to be {:?}, but is {:?}", $expected, err.expected, ); } if err.actual != $actual { panic!("err.actual was expected to be {:?}, but is {:?}", $actual, err.actual); } }; } let pm_u16_lit = pm2::Literal::u16_suffixed(2700); let pm_i16_lit = pm2::Literal::i16_unsuffixed(3912); let pm_f32_lit = pm2::Literal::f32_unsuffixed(3.14); let pm_f64_lit = pm2::Literal::f64_suffixed(99.3); let pm_string_lit = pm2::Literal::string("hello 🦊"); let pm_bytestr_lit = pm2::Literal::byte_string(b"hello \nfoxxo"); let pm_char_lit = pm2::Literal::character('🦀'); let u16_lit = Literal::parse("2700u16".to_string()).unwrap(); let i16_lit = Literal::parse("3912".to_string()).unwrap(); let f32_lit = Literal::parse("3.14".to_string()).unwrap(); let f64_lit = Literal::parse("99.3f64".to_string()).unwrap(); let string_lit = Literal::parse(r#""hello 🦊""#.to_string()).unwrap(); let bytestr_lit = Literal::parse(r#"b"hello \nfoxxo""#.to_string()).unwrap(); let char_lit = Literal::parse("'🦀'".to_string()).unwrap(); assert_eq!(Literal::from(&pm_u16_lit), u16_lit); assert_eq!(Literal::from(&pm_i16_lit), i16_lit); assert_eq!(Literal::from(&pm_f32_lit), f32_lit); assert_eq!(Literal::from(&pm_f64_lit), f64_lit); assert_eq!(Literal::from(&pm_string_lit), string_lit); assert_eq!(Literal::from(&pm_bytestr_lit), bytestr_lit); assert_eq!(Literal::from(&pm_char_lit), char_lit); let group = TokenTree::from(Group::new(Delimiter::Brace, TokenStream::new())); let punct = TokenTree::from(Punct::new(':', Spacing::Alone)); let ident = TokenTree::from(Ident::new("peter", Span::call_site())); assert_eq!( Literal::try_from(TokenTree::Literal(pm2::Literal::string("hello 🦊"))).unwrap(), Literal::String(StringLit::parse(r#""hello 🦊""#.to_string()).unwrap()), ); assert_invalid_token!( Literal::try_from(punct.clone()), expected: TokenKind::Literal, actual: TokenKind::Punct, ); assert_invalid_token!( Literal::try_from(group.clone()), expected: TokenKind::Literal, actual: TokenKind::Group, ); assert_invalid_token!( Literal::try_from(ident.clone()), expected: TokenKind::Literal, actual: TokenKind::Ident, ); assert_eq!(Literal::from(IntegerLit::try_from(pm_u16_lit.clone()).unwrap()), u16_lit); assert_eq!(Literal::from(IntegerLit::try_from(pm_i16_lit.clone()).unwrap()), i16_lit); assert_eq!(Literal::from(FloatLit::try_from(pm_f32_lit.clone()).unwrap()), f32_lit); assert_eq!(Literal::from(FloatLit::try_from(pm_f64_lit.clone()).unwrap()), f64_lit); assert_eq!(Literal::from(StringLit::try_from(pm_string_lit.clone()).unwrap()), string_lit); assert_eq!( Literal::from(ByteStringLit::try_from(pm_bytestr_lit.clone()).unwrap()), bytestr_lit, ); assert_eq!(Literal::from(CharLit::try_from(pm_char_lit.clone()).unwrap()), char_lit); assert_invalid_token!( StringLit::try_from(pm_u16_lit.clone()), expected: TokenKind::StringLit, actual: TokenKind::IntegerLit, ); assert_invalid_token!( StringLit::try_from(pm_f32_lit.clone()), expected: TokenKind::StringLit, actual: TokenKind::FloatLit, ); assert_invalid_token!( ByteLit::try_from(pm_bytestr_lit.clone()), expected: TokenKind::ByteLit, actual: TokenKind::ByteStringLit, ); assert_invalid_token!( ByteLit::try_from(pm_i16_lit.clone()), expected: TokenKind::ByteLit, actual: TokenKind::IntegerLit, ); assert_invalid_token!( IntegerLit::try_from(pm_string_lit.clone()), expected: TokenKind::IntegerLit, actual: TokenKind::StringLit, ); assert_invalid_token!( IntegerLit::try_from(pm_char_lit.clone()), expected: TokenKind::IntegerLit, actual: TokenKind::CharLit, ); assert_eq!( Literal::from(IntegerLit::try_from(TokenTree::from(pm_u16_lit.clone())).unwrap()), u16_lit, ); assert_eq!( Literal::from(IntegerLit::try_from(TokenTree::from(pm_i16_lit.clone())).unwrap()), i16_lit, ); assert_eq!( Literal::from(FloatLit::try_from(TokenTree::from(pm_f32_lit.clone())).unwrap()), f32_lit, ); assert_eq!( Literal::from(FloatLit::try_from(TokenTree::from(pm_f64_lit.clone())).unwrap()), f64_lit, ); assert_eq!( Literal::from(StringLit::try_from(TokenTree::from(pm_string_lit.clone())).unwrap()), string_lit, ); assert_eq!( Literal::from(ByteStringLit::try_from(TokenTree::from(pm_bytestr_lit.clone())).unwrap()), bytestr_lit, ); assert_eq!( Literal::from(CharLit::try_from(TokenTree::from(pm_char_lit.clone())).unwrap()), char_lit, ); assert_invalid_token!( StringLit::try_from(TokenTree::from(pm_u16_lit.clone())), expected: TokenKind::StringLit, actual: TokenKind::IntegerLit, ); assert_invalid_token!( StringLit::try_from(TokenTree::from(pm_f32_lit.clone())), expected: TokenKind::StringLit, actual: TokenKind::FloatLit, ); assert_invalid_token!( BoolLit::try_from(TokenTree::from(pm_bytestr_lit.clone())), expected: TokenKind::BoolLit, actual: TokenKind::ByteStringLit, ); assert_invalid_token!( BoolLit::try_from(TokenTree::from(pm_i16_lit.clone())), expected: TokenKind::BoolLit, actual: TokenKind::IntegerLit, ); assert_invalid_token!( IntegerLit::try_from(TokenTree::from(pm_string_lit.clone())), expected: TokenKind::IntegerLit, actual: TokenKind::StringLit, ); assert_invalid_token!( IntegerLit::try_from(TokenTree::from(pm_char_lit.clone())), expected: TokenKind::IntegerLit, actual: TokenKind::CharLit, ); assert_invalid_token!( StringLit::try_from(TokenTree::from(group)), expected: TokenKind::StringLit, actual: TokenKind::Group, ); assert_invalid_token!( BoolLit::try_from(TokenTree::from(punct)), expected: TokenKind::BoolLit, actual: TokenKind::Punct, ); assert_invalid_token!( FloatLit::try_from(TokenTree::from(ident)), expected: TokenKind::FloatLit, actual: TokenKind::Ident, ); } #[cfg(feature = "proc-macro2")] #[test] fn bool_try_from_tt() { use std::convert::TryFrom; use proc_macro2::{Ident, Span, TokenTree}; use crate::BoolLit; let ident = |s: &str| Ident::new(s, Span::call_site()); assert_eq!(BoolLit::try_from(TokenTree::Ident(ident("true"))).unwrap(), BoolLit::True); assert_eq!(BoolLit::try_from(TokenTree::Ident(ident("false"))).unwrap(), BoolLit::False); assert!(BoolLit::try_from(TokenTree::Ident(ident("falsex"))).is_err()); assert!(BoolLit::try_from(TokenTree::Ident(ident("_false"))).is_err()); assert!(BoolLit::try_from(TokenTree::Ident(ident("False"))).is_err()); assert!(BoolLit::try_from(TokenTree::Ident(ident("True"))).is_err()); assert!(BoolLit::try_from(TokenTree::Ident(ident("ltrue"))).is_err()); assert_eq!( Literal::try_from(TokenTree::Ident(ident("true"))).unwrap(), Literal::Bool(BoolLit::True), ); assert_eq!( Literal::try_from(TokenTree::Ident(ident("false"))).unwrap(), Literal::Bool(BoolLit::False), ); assert!(Literal::try_from(TokenTree::Ident(ident("falsex"))).is_err()); assert!(Literal::try_from(TokenTree::Ident(ident("_false"))).is_err()); assert!(Literal::try_from(TokenTree::Ident(ident("False"))).is_err()); assert!(Literal::try_from(TokenTree::Ident(ident("True"))).is_err()); assert!(Literal::try_from(TokenTree::Ident(ident("ltrue"))).is_err()); } #[cfg(feature = "proc-macro2")] #[test] fn invalid_token_display() { use crate::{InvalidToken, err::TokenKind}; let span = crate::err::Span::Two(proc_macro2::Span::call_site()); assert_eq!( InvalidToken { actual: TokenKind::StringLit, expected: TokenKind::FloatLit, span, }.to_string(), r#"expected a float literal (e.g. `3.14`), but found a string literal (e.g. "Ferris")"#, ); assert_eq!( InvalidToken { actual: TokenKind::Punct, expected: TokenKind::Literal, span, }.to_string(), r#"expected a literal, but found a punctuation character"#, ); }