pax_global_header 0000666 0000000 0000000 00000000064 14402354726 0014521 g ustar 00root root 0000000 0000000 52 comment=38424d37f03546df0823a6eaf8521f6c1fd15b58
lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/ 0000775 0000000 0000000 00000000000 14402354726 0020455 5 ustar 00root root 0000000 0000000 lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/.github/ 0000775 0000000 0000000 00000000000 14402354726 0022015 5 ustar 00root root 0000000 0000000 lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/.github/FUNDING.yml 0000664 0000000 0000000 00000000020 14402354726 0023622 0 ustar 00root root 0000000 0000000 github: [Canop]
lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/.gitignore 0000664 0000000 0000000 00000000022 14402354726 0022437 0 ustar 00root root 0000000 0000000 target
Cargo.lock
lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/CHANGELOG.md 0000664 0000000 0000000 00000003524 14402354726 0022272 0 ustar 00root root 0000000 0000000
### v2.5.0 - 2023-03-09
- `replace!` and `replace_all!` now supports non closure replacers - Fix #19
### v2.4.1 - 2023-01-05
- rustc minimal version downgraded from 1.65 to to 1.56 by popular demand
### v2.4.0 - 2023-01-04
- allow building with `--no-default-features`
- regex crate upgraded from 1.5 to 1.7 (minor Unicode changes)
- rustc minimal version now 1.65
### v2.3.1 - 2022-11-03
- better error messages on bad regexes - thanks @necauqua
### v2.3.0 - 2022-03-05
- support for [bytes](https://docs.rs/regex/latest/regex/bytes/index.html) regexes with the `B` suffix notation - thanks @bnoctis - Fix #11
### v2.2.2 - 2021-10-20
Reexpose features of the regex crate
### v2.2.1 - 2021-06-07
Add the `regex_replace!` macro for when you only want to replace one match
Reexports more types of the regex crates
### v2.2.0 - 2021-06-04
Add the `regex_replace_all!` macro to do replacements with a closure taking the right number of `&str` arguments according to the number of groups in the regular expression
### v2.1.0 - 2021-06-02
Add the `lazy_regex!` macro returning a `Lazy` for easy use in a `pub static` shared declaration.
### v2.0.2 - 2021-05-31
Fix a cross compilation problem, thanks @AlephAlpha - Fix #5
### v2.0.1 - 2021-05-20
Improved documentation
### v2.0.0 - 2021-05-17
- regular expressions are now checked at compile time
- regex_is_match!
- regex_find!
- regex_captures!
### v1.1.0 - 2021-05-08
- no more complementary import needed
- now based on once_cell instead of lazy_static
### v1.0.0 - 2021-05-04
- first public release
lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/Cargo.toml 0000664 0000000 0000000 00000002177 14402354726 0022414 0 ustar 00root root 0000000 0000000 [package]
name = "lazy-regex"
version = "2.5.0"
authors = ["Canop "]
edition = "2021"
description = "lazy static regular expressions checked at compile time"
keywords = ["macro", "lazy", "static", "regex"]
license = "MIT"
categories = ["text-processing"]
repository = "https://github.com/Canop/lazy-regex"
readme = "README.md"
rust-version = "1.56"
[dependencies]
once_cell = "1.17"
regex = {version = "1.7", default_features = false, features = ["std"]}
[dependencies.lazy-regex-proc_macros]
path = "src/proc_macros"
version = "2.4.0"
[features]
default = ["regex/default"]
std = ["regex/std"]
perf = ["regex/perf"]
perf-cache = ["regex/perf-cache"]
perf-dfa = ["regex/perf-dfa"]
perf-inline = ["regex/perf-inline"]
perf-literal = ["regex/perf-literal"]
unicode = ["regex/unicode"]
unicode-age = ["regex/unicode-age"]
unicode-bool = ["regex/unicode-bool"]
unicode-case = ["regex/unicode-case"]
unicode-gencat = ["regex/unicode-gencat"]
unicode-perl = ["regex/unicode-perl"]
unicode-script = ["regex/unicode-script"]
unicode-segment = ["regex/unicode-segment"]
[workspace]
members = ["src/proc_macros", "examples/regexes"]
lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/LICENSE 0000664 0000000 0000000 00000002046 14402354726 0021464 0 ustar 00root root 0000000 0000000 MIT License
Copyright (c) 2018 Canop
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/README.md 0000664 0000000 0000000 00000011177 14402354726 0021743 0 ustar 00root root 0000000 0000000 [![MIT][s2]][l2] [![Latest Version][s1]][l1] [![docs][s3]][l3] [![Chat on Miaou][s4]][l4]
[s1]: https://img.shields.io/crates/v/lazy-regex.svg
[l1]: https://crates.io/crates/lazy-regex
[s2]: https://img.shields.io/badge/license-MIT-blue.svg
[l2]: LICENSE
[s3]: https://docs.rs/lazy-regex/badge.svg
[l3]: https://docs.rs/lazy-regex/
[s4]: https://miaou.dystroy.org/static/shields/room.svg
[l4]: https://miaou.dystroy.org/3
# lazy-regex
With lazy-regex macros, regular expressions
* are checked at compile time, with clear error messages
* are wrapped in `once_cell` lazy static initializers so that they're compiled only once
* can hold flags as suffix: `let case_insensitive_regex = regex!("ab*"i);`
* are defined in a less verbose way
The `regex!` macro returns references to normal instances of `regex::Regex` or `regex::bytes::Regex` so all the usual features are available.
Other macros are specialized for testing a match, replacing with concise closures, or capturing groups as substrings in some common situations:
* `regex_is_match!`
* `regex_find!`
* `regex_captures!`
* `regex_replace!`
* `regex_replace_all!`
All of them support the `B` flag for the `regex::bytes::Regex` variant.
Some structs of the regex crate are reexported to ease dependency managment.
# Build Regexes
```rust
use lazy_regex::regex;
// build a simple regex
let r = regex!("sa+$");
assert_eq!(r.is_match("Saa"), false);
// build a regex with flag(s)
let r = regex!("sa+$"i);
assert_eq!(r.is_match("Saa"), true);
// you can use a raw literal
let r = regex!(r#"^"+$"#);
assert_eq!(r.is_match("\"\""), true);
// or a raw literal with flag(s)
let r = regex!(r#"^\s*("[a-t]*"\s*)+$"#i);
assert_eq!(r.is_match(r#" "Aristote" "Platon" "#), true);
// build a regex that operates on &[u8]
let r = regex!("(byte)?string$"B);
assert_eq!(r.is_match(b"bytestring"), true);
// there's no problem using the multiline definition syntax
let r = regex!(r#"(?x)
(?P\w+)
-
(?P[0-9.]+)
"#);
assert_eq!(r.find("This is lazy_regex-2.2!").unwrap().as_str(), "lazy_regex-2.2");
// (look at the regex_captures! macro to easily extract the groups)
```
```compile_fail
// this line doesn't compile because the regex is invalid:
let r = regex!("(unclosed");
```
Supported regex flags: `i`, `m`, `s`, `x`, `U`.
See [regex::RegexBuilder](https://docs.rs/regex/latest/regex/struct.RegexBuilder.html).
# Test a match
```rust
use lazy_regex::regex_is_match;
let b = regex_is_match!("[ab]+", "car");
assert_eq!(b, true);
```
# Extract a value
```rust
use lazy_regex::regex_find;
let f_word = regex_find!(r#"\bf\w+\b"#, "The fox jumps.");
assert_eq!(f_word, Some("fox"));
let f_word = regex_find!(r#"\bf\w+\b"#B, b"The forest is silent.");
assert_eq!(f_word, Some(b"forest" as &[u8]));
```
# Capture
```rust
use lazy_regex::regex_captures;
let (_, letter) = regex_captures!("([a-z])[0-9]+"i, "form A42").unwrap();
assert_eq!(letter, "A");
let (whole, name, version) = regex_captures!(
r#"(\w+)-([0-9.]+)"#, // a literal regex
"This is lazy_regex-2.0!", // any expression
).unwrap();
assert_eq!(whole, "lazy_regex-2.0");
assert_eq!(name, "lazy_regex");
assert_eq!(version, "2.0");
```
There's no limit to the size of the tuple.
It's checked at compile time to ensure you have the right number of capturing groups.
You receive `""` for optional groups with no value.
# Replace with captured groups
The [regex_replace!] and [regex_replace_all!] macros bring once compilation and compilation time checks to the `replace` and `replace_all` functions.
## Replacing with a closure
```rust
use lazy_regex::regex_replace_all;
let text = "Foo8 fuu3";
let text = regex_replace_all!(
r#"\bf(\w+)(\d)"#i,
text,
|_, name, digit| format!("F<{}>{}", name, digit),
);
assert_eq!(text, "F8 F3");
```
The number of arguments given to the closure is checked at compilation time to match the number of groups in the regular expression.
If it doesn't match you get, at compilation time, a clear error message.
## Replacing with another kind of Replacer
```rust
use lazy_regex::regex_replace_all;
let text = "UwU";
let output = regex_replace_all!("U", text, "O");
assert_eq!(&output, "OwO");
```
# Shared lazy static
When a regular expression is used in several functions, you sometimes don't want
to repeat it but have a shared static instance.
The `regex!` macro, while being backed by a lazy static regex, returns a reference.
If you want to have a shared lazy static regex, use the `lazy_regex!` macro:
```rust
use lazy_regex::*;
pub static GLOBAL_REX: Lazy = lazy_regex!("^ab+$"i);
```
Like for the other macros, the regex is static, checked at compile time, and lazily built at first use.
lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/examples/ 0000775 0000000 0000000 00000000000 14402354726 0022273 5 ustar 00root root 0000000 0000000 lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/examples/regexes/ 0000775 0000000 0000000 00000000000 14402354726 0023735 5 ustar 00root root 0000000 0000000 lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/examples/regexes/.gitignore 0000664 0000000 0000000 00000000500 14402354726 0025720 0 ustar 00root root 0000000 0000000 # Generated by Cargo
# will have compiled files and executables
/target/
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock
# These are backup files generated by rustfmt
**/*.rs.bk
lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/examples/regexes/Cargo.toml 0000664 0000000 0000000 00000000373 14402354726 0025670 0 ustar 00root root 0000000 0000000 [package]
name = "regexes"
version = "1.0.1"
authors = ["dystroy "]
edition = "2018"
description = "An example for lazy-regex"
license = "MIT"
readme = "README.md"
[dependencies]
lazy-regex = { path = "../.." }
regex = "1.7"
lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/examples/regexes/README.md 0000664 0000000 0000000 00000000263 14402354726 0025215 0 ustar 00root root 0000000 0000000
This example displays a few compilation regexes.
To demonstrate compile time checks
- uncomment line 24
- run `cargo run`
The program should fail to check with a clear error.
lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/examples/regexes/src/ 0000775 0000000 0000000 00000000000 14402354726 0024524 5 ustar 00root root 0000000 0000000 lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/examples/regexes/src/main.rs 0000664 0000000 0000000 00000004010 14402354726 0026011 0 ustar 00root root 0000000 0000000 use lazy_regex::*;
pub static SHARED: Lazy = lazy_regex!("^test$");
fn example_builds() {
// build a simple regex
let r = regex!("sa+$");
assert_eq!(r.is_match("Saa"), false);
// build a regex with flag(s)
let r = regex!("sa+b?$"i);
assert_eq!(r.is_match("Saa"), true);
// you can use a raw literal
let r = regex!(r#"^"+$"#);
assert_eq!(r.is_match("\"\""), true);
// and a raw literal with flag(s)
let r = regex!(r#"^\s*("[a-t]*"\s*)+$"#i);
assert_eq!(r.is_match(r#" "Aristote" "Platon" "#), true);
// this line wouldn't compile:
// let r = regex!("(unclosed");
}
fn example_is_match() {
let b = regex_is_match!("[ab]+", "car");
assert_eq!(b, true);
}
fn example_using_shared_static() {
let b = SHARED.is_match("not test");
assert_eq!(b, false);
}
fn example_captures() {
let (whole, name, version) = regex_captures!(
r#"(\w+)-([0-9.]+)"#, // a literal regex
"This is lazy_regex-2.0!", // any expression
)
.unwrap();
assert_eq!(whole, "lazy_regex-2.0");
assert_eq!(name, "lazy_regex");
assert_eq!(version, "2.0");
}
fn examples_replace_all() {
let text = "Foo fuu";
let text = regex_replace_all!(
r#"\bf(\w+)"#i,
text,
|_, suffix| format!("F<{}>", suffix),
);
assert_eq!(text, "F F");
let text = "A = 5 + 3 and B=27+4";
let text = regex_replace_all!(
r#"(?x)
(\d+)
\s*
\+
\s*
(\d+)
"#,
text,
|_, a: &str, b: &str| {
let a: u64 = a.parse().unwrap();
let b: u64 = b.parse().unwrap();
(a + b).to_string()
},
);
assert_eq!(text, "A = 8 and B=31");
}
fn main() {
// the regular expressions will be built only once
for _ in 0..10 {
example_builds();
}
example_is_match();
for _ in 0..10 {
example_captures();
example_using_shared_static();
examples_replace_all();
}
}
lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/src/ 0000775 0000000 0000000 00000000000 14402354726 0021244 5 ustar 00root root 0000000 0000000 lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/src/lib.rs 0000664 0000000 0000000 00000011155 14402354726 0022363 0 ustar 00root root 0000000 0000000 /*!
With lazy-regex macros, regular expressions
* are checked at compile time, with clear error messages
* are wrapped in `once_cell` lazy static initializers so that they're compiled only once
* can hold flags as suffix: `let case_insensitive_regex = regex!("ab*"i);`
* are defined in a less verbose way
The [regex!] macro returns references to normal instances of [regex::Regex] or [regex::bytes::Regex] so all the usual features are available.
Other macros are specialized for testing a match, replacing with concise closures, or capturing groups as substrings in some common situations:
* [regex_is_match!]
* [regex_find!]
* [regex_captures!]
* [regex_replace!]
* [regex_replace_all!]
All of them support the `B` flag for the `regex::bytes::Regex` variant.
Some structs of the regex crate are reexported to ease dependency managment.
# Build Regexes
```rust
use lazy_regex::regex;
// build a simple regex
let r = regex!("sa+$");
assert_eq!(r.is_match("Saa"), false);
// build a regex with flag(s)
let r = regex!("sa+$"i);
assert_eq!(r.is_match("Saa"), true);
// you can use a raw literal
let r = regex!(r#"^"+$"#);
assert_eq!(r.is_match("\"\""), true);
// or a raw literal with flag(s)
let r = regex!(r#"^\s*("[a-t]*"\s*)+$"#i);
assert_eq!(r.is_match(r#" "Aristote" "Platon" "#), true);
// build a regex that operates on &[u8]
let r = regex!("(byte)?string$"B);
assert_eq!(r.is_match(b"bytestring"), true);
// there's no problem using the multiline definition syntax
let r = regex!(r#"(?x)
(?P\w+)
-
(?P[0-9.]+)
"#);
assert_eq!(r.find("This is lazy_regex-2.2!").unwrap().as_str(), "lazy_regex-2.2");
// (look at the regex_captures! macro to easily extract the groups)
```
```compile_fail
// this line doesn't compile because the regex is invalid:
let r = regex!("(unclosed");
```
Supported regex flags: `i`, `m`, `s`, `x`, `U`.
See [regex::RegexBuilder].
# Test a match
```rust
use lazy_regex::regex_is_match;
let b = regex_is_match!("[ab]+", "car");
assert_eq!(b, true);
```
doc: [regex_is_match!]
# Extract a value
```rust
use lazy_regex::regex_find;
let f_word = regex_find!(r#"\bf\w+\b"#, "The fox jumps.");
assert_eq!(f_word, Some("fox"));
let f_word = regex_find!(r#"\bf\w+\b"#B, b"The forest is silent.");
assert_eq!(f_word, Some(b"forest" as &[u8]));
```
doc: [regex_find!]
# Capture
```rust
use lazy_regex::regex_captures;
let (_, letter) = regex_captures!("([a-z])[0-9]+"i, "form A42").unwrap();
assert_eq!(letter, "A");
let (whole, name, version) = regex_captures!(
r#"(\w+)-([0-9.]+)"#, // a literal regex
"This is lazy_regex-2.0!", // any expression
).unwrap();
assert_eq!(whole, "lazy_regex-2.0");
assert_eq!(name, "lazy_regex");
assert_eq!(version, "2.0");
```
There's no limit to the size of the tuple.
It's checked at compile time to ensure you have the right number of capturing groups.
You receive `""` for optional groups with no value.
doc: [regex_captures!]
# Replace with captured groups
The [regex_replace!] and [regex_replace_all!] macros bring once compilation and compilation time checks to the `replace` and `replace_all` functions.
## Replacing with a closure
```rust
use lazy_regex::regex_replace_all;
let text = "Foo8 fuu3";
let text = regex_replace_all!(
r#"\bf(\w+)(\d)"#i,
text,
|_, name, digit| format!("F<{}>{}", name, digit),
);
assert_eq!(text, "F8 F3");
```
The number of arguments given to the closure is checked at compilation time to match the number of groups in the regular expression.
If it doesn't match you get, at compilation time, a clear error message.
## Replacing with another kind of Replacer
```rust
use lazy_regex::regex_replace_all;
let text = "UwU";
let output = regex_replace_all!("U", text, "O");
assert_eq!(&output, "OwO");
```
# Shared lazy static
When a regular expression is used in several functions, you sometimes don't want
to repeat it but have a shared static instance.
The [regex!] macro, while being backed by a lazy static regex, returns a reference.
If you want to have a shared lazy static regex, use the [lazy_regex!] macro:
```rust
use lazy_regex::*;
pub static GLOBAL_REX: Lazy = lazy_regex!("^ab+$"i);
```
Like for the other macros, the regex is static, checked at compile time, and lazily built at first use.
doc: [lazy_regex!]
*/
pub use {
lazy_regex_proc_macros::{
lazy_regex, regex,
regex_captures,
regex_find,
regex_is_match,
regex_replace,
regex_replace_all,
},
once_cell::sync::Lazy,
regex::{
Captures, Regex, RegexBuilder,
bytes::{
Regex as BytesRegex,
RegexBuilder as BytesRegexBuilder
},
},
};
lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/src/proc_macros/ 0000775 0000000 0000000 00000000000 14402354726 0023553 5 ustar 00root root 0000000 0000000 lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/src/proc_macros/Cargo.toml 0000664 0000000 0000000 00000000533 14402354726 0025504 0 ustar 00root root 0000000 0000000 [package]
name = "lazy-regex-proc_macros"
version = "2.4.1"
authors = ["Canop "]
description = "proc macros for the lazy_regex crate"
license = "MIT"
edition = "2018"
[dependencies]
syn = { version = "1.0.103", features = ["full"] }
proc-macro2 = "1.0"
quote = "1.0"
regex = "1.7"
[lib]
proc-macro = true
path = "mod.rs"
lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/src/proc_macros/args.rs 0000664 0000000 0000000 00000003224 14402354726 0025056 0 ustar 00root root 0000000 0000000 use syn::{
parse::{Parse, ParseStream, Result},
Expr, ExprClosure, LitStr, Token,
};
/// Wrapping of the two arguments given to one of the
/// `regex_is_match`, `regex_find`, or `regex_captures`
/// macros
pub(crate) struct RexValArgs {
pub regex_str: LitStr,
pub value: Expr, // this expression is (or produces) the text to search or check
}
impl Parse for RexValArgs {
fn parse(input: ParseStream<'_>) -> Result {
let regex_str = input.parse::()?;
input.parse::()?;
let value = input.parse::()?;
let _ = input.parse::(); // allow a trailing comma
Ok(RexValArgs { regex_str, value })
}
}
/// Wrapping of the three arguments given to the
/// ``regex_replace` and regex_replace_all` macros
pub(crate) struct ReplaceArgs {
pub regex_str: LitStr,
pub value: Expr,
pub replacer: MaybeFun,
}
pub(crate) enum MaybeFun {
Fun(ExprClosure),
Expr(Expr),
}
impl Parse for ReplaceArgs {
fn parse(input: ParseStream<'_>) -> Result {
let regex_str = input.parse::()?;
input.parse::()?;
let value = input.parse::()?;
input.parse::()?;
// we try as a closure before, and as a general expr if
// it doesn't work out
let replacer = if let Ok(fun) = input.parse::() {
MaybeFun::Fun(fun)
} else {
MaybeFun::Expr(input.parse::()?)
};
let _ = input.parse::(); // allow a trailing comma
Ok(ReplaceArgs {
regex_str,
value,
replacer,
})
}
}
lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/src/proc_macros/mod.rs 0000664 0000000 0000000 00000015376 14402354726 0024714 0 ustar 00root root 0000000 0000000 mod args;
mod regex_code;
use {
crate::{args::*, regex_code::*},
proc_macro::TokenStream,
quote::quote,
std::convert::TryFrom,
syn::{parse_macro_input, Expr},
};
// The following `process*` functions are convenience funcs
// to reduce boilerplate in macro implementations below.
fn process(input: TokenStream, f: F) -> TokenStream
where
T: Into,
F: Fn(RegexCode) -> T,
{
match RegexCode::try_from(input) {
Ok(r) => f(r).into(),
Err(e) => e.to_compile_error().into(),
}
}
fn process_with_value(input: TokenStream, f: F) -> TokenStream
where
T: Into,
F: Fn(RegexCode, Expr) -> T,
{
let parsed = parse_macro_input!(input as RexValArgs);
match RegexCode::try_from(parsed.regex_str) {
Ok(r) => f(r, parsed.value).into(),
Err(e) => e.to_compile_error().into(),
}
}
/// Return a lazy static Regex checked at compilation time and
/// built at first use.
///
/// Flags can be specified as suffix:
/// ```
/// let case_insensitive_regex = regex!("^ab+$"i);
/// ```
///
/// The macro returns a reference to a [regex::Regex]
/// or a [regex::bytes::Regex] instance,
/// differentiated by the `B` flag:
/// ```
/// let verbose = regex!(r#"_([\d\.]+)"#)
/// .replace("This is lazy-regex_2.2", " (version $1)");
/// assert_eq!(verbose, "This is lazy-regex (version 2.2)");
/// ```
#[proc_macro]
pub fn regex(input: TokenStream) -> TokenStream {
process(input, |regex_code| regex_code.lazy_static())
}
/// Return an instance of `once_cell::sync::Lazy` or
/// `once_cell::sync::Lazy` that
/// you can use in a public static declaration.
///
/// Example:
///
/// ```
/// pub static GLOBAL_REX: Lazy = lazy_regex!("^ab+$"i);
/// ```
///
/// As for other macros, the regex is checked at compilation time.
#[proc_macro]
pub fn lazy_regex(input: TokenStream) -> TokenStream {
process(input, |regex_code| regex_code.build)
}
/// Test whether an expression matches a lazy static
/// regular expression (the regex is checked at compile
/// time)
///
/// Example:
/// ```
/// let b = regex_is_match!("[ab]+", "car");
/// assert_eq!(b, true);
/// ```
#[proc_macro]
pub fn regex_is_match(input: TokenStream) -> TokenStream {
process_with_value(input, |regex_code, value| {
let statick = regex_code.statick();
quote! {{
#statick;
RE.is_match(#value)
}}
})
}
/// Extract the leftmost match of the regex in the
/// second argument, as a `&str`, or a `&[u8]` if the `B` flag is set.
///
/// Example:
/// ```
/// let f_word = regex_find!(r#"\bf\w+\b"#, "The fox jumps.");
/// assert_eq!(f_word, Some("fox"));
/// let f_word = regex_find!(r#"\bf\w+\b"#B, "The forest is silent.");
/// assert_eq!(f_word, Some(b"forest" as &[u8]));
/// ```
#[proc_macro]
pub fn regex_find(input: TokenStream) -> TokenStream {
process_with_value(input, |regex_code, value| {
let statick = regex_code.statick();
let as_method = match regex_code.regex {
RegexInstance::Regex(..) => quote!(as_str),
RegexInstance::Bytes(..) => quote!(as_bytes),
};
quote! {{
#statick;
RE.find(#value).map(|mat| mat. #as_method ())
}}
})
}
/// Extract captured groups as a tuple of &str.
///
/// If there's no match, the macro returns `None`.
///
/// If an optional group has no value, the tuple
/// will contain `""` instead.
///
/// Example:
/// ```
/// let (whole, name, version) = regex_captures!(
/// r#"(\w+)-([0-9.]+)"#, // a literal regex
/// "This is lazy_regex-2.0!", // any expression
/// ).unwrap();
/// assert_eq!(whole, "lazy_regex-2.0");
/// assert_eq!(name, "lazy_regex");
/// assert_eq!(version, "2.0");
/// ```
#[proc_macro]
pub fn regex_captures(input: TokenStream) -> TokenStream {
process_with_value(input, |regex_code, value| {
let statick = regex_code.statick();
let n = regex_code.captures_len();
let groups = (0..n).map(|i| {
quote! {
caps.get(#i).map_or("", |c| c.as_str())
}
});
quote! {{
#statick;
RE.captures(#value)
.map(|caps| (
#(#groups),*
))
}}
})
}
/// common implementation of regex_replace and regex_replace_all
fn replacen(input: TokenStream, limit: usize) -> TokenStream {
let parsed = parse_macro_input!(input as ReplaceArgs);
let ReplaceArgs { regex_str, value, replacer } = parsed;
let regex_code = match RegexCode::try_from(regex_str) {
Ok(r) => r,
Err(e) => {
return e.to_compile_error().into();
}
};
let statick = regex_code.statick();
let stream = match replacer {
MaybeFun::Fun(fun) => {
let n = regex_code.captures_len();
let groups = (0..n).map(|i| {
quote! {
caps.get(#i).map_or("", |c| c.as_str())
}
});
quote! {{
#statick;
RE.replacen(
#value,
#limit,
|caps: &lazy_regex::Captures<'_>| {
let fun = #fun;
fun(
#(#groups),*
)
})
}}
}
MaybeFun::Expr(expr) => {
quote! {{
#statick;
RE.replacen(#value, #limit, #expr)
}}
}
};
stream.into()
}
/// Replaces the leftmost match in the second argument
/// using the replacer given as third argument.
///
/// When the replacer is a closure, it is given one or more `&str`,
/// the first one for the whole match and the following ones for
/// the groups.
/// Any optional group with no value is replaced with `""`.
///
/// Example:
/// ```
/// let text = "Fuu fuuu";
/// let text = regex_replace!(
/// "f(u*)"i,
/// text,
/// |_, suffix: &str| format!("F{}", suffix.len()),
/// );
/// assert_eq!(text, "F2 fuuu");
/// ```
#[proc_macro]
pub fn regex_replace(input: TokenStream) -> TokenStream {
replacen(input, 1)
}
/// Replaces all non-overlapping matches in the second argument
/// using the replacer given as third argument.
///
/// When the replacer is a closure, it is given one or more `&str`,
/// the first one for the whole match and the following ones for
/// the groups.
/// Any optional group with no value is replaced with `""`.
///
/// Example:
/// ```
/// let text = "Foo fuu";
/// let text = regex_replace_all!(
/// r#"\bf(?P\w+)"#i,
/// text,
/// |_, suffix| format!("F<{}>", suffix),
/// );
/// assert_eq!(text, "F F");
/// ```
#[proc_macro]
pub fn regex_replace_all(input: TokenStream) -> TokenStream {
replacen(input, 0)
}
lazy-regex-38424d37f03546df0823a6eaf8521f6c1fd15b58/src/proc_macros/regex_code.rs 0000664 0000000 0000000 00000007107 14402354726 0026232 0 ustar 00root root 0000000 0000000 use {
proc_macro::TokenStream,
proc_macro2::TokenStream as TokenStream2,
quote::quote,
std::convert::TryFrom,
syn::LitStr,
};
/// The lazy static regex building code, which is produced and
/// inserted by all lazy-regex macros
pub(crate) struct RegexCode {
pub build: TokenStream2,
pub regex: RegexInstance,
}
pub(crate) enum RegexInstance {
Regex(regex::Regex),
Bytes(regex::bytes::Regex),
}
impl TryFrom for RegexCode {
type Error = syn::Error;
fn try_from(lit_str: LitStr) -> Result {
let pattern = lit_str.value();
let mut case_insensitive = false;
let mut multi_line = false;
let mut dot_matches_new_line = false;
let mut ignore_whitespace = false;
let mut swap_greed = false;
let mut is_bytes = false;
for (i, ch) in lit_str.suffix().chars().enumerate() {
match ch {
'i' => case_insensitive = true,
'm' => multi_line = true,
's' => dot_matches_new_line = true,
'x' => ignore_whitespace = true,
'U' => swap_greed = true,
'B' => is_bytes = true, // non-standard!
_ => {
let lit = lit_str.token();
let pos = lit.to_string().len() - i;
// subspan only works on nighlty
return Err(syn::Error::new(
lit.subspan(pos - 1..pos).unwrap_or_else(|| lit.span()),
format!("unrecognized regex flag {:?}", ch),
));
}
};
}
let regex = if is_bytes {
regex::bytes::Regex::new(&pattern).map(RegexInstance::Bytes)
} else {
regex::Regex::new(&pattern).map(RegexInstance::Regex)
};
let regex = regex.map_err(|e| syn::Error::new(lit_str.span(), e.to_string()))?;
let builder_token = if is_bytes {
quote!(BytesRegexBuilder)
} else {
quote!(RegexBuilder)
};
let build = quote! {
lazy_regex::Lazy::new(|| {
//println!("compiling regex {:?}", #pattern);
lazy_regex:: #builder_token ::new(#pattern)
.case_insensitive(#case_insensitive)
.multi_line(#multi_line)
.dot_matches_new_line(#dot_matches_new_line)
.ignore_whitespace(#ignore_whitespace)
.swap_greed(#swap_greed)
.build()
.unwrap()
})
};
Ok(Self { build, regex })
}
}
impl TryFrom for RegexCode {
type Error = syn::Error;
fn try_from(token_stream: TokenStream) -> Result {
Self::try_from(syn::parse::(token_stream)?)
}
}
impl RegexCode {
pub fn statick(&self) -> TokenStream2 {
let build = &self.build;
let regex_token = match self.regex {
RegexInstance::Regex(..) => quote!(Regex),
RegexInstance::Bytes(..) => quote!(BytesRegex),
};
quote! {
static RE: lazy_regex::Lazy = #build;
}
}
pub fn lazy_static(&self) -> TokenStream2 {
let statick = self.statick();
quote! {{
#statick;
&RE
}}
}
pub fn captures_len(&self) -> usize {
match &self.regex {
RegexInstance::Regex(regex) => regex.captures_len(),
RegexInstance::Bytes(regex) => regex.captures_len(),
}
}
}