pep508_rs-0.9.1/.cargo_vcs_info.json0000644000000001360000000000100125700ustar { "git": { "sha1": "830f92f482d505346f9b29fb0c712a9aa4f11026" }, "path_in_vcs": "" }pep508_rs-0.9.1/Cargo.toml0000644000000044230000000000100105710ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "pep508_rs" version = "0.9.1" build = false include = [ "/src", "Changelog.md", "License-Apache", "License-BSD", "Readme.md", "pyproject.toml", ] autobins = false autoexamples = false autotests = false autobenches = false description = "A library for python dependency specifiers, better known as PEP 508" readme = "Readme.md" license = "Apache-2.0 OR BSD-2-Clause" repository = "https://github.com/konstin/pep508_rs" [lib] name = "pep508_rs" crate-type = [ "cdylib", "rlib", ] path = "src/lib.rs" [dependencies.boxcar] version = "0.2.6" [dependencies.indexmap] version = "2.6.0" [dependencies.itertools] version = "0.13.0" [dependencies.once_cell] version = "1.19.0" [dependencies.pep440_rs] version = "0.7.2" features = ["version-ranges"] [dependencies.regex] version = "1.10.4" [dependencies.rustc-hash] version = "2.0.0" [dependencies.schemars] version = "0.8.21" optional = true [dependencies.serde] version = "1.0.198" features = [ "rc", "derive", ] [dependencies.smallvec] version = "1.13.2" [dependencies.thiserror] version = "1.0.59" [dependencies.tracing] version = "0.1.40" optional = true [dependencies.unicode-width] version = "0.2.0" [dependencies.url] version = "2.5.0" features = ["serde"] [dependencies.urlencoding] version = "2.1.3" [dependencies.version-ranges] version = "0.1.0" [dev-dependencies.indoc] version = "2.0.5" [dev-dependencies.insta] version = "1.41.0" [dev-dependencies.log] version = "0.4.22" [dev-dependencies.serde_json] version = "1.0.116" [dev-dependencies.testing_logger] version = "0.1.1" [dev-dependencies.tracing-test] version = "0.2.5" [features] default = [] non-pep508-extensions = [] schemars = ["dep:schemars"] tracing = [ "dep:tracing", "pep440_rs/tracing", ] [lints.clippy] collapsible_if = "allow" pep508_rs-0.9.1/Cargo.toml.orig000064400000000000000000000034561046102023000142570ustar 00000000000000[package] name = "pep508_rs" version = "0.9.1" description = "A library for python dependency specifiers, better known as PEP 508" edition = "2021" include = ["/src", "Changelog.md", "License-Apache", "License-BSD", "Readme.md", "pyproject.toml"] # Same license as pypa/packaging where the tests are from license = "Apache-2.0 OR BSD-2-Clause" readme = "Readme.md" repository = "https://github.com/konstin/pep508_rs" [lib] name = "pep508_rs" crate-type = ["cdylib", "rlib"] [lints.clippy] collapsible_if = "allow" [dependencies] boxcar = "0.2.6" indexmap = "2.6.0" itertools = "0.13.0" once_cell = "1.19.0" pep440_rs = { version = "0.7.2", features = ["version-ranges"] } regex = "1.10.4" rustc-hash = "2.0.0" schemars = { version = "0.8.21", optional = true } serde = { version = "1.0.198", features = ["rc", "derive"] } smallvec = "1.13.2" thiserror = "1.0.59" tracing = { version = "0.1.40", optional = true } unicode-width = "0.2.0" url = { version = "2.5.0", features = ["serde"] } urlencoding = "2.1.3" version-ranges = "0.1.0" [dev-dependencies] indoc = "2.0.5" insta = "1.41.0" log = "0.4.22" serde_json = "1.0.116" testing_logger = "0.1.1" tracing-test = "0.2.5" [features] tracing = ["dep:tracing", "pep440_rs/tracing"] # PEP 508 allows only URLs such as `foo @ https://example.org/foo` or `foo @ file:///home/ferris/foo`, and # arguably does not allow relative paths in file URLs (`foo @ file://./foo`, # `foo @ file:foo-3.0.0-py3-none-any.whl`, `foo @ file://foo-3.0.0-py3-none-any.whl`), as they are not part of the # relevant RFCs, even though widely supported. Pip accepts relative file URLs and paths instead of urls # (`foo @ ./foo-3.0.0-py3-none-any.whl`). The `non-pep508-features` controls whether these non-spec features will # be supported. non-pep508-extensions = [] default = [] schemars = ["dep:schemars"] pep508_rs-0.9.1/Changelog.md000064400000000000000000000013471046102023000135760ustar 00000000000000# 0.9.1 - Fix feature scoping to support the wasm32-unknown-unknown target # 0.9.0 - Rewritten markers for sounds arithmetic operation and being much faster # 0.8.1 - Reexport pep440_rs # 0.8.0 - Remove rkyv feature - Use tracing-test instead of testing_logger # 0.7.0 - Remove pyo3 # 0.6.1 - Update to pyo3 0.22 # 0.6.0 - Added `origin` to `Requirement` # 0.5.0 - Update to pyo3 0.21 - Update to pyo3-log 0.1.0 # v0.4.2 - CI fixes, mac os builds are temporarily disabled. # v0.4.1 - CI fixes, mac os builds are temporarily disabled. # v0.4.0 - Package and extra names are now validated and normalized. - Updated `pep440_rs` to 0.5.0. - [rkyv](https://github.com/rkyv/rkyv) support. - `tracing` is now a separate feature. pep508_rs-0.9.1/License-Apache000064400000000000000000000236751046102023000140610ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS pep508_rs-0.9.1/License-BSD000064400000000000000000000024151046102023000132750ustar 00000000000000Copyright (c) 2023 konstin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pep508_rs-0.9.1/Readme.md000064400000000000000000000030101046102023000130710ustar 00000000000000# Dependency specifiers (PEP 508) in Rust [![Crates.io](https://img.shields.io/crates/v/pep508_rs.svg?logo=rust&style=flat-square)](https://crates.io/crates/pep508_rs) [![PyPI](https://img.shields.io/pypi/v/pep508_rs.svg?logo=python&style=flat-square)](https://pypi.org/project/pep508_rs) A library for python [dependency specifiers](https://packaging.python.org/en/latest/specifications/dependency-specifiers/), better known as [PEP 508](https://peps.python.org/pep-0508/). ## Usage **In Rust** ```rust use std::str::FromStr; use pep508_rs::Requirement; let marker = r#"requests [security,tests] >= 2.8.1, == 2.8.* ; python_version > "3.8""#; let dependency_specification = Requirement::from_str(marker).unwrap(); assert_eq!(dependency_specification.name, "requests"); assert_eq!(dependency_specification.extras, Some(vec!["security".to_string(), "tests".to_string()])); ``` ## Markers Markers allow you to install dependencies only in specific environments (python version, operating system, architecture, etc.) or when a specific feature is activated. E.g. you can say `importlib-metadata ; python_version < "3.8"` or `itsdangerous (>=1.1.0) ; extra == 'security'`. Unfortunately, the marker grammar has some oversights (e.g. ) and the design of comparisons (PEP 440 comparisons with lexicographic fallback) leads to confusing outcomes. This implementation tries to carefully validate everything and emit warnings whenever bogus comparisons with unintended semantics are made. pep508_rs-0.9.1/src/cursor.rs000064400000000000000000000103151046102023000140320ustar 00000000000000use std::fmt::{Display, Formatter}; use std::str::Chars; use crate::{Pep508Error, Pep508ErrorSource, Pep508Url}; /// A [`Cursor`] over a string. #[derive(Debug, Clone)] pub(crate) struct Cursor<'a> { input: &'a str, chars: Chars<'a>, pos: usize, } impl<'a> Cursor<'a> { /// Convert from `&str`. pub(crate) fn new(input: &'a str) -> Self { Self { input, chars: input.chars(), pos: 0, } } /// Returns a new cursor starting at the given position. pub(crate) fn at(self, pos: usize) -> Self { Self { input: self.input, chars: self.input[pos..].chars(), pos, } } /// Returns the current byte position of the cursor. pub(crate) fn pos(&self) -> usize { self.pos } /// Returns a slice over the input string. pub(crate) fn slice(&self, start: usize, len: usize) -> &str { &self.input[start..start + len] } /// Peeks the next character and position from the input stream without consuming it. pub(crate) fn peek(&self) -> Option<(usize, char)> { self.chars.clone().next().map(|char| (self.pos, char)) } /// Peeks the next character from the input stream without consuming it. pub(crate) fn peek_char(&self) -> Option { self.chars.clone().next() } /// Eats the next character from the input stream if it matches the given token. pub(crate) fn eat_char(&mut self, token: char) -> Option { let (start_pos, peek_char) = self.peek()?; if peek_char == token { self.next(); Some(start_pos) } else { None } } /// Consumes whitespace from the cursor. pub(crate) fn eat_whitespace(&mut self) { while let Some(char) = self.peek_char() { if char.is_whitespace() { self.next(); } else { return; } } } /// Returns the next character and position from the input stream and consumes it. pub(crate) fn next(&mut self) -> Option<(usize, char)> { let pos = self.pos; let char = self.chars.next()?; self.pos += char.len_utf8(); Some((pos, char)) } pub(crate) fn remaining(&self) -> usize { self.chars.clone().count() } /// Peeks over the cursor as long as the condition is met, without consuming it. pub(crate) fn peek_while(&mut self, condition: impl Fn(char) -> bool) -> (usize, usize) { let peeker = self.chars.clone(); let start = self.pos(); let len = peeker.take_while(|c| condition(*c)).count(); (start, len) } /// Consumes characters from the cursor as long as the condition is met. pub(crate) fn take_while(&mut self, condition: impl Fn(char) -> bool) -> (usize, usize) { let start = self.pos(); let mut len = 0; while let Some(char) = self.peek_char() { if !condition(char) { break; } self.next(); len += char.len_utf8(); } (start, len) } /// Consumes characters from the cursor, raising an error if it doesn't match the given token. pub(crate) fn next_expect_char( &mut self, expected: char, span_start: usize, ) -> Result<(), Pep508Error> { match self.next() { None => Err(Pep508Error { message: Pep508ErrorSource::String(format!( "Expected '{expected}', found end of dependency specification" )), start: span_start, len: 1, input: self.to_string(), }), Some((_, value)) if value == expected => Ok(()), Some((pos, other)) => Err(Pep508Error { message: Pep508ErrorSource::String(format!( "Expected `{expected}`, found `{other}`" )), start: pos, len: other.len_utf8(), input: self.to_string(), }), } } } impl Display for Cursor<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.input) } } pep508_rs-0.9.1/src/lib.rs000064400000000000000000001052221046102023000132650ustar 00000000000000//! A library for [dependency specifiers](https://packaging.python.org/en/latest/specifications/dependency-specifiers/) //! previously known as [PEP 508](https://peps.python.org/pep-0508/) //! //! ## Usage //! //! ``` //! use std::str::FromStr; //! use pep508_rs::{Requirement, VerbatimUrl, ExtraName}; //! //! let marker = r#"requests [security,tests] >= 2.8.1, == 2.8.* ; python_version > "3.8""#; //! let dependency_specification = Requirement::::from_str(marker).unwrap(); //! assert_eq!(dependency_specification.name.as_ref(), "requests"); //! assert_eq!(dependency_specification.extras, vec![ExtraName::from_str("security").unwrap(), ExtraName::from_str("tests").unwrap()]); //! ``` #![warn(missing_docs)] use std::collections::HashSet; use std::error::Error; use std::fmt::{Debug, Display, Formatter}; use std::path::Path; use std::str::FromStr; use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; use thiserror::Error; use url::Url; use cursor::Cursor; pub use marker::{ ContainsMarkerTree, ExtraMarkerTree, ExtraOperator, InMarkerTree, MarkerEnvironment, MarkerEnvironmentBuilder, MarkerExpression, MarkerOperator, MarkerTree, MarkerTreeContents, MarkerTreeKind, MarkerValue, MarkerValueExtra, MarkerValueString, MarkerValueVersion, MarkerWarningKind, StringMarkerTree, StringVersion, VersionMarkerTree, }; pub use normalize::{ExtraName, InvalidNameError, PackageName}; pub use origin::RequirementOrigin; /// Version and version specifiers used in requirements (reexport). // https://github.com/konstin/pep508_rs/issues/19 pub use pep440_rs; use pep440_rs::{Version, VersionSpecifier, VersionSpecifiers}; #[cfg(feature = "non-pep508-extensions")] pub use unnamed::{UnnamedRequirement, UnnamedRequirementUrl}; pub use verbatim_url::{ expand_env_vars, split_scheme, strip_host, Scheme, VerbatimUrl, VerbatimUrlError, }; mod cursor; pub mod marker; mod normalize; mod origin; #[cfg(feature = "non-pep508-extensions")] mod unnamed; mod verbatim_url; /// Error with a span attached. Not that those aren't `String` but `Vec` indices. #[derive(Debug)] pub struct Pep508Error { /// Either we have an error string from our parser or an upstream error from `url` pub message: Pep508ErrorSource, /// Span start index pub start: usize, /// Span length pub len: usize, /// The input string so we can print it underlined pub input: String, } /// Either we have an error string from our parser or an upstream error from `url` #[derive(Debug, Error)] pub enum Pep508ErrorSource { /// An error from our parser. #[error("{0}")] String(String), /// A URL parsing error. #[error(transparent)] UrlError(T::Err), /// The version requirement is not supported. #[error("{0}")] UnsupportedRequirement(String), } impl Display for Pep508Error { /// Pretty formatting with underline. fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { // We can use char indices here since it's a Vec let start_offset = self.input[..self.start] .chars() .filter_map(unicode_width::UnicodeWidthChar::width) .sum::(); let underline_len = if self.start == self.input.len() { // We also allow 0 here for convenience assert!( self.len <= 1, "Can only go one past the input not {}", self.len ); 1 } else { self.input[self.start..self.start + self.len] .chars() .filter_map(unicode_width::UnicodeWidthChar::width) .sum::() }; write!( f, "{}\n{}\n{}{}", self.message, self.input, " ".repeat(start_offset), "^".repeat(underline_len) ) } } /// We need this to allow anyhow's `.context()` and `AsDynError`. impl> std::error::Error for Pep508Error {} /// A PEP 508 dependency specifier. #[derive(Hash, Debug, Clone, Eq, PartialEq, Ord, PartialOrd)] pub struct Requirement { /// The distribution name such as `requests` in /// `requests [security,tests] >= 2.8.1, == 2.8.* ; python_version > "3.8"`. pub name: PackageName, /// The list of extras such as `security`, `tests` in /// `requests [security,tests] >= 2.8.1, == 2.8.* ; python_version > "3.8"`. pub extras: Vec, /// The version specifier such as `>= 2.8.1`, `== 2.8.*` in /// `requests [security,tests] >= 2.8.1, == 2.8.* ; python_version > "3.8"`. /// or a URL. pub version_or_url: Option>, /// The markers such as `python_version > "3.8"` in /// `requests [security,tests] >= 2.8.1, == 2.8.* ; python_version > "3.8"`. /// Those are a nested and/or tree. pub marker: MarkerTree, /// The source file containing the requirement. pub origin: Option, } impl Requirement { /// Removes the URL specifier from this requirement. pub fn clear_url(&mut self) { if matches!(self.version_or_url, Some(VersionOrUrl::Url(_))) { self.version_or_url = None; } } } impl Display for Requirement { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.name)?; if !self.extras.is_empty() { write!( f, "[{}]", self.extras .iter() .map(ToString::to_string) .collect::>() .join(",") )?; } if let Some(version_or_url) = &self.version_or_url { match version_or_url { VersionOrUrl::VersionSpecifier(version_specifier) => { let version_specifier: Vec = version_specifier.iter().map(ToString::to_string).collect(); write!(f, "{}", version_specifier.join(","))?; } VersionOrUrl::Url(url) => { // We add the space for markers later if necessary write!(f, " @ {url}")?; } } } if let Some(marker) = self.marker.contents() { write!(f, " ; {marker}")?; } Ok(()) } } /// impl<'de, T: Pep508Url> Deserialize<'de> for Requirement { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let s = String::deserialize(deserializer)?; FromStr::from_str(&s).map_err(de::Error::custom) } } /// impl Serialize for Requirement { fn serialize(&self, serializer: S) -> Result where S: Serializer, { serializer.collect_str(self) } } type MarkerWarning = (MarkerWarningKind, String); impl Requirement { /// Returns whether the markers apply for the given environment pub fn evaluate_markers(&self, env: &MarkerEnvironment, extras: &[ExtraName]) -> bool { self.marker.evaluate(env, extras) } /// Returns whether the requirement would be satisfied, independent of environment markers, i.e. /// if there is potentially an environment that could activate this requirement. /// /// Note that unlike [`Self::evaluate_markers`] this does not perform any checks for bogus /// expressions but will simply return true. As caller you should separately perform a check /// with an environment and forward all warnings. pub fn evaluate_extras_and_python_version( &self, extras: &HashSet, python_versions: &[Version], ) -> bool { self.marker .evaluate_extras_and_python_version(extras, python_versions) } /// Returns whether the markers apply for the given environment. pub fn evaluate_markers_and_report( &self, env: &MarkerEnvironment, extras: &[ExtraName], ) -> (bool, Vec) { self.marker.evaluate_collect_warnings(env, extras) } /// Return the requirement with an additional marker added, to require the given extra. /// /// For example, given `flask >= 2.0.2`, calling `with_extra_marker("dotenv")` would return /// `flask >= 2.0.2 ; extra == "dotenv"`. #[must_use] pub fn with_extra_marker(mut self, extra: &ExtraName) -> Self { self.marker .and(MarkerTree::expression(MarkerExpression::Extra { operator: ExtraOperator::Equal, name: MarkerValueExtra::Extra(extra.clone()), })); self } /// Set the source file containing the requirement. #[must_use] pub fn with_origin(self, origin: RequirementOrigin) -> Self { Self { origin: Some(origin), ..self } } } /// Type to parse URLs from `name @ ` into. Defaults to [`url::Url`]. pub trait Pep508Url: Display + Debug + Sized { /// String to URL parsing error type Err: Error + Debug; /// Parse a url from `name @ `. Defaults to [`url::Url::parse_url`]. fn parse_url(url: &str, working_dir: Option<&Path>) -> Result; } impl Pep508Url for Url { type Err = url::ParseError; fn parse_url(url: &str, _working_dir: Option<&Path>) -> Result { Url::parse(url) } } /// A reporter for warnings that occur during marker parsing or evaluation. pub trait Reporter { /// Report a warning. fn report(&mut self, kind: MarkerWarningKind, warning: String); } impl Reporter for F where F: FnMut(MarkerWarningKind, String), { fn report(&mut self, kind: MarkerWarningKind, warning: String) { (self)(kind, warning); } } /// A simple [`Reporter`] that logs to tracing when the `tracing` feature is enabled. pub struct TracingReporter; impl Reporter for TracingReporter { #[allow(unused_variables)] fn report(&mut self, _kind: MarkerWarningKind, message: String) { #[cfg(feature = "tracing")] { tracing::warn!("{message}"); } } } #[cfg(feature = "schemars")] impl schemars::JsonSchema for Requirement { fn schema_name() -> String { "Requirement".to_string() } fn json_schema(_gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema { schemars::schema::SchemaObject { instance_type: Some(schemars::schema::InstanceType::String.into()), metadata: Some(Box::new(schemars::schema::Metadata { description: Some( "A PEP 508 dependency specifier, e.g., `ruff >= 0.6.0`".to_string(), ), ..schemars::schema::Metadata::default() })), ..schemars::schema::SchemaObject::default() } .into() } } impl FromStr for Requirement { type Err = Pep508Error; /// Parse a [Dependency Specifier](https://packaging.python.org/en/latest/specifications/dependency-specifiers/). fn from_str(input: &str) -> Result { parse_pep508_requirement::(&mut Cursor::new(input), None, &mut TracingReporter) } } impl Requirement { /// Parse a [Dependency Specifier](https://packaging.python.org/en/latest/specifications/dependency-specifiers/). pub fn parse(input: &str, working_dir: impl AsRef) -> Result> { parse_pep508_requirement( &mut Cursor::new(input), Some(working_dir.as_ref()), &mut TracingReporter, ) } /// Parse a [Dependency Specifier](https://packaging.python.org/en/latest/specifications/dependency-specifiers/) /// with the given reporter for warnings. pub fn parse_reporter( input: &str, working_dir: impl AsRef, reporter: &mut impl Reporter, ) -> Result> { parse_pep508_requirement( &mut Cursor::new(input), Some(working_dir.as_ref()), reporter, ) } } /// A list of [`ExtraName`] that can be attached to a [`Requirement`]. #[derive(Debug, Clone, Eq, Hash, PartialEq)] pub struct Extras(Vec); impl Extras { /// Parse a list of extras. pub fn parse(input: &str) -> Result> { Ok(Self(parse_extras_cursor(&mut Cursor::new(input))?)) } } /// The actual version specifier or URL to install. #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] pub enum VersionOrUrl { /// A PEP 440 version specifier set VersionSpecifier(VersionSpecifiers), /// A installable URL Url(T), } impl Display for VersionOrUrl { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { Self::VersionSpecifier(version_specifier) => Display::fmt(version_specifier, f), Self::Url(url) => Display::fmt(url, f), } } } /// Unowned version specifier or URL to install. #[derive(Debug, Clone, Copy, Eq, Hash, PartialEq)] pub enum VersionOrUrlRef<'a, T: Pep508Url = VerbatimUrl> { /// A PEP 440 version specifier set VersionSpecifier(&'a VersionSpecifiers), /// A installable URL Url(&'a T), } impl Display for VersionOrUrlRef<'_, T> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { Self::VersionSpecifier(version_specifier) => Display::fmt(version_specifier, f), Self::Url(url) => Display::fmt(url, f), } } } impl<'a> From<&'a VersionOrUrl> for VersionOrUrlRef<'a> { fn from(value: &'a VersionOrUrl) -> Self { match value { VersionOrUrl::VersionSpecifier(version_specifier) => { VersionOrUrlRef::VersionSpecifier(version_specifier) } VersionOrUrl::Url(url) => VersionOrUrlRef::Url(url), } } } fn parse_name(cursor: &mut Cursor) -> Result> { // https://peps.python.org/pep-0508/#names // ^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$ with re.IGNORECASE let start = cursor.pos(); let mut name = String::new(); if let Some((index, char)) = cursor.next() { if matches!(char, 'A'..='Z' | 'a'..='z' | '0'..='9') { name.push(char); } else { // Check if the user added a filesystem path without a package name. pip supports this // in `requirements.txt`, but it doesn't adhere to the PEP 508 grammar. let mut clone = cursor.clone().at(start); return if looks_like_unnamed_requirement(&mut clone) { Err(Pep508Error { message: Pep508ErrorSource::UnsupportedRequirement("URL requirement must be preceded by a package name. Add the name of the package before the URL (e.g., `package_name @ /path/to/file`).".to_string()), start, len: clone.pos() - start, input: clone.to_string(), }) } else { Err(Pep508Error { message: Pep508ErrorSource::String(format!( "Expected package name starting with an alphanumeric character, found `{char}`" )), start: index, len: char.len_utf8(), input: cursor.to_string(), }) }; } } else { return Err(Pep508Error { message: Pep508ErrorSource::String("Empty field is not allowed for PEP508".to_string()), start: 0, len: 1, input: cursor.to_string(), }); } loop { match cursor.peek() { Some((index, char @ ('A'..='Z' | 'a'..='z' | '0'..='9' | '.' | '-' | '_'))) => { name.push(char); cursor.next(); // [.-_] can't be the final character if cursor.peek().is_none() && matches!(char, '.' | '-' | '_') { return Err(Pep508Error { message: Pep508ErrorSource::String(format!( "Package name must end with an alphanumeric character, not '{char}'" )), start: index, len: char.len_utf8(), input: cursor.to_string(), }); } } Some(_) | None => { return Ok(PackageName::new(name) .expect("`PackageName` validation should match PEP 508 parsing")); } } } } /// Parse a potential URL from the [`Cursor`], advancing the [`Cursor`] to the end of the URL. /// /// Returns `true` if the URL appears to be a viable unnamed requirement, and `false` otherwise. fn looks_like_unnamed_requirement(cursor: &mut Cursor) -> bool { // Read the entire path. let (start, len) = cursor.take_while(|char| !char.is_whitespace()); let url = cursor.slice(start, len); // Expand any environment variables in the path. let expanded = expand_env_vars(url); // Strip extras. let url = split_extras(&expanded) .map(|(url, _)| url) .unwrap_or(&expanded); // Analyze the path. let mut chars = url.chars(); let Some(first_char) = chars.next() else { return false; }; // Ex) `/bin/ls` if first_char == '\\' || first_char == '/' || first_char == '.' { return true; } // Ex) `https://` or `C:` if split_scheme(url).is_some() { return true; } // Ex) `foo/bar` if url.contains('/') || url.contains('\\') { return true; } // Ex) `foo.tar.gz` if looks_like_archive(url) { return true; } false } /// Returns `true` if a file looks like an archive. /// /// See /// for the list of supported archive extensions. fn looks_like_archive(file: impl AsRef) -> bool { let file = file.as_ref(); // E.g., `gz` in `foo.tar.gz` let Some(extension) = file.extension().and_then(|ext| ext.to_str()) else { return false; }; // E.g., `tar` in `foo.tar.gz` let pre_extension = file .file_stem() .and_then(|stem| Path::new(stem).extension().and_then(|ext| ext.to_str())); matches!( (pre_extension, extension), (_, "whl" | "tbz" | "txz" | "tlz" | "zip" | "tgz" | "tar") | (Some("tar"), "bz2" | "xz" | "lz" | "lzma" | "gz") ) } /// parses extras in the `[extra1,extra2] format` fn parse_extras_cursor( cursor: &mut Cursor, ) -> Result, Pep508Error> { let Some(bracket_pos) = cursor.eat_char('[') else { return Ok(vec![]); }; cursor.eat_whitespace(); let mut extras = Vec::new(); let mut is_first_iteration = true; loop { // End of the extras section. (Empty extras are allowed.) if let Some(']') = cursor.peek_char() { cursor.next(); break; } // Comma separator match (cursor.peek(), is_first_iteration) { // For the first iteration, we don't expect a comma. (Some((pos, ',')), true) => { return Err(Pep508Error { message: Pep508ErrorSource::String( "Expected either alphanumerical character (starting the extra name) or `]` (ending the extras section), found `,`".to_string() ), start: pos, len: 1, input: cursor.to_string(), }); } // For the other iterations, the comma is required. (Some((_, ',')), false) => { cursor.next(); } (Some((pos, other)), false) => { return Err(Pep508Error { message: Pep508ErrorSource::String( format!("Expected either `,` (separating extras) or `]` (ending the extras section), found `{other}`") ), start: pos, len: 1, input: cursor.to_string(), }); } _ => {} } // wsp* before the identifier cursor.eat_whitespace(); let mut buffer = String::new(); let early_eof_error = Pep508Error { message: Pep508ErrorSource::String( "Missing closing bracket (expected ']', found end of dependency specification)" .to_string(), ), start: bracket_pos, len: 1, input: cursor.to_string(), }; // First char of the identifier. match cursor.next() { // letterOrDigit Some((_, alphanumeric @ ('a'..='z' | 'A'..='Z' | '0'..='9'))) => { buffer.push(alphanumeric); } Some((pos, other)) => { return Err(Pep508Error { message: Pep508ErrorSource::String(format!( "Expected an alphanumeric character starting the extra name, found `{other}`" )), start: pos, len: other.len_utf8(), input: cursor.to_string(), }); } None => return Err(early_eof_error), } // Parse from the second char of the identifier // We handle the illegal character case below // identifier_end = letterOrDigit | (('-' | '_' | '.' )* letterOrDigit) // identifier_end* let (start, len) = cursor .take_while(|char| matches!(char, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.')); buffer.push_str(cursor.slice(start, len)); match cursor.peek() { Some((pos, char)) if char != ',' && char != ']' && !char.is_whitespace() => { return Err(Pep508Error { message: Pep508ErrorSource::String(format!( "Invalid character in extras name, expected an alphanumeric character, `-`, `_`, `.`, `,` or `]`, found `{char}`" )), start: pos, len: char.len_utf8(), input: cursor.to_string(), }); } _ => {} }; // wsp* after the identifier cursor.eat_whitespace(); // Add the parsed extra extras.push( ExtraName::new(buffer).expect("`ExtraName` validation should match PEP 508 parsing"), ); is_first_iteration = false; } Ok(extras) } /// Parse a raw string for a URL requirement, which could be either a URL or a local path, and which /// could contain unexpanded environment variables. /// /// When parsing, we eat characters until we see any of the following: /// - A newline. /// - A semicolon (marker) or hash (comment), _preceded_ by a space. We parse the URL until the last /// non-whitespace character (inclusive). /// - A semicolon (marker) or hash (comment) _followed_ by a space. We treat this as an error, since /// the end of the URL is ambiguous. /// /// For example: /// - `https://pypi.org/project/requests/...` /// - `file:///home/ferris/project/scripts/...` /// - `file:../editable/` /// - `../editable/` /// - `../path to editable/` /// - `https://download.pytorch.org/whl/torch_stable.html` fn parse_url( cursor: &mut Cursor, working_dir: Option<&Path>, ) -> Result> { // wsp* cursor.eat_whitespace(); // let (start, len) = { let start = cursor.pos(); let mut len = 0; while let Some((_, c)) = cursor.next() { // If we see a line break, we're done. if matches!(c, '\r' | '\n') { break; } // If we see top-level whitespace, check if it's followed by a semicolon or hash. If so, // end the URL at the last non-whitespace character. if c.is_whitespace() { let mut cursor = cursor.clone(); cursor.eat_whitespace(); if matches!(cursor.peek_char(), None | Some(';' | '#')) { break; } } len += c.len_utf8(); // If we see a top-level semicolon or hash followed by whitespace, we're done. match c { ';' if cursor.peek_char().is_some_and(char::is_whitespace) => { break; } '#' if cursor.peek_char().is_some_and(char::is_whitespace) => { break; } _ => {} } } (start, len) }; let url = cursor.slice(start, len); if url.is_empty() { return Err(Pep508Error { message: Pep508ErrorSource::String("Expected URL".to_string()), start, len, input: cursor.to_string(), }); } let url = T::parse_url(url, working_dir).map_err(|err| Pep508Error { message: Pep508ErrorSource::UrlError(err), start, len, input: cursor.to_string(), })?; Ok(url) } /// Identify the extras in a relative URL (e.g., `../editable[dev]`). /// /// Pip uses `m = re.match(r'^(.+)(\[[^]]+])$', path)`. Our strategy is: /// - If the string ends with a closing bracket (`]`)... /// - Iterate backwards until you find the open bracket (`[`)... /// - But abort if you find another closing bracket (`]`) first. pub fn split_extras(given: &str) -> Option<(&str, &str)> { let mut chars = given.char_indices().rev(); // If the string ends with a closing bracket (`]`)... if !matches!(chars.next(), Some((_, ']'))) { return None; } // Iterate backwards until you find the open bracket (`[`)... let (index, _) = chars .take_while(|(_, c)| *c != ']') .find(|(_, c)| *c == '[')?; Some(given.split_at(index)) } /// PEP 440 wrapper fn parse_specifier( cursor: &mut Cursor, buffer: &str, start: usize, end: usize, ) -> Result> { VersionSpecifier::from_str(buffer).map_err(|err| Pep508Error { message: Pep508ErrorSource::String(err.to_string()), start, len: end - start, input: cursor.to_string(), }) } /// Such as `>=1.19,<2.0`, either delimited by the end of the specifier or a `;` for the marker part /// /// ```text /// version_one (wsp* ',' version_one)* /// ``` fn parse_version_specifier( cursor: &mut Cursor, ) -> Result>, Pep508Error> { let mut start = cursor.pos(); let mut specifiers = Vec::new(); let mut buffer = String::new(); let requirement_kind = loop { match cursor.peek() { Some((end, ',')) => { let specifier = parse_specifier(cursor, &buffer, start, end)?; specifiers.push(specifier); buffer.clear(); cursor.next(); start = end + 1; } Some((_, ';')) | None => { let end = cursor.pos(); let specifier = parse_specifier(cursor, &buffer, start, end)?; specifiers.push(specifier); break Some(VersionOrUrl::VersionSpecifier( specifiers.into_iter().collect(), )); } Some((_, char)) => { buffer.push(char); cursor.next(); } } }; Ok(requirement_kind) } /// Such as `(>=1.19,<2.0)` /// /// ```text /// '(' version_one (wsp* ',' version_one)* ')' /// ``` fn parse_version_specifier_parentheses( cursor: &mut Cursor, ) -> Result>, Pep508Error> { let brace_pos = cursor.pos(); cursor.next(); // Makes for slightly better error underline cursor.eat_whitespace(); let mut start = cursor.pos(); let mut specifiers = Vec::new(); let mut buffer = String::new(); let requirement_kind = loop { match cursor.next() { Some((end, ',')) => { let specifier = parse_specifier(cursor, &buffer, start, end)?; specifiers.push(specifier); buffer.clear(); start = end + 1; } Some((end, ')')) => { let specifier = parse_specifier(cursor, &buffer, start, end)?; specifiers.push(specifier); break Some(VersionOrUrl::VersionSpecifier(specifiers.into_iter().collect())); } Some((_, char)) => buffer.push(char), None => return Err(Pep508Error { message: Pep508ErrorSource::String("Missing closing parenthesis (expected ')', found end of dependency specification)".to_string()), start: brace_pos, len: 1, input: cursor.to_string(), }), } }; Ok(requirement_kind) } /// Parse a PEP 508-compliant [dependency specifier](https://packaging.python.org/en/latest/specifications/dependency-specifiers). fn parse_pep508_requirement( cursor: &mut Cursor, working_dir: Option<&Path>, reporter: &mut impl Reporter, ) -> Result, Pep508Error> { let start = cursor.pos(); // Technically, the grammar is: // ```text // name_req = name wsp* extras? wsp* versionspec? wsp* quoted_marker? // url_req = name wsp* extras? wsp* urlspec wsp+ quoted_marker? // specification = wsp* ( url_req | name_req ) wsp* // ``` // So we can merge this into: // ```text // specification = wsp* name wsp* extras? wsp* (('@' wsp* url_req) | ('(' versionspec ')') | (versionspec)) wsp* (';' wsp* marker)? wsp* // ``` // Where the extras start with '[' if any, then we have '@', '(' or one of the version comparison // operators. Markers start with ';' if any // wsp* cursor.eat_whitespace(); // name let name_start = cursor.pos(); let name = parse_name(cursor)?; let name_end = cursor.pos(); // wsp* cursor.eat_whitespace(); // extras? let extras = parse_extras_cursor(cursor)?; // wsp* cursor.eat_whitespace(); // ( url_req | name_req )? let requirement_kind = match cursor.peek_char() { // url_req Some('@') => { cursor.next(); Some(VersionOrUrl::Url(parse_url(cursor, working_dir)?)) } // name_req Some('(') => parse_version_specifier_parentheses(cursor)?, // name_req Some('<' | '=' | '>' | '~' | '!') => parse_version_specifier(cursor)?, // No requirements / any version Some(';') | None => None, Some(other) => { // Rewind to the start of the version specifier, to see if the user added a URL without // a package name. pip supports this in `requirements.txt`, but it doesn't adhere to // the PEP 508 grammar. let mut clone = cursor.clone().at(start); return if looks_like_unnamed_requirement(&mut clone) { Err(Pep508Error { message: Pep508ErrorSource::UnsupportedRequirement("URL requirement must be preceded by a package name. Add the name of the package before the URL (e.g., `package_name @ https://...`).".to_string()), start, len: clone.pos() - start, input: clone.to_string(), }) } else { Err(Pep508Error { message: Pep508ErrorSource::String(format!( "Expected one of `@`, `(`, `<`, `=`, `>`, `~`, `!`, `;`, found `{other}`" )), start: cursor.pos(), len: other.len_utf8(), input: cursor.to_string(), }) }; } }; let requirement_end = cursor.pos(); // If the requirement consists solely of a package name, and that name appears to be an archive, // treat it as a URL requirement, for consistency and security. (E.g., `requests-2.26.0.tar.gz` // is a valid Python package name, but we should treat it as a reference to a file.) // // See: https://github.com/pypa/pip/blob/111eed14b6e9fba7c78a5ec2b7594812d17b5d2b/src/pip/_internal/utils/filetypes.py#L8 if requirement_kind.is_none() { if looks_like_archive(cursor.slice(name_start, name_end - name_start)) { let clone = cursor.clone().at(start); return Err(Pep508Error { message: Pep508ErrorSource::UnsupportedRequirement("URL requirement must be preceded by a package name. Add the name of the package before the URL (e.g., `package_name @ https://...`).".to_string()), start, len: clone.pos() - start, input: clone.to_string(), }); } } // wsp* cursor.eat_whitespace(); // quoted_marker? let marker = if cursor.peek_char() == Some(';') { // Skip past the semicolon cursor.next(); marker::parse::parse_markers_cursor(cursor, reporter)? } else { None }; // wsp* cursor.eat_whitespace(); if let Some((pos, char)) = cursor.next() { if marker.is_none() { if let Some(VersionOrUrl::Url(url)) = requirement_kind { let url = url.to_string(); for c in [';', '#'] { if url.ends_with(c) { return Err(Pep508Error { message: Pep508ErrorSource::String(format!( "Missing space before '{c}', the end of the URL is ambiguous" )), start: requirement_end - c.len_utf8(), len: c.len_utf8(), input: cursor.to_string(), }); } } } } let message = if marker.is_none() { format!(r#"Expected end of input or `;`, found `{char}`"#) } else { format!(r#"Expected end of input, found `{char}`"#) }; return Err(Pep508Error { message: Pep508ErrorSource::String(message), start: pos, len: char.len_utf8(), input: cursor.to_string(), }); } Ok(Requirement { name, extras, version_or_url: requirement_kind, marker: marker.unwrap_or_default(), origin: None, }) } mod path; #[cfg(test)] mod tests; pep508_rs-0.9.1/src/marker/algebra/tests.rs000064400000000000000000000047461046102023000165500ustar 00000000000000use super::{NodeId, INTERNER}; use crate::MarkerExpression; fn expr(s: &str) -> NodeId { INTERNER .lock() .expression(MarkerExpression::from_str(s).unwrap().unwrap()) } #[test] fn basic() { let m = || INTERNER.lock(); let extra_foo = expr("extra == 'foo'"); assert!(!extra_foo.is_false()); let os_foo = expr("os_name == 'foo'"); let extra_and_os_foo = m().or(extra_foo, os_foo); assert!(!extra_and_os_foo.is_false()); assert!(!m().and(extra_foo, os_foo).is_false()); let trivially_true = m().or(extra_and_os_foo, extra_and_os_foo.not()); assert!(!trivially_true.is_false()); assert!(trivially_true.is_true()); let trivially_false = m().and(extra_foo, extra_foo.not()); assert!(trivially_false.is_false()); let e = m().or(trivially_false, os_foo); assert!(!e.is_false()); let extra_not_foo = expr("extra != 'foo'"); assert!(m().and(extra_foo, extra_not_foo).is_false()); assert!(m().or(extra_foo, extra_not_foo).is_true()); let os_geq_bar = expr("os_name >= 'bar'"); assert!(!os_geq_bar.is_false()); let os_le_bar = expr("os_name < 'bar'"); assert!(m().and(os_geq_bar, os_le_bar).is_false()); assert!(m().or(os_geq_bar, os_le_bar).is_true()); let os_leq_bar = expr("os_name <= 'bar'"); assert!(!m().and(os_geq_bar, os_leq_bar).is_false()); assert!(m().or(os_geq_bar, os_leq_bar).is_true()); } #[test] fn version() { let m = || INTERNER.lock(); let eq_3 = expr("python_version == '3'"); let neq_3 = expr("python_version != '3'"); let geq_3 = expr("python_version >= '3'"); let leq_3 = expr("python_version <= '3'"); let eq_2 = expr("python_version == '2'"); let eq_1 = expr("python_version == '1'"); assert!(m().and(eq_2, eq_1).is_false()); assert_eq!(eq_3.not(), neq_3); assert_eq!(eq_3, neq_3.not()); assert!(m().and(eq_3, neq_3).is_false()); assert!(m().or(eq_3, neq_3).is_true()); assert_eq!(m().and(eq_3, geq_3), eq_3); assert_eq!(m().and(eq_3, leq_3), eq_3); assert_eq!(m().and(geq_3, leq_3), eq_3); assert!(!m().and(geq_3, leq_3).is_false()); assert!(m().or(geq_3, leq_3).is_true()); } #[test] fn simplify() { let m = || INTERNER.lock(); let x86 = expr("platform_machine == 'x86_64'"); let not_x86 = expr("platform_machine != 'x86_64'"); let windows = expr("platform_machine == 'Windows'"); let a = m().and(x86, windows); let b = m().and(not_x86, windows); assert_eq!(m().or(a, b), windows); } pep508_rs-0.9.1/src/marker/algebra.rs000064400000000000000000001436351046102023000154070ustar 00000000000000//! This module implements marker tree operations using Algebraic Decision Diagrams (ADD). //! //! An ADD is a tree of decision nodes as well as two terminal nodes, `true` and `false`. Marker //! variables are represented as decision nodes. The edge from a decision node to it's child //! represents a particular assignment of a value to that variable. Depending on the type of //! variable, an edge can be represented by binary values or a disjoint set of ranges, as opposed //! to a traditional Binary Decision Diagram. //! //! For example, the marker `python_version > '3.7' and os_name == 'Linux'` creates the following //! marker tree: //! //! ```text //! python_version: //! (> '3.7') -> os_name: //! (> 'Linux') -> FALSE //! (== 'Linux') -> TRUE //! (< 'Linux') -> FALSE //! (<= '3.7') -> FALSE //! ``` //! //! Specifically, a marker tree is represented as a Reduced Ordered ADD. An ADD is ordered if //! different variables appear in the same order on all paths from the root. Additionally, an ADD //! is reduced if: //! - Isomorphic nodes are merged. //! - Nodes with isomorphic children are eliminated. //! //! These two rules provide an important guarantee for marker trees: marker trees are canonical for //! a given marker function and variable ordering. Because variable ordering is defined at compile-time, //! this means any functionally equivalent marker trees are normalized upon construction. Importantly, //! this means that we can identify trivially true marker trees, as well as unsatisfiable marker trees. //! This provides important information to the resolver when forking. //! //! ADDs provide polynomial time operations such as conjunction and negation, which is important as marker //! trees are combined during universal resolution. Because ADDs solve the SAT problem, constructing an //! arbitrary ADD can theoretically take exponential time in the worst case. However, in practice, marker trees //! have a limited number of variables and user-provided marker trees are typically very simple. //! //! Additionally, the implementation in this module uses complemented edges, meaning a marker tree and //! it's complement are represented by the same node internally. This allows cheap constant-time marker //! tree negation. It also allows us to only implement a single operation for both `AND` and `OR`, implementing //! the other in terms of its De Morgan Complement. //! //! ADDs are created and managed through the global [`Interner`]. A given ADD is referenced through //! a [`NodeId`], which represents a potentially complemented reference to a [`Node`] in the interner, //! or a terminal `true`/`false` node. Interning allows the reduction rule that isomorphic nodes are //! merged to be applied globally. use std::cmp::Ordering; use std::fmt; use std::ops::Bound; use std::sync::Mutex; use std::sync::MutexGuard; use itertools::Either; use pep440_rs::{release_specifier_to_range, Operator, Version, VersionSpecifier}; use rustc_hash::FxHashMap; use std::sync::LazyLock; use version_ranges::Ranges; use crate::marker::MarkerValueExtra; use crate::ExtraOperator; use crate::{MarkerExpression, MarkerOperator, MarkerValueString, MarkerValueVersion}; /// The global node interner. pub(crate) static INTERNER: LazyLock = LazyLock::new(Interner::default); /// An interner for decision nodes. /// /// Interning decision nodes allows isomorphic nodes to be automatically merged. /// It also allows nodes to cheaply compared. #[derive(Default)] pub(crate) struct Interner { pub(crate) shared: InternerShared, state: Mutex, } /// The shared part of an [`Interner`], which can be accessed without a lock. #[derive(Default)] pub(crate) struct InternerShared { /// A list of unique [`Node`]s. nodes: boxcar::Vec, } /// The mutable [`Interner`] state, stored behind a lock. #[derive(Default)] struct InternerState { /// A map from a [`Node`] to a unique [`NodeId`], representing an index /// into [`InternerShared`]. unique: FxHashMap, /// A cache for `AND` operations between two nodes. /// Note that `OR` is implemented in terms of `AND`. cache: FxHashMap<(NodeId, NodeId), NodeId>, } impl InternerShared { /// Returns the node for the given [`NodeId`]. pub(crate) fn node(&self, id: NodeId) -> &Node { &self.nodes[id.index()] } } impl Interner { /// Locks the interner state, returning a guard that can be used to perform marker /// operations. pub(crate) fn lock(&self) -> InternerGuard<'_> { InternerGuard { state: self.state.lock().unwrap(), shared: &self.shared, } } } /// A lock of [`InternerState`]. pub(crate) struct InternerGuard<'a> { state: MutexGuard<'a, InternerState>, shared: &'a InternerShared, } impl InternerGuard<'_> { /// Creates a decision node with the given variable and children. fn create_node(&mut self, var: Variable, children: Edges) -> NodeId { let mut node = Node { var, children }; let mut first = node.children.nodes().next().unwrap(); // With a complemented edge representation, there are two ways to represent the same node: // complementing the root and all children edges results in the same node. To ensure markers // are canonical, the first child edge is never complemented. let mut flipped = false; if first.is_complement() { node = node.not(); first = first.not(); flipped = true; } // Reduction: If all children refer to the same node, we eliminate the parent node // and just return the child. if node.children.nodes().all(|node| node == first) { return if flipped { first.not() } else { first }; } // Insert the node. let id = self .state .unique .entry(node.clone()) .or_insert_with(|| NodeId::new(self.shared.nodes.push(node), false)); if flipped { id.not() } else { *id } } /// Returns a decision node for a single marker expression. pub(crate) fn expression(&mut self, expr: MarkerExpression) -> NodeId { let (var, children) = match expr { // Normalize `python_version` markers to `python_full_version` nodes. MarkerExpression::Version { key: MarkerValueVersion::PythonVersion, specifier, } => match python_version_to_full_version(normalize_specifier(specifier)) { Ok(specifier) => ( Variable::Version(MarkerValueVersion::PythonFullVersion), Edges::from_specifier(specifier), ), Err(node) => return node, }, MarkerExpression::VersionIn { key: MarkerValueVersion::PythonVersion, versions, negated, } => match Edges::from_python_versions(versions, negated) { Ok(edges) => ( Variable::Version(MarkerValueVersion::PythonFullVersion), edges, ), Err(node) => return node, }, // A variable representing the output of a version key. Edges correspond // to disjoint version ranges. MarkerExpression::Version { key, specifier } => { (Variable::Version(key), Edges::from_specifier(specifier)) } // A variable representing the output of a version key. Edges correspond // to disjoint version ranges. MarkerExpression::VersionIn { key, versions, negated, } => ( Variable::Version(key), Edges::from_versions(&versions, negated), ), // The `in` and `contains` operators are a bit different than other operators. // In particular, they do not represent a particular value for the corresponding // variable, and can overlap. For example, `'nux' in os_name` and `os_name == 'Linux'` // can both be `true` in the same marker environment, and so cannot be represented by // the same variable. Because of this, we represent `in` and `contains`, as well as // their negations, as distinct variables, unrelated to the range of a given key. // // Note that in the presence of the `in` operator, we may not be able to simplify // some marker trees to a constant `true` or `false`. For example, it is not trivial to // detect that `os_name > 'z' and os_name in 'Linux'` is unsatisfiable. MarkerExpression::String { key, operator: MarkerOperator::In, value, } => (Variable::In { key, value }, Edges::from_bool(true)), MarkerExpression::String { key, operator: MarkerOperator::NotIn, value, } => (Variable::In { key, value }, Edges::from_bool(false)), MarkerExpression::String { key, operator: MarkerOperator::Contains, value, } => (Variable::Contains { key, value }, Edges::from_bool(true)), MarkerExpression::String { key, operator: MarkerOperator::NotContains, value, } => (Variable::Contains { key, value }, Edges::from_bool(false)), // A variable representing the output of a string key. Edges correspond // to disjoint string ranges. MarkerExpression::String { key, operator, value, } => (Variable::String(key), Edges::from_string(operator, value)), // A variable representing the existence or absence of a particular extra. MarkerExpression::Extra { name, operator: ExtraOperator::Equal, } => (Variable::Extra(name), Edges::from_bool(true)), MarkerExpression::Extra { name, operator: ExtraOperator::NotEqual, } => (Variable::Extra(name), Edges::from_bool(false)), }; self.create_node(var, children) } // Returns a decision node representing the disjunction of two nodes. pub(crate) fn or(&mut self, x: NodeId, y: NodeId) -> NodeId { // We take advantage of cheap negation here and implement OR in terms // of it's De Morgan complement. self.and(x.not(), y.not()).not() } // Returns a decision node representing the conjunction of two nodes. pub(crate) fn and(&mut self, xi: NodeId, yi: NodeId) -> NodeId { if xi.is_true() { return yi; } if yi.is_true() { return xi; } if xi == yi { return xi; } if xi.is_false() || yi.is_false() { return NodeId::FALSE; } // `X and not X` is `false` by definition. if xi.not() == yi { return NodeId::FALSE; } // The operation was memoized. if let Some(result) = self.state.cache.get(&(xi, yi)) { return *result; } let (x, y) = (self.shared.node(xi), self.shared.node(yi)); // Perform Shannon Expansion of the higher order variable. let (func, children) = match x.var.cmp(&y.var) { // X is higher order than Y, apply Y to every child of X. Ordering::Less => { let children = x.children.map(xi, |node| self.and(node, yi)); (x.var.clone(), children) } // Y is higher order than X, apply X to every child of Y. Ordering::Greater => { let children = y.children.map(yi, |node| self.and(node, xi)); (y.var.clone(), children) } // X and Y represent the same variable, merge their children. Ordering::Equal => { let children = x.children.apply(xi, &y.children, yi, |x, y| self.and(x, y)); (x.var.clone(), children) } }; // Create the output node. let node = self.create_node(func, children); // Memoize the result of this operation. // // ADDs often contain duplicated subgraphs in distinct branches due to the restricted // variable ordering. Memoizing allows ADD operations to remain polynomial time. self.state.cache.insert((xi, yi), node); node } /// Returns `true` if there is no environment in which both marker trees can apply, /// i.e. their conjunction is always `false`. pub(crate) fn is_disjoint(&mut self, xi: NodeId, yi: NodeId) -> bool { // `false` is disjoint with any marker. if xi.is_false() || yi.is_false() { return true; } // `true` is not disjoint with any marker except `false`. if xi.is_true() || yi.is_true() { return false; } // `X` and `X` are not disjoint. if xi == yi { return false; } // `X` and `not X` are disjoint by definition. if xi.not() == yi { return true; } let (x, y) = (self.shared.node(xi), self.shared.node(yi)); match x.var.cmp(&y.var) { // X is higher order than Y, Y must be disjoint with every child of X. Ordering::Less => x .children .nodes() .all(|x| self.is_disjoint(x.negate(xi), yi)), // Y is higher order than X, X must be disjoint with every child of Y. Ordering::Greater => y .children .nodes() .all(|y| self.is_disjoint(y.negate(yi), xi)), // X and Y represent the same variable, their merged edges must be unsatisifiable. Ordering::Equal => x.children.is_disjoint(xi, &y.children, yi, self), } } // Restrict the output of a given boolean variable in the tree. // // If the provided function `f` returns a `Some` boolean value, the tree will be simplified // with the assumption that the given variable is restricted to that value. If the function // returns `None`, the variable will not be affected. pub(crate) fn restrict(&mut self, i: NodeId, f: &impl Fn(&Variable) -> Option) -> NodeId { if matches!(i, NodeId::TRUE | NodeId::FALSE) { return i; } let node = self.shared.node(i); if let Edges::Boolean { high, low } = node.children { if let Some(value) = f(&node.var) { // Restrict this variable to the given output by merging it // with the relevant child. let node = if value { high } else { low }; return node.negate(i); } } // Restrict all nodes recursively. let children = node.children.map(i, |node| self.restrict(node, f)); self.create_node(node.var.clone(), children) } /// Simplify this tree by *assuming* that the Python version range provided /// is true and that the complement of it is false. /// /// For example, with `requires-python = '>=3.8'` and a marker tree of /// `python_full_version >= '3.8' and python_full_version <= '3.10'`, this /// would result in a marker of `python_full_version <= '3.10'`. pub(crate) fn simplify_python_versions( &mut self, i: NodeId, py_lower: Bound<&Version>, py_upper: Bound<&Version>, ) -> NodeId { if matches!(i, NodeId::TRUE | NodeId::FALSE) || matches!((py_lower, py_upper), (Bound::Unbounded, Bound::Unbounded)) { return i; } let node = self.shared.node(i); // Look for a `python_full_version` expression, otherwise // we recursively simplify. let Node { var: Variable::Version(MarkerValueVersion::PythonFullVersion), children: Edges::Version { ref edges }, } = node else { // Simplify all nodes recursively. let children = node.children.map(i, |node_id| { self.simplify_python_versions(node_id, py_lower, py_upper) }); return self.create_node(node.var.clone(), children); }; let py_range = Ranges::from_range_bounds((py_lower.cloned(), py_upper.cloned())); if py_range.is_empty() { // Oops, the bounds imply there is nothing that can match, // so we always evaluate to false. return NodeId::FALSE; } let mut new = SmallVec::new(); for &(ref range, node) in edges { let overlap = range.intersection(&py_range); if overlap.is_empty() { continue; } new.push((overlap.clone(), node)); } // Now that we know the only ranges left are those that // intersect with our lower/upper Python version bounds, we // can "extend" out the lower/upper bounds here all the way to // negative and positive infinity, respectively. // // This has the effect of producing a marker that is only // applicable in a context where the Python lower/upper bounds // are known to be satisfied. let &(ref first_range, first_node_id) = new.first().unwrap(); let first_upper = first_range.bounding_range().unwrap().1; let clipped = Ranges::from_range_bounds((Bound::Unbounded, first_upper.cloned())); *new.first_mut().unwrap() = (clipped, first_node_id); let &(ref last_range, last_node_id) = new.last().unwrap(); let last_lower = last_range.bounding_range().unwrap().0; let clipped = Ranges::from_range_bounds((last_lower.cloned(), Bound::Unbounded)); *new.last_mut().unwrap() = (clipped, last_node_id); self.create_node(node.var.clone(), Edges::Version { edges: new }) .negate(i) } /// Complexify this tree by requiring the given Python version /// range to be true in order for this marker tree to evaluate to /// true in all circumstances. /// /// For example, with `requires-python = '>=3.8'` and a marker tree of /// `python_full_version <= '3.10'`, this would result in a marker of /// `python_full_version >= '3.8' and python_full_version <= '3.10'`. pub(crate) fn complexify_python_versions( &mut self, i: NodeId, py_lower: Bound<&Version>, py_upper: Bound<&Version>, ) -> NodeId { if matches!(i, NodeId::FALSE) || matches!((py_lower, py_upper), (Bound::Unbounded, Bound::Unbounded)) { return i; } let py_range = Ranges::from_range_bounds((py_lower.cloned(), py_upper.cloned())); if py_range.is_empty() { // Oops, the bounds imply there is nothing that can match, // so we always evaluate to false. return NodeId::FALSE; } if matches!(i, NodeId::TRUE) { let var = Variable::Version(MarkerValueVersion::PythonFullVersion); let edges = Edges::Version { edges: Edges::from_range(&py_range), }; return self.create_node(var, edges).negate(i); } let node = self.shared.node(i); let Node { var: Variable::Version(MarkerValueVersion::PythonFullVersion), children: Edges::Version { ref edges }, } = node else { // Complexify all nodes recursively. let children = node.children.map(i, |node_id| { self.complexify_python_versions(node_id, py_lower, py_upper) }); return self.create_node(node.var.clone(), children); }; // The approach we take here is to filter out any range that // has no intersection with our Python lower/upper bounds. // These ranges will now always be false, so we can dismiss // them entirely. // // Then, depending on whether we have finite lower/upper bound, // we "fix up" the edges by clipping the existing ranges and // adding an additional range that covers the Python versions // outside of our bounds by always mapping them to false. let mut new: SmallVec<_> = edges .iter() .filter(|(range, _)| !py_range.intersection(range).is_empty()) .cloned() .collect(); // Below, we assume `new` has at least one element. It's // subtle, but since 1) edges is a disjoint covering of the // universe and 2) our `py_range` is non-empty at this point, // it must intersect with at least one range. assert!( !new.is_empty(), "expected at least one non-empty intersection" ); // This is the NodeId we map to anything that should // always be false. We have to "negate" it in case the // parent is negated. let exclude_node_id = NodeId::FALSE.negate(i); if !matches!(py_lower, Bound::Unbounded) { let &(ref first_range, first_node_id) = new.first().unwrap(); let first_upper = first_range.bounding_range().unwrap().1; // When the first range is always false, then we can just // "expand" it out to negative infinity to reflect that // anything less than our lower bound should evaluate to // false. If we don't do this, then we could have two // adjacent ranges map to the same node, which would not be // a canonical representation. if exclude_node_id == first_node_id { let clipped = Ranges::from_range_bounds((Bound::Unbounded, first_upper.cloned())); *new.first_mut().unwrap() = (clipped, first_node_id); } else { let clipped = Ranges::from_range_bounds((py_lower.cloned(), first_upper.cloned())); *new.first_mut().unwrap() = (clipped, first_node_id); let py_range_lower = Ranges::from_range_bounds((py_lower.cloned(), Bound::Unbounded)); new.insert(0, (py_range_lower.complement(), NodeId::FALSE.negate(i))); } } if !matches!(py_upper, Bound::Unbounded) { let &(ref last_range, last_node_id) = new.last().unwrap(); let last_lower = last_range.bounding_range().unwrap().0; // See lower bound case above for why we do this. The // same reasoning applies here: to maintain a canonical // representation. if exclude_node_id == last_node_id { let clipped = Ranges::from_range_bounds((last_lower.cloned(), Bound::Unbounded)); *new.last_mut().unwrap() = (clipped, last_node_id); } else { let clipped = Ranges::from_range_bounds((last_lower.cloned(), py_upper.cloned())); *new.last_mut().unwrap() = (clipped, last_node_id); let py_range_upper = Ranges::from_range_bounds((Bound::Unbounded, py_upper.cloned())); new.push((py_range_upper.complement(), exclude_node_id)); } } self.create_node(node.var.clone(), Edges::Version { edges: new }) .negate(i) } } /// A unique variable for a decision node. /// /// This `enum` also defines the variable ordering for all ADDs. /// Variable ordering is an interesting property of ADDs. A bad ordering /// can lead to exponential explosion of the size of an ADD. However, /// dynamically computing an optimal ordering is NP-complete. /// /// We may wish to investigate the effect of this ordering on common marker /// trees. However, marker trees are typically small, so this may not be high /// impact. #[derive(PartialOrd, Ord, PartialEq, Eq, Hash, Clone, Debug)] pub(crate) enum Variable { /// A version marker, such as `python_version`. /// /// This is the highest order variable as it typically contains the most complex /// ranges, allowing us to merge ranges at the top-level. Version(MarkerValueVersion), /// A string marker, such as `os_name`. String(MarkerValueString), /// A variable representing a ` in ` expression for a particular /// string marker and value. In { key: MarkerValueString, value: String, }, /// A variable representing a ` in ` expression for a particular /// string marker and value. Contains { key: MarkerValueString, value: String, }, /// A variable representing the existence or absence of a given extra. /// /// We keep extras at the leaves of the tree, so when simplifying extras we can /// trivially remove the leaves without having to reconstruct the entire tree. Extra(MarkerValueExtra), } /// A decision node in an Algebraic Decision Diagram. #[derive(PartialEq, Eq, Hash, Clone, Debug)] pub(crate) struct Node { /// The variable this node represents. pub(crate) var: Variable, /// The children of this node, with edges representing the possible outputs /// of this variable. pub(crate) children: Edges, } impl Node { /// Return the complement of this node, flipping all children IDs. fn not(self) -> Node { Node { var: self.var, children: self.children.not(), } } } /// An ID representing a reference to a decision node in the [`Interner`]. /// /// The lowest bit of the ID is used represent complemented edges. #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] pub(crate) struct NodeId(usize); impl NodeId { // The terminal node representing `true`, or a trivially `true` node. pub(crate) const TRUE: NodeId = NodeId(0); // The terminal node representing `false`, or an unsatisifable node. pub(crate) const FALSE: NodeId = NodeId(1); /// Create a new, optionally complemented, [`NodeId`] with the given index. fn new(index: usize, complement: bool) -> NodeId { // Ensure the index does not interfere with the lowest complement bit. let index = (index + 1) << 1; NodeId(index | usize::from(complement)) } /// Returns the index of this ID, ignoring the complemented edge. fn index(self) -> usize { // Ignore the lowest bit and bring indices back to starting at `0`. (self.0 >> 1) - 1 } /// Returns `true` if this ID represents a complemented edge. fn is_complement(self) -> bool { // Whether the lowest bit is set. (self.0 & 1) == 1 } /// Returns the complement of this node. pub(crate) fn not(self) -> NodeId { // Toggle the lowest bit. NodeId(self.0 ^ 1) } /// Returns the complement of this node, if it's parent is complemented. /// /// This method is useful to restore the complemented state of children nodes /// when traversing the tree. pub(crate) fn negate(self, parent: NodeId) -> NodeId { if parent.is_complement() { self.not() } else { self } } /// Returns `true` if this node represents an unsatisfiable node. pub(crate) fn is_false(self) -> bool { self == NodeId::FALSE } /// Returns `true` if this node represents a trivially `true` node. pub(crate) fn is_true(self) -> bool { self == NodeId::TRUE } } /// A [`SmallVec`] with enough elements to hold two constant edges, as well as the /// ranges in-between. type SmallVec = smallvec::SmallVec<[T; 5]>; /// The edges of a decision node. #[derive(PartialEq, Eq, Hash, Clone, Debug)] #[allow(clippy::large_enum_variant)] // Nodes are interned. pub(crate) enum Edges { // The edges of a version variable, representing a disjoint set of ranges that cover // the output space. // // Invariant: All ranges are simple, meaning they can be represented by a bounded // interval without gaps. Additionally, there are at least two edges in the set. Version { edges: SmallVec<(Ranges, NodeId)>, }, // The edges of a string variable, representing a disjoint set of ranges that cover // the output space. // // Invariant: All ranges are simple, meaning they can be represented by a bounded // interval without gaps. Additionally, there are at least two edges in the set. String { edges: SmallVec<(Ranges, NodeId)>, }, // The edges of a boolean variable, representing the values `true` (the `high` child) // and `false` (the `low` child). Boolean { high: NodeId, low: NodeId, }, } impl Edges { /// Returns the [`Edges`] for a boolean variable. fn from_bool(complemented: bool) -> Edges { if complemented { Edges::Boolean { high: NodeId::TRUE, low: NodeId::FALSE, } } else { Edges::Boolean { high: NodeId::FALSE, low: NodeId::TRUE, } } } /// Returns the [`Edges`] for a string expression. /// /// This function will panic for the `In` and `Contains` marker operators, which /// should be represented as separate boolean variables. fn from_string(operator: MarkerOperator, value: String) -> Edges { let range: Ranges = match operator { MarkerOperator::Equal => Ranges::singleton(value), MarkerOperator::NotEqual => Ranges::singleton(value).complement(), MarkerOperator::GreaterThan => Ranges::strictly_higher_than(value), MarkerOperator::GreaterEqual => Ranges::higher_than(value), MarkerOperator::LessThan => Ranges::strictly_lower_than(value), MarkerOperator::LessEqual => Ranges::lower_than(value), MarkerOperator::TildeEqual => unreachable!("string comparisons with ~= are ignored"), _ => unreachable!("`in` and `contains` are treated as boolean variables"), }; Edges::String { edges: Edges::from_range(&range), } } /// Returns the [`Edges`] for a version specifier. fn from_specifier(specifier: VersionSpecifier) -> Edges { let specifier = release_specifier_to_range(normalize_specifier(specifier)); Edges::Version { edges: Edges::from_range(&specifier), } } /// Returns an [`Edges`] where values in the given range are `true`. /// /// Only for use when the `key` is a `PythonVersion`. Normalizes to `PythonFullVersion`. fn from_python_versions(versions: Vec, negated: bool) -> Result { let mut range = Ranges::empty(); // TODO(zanieb): We need to make sure this is performant, repeated unions like this do not // seem efficient. for version in versions { let specifier = VersionSpecifier::equals_version(version.clone()); let specifier = python_version_to_full_version(specifier)?; let pubgrub_specifier = release_specifier_to_range(normalize_specifier(specifier)); range = range.union(&pubgrub_specifier); } if negated { range = range.complement(); } Ok(Edges::Version { edges: Edges::from_range(&range), }) } /// Returns an [`Edges`] where values in the given range are `true`. fn from_versions(versions: &Vec, negated: bool) -> Edges { let mut range = Ranges::empty(); // TODO(zanieb): We need to make sure this is performant, repeated unions like this do not // seem efficient. for version in versions { range = range.union(&Ranges::singleton(version.clone())); } if negated { range = range.complement(); } Edges::Version { edges: Edges::from_range(&range), } } /// Returns an [`Edges`] where values in the given range are `true`. fn from_range(range: &Ranges) -> SmallVec<(Ranges, NodeId)> where T: Ord + Clone, { let mut edges = SmallVec::new(); // Add the `true` edges. for (start, end) in range.iter() { let range = Ranges::from_range_bounds((start.clone(), end.clone())); edges.push((range, NodeId::TRUE)); } // Add the `false` edges. for (start, end) in range.complement().iter() { let range = Ranges::from_range_bounds((start.clone(), end.clone())); edges.push((range, NodeId::FALSE)); } // Sort the ranges. // // The ranges are disjoint so we don't care about equality. edges.sort_by(|(range1, _), (range2, _)| compare_disjoint_range_start(range1, range2)); edges } /// Merge two [`Edges`], applying the given operation (e.g., `AND` or `OR`) to all intersecting edges. /// /// For example, given two nodes corresponding to the same boolean variable: /// ```text /// left (extra == 'foo'): { true: A, false: B } /// right (extra == 'foo'): { true: C, false: D } /// ``` /// /// We merge them into a single node by applying the given operation to the matching edges. /// ```text /// (extra == 'foo'): { true: (A and C), false: (B and D) } /// ``` /// For non-boolean variables, this is more complex. See `apply_ranges` for details. /// /// Note that the LHS and RHS must be of the same [`Edges`] variant. fn apply( &self, parent: NodeId, right_edges: &Edges, right_parent: NodeId, mut apply: impl FnMut(NodeId, NodeId) -> NodeId, ) -> Edges { match (self, right_edges) { // For version or string variables, we have to split and merge the overlapping ranges. (Edges::Version { edges }, Edges::Version { edges: right_edges }) => Edges::Version { edges: Edges::apply_ranges(edges, parent, right_edges, right_parent, apply), }, (Edges::String { edges }, Edges::String { edges: right_edges }) => Edges::String { edges: Edges::apply_ranges(edges, parent, right_edges, right_parent, apply), }, // For boolean variables, we simply merge the low and high edges. ( Edges::Boolean { high, low }, Edges::Boolean { high: right_high, low: right_low, }, ) => Edges::Boolean { high: apply(high.negate(parent), right_high.negate(right_parent)), low: apply(low.negate(parent), right_low.negate(right_parent)), }, _ => unreachable!("cannot merge two `Edges` of different types"), } } /// Merge two range maps, applying the given operation to all disjoint, intersecting ranges. /// /// For example, two nodes might have the following edges: /// ```text /// left (python_version): { [0, 3.4): A, [3.4, 3.4]: B, (3.4, inf): C } /// right (python_version): { [0, 3.6): D, [3.6, 3.6]: E, (3.6, inf): F } /// ``` /// /// Unlike with boolean variables, we can't simply apply the operation the static `true` /// and `false` edges. Instead, we have to split and merge overlapping ranges: /// ```text /// python_version: { /// [0, 3.4): (A and D), /// [3.4, 3.4]: (B and D), /// (3.4, 3.6): (C and D), /// [3.6, 3.6]: (C and E), /// (3.6, inf): (C and F) /// } /// ``` /// /// The left and right edges may also have a restricted range from calls to `restrict_versions`. /// In that case, we drop any ranges that do not exist in the domain of both edges. Note that /// this should not occur in practice because `requires-python` bounds are global. fn apply_ranges( left_edges: &SmallVec<(Ranges, NodeId)>, left_parent: NodeId, right_edges: &SmallVec<(Ranges, NodeId)>, right_parent: NodeId, mut apply: impl FnMut(NodeId, NodeId) -> NodeId, ) -> SmallVec<(Ranges, NodeId)> where T: Clone + Ord, { let mut combined = SmallVec::new(); for (left_range, left_child) in left_edges { // Split the two maps into a set of disjoint and overlapping ranges, merging the // intersections. // // Note that restrict ranges (see `restrict_versions`) makes finding intersections // a bit more complicated despite the ranges being sorted. We cannot simply zip both // sets, as they may contain arbitrary gaps. Instead, we use a quadratic search for // simplicity as the set of ranges for a given variable is typically very small. for (right_range, right_child) in right_edges { let intersection = right_range.intersection(left_range); if intersection.is_empty() { // TODO(ibraheem): take advantage of the sorted ranges to `break` early continue; } // Merge the intersection. let node = apply( left_child.negate(left_parent), right_child.negate(right_parent), ); match combined.last_mut() { // Combine ranges if possible. Some((range, prev)) if *prev == node && can_conjoin(range, &intersection) => { *range = range.union(&intersection); } _ => combined.push((intersection.clone(), node)), } } } combined } // Returns `true` if two [`Edges`] are disjoint. fn is_disjoint( &self, parent: NodeId, right_edges: &Edges, right_parent: NodeId, interner: &mut InternerGuard<'_>, ) -> bool { match (self, right_edges) { // For version or string variables, we have to split and check the overlapping ranges. (Edges::Version { edges }, Edges::Version { edges: right_edges }) => { Edges::is_disjoint_ranges(edges, parent, right_edges, right_parent, interner) } (Edges::String { edges }, Edges::String { edges: right_edges }) => { Edges::is_disjoint_ranges(edges, parent, right_edges, right_parent, interner) } // For boolean variables, we simply check the low and high edges. ( Edges::Boolean { high, low }, Edges::Boolean { high: right_high, low: right_low, }, ) => { interner.is_disjoint(high.negate(parent), right_high.negate(right_parent)) && interner.is_disjoint(low.negate(parent), right_low.negate(right_parent)) } _ => unreachable!("cannot merge two `Edges` of different types"), } } // Returns `true` if all intersecting ranges in two range maps are disjoint. fn is_disjoint_ranges( left_edges: &SmallVec<(Ranges, NodeId)>, left_parent: NodeId, right_edges: &SmallVec<(Ranges, NodeId)>, right_parent: NodeId, interner: &mut InternerGuard<'_>, ) -> bool where T: Clone + Ord, { // This is similar to the routine in `apply_ranges` except we only care about disjointness, // not the resulting edges. for (left_range, left_child) in left_edges { for (right_range, right_child) in right_edges { let intersection = right_range.intersection(left_range); if intersection.is_empty() { continue; } // Ensure the intersection is disjoint. if !interner.is_disjoint( left_child.negate(left_parent), right_child.negate(right_parent), ) { return false; } } } true } // Apply the given function to all direct children of this node. fn map(&self, parent: NodeId, mut f: impl FnMut(NodeId) -> NodeId) -> Edges { match self { Edges::Version { edges: map } => Edges::Version { edges: map .iter() .cloned() .map(|(range, node)| (range, f(node.negate(parent)))) .collect(), }, Edges::String { edges: map } => Edges::String { edges: map .iter() .cloned() .map(|(range, node)| (range, f(node.negate(parent)))) .collect(), }, Edges::Boolean { high, low } => Edges::Boolean { low: f(low.negate(parent)), high: f(high.negate(parent)), }, } } // Returns an iterator over all direct children of this node. fn nodes(&self) -> impl Iterator + '_ { match self { Edges::Version { edges: map } => { Either::Left(Either::Left(map.iter().map(|(_, node)| *node))) } Edges::String { edges: map } => { Either::Left(Either::Right(map.iter().map(|(_, node)| *node))) } Edges::Boolean { high, low } => Either::Right([*high, *low].into_iter()), } } // Returns the complement of this [`Edges`]. fn not(self) -> Edges { match self { Edges::Version { edges: map } => Edges::Version { edges: map .into_iter() .map(|(range, node)| (range, node.not())) .collect(), }, Edges::String { edges: map } => Edges::String { edges: map .into_iter() .map(|(range, node)| (range, node.not())) .collect(), }, Edges::Boolean { high, low } => Edges::Boolean { high: high.not(), low: low.not(), }, } } } // Normalize a [`VersionSpecifier`] before adding it to the tree. fn normalize_specifier(specifier: VersionSpecifier) -> VersionSpecifier { let (operator, version) = specifier.into_parts(); // The decision diagram relies on the assumption that the negation of a marker tree is // the complement of the marker space. However, pre-release versions violate this assumption. // // For example, the marker `python_full_version > '3.9' or python_full_version <= '3.9'` // does not match `python_full_version == 3.9.0a0` and so cannot simplify to `true`. However, // its negation, `python_full_version > '3.9' and python_full_version <= '3.9'`, also does not // match `3.9.0a0` and simplifies to `false`, which violates the algebra decision diagrams // rely on. For this reason we ignore pre-release versions entirely when evaluating markers. // // Note that `python_version` cannot take on pre-release values as it is truncated to just the // major and minor version segments. Thus using release-only specifiers is definitely necessary // for `python_version` to fully simplify any ranges, such as `python_version > '3.9' or python_version <= '3.9'`, // which is always `true` for `python_version`. For `python_full_version` however, this decision // is a semantic change. let mut release = version.release(); // Strip any trailing `0`s. // // The [`Version`] type ignores trailing `0`s for equality, but still preserves them in its // [`Display`] output. We must normalize all versions by stripping trailing `0`s to remove the // distinction between versions like `3.9` and `3.9.0`. Otherwise, their output would depend on // which form was added to the global marker interner first. // // Note that we cannot strip trailing `0`s for star equality, as `==3.0.*` is different from `==3.*`. if !operator.is_star() { if let Some(end) = release.iter().rposition(|segment| *segment != 0) { if end > 0 { release = &release[..=end]; } } } VersionSpecifier::from_version(operator, Version::new(release)).unwrap() } /// Returns the equivalent `python_full_version` specifier for a `python_version` specifier. /// /// Returns `Err` with a constant node if the equivalent comparison is always `true` or `false`. fn python_version_to_full_version(specifier: VersionSpecifier) -> Result { // Extract the major and minor version segments if the specifier contains exactly // those segments, or if it contains a major segment with an implied minor segment of `0`. let major_minor = match *specifier.version().release() { // For star operators, we cannot add a trailing `0`. // // `python_version == 3.*` is equivalent to `python_full_version == 3.*`. Adding a // trailing `0` would result in `python_version == 3.0.*`, which is incorrect. [_major] if specifier.operator().is_star() => return Ok(specifier), // Add a trailing `0` for the minor version, which is implied. // For example, `python_version == 3` matches `3.0.1`, `3.0.2`, etc. [major] => Some((major, 0)), [major, minor] => Some((major, minor)), // Specifiers including segments beyond the minor version require separate handling. _ => None, }; // Note that the values taken on by `python_version` are truncated to their major and minor // version segments. For example, a python version of `3.7.0`, `3.7.1`, and so on, would all // result in a `python_version` marker of `3.7`. For this reason, we must consider the range // of values that would satisfy a `python_version` specifier when truncated in order to transform // the the specifier into its `python_full_version` equivalent. if let Some((major, minor)) = major_minor { let version = Version::new([major, minor]); Ok(match specifier.operator() { // `python_version == 3.7` is equivalent to `python_full_version == 3.7.*`. Operator::Equal | Operator::ExactEqual => { VersionSpecifier::equals_star_version(version) } // `python_version != 3.7` is equivalent to `python_full_version != 3.7.*`. Operator::NotEqual => VersionSpecifier::not_equals_star_version(version), // `python_version > 3.7` is equivalent to `python_full_version >= 3.8`. Operator::GreaterThan => { VersionSpecifier::greater_than_equal_version(Version::new([major, minor + 1])) } // `python_version < 3.7` is equivalent to `python_full_version < 3.7`. Operator::LessThan => specifier, // `python_version >= 3.7` is equivalent to `python_full_version >= 3.7`. Operator::GreaterThanEqual => specifier, // `python_version <= 3.7` is equivalent to `python_full_version < 3.8`. Operator::LessThanEqual => { VersionSpecifier::less_than_version(Version::new([major, minor + 1])) } // `==3.7.*`, `!=3.7.*`, `~=3.7` already represent the equivalent `python_full_version` // comparison. Operator::EqualStar | Operator::NotEqualStar | Operator::TildeEqual => specifier, }) } else { let &[major, minor, ..] = specifier.version().release() else { unreachable!() }; Ok(match specifier.operator() { // `python_version` cannot have more than two release segments, so equality is impossible. Operator::Equal | Operator::ExactEqual | Operator::EqualStar | Operator::TildeEqual => { return Err(NodeId::FALSE) } // Similarly, inequalities are always `true`. Operator::NotEqual | Operator::NotEqualStar => return Err(NodeId::TRUE), // `python_version {<,<=} 3.7.8` is equivalent to `python_full_version < 3.8`. Operator::LessThan | Operator::LessThanEqual => { VersionSpecifier::less_than_version(Version::new([major, minor + 1])) } // `python_version {>,>=} 3.7.8` is equivalent to `python_full_version >= 3.8`. Operator::GreaterThan | Operator::GreaterThanEqual => { VersionSpecifier::greater_than_equal_version(Version::new([major, minor + 1])) } }) } } /// Compares the start of two ranges that are known to be disjoint. fn compare_disjoint_range_start(range1: &Ranges, range2: &Ranges) -> Ordering where T: Ord, { let (upper1, _) = range1.bounding_range().unwrap(); let (upper2, _) = range2.bounding_range().unwrap(); match (upper1, upper2) { (Bound::Unbounded, _) => Ordering::Less, (_, Bound::Unbounded) => Ordering::Greater, (Bound::Included(v1), Bound::Excluded(v2)) if v1 == v2 => Ordering::Less, (Bound::Excluded(v1), Bound::Included(v2)) if v1 == v2 => Ordering::Greater, // Note that the ranges are disjoint, so their lower bounds cannot be equal. (Bound::Included(v1) | Bound::Excluded(v1), Bound::Included(v2) | Bound::Excluded(v2)) => { v1.cmp(v2) } } } /// Returns `true` if two disjoint ranges can be conjoined seamlessly without introducing a gap. fn can_conjoin(range1: &Ranges, range2: &Ranges) -> bool where T: Ord + Clone, { let Some((_, end)) = range1.bounding_range() else { return false; }; let Some((start, _)) = range2.bounding_range() else { return false; }; match (end, start) { (Bound::Included(v1), Bound::Excluded(v2)) if v1 == v2 => true, (Bound::Excluded(v1), Bound::Included(v2)) if v1 == v2 => true, _ => false, } } impl fmt::Debug for NodeId { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if self.is_false() { return write!(f, "false"); } if self.is_true() { return write!(f, "true"); } if self.is_complement() { write!(f, "{:?}", INTERNER.shared.node(*self).clone().not()) } else { write!(f, "{:?}", INTERNER.shared.node(*self)) } } } #[cfg(test)] mod tests; pep508_rs-0.9.1/src/marker/environment.rs000064400000000000000000000307321046102023000163470ustar 00000000000000use std::sync::Arc; use pep440_rs::{Version, VersionParseError}; use crate::{MarkerValueString, MarkerValueVersion, StringVersion}; /// The marker values for a python interpreter, normally the current one /// /// /// /// Some are `(String, Version)` because we have to support version comparison #[allow(missing_docs, clippy::unsafe_derive_deserialize)] #[derive(Clone, Debug, Eq, Hash, PartialEq, serde::Deserialize, serde::Serialize)] pub struct MarkerEnvironment { #[serde(flatten)] inner: Arc, } #[derive(Clone, Debug, Eq, Hash, PartialEq, serde::Deserialize, serde::Serialize)] struct MarkerEnvironmentInner { implementation_name: String, implementation_version: StringVersion, os_name: String, platform_machine: String, platform_python_implementation: String, platform_release: String, platform_system: String, platform_version: String, python_full_version: StringVersion, python_version: StringVersion, sys_platform: String, } impl MarkerEnvironment { /// Returns of the PEP 440 version typed value of the key in the current environment pub fn get_version(&self, key: &MarkerValueVersion) -> &Version { match key { MarkerValueVersion::ImplementationVersion => &self.implementation_version().version, MarkerValueVersion::PythonFullVersion => &self.python_full_version().version, MarkerValueVersion::PythonVersion => &self.python_version().version, } } /// Returns of the stringly typed value of the key in the current environment pub fn get_string(&self, key: &MarkerValueString) -> &str { match key { MarkerValueString::ImplementationName => self.implementation_name(), MarkerValueString::OsName | MarkerValueString::OsNameDeprecated => self.os_name(), MarkerValueString::PlatformMachine | MarkerValueString::PlatformMachineDeprecated => { self.platform_machine() } MarkerValueString::PlatformPythonImplementation | MarkerValueString::PlatformPythonImplementationDeprecated | MarkerValueString::PythonImplementationDeprecated => { self.platform_python_implementation() } MarkerValueString::PlatformRelease => self.platform_release(), MarkerValueString::PlatformSystem => self.platform_system(), MarkerValueString::PlatformVersion | MarkerValueString::PlatformVersionDeprecated => { self.platform_version() } MarkerValueString::SysPlatform | MarkerValueString::SysPlatformDeprecated => { self.sys_platform() } } } } /// APIs for retrieving specific parts of a marker environment. impl MarkerEnvironment { /// Returns the name of the Python implementation for this environment. /// /// This is equivalent to `sys.implementation.name`. /// /// Some example values are: `cpython`. #[inline] pub fn implementation_name(&self) -> &str { &self.inner.implementation_name } /// Returns the Python implementation version for this environment. /// /// This value is derived from `sys.implementation.version`. See [PEP 508 /// environment markers] for full details. /// /// This is equivalent to `sys.implementation.name`. /// /// Some example values are: `3.4.0`, `3.5.0b1`. /// /// [PEP 508 environment markers]: https://peps.python.org/pep-0508/#environment-markers #[inline] pub fn implementation_version(&self) -> &StringVersion { &self.inner.implementation_version } /// Returns the name of the operating system for this environment. /// /// This is equivalent to `os.name`. /// /// Some example values are: `posix`, `java`. #[inline] pub fn os_name(&self) -> &str { &self.inner.os_name } /// Returns the name of the machine for this environment's platform. /// /// This is equivalent to `platform.machine()`. /// /// Some example values are: `x86_64`. #[inline] pub fn platform_machine(&self) -> &str { &self.inner.platform_machine } /// Returns the name of the Python implementation for this environment's /// platform. /// /// This is equivalent to `platform.python_implementation()`. /// /// Some example values are: `CPython`, `Jython`. #[inline] pub fn platform_python_implementation(&self) -> &str { &self.inner.platform_python_implementation } /// Returns the release for this environment's platform. /// /// This is equivalent to `platform.release()`. /// /// Some example values are: `3.14.1-x86_64-linode39`, `14.5.0`, `1.8.0_51`. #[inline] pub fn platform_release(&self) -> &str { &self.inner.platform_release } /// Returns the system for this environment's platform. /// /// This is equivalent to `platform.system()`. /// /// Some example values are: `Linux`, `Windows`, `Java`. #[inline] pub fn platform_system(&self) -> &str { &self.inner.platform_system } /// Returns the version for this environment's platform. /// /// This is equivalent to `platform.version()`. /// /// Some example values are: `#1 SMP Fri Apr 25 13:07:35 EDT 2014`, /// `Java HotSpot(TM) 64-Bit Server VM, 25.51-b03, Oracle Corporation`, /// `Darwin Kernel Version 14.5.0: Wed Jul 29 02:18:53 PDT 2015; /// root:xnu-2782.40.9~2/RELEASE_X86_64`. #[inline] pub fn platform_version(&self) -> &str { &self.inner.platform_version } /// Returns the full version of Python for this environment. /// /// This is equivalent to `platform.python_version()`. /// /// Some example values are: `3.4.0`, `3.5.0b1`. #[inline] pub fn python_full_version(&self) -> &StringVersion { &self.inner.python_full_version } /// Returns the version of Python for this environment. /// /// This is equivalent to `'.'.join(platform.python_version_tuple()[:2])`. /// /// Some example values are: `3.4`, `2.7`. #[inline] pub fn python_version(&self) -> &StringVersion { &self.inner.python_version } /// Returns the name of the system platform for this environment. /// /// This is equivalent to `sys.platform`. /// /// Some example values are: `linux`, `linux2`, `darwin`, `java1.8.0_51` /// (note that `linux` is from Python3 and `linux2` from Python2). #[inline] pub fn sys_platform(&self) -> &str { &self.inner.sys_platform } } /// APIs for setting specific parts of a marker environment. impl MarkerEnvironment { /// Set the name of the Python implementation for this environment. /// /// See also [`MarkerEnvironment::implementation_name`]. #[inline] #[must_use] pub fn with_implementation_name(mut self, value: impl Into) -> MarkerEnvironment { Arc::make_mut(&mut self.inner).implementation_name = value.into(); self } /// Set the Python implementation version for this environment. /// /// See also [`MarkerEnvironment::implementation_version`]. #[inline] #[must_use] pub fn with_implementation_version( mut self, value: impl Into, ) -> MarkerEnvironment { Arc::make_mut(&mut self.inner).implementation_version = value.into(); self } /// Set the name of the operating system for this environment. /// /// See also [`MarkerEnvironment::os_name`]. #[inline] #[must_use] pub fn with_os_name(mut self, value: impl Into) -> MarkerEnvironment { Arc::make_mut(&mut self.inner).os_name = value.into(); self } /// Set the name of the machine for this environment's platform. /// /// See also [`MarkerEnvironment::platform_machine`]. #[inline] #[must_use] pub fn with_platform_machine(mut self, value: impl Into) -> MarkerEnvironment { Arc::make_mut(&mut self.inner).platform_machine = value.into(); self } /// Set the name of the Python implementation for this environment's /// platform. /// /// See also [`MarkerEnvironment::platform_python_implementation`]. #[inline] #[must_use] pub fn with_platform_python_implementation( mut self, value: impl Into, ) -> MarkerEnvironment { Arc::make_mut(&mut self.inner).platform_python_implementation = value.into(); self } /// Set the release for this environment's platform. /// /// See also [`MarkerEnvironment::platform_release`]. #[inline] #[must_use] pub fn with_platform_release(mut self, value: impl Into) -> MarkerEnvironment { Arc::make_mut(&mut self.inner).platform_release = value.into(); self } /// Set the system for this environment's platform. /// /// See also [`MarkerEnvironment::platform_system`]. #[inline] #[must_use] pub fn with_platform_system(mut self, value: impl Into) -> MarkerEnvironment { Arc::make_mut(&mut self.inner).platform_system = value.into(); self } /// Set the version for this environment's platform. /// /// See also [`MarkerEnvironment::platform_version`]. #[inline] #[must_use] pub fn with_platform_version(mut self, value: impl Into) -> MarkerEnvironment { Arc::make_mut(&mut self.inner).platform_version = value.into(); self } /// Set the full version of Python for this environment. /// /// See also [`MarkerEnvironment::python_full_version`]. #[inline] #[must_use] pub fn with_python_full_version( mut self, value: impl Into, ) -> MarkerEnvironment { Arc::make_mut(&mut self.inner).python_full_version = value.into(); self } /// Set the version of Python for this environment. /// /// See also [`MarkerEnvironment::python_full_version`]. #[inline] #[must_use] pub fn with_python_version(mut self, value: impl Into) -> MarkerEnvironment { Arc::make_mut(&mut self.inner).python_version = value.into(); self } /// Set the name of the system platform for this environment. /// /// See also [`MarkerEnvironment::sys_platform`]. #[inline] #[must_use] pub fn with_sys_platform(mut self, value: impl Into) -> MarkerEnvironment { Arc::make_mut(&mut self.inner).sys_platform = value.into(); self } } /// A builder for constructing a marker environment. /// /// A value of this type can be fallibly converted to a full /// [`MarkerEnvironment`] via [`MarkerEnvironment::try_from`]. This can fail when /// the version strings given aren't valid. /// /// The main utility of this type is for constructing dummy or test environment /// values. #[allow(missing_docs)] #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct MarkerEnvironmentBuilder<'a> { pub implementation_name: &'a str, pub implementation_version: &'a str, pub os_name: &'a str, pub platform_machine: &'a str, pub platform_python_implementation: &'a str, pub platform_release: &'a str, pub platform_system: &'a str, pub platform_version: &'a str, pub python_full_version: &'a str, pub python_version: &'a str, pub sys_platform: &'a str, } impl<'a> TryFrom> for MarkerEnvironment { type Error = VersionParseError; fn try_from(builder: MarkerEnvironmentBuilder<'a>) -> Result { Ok(MarkerEnvironment { inner: Arc::new(MarkerEnvironmentInner { implementation_name: builder.implementation_name.to_string(), implementation_version: builder.implementation_version.parse()?, os_name: builder.os_name.to_string(), platform_machine: builder.platform_machine.to_string(), platform_python_implementation: builder.platform_python_implementation.to_string(), platform_release: builder.platform_release.to_string(), platform_system: builder.platform_system.to_string(), platform_version: builder.platform_version.to_string(), python_full_version: builder.python_full_version.parse()?, python_version: builder.python_version.parse()?, sys_platform: builder.sys_platform.to_string(), }), }) } } pep508_rs-0.9.1/src/marker/mod.rs000064400000000000000000000034521046102023000145610ustar 00000000000000//! PEP 508 markers implementations with validation and warnings //! //! Markers allow you to install dependencies only in specific environments (python version, //! operating system, architecture, etc.) or when a specific feature is activated. E.g. you can //! say `importlib-metadata ; python_version < "3.8"` or //! `itsdangerous (>=1.1.0) ; extra == 'security'`. Unfortunately, the marker grammar has some //! oversights (e.g. ) and //! the design of comparisons (PEP 440 comparisons with lexicographic fallback) leads to confusing //! outcomes. This implementation tries to carefully validate everything and emit warnings whenever //! bogus comparisons with unintended semantics are made. mod algebra; mod environment; pub(crate) mod parse; mod simplify; mod tree; pub use environment::{MarkerEnvironment, MarkerEnvironmentBuilder}; pub use tree::{ ContainsMarkerTree, ExtraMarkerTree, ExtraOperator, InMarkerTree, MarkerExpression, MarkerOperator, MarkerTree, MarkerTreeContents, MarkerTreeDebugGraph, MarkerTreeKind, MarkerValue, MarkerValueExtra, MarkerValueString, MarkerValueVersion, MarkerWarningKind, StringMarkerTree, StringVersion, VersionMarkerTree, }; /// `serde` helpers for [`MarkerTree`]. pub mod ser { use super::MarkerTree; use serde::Serialize; /// A helper for `serde(skip_serializing_if)`. pub fn is_empty(marker: &MarkerTree) -> bool { marker.contents().is_none() } /// A helper for `serde(serialize_with)`. /// /// Note this will panic if `marker.contents()` is `None`, and so should be paired with `is_empty`. pub fn serialize(marker: &MarkerTree, s: S) -> Result where S: serde::Serializer, { marker.contents().unwrap().serialize(s) } } pep508_rs-0.9.1/src/marker/parse.rs000064400000000000000000000501321046102023000151110ustar 00000000000000use std::str::FromStr; use pep440_rs::{Version, VersionPattern, VersionSpecifier}; use crate::cursor::Cursor; use crate::marker::MarkerValueExtra; use crate::{ ExtraName, ExtraOperator, MarkerExpression, MarkerOperator, MarkerTree, MarkerValue, MarkerValueVersion, MarkerWarningKind, Pep508Error, Pep508ErrorSource, Pep508Url, Reporter, }; /// ```text /// version_cmp = wsp* <'<=' | '<' | '!=' | '==' | '>=' | '>' | '~=' | '==='> /// marker_op = version_cmp | (wsp* 'in') | (wsp* 'not' wsp+ 'in') /// ``` /// The `wsp*` has already been consumed by the caller. fn parse_marker_operator( cursor: &mut Cursor, ) -> Result> { let (start, len) = if cursor.peek_char().is_some_and(char::is_alphabetic) { // "in" or "not" cursor.take_while(|char| !char.is_whitespace() && char != '\'' && char != '"') } else { // A mathematical operator cursor.take_while(|char| matches!(char, '<' | '=' | '>' | '~' | '!')) }; let operator = cursor.slice(start, len); if operator == "not" { // 'not' wsp+ 'in' match cursor.next() { None => { return Err(Pep508Error { message: Pep508ErrorSource::String( "Expected whitespace after 'not', found end of input".to_string(), ), start: cursor.pos(), len: 1, input: cursor.to_string(), }); } Some((_, whitespace)) if whitespace.is_whitespace() => {} Some((pos, other)) => { return Err(Pep508Error { message: Pep508ErrorSource::String(format!( "Expected whitespace after `not`, found `{other}`" )), start: pos, len: other.len_utf8(), input: cursor.to_string(), }); } }; cursor.eat_whitespace(); cursor.next_expect_char('i', cursor.pos())?; cursor.next_expect_char('n', cursor.pos())?; return Ok(MarkerOperator::NotIn); } MarkerOperator::from_str(operator).map_err(|_| Pep508Error { message: Pep508ErrorSource::String(format!( "Expected a valid marker operator (such as `>=` or `not in`), found `{operator}`" )), start, len, input: cursor.to_string(), }) } /// Either a single or double quoted string or one of '`python_version`', '`python_full_version`', /// '`os_name`', '`sys_platform`', '`platform_release`', '`platform_system`', '`platform_version`', /// '`platform_machine`', '`platform_python_implementation`', '`implementation_name`', /// '`implementation_version`', 'extra' pub(crate) fn parse_marker_value( cursor: &mut Cursor, ) -> Result> { // > User supplied constants are always encoded as strings with either ' or " quote marks. Note // > that backslash escapes are not defined, but existing implementations do support them. They // > are not included in this specification because they add complexity and there is no observable // > need for them today. Similarly we do not define non-ASCII character support: all the runtime // > variables we are referencing are expected to be ASCII-only. match cursor.peek() { None => Err(Pep508Error { message: Pep508ErrorSource::String( "Expected marker value, found end of dependency specification".to_string(), ), start: cursor.pos(), len: 1, input: cursor.to_string(), }), // It can be a string ... Some((start_pos, quotation_mark @ ('"' | '\''))) => { cursor.next(); let (start, len) = cursor.take_while(|c| c != quotation_mark); let value = cursor.slice(start, len).to_string(); cursor.next_expect_char(quotation_mark, start_pos)?; Ok(MarkerValue::QuotedString(value)) } // ... or it can be a keyword Some(_) => { let (start, len) = cursor.take_while(|char| { !char.is_whitespace() && !['>', '=', '<', '!', '~', ')'].contains(&char) }); let key = cursor.slice(start, len); MarkerValue::from_str(key).map_err(|_| Pep508Error { message: Pep508ErrorSource::String(format!( "Expected a quoted string or a valid marker name, found `{key}`" )), start, len, input: cursor.to_string(), }) } } } /// ```text /// marker_var:l marker_op:o marker_var:r /// ``` pub(crate) fn parse_marker_key_op_value( cursor: &mut Cursor, reporter: &mut impl Reporter, ) -> Result, Pep508Error> { cursor.eat_whitespace(); let l_value = parse_marker_value(cursor)?; cursor.eat_whitespace(); // "not in" and "in" must be preceded by whitespace. We must already have matched a whitespace // when we're here because other `parse_marker_key` would have pulled the characters in and // errored let operator = parse_marker_operator(cursor)?; cursor.eat_whitespace(); let r_value = parse_marker_value(cursor)?; // Convert a ` ` expression into its // typed equivalent. let expr = match l_value { // Either: // - ` ` // - ` in ` and ("not in") MarkerValue::MarkerEnvVersion(key) => { let MarkerValue::QuotedString(value) = r_value else { reporter.report( MarkerWarningKind::Pep440Error, format!( "Expected double quoted PEP 440 version to compare with {key}, found {r_value}, will be ignored" ), ); return Ok(None); }; // Check for `in` and `not in` expressions if let Some(expr) = parse_version_in_expr(key.clone(), operator, &value, reporter) { return Ok(Some(expr)); } // Otherwise, it's a normal version expression parse_version_expr(key.clone(), operator, &value, reporter) } // The only sound choice for this is ` ` MarkerValue::MarkerEnvString(key) => { let value = match r_value { MarkerValue::Extra | MarkerValue::MarkerEnvVersion(_) | MarkerValue::MarkerEnvString(_) => { reporter.report( MarkerWarningKind::MarkerMarkerComparison, "Comparing two markers with each other doesn't make any sense, will be ignored" .to_string(), ); return Ok(None); } MarkerValue::QuotedString(r_string) => r_string, }; if operator == MarkerOperator::TildeEqual { reporter.report( MarkerWarningKind::LexicographicComparison, "Can't compare strings with `~=`, will be ignored".to_string(), ); return Ok(None); } Some(MarkerExpression::String { key, operator, value, }) } // `extra == '...'` MarkerValue::Extra => { let value = match r_value { MarkerValue::MarkerEnvVersion(_) | MarkerValue::MarkerEnvString(_) | MarkerValue::Extra => { reporter.report( MarkerWarningKind::ExtraInvalidComparison, "Comparing extra with something other than a quoted string is wrong, will be ignored" .to_string(), ); return Ok(None); } MarkerValue::QuotedString(value) => value, }; parse_extra_expr(operator, &value, reporter) } // This is either MarkerEnvVersion, MarkerEnvString or Extra inverted MarkerValue::QuotedString(l_string) => { match r_value { // The only sound choice for this is ` ` MarkerValue::MarkerEnvVersion(key) => { parse_inverted_version_expr(&l_string, operator, key.clone(), reporter) } // '...' == MarkerValue::MarkerEnvString(key) => Some(MarkerExpression::String { key, // Invert the operator to normalize the expression order. operator: operator.invert(), value: l_string, }), // `'...' == extra` MarkerValue::Extra => parse_extra_expr(operator, &l_string, reporter), // `'...' == '...'`, doesn't make much sense MarkerValue::QuotedString(_) => { // Not even pypa/packaging 22.0 supports this // https://github.com/pypa/packaging/issues/632 reporter.report( MarkerWarningKind::StringStringComparison, format!( "Comparing two quoted strings with each other doesn't make sense: '{l_string}' {operator} {r_value}, will be ignored" ), ); None } } } }; Ok(expr) } /// Creates an instance of [`MarkerExpression::VersionIn`] with the given values. /// /// Some important caveats apply here. /// /// While the specification defines this operation as a substring search, for versions, we use a /// version-aware match so we can perform algebra on the expressions. This means that some markers /// will not be evaluated according to the specification, but these marker expressions are /// relatively rare so the trade-off is acceptable. /// /// The following limited expression is supported: /// /// ```text /// [not] in ' [additional versions]' /// ``` /// /// where the version is PEP 440 compliant. Arbitrary whitespace is allowed between versions. /// /// Returns `None` if the [`MarkerOperator`] is not relevant. /// Reports a warning if an invalid version is encountered, and returns `None`. fn parse_version_in_expr( key: MarkerValueVersion, operator: MarkerOperator, value: &str, reporter: &mut impl Reporter, ) -> Option { if !matches!(operator, MarkerOperator::In | MarkerOperator::NotIn) { return None; } let negated = matches!(operator, MarkerOperator::NotIn); let mut cursor = Cursor::new(value); let mut versions = Vec::new(); // Parse all of the values in the list as versions loop { // Allow arbitrary whitespace between versions cursor.eat_whitespace(); let (start, len) = cursor.take_while(|c| !c.is_whitespace()); if len == 0 { break; } let version = match Version::from_str(cursor.slice(start, len)) { Ok(version) => version, Err(err) => { reporter.report( MarkerWarningKind::Pep440Error, format!( "Expected PEP 440 versions to compare with {key}, found {value}, will be ignored: {err}" ), ); return None; } }; versions.push(version); } Some(MarkerExpression::VersionIn { key, versions, negated, }) } /// Creates an instance of [`MarkerExpression::Version`] with the given values. /// /// Reports a warning on failure, and returns `None`. fn parse_version_expr( key: MarkerValueVersion, marker_operator: MarkerOperator, value: &str, reporter: &mut impl Reporter, ) -> Option { let pattern = match value.parse::() { Ok(pattern) => pattern, Err(err) => { reporter.report( MarkerWarningKind::Pep440Error, format!( "Expected PEP 440 version to compare with {key}, found {value}, will be ignored: {err}" ), ); return None; } }; let Some(operator) = marker_operator.to_pep440_operator() else { reporter.report( MarkerWarningKind::Pep440Error, format!( "Expected PEP 440 version operator to compare {key} with `{version}`, found `{marker_operator}`, will be ignored", version = pattern.version() ), ); return None; }; let specifier = match VersionSpecifier::from_pattern(operator, pattern) { Ok(specifier) => specifier, Err(err) => { reporter.report( MarkerWarningKind::Pep440Error, format!("Invalid operator/version combination: {err}"), ); return None; } }; Some(MarkerExpression::Version { key, specifier }) } /// Creates an instance of [`MarkerExpression::Version`] from an inverted expression. /// /// Reports a warning on failure, and returns `None`. fn parse_inverted_version_expr( value: &str, marker_operator: MarkerOperator, key: MarkerValueVersion, reporter: &mut impl Reporter, ) -> Option { // Invert the operator to normalize the expression order. let marker_operator = marker_operator.invert(); // Not star allowed here, `'3.*' == python_version` is not a valid PEP 440 comparison. let version = match value.parse::() { Ok(version) => version, Err(err) => { reporter.report( MarkerWarningKind::Pep440Error, format!( "Expected PEP 440 version to compare with {key}, found {value}, will be ignored: {err}" ), ); return None; } }; let Some(operator) = marker_operator.to_pep440_operator() else { reporter.report( MarkerWarningKind::Pep440Error, format!( "Expected PEP 440 version operator to compare {key} with `{version}`, found `{marker_operator}`, will be ignored" ), ); return None; }; let specifier = match VersionSpecifier::from_version(operator, version) { Ok(specifier) => specifier, Err(err) => { reporter.report( MarkerWarningKind::Pep440Error, format!("Invalid operator/version combination: {err}"), ); return None; } }; Some(MarkerExpression::Version { key, specifier }) } /// Creates an instance of [`MarkerExpression::Extra`] with the given values, falling back to /// [`MarkerExpression::Arbitrary`] on failure. fn parse_extra_expr( operator: MarkerOperator, value: &str, reporter: &mut impl Reporter, ) -> Option { let name = match ExtraName::from_str(value) { Ok(name) => MarkerValueExtra::Extra(name), Err(err) => { reporter.report( MarkerWarningKind::ExtraInvalidComparison, format!("Expected extra name (found `{value}`): {err}"), ); MarkerValueExtra::Arbitrary(value.to_string()) } }; if let Some(operator) = ExtraOperator::from_marker_operator(operator) { return Some(MarkerExpression::Extra { operator, name }); } reporter.report( MarkerWarningKind::ExtraInvalidComparison, "Comparing extra with something other than a quoted string is wrong, will be ignored" .to_string(), ); None } /// ```text /// marker_expr = marker_var:l marker_op:o marker_var:r -> (o, l, r) /// | wsp* '(' marker:m wsp* ')' -> m /// ``` fn parse_marker_expr( cursor: &mut Cursor, reporter: &mut impl Reporter, ) -> Result, Pep508Error> { cursor.eat_whitespace(); if let Some(start_pos) = cursor.eat_char('(') { let marker = parse_marker_or(cursor, reporter)?; cursor.next_expect_char(')', start_pos)?; Ok(marker) } else { Ok(parse_marker_key_op_value(cursor, reporter)?.map(MarkerTree::expression)) } } /// ```text /// marker_and = marker_expr:l wsp* 'and' marker_expr:r -> ('and', l, r) /// | marker_expr:m -> m /// ``` fn parse_marker_and( cursor: &mut Cursor, reporter: &mut impl Reporter, ) -> Result, Pep508Error> { parse_marker_op(cursor, "and", MarkerTree::and, parse_marker_expr, reporter) } /// ```text /// marker_or = marker_and:l wsp* 'or' marker_and:r -> ('or', l, r) /// | marker_and:m -> m /// ``` fn parse_marker_or( cursor: &mut Cursor, reporter: &mut impl Reporter, ) -> Result, Pep508Error> { parse_marker_op( cursor, "or", MarkerTree::or, |cursor, reporter| parse_marker_and(cursor, reporter), reporter, ) } /// Parses both `marker_and` and `marker_or` #[allow(clippy::type_complexity)] fn parse_marker_op( cursor: &mut Cursor, op: &str, apply: fn(&mut MarkerTree, MarkerTree), parse_inner: fn(&mut Cursor, &mut R) -> Result, Pep508Error>, reporter: &mut R, ) -> Result, Pep508Error> { let mut tree = None; // marker_and or marker_expr let first_element = parse_inner(cursor, reporter)?; if let Some(expression) = first_element { match tree { Some(ref mut tree) => apply(tree, expression), None => tree = Some(expression), } } loop { // wsp* cursor.eat_whitespace(); // ('or' marker_and) or ('and' marker_or) let (start, len) = cursor.peek_while(|c| !c.is_whitespace()); match cursor.slice(start, len) { value if value == op => { cursor.take_while(|c| !c.is_whitespace()); if let Some(expression) = parse_inner(cursor, reporter)? { match tree { Some(ref mut tree) => apply(tree, expression), None => tree = Some(expression), } } } _ => return Ok(tree), } } } /// ```text /// marker = marker_or^ /// ``` pub(crate) fn parse_markers_cursor( cursor: &mut Cursor, reporter: &mut impl Reporter, ) -> Result, Pep508Error> { let marker = parse_marker_or(cursor, reporter)?; cursor.eat_whitespace(); if let Some((pos, unexpected)) = cursor.next() { // If we're here, both parse_marker_or and parse_marker_and returned because the next // character was neither "and" nor "or" return Err(Pep508Error { message: Pep508ErrorSource::String(format!( "Unexpected character '{unexpected}', expected 'and', 'or' or end of input" )), start: pos, len: cursor.remaining(), input: cursor.to_string(), }); }; Ok(marker) } /// Parses markers such as `python_version < '3.8'` or /// `python_version == "3.10" and (sys_platform == "win32" or (os_name == "linux" and implementation_name == 'cpython'))` pub(crate) fn parse_markers( markers: &str, reporter: &mut impl Reporter, ) -> Result> { let mut chars = Cursor::new(markers); // If the tree consisted entirely of arbitrary expressions // that were ignored, it evaluates to true. parse_markers_cursor(&mut chars, reporter).map(|result| result.unwrap_or(MarkerTree::TRUE)) } pep508_rs-0.9.1/src/marker/simplify.rs000064400000000000000000000354111046102023000156360ustar 00000000000000use std::fmt; use std::ops::Bound; use indexmap::IndexMap; use itertools::Itertools; use pep440_rs::{Version, VersionSpecifier}; use rustc_hash::FxBuildHasher; use version_ranges::Ranges; use crate::{ExtraOperator, MarkerExpression, MarkerOperator, MarkerTree, MarkerTreeKind}; /// Returns a simplified DNF expression for a given marker tree. /// /// Marker trees are represented as decision diagrams that cannot be directly serialized to. /// a boolean expression. Instead, you must traverse and collect all possible solutions to the /// diagram, which can be used to create a DNF expression, or all non-solutions to the diagram, /// which can be used to create a CNF expression. /// /// We choose DNF as it is easier to simplify for user-facing output. pub(crate) fn to_dnf(tree: &MarkerTree) -> Vec> { let mut dnf = Vec::new(); collect_dnf(tree, &mut dnf, &mut Vec::new()); simplify(&mut dnf); dnf } /// Walk a [`MarkerTree`] recursively and construct a DNF expression. /// /// A decision diagram can be converted to DNF form by performing a depth-first traversal of /// the tree and collecting all paths to a `true` terminal node. /// /// `path` is the list of marker expressions traversed on the current path. fn collect_dnf( tree: &MarkerTree, dnf: &mut Vec>, path: &mut Vec, ) { match tree.kind() { // Reached a `false` node, meaning the conjunction is irrelevant for DNF. MarkerTreeKind::False => {} // Reached a solution, store the conjunction. MarkerTreeKind::True => { if !path.is_empty() { dnf.push(path.clone()); } } MarkerTreeKind::Version(marker) => { for (tree, range) in collect_edges(marker.edges()) { // Detect whether the range for this edge can be simplified as an inequality. if let Some(excluded) = range_inequality(&range) { let current = path.len(); for version in excluded { path.push(MarkerExpression::Version { key: marker.key().clone(), specifier: VersionSpecifier::not_equals_version(version.clone()), }); } collect_dnf(&tree, dnf, path); path.truncate(current); continue; } // Detect whether the range for this edge can be simplified as a star inequality. if let Some(specifier) = star_range_inequality(&range) { path.push(MarkerExpression::Version { key: marker.key().clone(), specifier, }); collect_dnf(&tree, dnf, path); path.pop(); continue; } for bounds in range.iter() { let current = path.len(); for specifier in VersionSpecifier::from_release_only_bounds(bounds) { path.push(MarkerExpression::Version { key: marker.key().clone(), specifier, }); } collect_dnf(&tree, dnf, path); path.truncate(current); } } } MarkerTreeKind::String(marker) => { for (tree, range) in collect_edges(marker.children()) { // Detect whether the range for this edge can be simplified as an inequality. if let Some(excluded) = range_inequality(&range) { let current = path.len(); for value in excluded { path.push(MarkerExpression::String { key: marker.key().clone(), operator: MarkerOperator::NotEqual, value: value.clone(), }); } collect_dnf(&tree, dnf, path); path.truncate(current); continue; } for bounds in range.iter() { let current = path.len(); for (operator, value) in MarkerOperator::from_bounds(bounds) { path.push(MarkerExpression::String { key: marker.key().clone(), operator, value: value.clone(), }); } collect_dnf(&tree, dnf, path); path.truncate(current); } } } MarkerTreeKind::In(marker) => { for (value, tree) in marker.children() { let operator = if value { MarkerOperator::In } else { MarkerOperator::NotIn }; let expr = MarkerExpression::String { key: marker.key().clone(), value: marker.value().to_owned(), operator, }; path.push(expr); collect_dnf(&tree, dnf, path); path.pop(); } } MarkerTreeKind::Contains(marker) => { for (value, tree) in marker.children() { let operator = if value { MarkerOperator::Contains } else { MarkerOperator::NotContains }; let expr = MarkerExpression::String { key: marker.key().clone(), value: marker.value().to_owned(), operator, }; path.push(expr); collect_dnf(&tree, dnf, path); path.pop(); } } MarkerTreeKind::Extra(marker) => { for (value, tree) in marker.children() { let operator = if value { ExtraOperator::Equal } else { ExtraOperator::NotEqual }; let expr = MarkerExpression::Extra { name: marker.name().clone(), operator, }; path.push(expr); collect_dnf(&tree, dnf, path); path.pop(); } } } } /// Simplifies a DNF expression. /// /// A decision diagram is canonical, but only for a given variable order. Depending on the /// pre-defined order, the DNF expression produced by a decision tree can still be further /// simplified. /// /// For example, the decision diagram for the expression `A or B` will be represented as /// `A or (not A and B)` or `B or (not B and A)`, depending on the variable order. In both /// cases, the negation in the second clause is redundant. /// /// Completely simplifying a DNF expression is NP-hard and amounts to the set cover problem. /// Additionally, marker expressions can contain complex expressions involving version ranges /// that are not trivial to simplify. Instead, we choose to simplify at the boolean variable /// level without any truth table expansion. Combined with the normalization applied by decision /// trees, this seems to be sufficient in practice. /// /// Note: This function has quadratic time complexity. However, it is not applied on every marker /// operation, only to user facing output, which are typically very simple. fn simplify(dnf: &mut Vec>) { for i in 0..dnf.len() { let clause = &dnf[i]; // Find redundant terms in this clause. let mut redundant_terms = Vec::new(); 'term: for (skipped, skipped_term) in clause.iter().enumerate() { for (j, other_clause) in dnf.iter().enumerate() { if i == j { continue; } // Let X be this clause with a given term A set to it's negation. // If there exists another clause that is a subset of X, the term A is // redundant in this clause. // // For example, `A or (not A and B)` can be simplified to `A or B`, // eliminating the `not A` term. if other_clause.iter().all(|term| { // For the term to be redundant in this clause, the other clause can // contain the negation of the term but not the term itself. if term == skipped_term { return false; } if is_negation(term, skipped_term) { return true; } // TODO(ibraheem): if we intern variables we could reduce this // from a linear search to an integer `HashSet` lookup clause .iter() .position(|x| x == term) // If the term was already removed from this one, we cannot // depend on it for further simplification. .is_some_and(|i| !redundant_terms.contains(&i)) }) { redundant_terms.push(skipped); continue 'term; } } } // Eliminate any redundant terms. redundant_terms.sort_by(|a, b| b.cmp(a)); for term in redundant_terms { dnf[i].remove(term); } } // Once we have eliminated redundant terms, there may also be redundant clauses. // For example, `(A and B) or (not A and B)` would have been simplified above to // `(A and B) or B` and can now be further simplified to just `B`. let mut redundant_clauses = Vec::new(); 'clause: for i in 0..dnf.len() { let clause = &dnf[i]; for (j, other_clause) in dnf.iter().enumerate() { // Ignore clauses that are going to be eliminated. if i == j || redundant_clauses.contains(&j) { continue; } // There is another clause that is a subset of this one, thus this clause is redundant. if other_clause.iter().all(|term| { // TODO(ibraheem): if we intern variables we could reduce this // from a linear search to an integer `HashSet` lookup clause.contains(term) }) { redundant_clauses.push(i); continue 'clause; } } } // Eliminate any redundant clauses. for i in redundant_clauses.into_iter().rev() { dnf.remove(i); } } /// Merge any edges that lead to identical subtrees into a single range. pub(crate) fn collect_edges<'a, T>( map: impl ExactSizeIterator, MarkerTree)>, ) -> IndexMap, FxBuildHasher> where T: Ord + Clone + 'a, { let mut paths: IndexMap<_, Ranges<_>, FxBuildHasher> = IndexMap::default(); for (range, tree) in map { // OK because all ranges are guaranteed to be non-empty. let (start, end) = range.bounding_range().unwrap(); // Combine the ranges. let range = Ranges::from_range_bounds((start.cloned(), end.cloned())); paths .entry(tree) .and_modify(|union| *union = union.union(&range)) .or_insert_with(|| range.clone()); } paths } /// Returns `Some` if the expression can be simplified as an inequality consisting /// of the given values. /// /// For example, `os_name < 'Linux' or os_name > 'Linux'` can be simplified to /// `os_name != 'Linux'`. fn range_inequality(range: &Ranges) -> Option> where T: Ord + Clone + fmt::Debug, { if range.is_empty() || range.bounding_range() != Some((Bound::Unbounded, Bound::Unbounded)) { return None; } let mut excluded = Vec::new(); for ((_, end), (start, _)) in range.iter().tuple_windows() { match (end, start) { (Bound::Excluded(v1), Bound::Excluded(v2)) if v1 == v2 => excluded.push(v1), _ => return None, } } Some(excluded) } /// Returns `Some` if the version expression can be simplified as a star inequality with the given /// specifier. /// /// For example, `python_full_version < '3.8' or python_full_version >= '3.9'` can be simplified to /// `python_full_version != '3.8.*'`. fn star_range_inequality(range: &Ranges) -> Option { let (b1, b2) = range.iter().collect_tuple()?; match (b1, b2) { ((Bound::Unbounded, Bound::Excluded(v1)), (Bound::Included(v2), Bound::Unbounded)) if v1.release().len() == 2 && v2.release() == [v1.release()[0], v1.release()[1] + 1] => { Some(VersionSpecifier::not_equals_star_version(v1.clone())) } _ => None, } } /// Returns `true` if the LHS is the negation of the RHS, or vice versa. fn is_negation(left: &MarkerExpression, right: &MarkerExpression) -> bool { match left { MarkerExpression::Version { key, specifier } => { let MarkerExpression::Version { key: key2, specifier: specifier2, } = right else { return false; }; key == key2 && specifier.version() == specifier2.version() && specifier .operator() .negate() .is_some_and(|negated| negated == *specifier2.operator()) } MarkerExpression::VersionIn { key, versions, negated, } => { let MarkerExpression::VersionIn { key: key2, versions: versions2, negated: negated2, } = right else { return false; }; key == key2 && versions == versions2 && negated != negated2 } MarkerExpression::String { key, operator, value, } => { let MarkerExpression::String { key: key2, operator: operator2, value: value2, } = right else { return false; }; key == key2 && value == value2 && operator .negate() .is_some_and(|negated| negated == *operator2) } MarkerExpression::Extra { operator, name } => { let MarkerExpression::Extra { name: name2, operator: operator2, } = right else { return false; }; name == name2 && operator.negate() == *operator2 } } } pep508_rs-0.9.1/src/marker/tree.rs000064400000000000000000003346541046102023000147540ustar 00000000000000use std::cmp::Ordering; use std::collections::HashSet; use std::fmt::{self, Display, Formatter}; use std::ops::{Bound, Deref}; use std::str::FromStr; use itertools::Itertools; use pep440_rs::{Version, VersionParseError, VersionSpecifier}; use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; use version_ranges::Ranges; use crate::cursor::Cursor; use crate::marker::parse; use crate::{ ExtraName, MarkerEnvironment, Pep508Error, Pep508ErrorSource, Pep508Url, Reporter, TracingReporter, }; use super::algebra::{Edges, NodeId, Variable, INTERNER}; use super::simplify; /// Ways in which marker evaluation can fail #[derive(Debug, Eq, Hash, Ord, PartialOrd, PartialEq, Clone, Copy)] pub enum MarkerWarningKind { /// Using an old name from PEP 345 instead of the modern equivalent /// DeprecatedMarkerName, /// Doing an operation other than `==` and `!=` on a quoted string with `extra`, such as /// `extra > "perf"` or `extra == os_name` ExtraInvalidComparison, /// Comparing a string valued marker and a string lexicographically, such as `"3.9" > "3.10"` LexicographicComparison, /// Comparing two markers, such as `os_name != sys_implementation` MarkerMarkerComparison, /// Failed to parse a PEP 440 version or version specifier, e.g. `>=1<2` Pep440Error, /// Comparing two strings, such as `"3.9" > "3.10"` StringStringComparison, } /// Those environment markers with a PEP 440 version as value such as `python_version` #[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] #[allow(clippy::enum_variant_names)] pub enum MarkerValueVersion { /// `implementation_version` ImplementationVersion, /// `python_full_version` PythonFullVersion, /// `python_version` PythonVersion, } impl Display for MarkerValueVersion { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { Self::ImplementationVersion => f.write_str("implementation_version"), Self::PythonFullVersion => f.write_str("python_full_version"), Self::PythonVersion => f.write_str("python_version"), } } } /// Those environment markers with an arbitrary string as value such as `sys_platform` #[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] pub enum MarkerValueString { /// `implementation_name` ImplementationName, /// `os_name` OsName, /// Deprecated `os.name` from OsNameDeprecated, /// `platform_machine` PlatformMachine, /// Deprecated `platform.machine` from PlatformMachineDeprecated, /// `platform_python_implementation` PlatformPythonImplementation, /// Deprecated `platform.python_implementation` from PlatformPythonImplementationDeprecated, /// Deprecated `python_implementation` from PythonImplementationDeprecated, /// `platform_release` PlatformRelease, /// `platform_system` PlatformSystem, /// `platform_version` PlatformVersion, /// Deprecated `platform.version` from PlatformVersionDeprecated, /// `sys_platform` SysPlatform, /// Deprecated `sys.platform` from SysPlatformDeprecated, } impl Display for MarkerValueString { /// Normalizes deprecated names to the proper ones fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { Self::ImplementationName => f.write_str("implementation_name"), Self::OsName | Self::OsNameDeprecated => f.write_str("os_name"), Self::PlatformMachine | Self::PlatformMachineDeprecated => { f.write_str("platform_machine") } Self::PlatformPythonImplementation | Self::PlatformPythonImplementationDeprecated | Self::PythonImplementationDeprecated => f.write_str("platform_python_implementation"), Self::PlatformRelease => f.write_str("platform_release"), Self::PlatformSystem => f.write_str("platform_system"), Self::PlatformVersion | Self::PlatformVersionDeprecated => { f.write_str("platform_version") } Self::SysPlatform | Self::SysPlatformDeprecated => f.write_str("sys_platform"), } } } /// One of the predefined environment values /// /// #[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] pub enum MarkerValue { /// Those environment markers with a PEP 440 version as value such as `python_version` MarkerEnvVersion(MarkerValueVersion), /// Those environment markers with an arbitrary string as value such as `sys_platform` MarkerEnvString(MarkerValueString), /// `extra`. This one is special because it's a list and not env but user given Extra, /// Not a constant, but a user given quoted string with a value inside such as '3.8' or "windows" QuotedString(String), } impl FromStr for MarkerValue { type Err = String; /// This is specifically for the reserved values fn from_str(s: &str) -> Result { let value = match s { "implementation_name" => Self::MarkerEnvString(MarkerValueString::ImplementationName), "implementation_version" => { Self::MarkerEnvVersion(MarkerValueVersion::ImplementationVersion) } "os_name" => Self::MarkerEnvString(MarkerValueString::OsName), "os.name" => Self::MarkerEnvString(MarkerValueString::OsNameDeprecated), "platform_machine" => Self::MarkerEnvString(MarkerValueString::PlatformMachine), "platform.machine" => { Self::MarkerEnvString(MarkerValueString::PlatformMachineDeprecated) } "platform_python_implementation" => { Self::MarkerEnvString(MarkerValueString::PlatformPythonImplementation) } "platform.python_implementation" => { Self::MarkerEnvString(MarkerValueString::PlatformPythonImplementationDeprecated) } "python_implementation" => { Self::MarkerEnvString(MarkerValueString::PythonImplementationDeprecated) } "platform_release" => Self::MarkerEnvString(MarkerValueString::PlatformRelease), "platform_system" => Self::MarkerEnvString(MarkerValueString::PlatformSystem), "platform_version" => Self::MarkerEnvString(MarkerValueString::PlatformVersion), "platform.version" => { Self::MarkerEnvString(MarkerValueString::PlatformVersionDeprecated) } "python_full_version" => Self::MarkerEnvVersion(MarkerValueVersion::PythonFullVersion), "python_version" => Self::MarkerEnvVersion(MarkerValueVersion::PythonVersion), "sys_platform" => Self::MarkerEnvString(MarkerValueString::SysPlatform), "sys.platform" => Self::MarkerEnvString(MarkerValueString::SysPlatformDeprecated), "extra" => Self::Extra, _ => return Err(format!("Invalid key: {s}")), }; Ok(value) } } impl Display for MarkerValue { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { Self::MarkerEnvVersion(marker_value_version) => marker_value_version.fmt(f), Self::MarkerEnvString(marker_value_string) => marker_value_string.fmt(f), Self::Extra => f.write_str("extra"), Self::QuotedString(value) => write!(f, "'{value}'"), } } } /// How to compare key and value, such as by `==`, `>` or `not in` #[derive(Copy, Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] pub enum MarkerOperator { /// `==` Equal, /// `!=` NotEqual, /// `>` GreaterThan, /// `>=` GreaterEqual, /// `<` LessThan, /// `<=` LessEqual, /// `~=` TildeEqual, /// `in` In, /// `not in` NotIn, /// The inverse of the `in` operator. /// /// This is not a valid operator when parsing but is used for normalizing /// marker trees. Contains, /// The inverse of the `not in` operator. /// /// This is not a valid operator when parsing but is used for normalizing /// marker trees. NotContains, } impl MarkerOperator { /// Compare two versions, returning `None` for `in` and `not in`. pub(crate) fn to_pep440_operator(self) -> Option { match self { Self::Equal => Some(pep440_rs::Operator::Equal), Self::NotEqual => Some(pep440_rs::Operator::NotEqual), Self::GreaterThan => Some(pep440_rs::Operator::GreaterThan), Self::GreaterEqual => Some(pep440_rs::Operator::GreaterThanEqual), Self::LessThan => Some(pep440_rs::Operator::LessThan), Self::LessEqual => Some(pep440_rs::Operator::LessThanEqual), Self::TildeEqual => Some(pep440_rs::Operator::TildeEqual), _ => None, } } /// Inverts this marker operator. pub(crate) fn invert(self) -> MarkerOperator { match self { Self::LessThan => Self::GreaterThan, Self::LessEqual => Self::GreaterEqual, Self::GreaterThan => Self::LessThan, Self::GreaterEqual => Self::LessEqual, Self::Equal => Self::Equal, Self::NotEqual => Self::NotEqual, Self::TildeEqual => Self::TildeEqual, Self::In => Self::Contains, Self::NotIn => Self::NotContains, Self::Contains => Self::In, Self::NotContains => Self::NotIn, } } /// Negates this marker operator. /// /// If a negation doesn't exist, which is only the case for ~=, then this /// returns `None`. pub(crate) fn negate(self) -> Option { Some(match self { Self::Equal => Self::NotEqual, Self::NotEqual => Self::Equal, Self::TildeEqual => return None, Self::LessThan => Self::GreaterEqual, Self::LessEqual => Self::GreaterThan, Self::GreaterThan => Self::LessEqual, Self::GreaterEqual => Self::LessThan, Self::In => Self::NotIn, Self::NotIn => Self::In, Self::Contains => Self::NotContains, Self::NotContains => Self::Contains, }) } /// Returns the marker operator and value whose union represents the given range. pub fn from_bounds( bounds: (&Bound, &Bound), ) -> impl Iterator { let (b1, b2) = match bounds { (Bound::Included(v1), Bound::Included(v2)) if v1 == v2 => { (Some((MarkerOperator::Equal, v1.clone())), None) } (Bound::Excluded(v1), Bound::Excluded(v2)) if v1 == v2 => { (Some((MarkerOperator::NotEqual, v1.clone())), None) } (lower, upper) => ( MarkerOperator::from_lower_bound(lower), MarkerOperator::from_upper_bound(upper), ), }; b1.into_iter().chain(b2) } /// Returns a value specifier representing the given lower bound. pub fn from_lower_bound(bound: &Bound) -> Option<(MarkerOperator, String)> { match bound { Bound::Included(value) => Some((MarkerOperator::GreaterEqual, value.clone())), Bound::Excluded(value) => Some((MarkerOperator::GreaterThan, value.clone())), Bound::Unbounded => None, } } /// Returns a value specifier representing the given upper bound. pub fn from_upper_bound(bound: &Bound) -> Option<(MarkerOperator, String)> { match bound { Bound::Included(value) => Some((MarkerOperator::LessEqual, value.clone())), Bound::Excluded(value) => Some((MarkerOperator::LessThan, value.clone())), Bound::Unbounded => None, } } } impl FromStr for MarkerOperator { type Err = String; /// PEP 508 allows arbitrary whitespace between "not" and "in", and so do we fn from_str(s: &str) -> Result { let value = match s { "==" => Self::Equal, "!=" => Self::NotEqual, ">" => Self::GreaterThan, ">=" => Self::GreaterEqual, "<" => Self::LessThan, "<=" => Self::LessEqual, "~=" => Self::TildeEqual, "in" => Self::In, not_space_in if not_space_in // start with not .strip_prefix("not") // ends with in .and_then(|space_in| space_in.strip_suffix("in")) // and has only whitespace in between .is_some_and(|space| !space.is_empty() && space.trim().is_empty()) => { Self::NotIn } other => return Err(format!("Invalid comparator: {other}")), }; Ok(value) } } impl Display for MarkerOperator { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.write_str(match self { Self::Equal => "==", Self::NotEqual => "!=", Self::GreaterThan => ">", Self::GreaterEqual => ">=", Self::LessThan => "<", Self::LessEqual => "<=", Self::TildeEqual => "~=", Self::In | Self::Contains => "in", Self::NotIn | Self::NotContains => "not in", }) } } /// Helper type with a [Version] and its original text #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct StringVersion { /// Original unchanged string pub string: String, /// Parsed version pub version: Version, } impl From for StringVersion { fn from(version: Version) -> Self { Self { string: version.to_string(), version, } } } impl FromStr for StringVersion { type Err = VersionParseError; fn from_str(s: &str) -> Result { Ok(Self { string: s.to_string(), version: Version::from_str(s)?, }) } } impl Display for StringVersion { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { self.string.fmt(f) } } impl Serialize for StringVersion { fn serialize(&self, serializer: S) -> Result where S: Serializer, { serializer.serialize_str(&self.string) } } impl<'de> Deserialize<'de> for StringVersion { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let string = String::deserialize(deserializer)?; Self::from_str(&string).map_err(de::Error::custom) } } impl Deref for StringVersion { type Target = Version; fn deref(&self) -> &Self::Target { &self.version } } /// The [`ExtraName`] value used in `extra` markers. #[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] pub enum MarkerValueExtra { /// A valid [`ExtraName`]. Extra(ExtraName), /// An invalid name, preserved as an arbitrary string. Arbitrary(String), } impl MarkerValueExtra { fn as_extra(&self) -> Option<&ExtraName> { match self { Self::Extra(extra) => Some(extra), Self::Arbitrary(_) => None, } } } impl Display for MarkerValueExtra { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { Self::Extra(extra) => extra.fmt(f), Self::Arbitrary(string) => string.fmt(f), } } } /// Represents one clause such as `python_version > "3.8"`. #[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] #[allow(missing_docs)] pub enum MarkerExpression { /// A version expression, e.g. ` `. /// /// Inverted version expressions, such as ` `, are also /// normalized to this form. Version { key: MarkerValueVersion, specifier: VersionSpecifier, }, /// A version in list expression, e.g. ` in `. /// /// A special case of [`MarkerExpression::String`] with the [`MarkerOperator::In`] operator for /// [`MarkerValueVersion`] values. /// /// See [`parse::parse_version_in_expr`] for details on the supported syntax. /// /// Negated expressions, using "not in" are represented using `negated = true`. VersionIn { key: MarkerValueVersion, versions: Vec, negated: bool, }, /// An string marker comparison, e.g. `sys_platform == '...'`. /// /// Inverted string expressions, e.g `'...' == sys_platform`, are also normalized to this form. String { key: MarkerValueString, operator: MarkerOperator, value: String, }, /// `extra '...'` or `'...' extra`. Extra { operator: ExtraOperator, name: MarkerValueExtra, }, } /// The operator for an extra expression, either '==' or '!='. #[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] pub enum ExtraOperator { /// `==` Equal, /// `!=` NotEqual, } impl ExtraOperator { /// Creates a [`ExtraOperator`] from an equivalent [`MarkerOperator`]. /// /// Returns `None` if the operator is not supported for extras. pub(crate) fn from_marker_operator(operator: MarkerOperator) -> Option { match operator { MarkerOperator::Equal => Some(ExtraOperator::Equal), MarkerOperator::NotEqual => Some(ExtraOperator::NotEqual), _ => None, } } /// Negates this operator. pub(crate) fn negate(&self) -> ExtraOperator { match *self { ExtraOperator::Equal => ExtraOperator::NotEqual, ExtraOperator::NotEqual => ExtraOperator::Equal, } } } impl Display for ExtraOperator { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.write_str(match self { Self::Equal => "==", Self::NotEqual => "!=", }) } } impl MarkerExpression { /// Parse a [`MarkerExpression`] from a string with the given reporter. pub fn parse_reporter( s: &str, reporter: &mut impl Reporter, ) -> Result, Pep508Error> { let mut chars = Cursor::new(s); let expression = parse::parse_marker_key_op_value(&mut chars, reporter)?; chars.eat_whitespace(); if let Some((pos, unexpected)) = chars.next() { return Err(Pep508Error { message: Pep508ErrorSource::String(format!( "Unexpected character '{unexpected}', expected end of input" )), start: pos, len: chars.remaining(), input: chars.to_string(), }); } Ok(expression) } /// Parse a [`MarkerExpression`] from a string. /// /// Returns `None` if the expression consists entirely of meaningless expressions /// that are ignored, such as `os_name ~= 'foo'`. #[allow(clippy::should_implement_trait)] pub fn from_str(s: &str) -> Result, Pep508Error> { MarkerExpression::parse_reporter(s, &mut TracingReporter) } } impl Display for MarkerExpression { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { MarkerExpression::Version { key, specifier } => { let (op, version) = (specifier.operator(), specifier.version()); if op == &pep440_rs::Operator::EqualStar || op == &pep440_rs::Operator::NotEqualStar { return write!(f, "{key} {op} '{version}.*'"); } write!(f, "{key} {op} '{version}'") } MarkerExpression::VersionIn { key, versions, negated, } => { let op = if *negated { "not in" } else { "in" }; let versions = versions.iter().map(ToString::to_string).join(" "); write!(f, "{key} {op} '{versions}'") } MarkerExpression::String { key, operator, value, } => { if matches!( operator, MarkerOperator::Contains | MarkerOperator::NotContains ) { return write!(f, "'{value}' {} {key}", operator.invert()); } write!(f, "{key} {operator} '{value}'") } MarkerExpression::Extra { operator, name } => { write!(f, "extra {operator} '{name}'") } } } } /// Represents one or more nested marker expressions with and/or/parentheses. /// /// Marker trees are canonical, meaning any two functionally equivalent markers /// will compare equally. Markers also support efficient polynomial-time operations, /// such as conjunction and disjunction. // TODO(ibraheem): decide whether we want to implement `Copy` for marker trees #[derive(Clone, Eq, Hash, PartialEq)] pub struct MarkerTree(NodeId); impl Default for MarkerTree { fn default() -> Self { MarkerTree::TRUE } } impl<'de> Deserialize<'de> for MarkerTree { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let s = String::deserialize(deserializer)?; FromStr::from_str(&s).map_err(de::Error::custom) } } impl FromStr for MarkerTree { type Err = Pep508Error; fn from_str(markers: &str) -> Result { parse::parse_markers(markers, &mut TracingReporter) } } impl MarkerTree { /// Like [`FromStr::from_str`], but the caller chooses the return type generic. pub fn parse_str(markers: &str) -> Result> { parse::parse_markers(markers, &mut TracingReporter) } /// Parse a [`MarkerTree`] from a string with the given reporter. pub fn parse_reporter( markers: &str, reporter: &mut impl Reporter, ) -> Result { parse::parse_markers(markers, reporter) } /// An empty marker that always evaluates to `true`. pub const TRUE: MarkerTree = MarkerTree(NodeId::TRUE); /// An unsatisfiable marker that always evaluates to `false`. pub const FALSE: MarkerTree = MarkerTree(NodeId::FALSE); /// Returns a marker tree for a single expression. pub fn expression(expr: MarkerExpression) -> MarkerTree { MarkerTree(INTERNER.lock().expression(expr)) } /// Whether the marker always evaluates to `true`. /// /// If this method returns `true`, it is definitively known that the marker will /// evaluate to `true` in any environment. However, this method may return false /// negatives, i.e. it may not be able to detect that a marker is always true for /// complex expressions. pub fn is_true(&self) -> bool { self.0.is_true() } /// Whether the marker always evaluates to `false`, i.e. the expression is not /// satisfiable in any environment. /// /// If this method returns `true`, it is definitively known that the marker will /// evaluate to `false` in any environment. However, this method may return false /// negatives, i.e. it may not be able to detect that a marker is unsatisfiable /// for complex expressions. pub fn is_false(&self) -> bool { self.0.is_false() } /// Returns a new marker tree that is the negation of this one. #[must_use] pub fn negate(&self) -> MarkerTree { MarkerTree(self.0.not()) } /// Combine this marker tree with the one given via a conjunction. #[allow(clippy::needless_pass_by_value)] pub fn and(&mut self, tree: MarkerTree) { self.0 = INTERNER.lock().and(self.0, tree.0); } /// Combine this marker tree with the one given via a disjunction. #[allow(clippy::needless_pass_by_value)] pub fn or(&mut self, tree: MarkerTree) { self.0 = INTERNER.lock().or(self.0, tree.0); } /// Returns `true` if there is no environment in which both marker trees can apply, /// i.e. their conjunction is always `false`. /// /// If this method returns `true`, it is definitively known that the two markers can /// never both evaluate to `true` in a given environment. However, this method may return /// false negatives, i.e. it may not be able to detect that two markers are disjoint for /// complex expressions. pub fn is_disjoint(&self, other: &MarkerTree) -> bool { INTERNER.lock().is_disjoint(self.0, other.0) } /// Returns the contents of this marker tree, if it contains at least one expression. /// /// If the marker is `true`, this method will return `None`. /// If the marker is `false`, the marker is represented as the normalized expression, `python_version < '0'`. /// /// The returned type implements [`Display`] and [`serde::Serialize`]. pub fn contents(&self) -> Option { if self.is_true() { return None; } Some(MarkerTreeContents(self.clone())) } /// Returns a simplified string representation of this marker, if it contains at least one /// expression. /// /// If the marker is `true`, this method will return `None`. /// If the marker is `false`, the marker is represented as the normalized expression, `python_version < '0'`. pub fn try_to_string(&self) -> Option { self.contents().map(|contents| contents.to_string()) } /// Returns the underlying [`MarkerTreeKind`] of the root node. pub fn kind(&self) -> MarkerTreeKind<'_> { if self.is_true() { return MarkerTreeKind::True; } if self.is_false() { return MarkerTreeKind::False; } let node = INTERNER.shared.node(self.0); match &node.var { Variable::Version(key) => { let Edges::Version { edges: ref map } = node.children else { unreachable!() }; MarkerTreeKind::Version(VersionMarkerTree { id: self.0, key: key.clone(), map, }) } Variable::String(key) => { let Edges::String { edges: ref map } = node.children else { unreachable!() }; MarkerTreeKind::String(StringMarkerTree { id: self.0, key: key.clone(), map, }) } Variable::In { key, value } => { let Edges::Boolean { low, high } = node.children else { unreachable!() }; MarkerTreeKind::In(InMarkerTree { key: key.clone(), value, high: high.negate(self.0), low: low.negate(self.0), }) } Variable::Contains { key, value } => { let Edges::Boolean { low, high } = node.children else { unreachable!() }; MarkerTreeKind::Contains(ContainsMarkerTree { key: key.clone(), value, high: high.negate(self.0), low: low.negate(self.0), }) } Variable::Extra(name) => { let Edges::Boolean { low, high } = node.children else { unreachable!() }; MarkerTreeKind::Extra(ExtraMarkerTree { name, high: high.negate(self.0), low: low.negate(self.0), }) } } } /// Returns a simplified DNF expression for this marker tree. pub fn to_dnf(&self) -> Vec> { simplify::to_dnf(self) } /// Does this marker apply in the given environment? pub fn evaluate(&self, env: &MarkerEnvironment, extras: &[ExtraName]) -> bool { self.report_deprecated_options(&mut TracingReporter); self.evaluate_reporter_impl(env, extras, &mut TracingReporter) } /// Evaluates this marker tree against an optional environment and a /// possibly empty sequence of extras. /// /// When an environment is not provided, all marker expressions based on /// the environment evaluate to `true`. That is, this provides environment /// independent marker evaluation. In practice, this means only the extras /// are evaluated when an environment is not provided. pub fn evaluate_optional_environment( &self, env: Option<&MarkerEnvironment>, extras: &[ExtraName], ) -> bool { self.report_deprecated_options(&mut TracingReporter); match env { None => self.evaluate_extras(extras), Some(env) => self.evaluate_reporter_impl(env, extras, &mut TracingReporter), } } /// Same as [`Self::evaluate`], but instead of using logging to warn, you can pass your own /// handler for warnings pub fn evaluate_reporter( &self, env: &MarkerEnvironment, extras: &[ExtraName], reporter: &mut impl Reporter, ) -> bool { self.report_deprecated_options(reporter); self.evaluate_reporter_impl(env, extras, reporter) } fn evaluate_reporter_impl( &self, env: &MarkerEnvironment, extras: &[ExtraName], reporter: &mut impl Reporter, ) -> bool { match self.kind() { MarkerTreeKind::True => return true, MarkerTreeKind::False => return false, MarkerTreeKind::Version(marker) => { for (range, tree) in marker.edges() { if range.contains(env.get_version(marker.key())) { return tree.evaluate_reporter_impl(env, extras, reporter); } } } MarkerTreeKind::String(marker) => { for (range, tree) in marker.children() { let l_string = env.get_string(marker.key()); if range.as_singleton().is_none() { if let Some((start, end)) = range.bounding_range() { if let Bound::Included(value) | Bound::Excluded(value) = start { reporter.report( MarkerWarningKind::LexicographicComparison, format!("Comparing {l_string} and {value} lexicographically"), ); }; if let Bound::Included(value) | Bound::Excluded(value) = end { reporter.report( MarkerWarningKind::LexicographicComparison, format!("Comparing {l_string} and {value} lexicographically"), ); }; } } // todo(ibraheem): avoid cloning here, `contains` should accept `&impl Borrow` let l_string = &l_string.to_string(); if range.contains(l_string) { return tree.evaluate_reporter_impl(env, extras, reporter); } } } MarkerTreeKind::In(marker) => { return marker .edge(marker.value().contains(env.get_string(marker.key()))) .evaluate_reporter_impl(env, extras, reporter); } MarkerTreeKind::Contains(marker) => { return marker .edge(env.get_string(marker.key()).contains(marker.value())) .evaluate_reporter_impl(env, extras, reporter); } MarkerTreeKind::Extra(marker) => { return marker .edge( marker .name() .as_extra() .is_some_and(|extra| extras.contains(extra)), ) .evaluate_reporter_impl(env, extras, reporter); } } false } /// Checks if the requirement should be activated with the given set of active extras and a set /// of possible python versions (from `requires-python`) without evaluating the remaining /// environment markers, i.e. if there is potentially an environment that could activate this /// requirement. /// /// Note that unlike [`Self::evaluate`] this does not perform any checks for bogus expressions but /// will simply return true. As caller you should separately perform a check with an environment /// and forward all warnings. pub fn evaluate_extras_and_python_version( &self, extras: &HashSet, python_versions: &[Version], ) -> bool { match self.kind() { MarkerTreeKind::True => true, MarkerTreeKind::False => false, MarkerTreeKind::Version(marker) => marker.edges().any(|(range, tree)| { if *marker.key() == MarkerValueVersion::PythonVersion { if !python_versions .iter() .any(|version| range.contains(version)) { return false; } } tree.evaluate_extras_and_python_version(extras, python_versions) }), MarkerTreeKind::String(marker) => marker .children() .any(|(_, tree)| tree.evaluate_extras_and_python_version(extras, python_versions)), MarkerTreeKind::In(marker) => marker .children() .any(|(_, tree)| tree.evaluate_extras_and_python_version(extras, python_versions)), MarkerTreeKind::Contains(marker) => marker .children() .any(|(_, tree)| tree.evaluate_extras_and_python_version(extras, python_versions)), MarkerTreeKind::Extra(marker) => marker .edge( marker .name() .as_extra() .is_some_and(|extra| extras.contains(extra)), ) .evaluate_extras_and_python_version(extras, python_versions), } } /// Checks if the requirement should be activated with the given set of active extras without evaluating /// the remaining environment markers, i.e. if there is potentially an environment that could activate this /// requirement. pub fn evaluate_extras(&self, extras: &[ExtraName]) -> bool { match self.kind() { MarkerTreeKind::True => true, MarkerTreeKind::False => false, MarkerTreeKind::Version(marker) => { marker.edges().any(|(_, tree)| tree.evaluate_extras(extras)) } MarkerTreeKind::String(marker) => marker .children() .any(|(_, tree)| tree.evaluate_extras(extras)), MarkerTreeKind::In(marker) => marker .children() .any(|(_, tree)| tree.evaluate_extras(extras)), MarkerTreeKind::Contains(marker) => marker .children() .any(|(_, tree)| tree.evaluate_extras(extras)), MarkerTreeKind::Extra(marker) => marker .edge( marker .name() .as_extra() .is_some_and(|extra| extras.contains(extra)), ) .evaluate_extras(extras), } } /// Same as [`Self::evaluate`], but instead of using logging to warn, you get a Vec with all /// warnings collected pub fn evaluate_collect_warnings( &self, env: &MarkerEnvironment, extras: &[ExtraName], ) -> (bool, Vec<(MarkerWarningKind, String)>) { let mut warnings = Vec::new(); let mut reporter = |kind, warning| { warnings.push((kind, warning)); }; self.report_deprecated_options(&mut reporter); let result = self.evaluate_reporter_impl(env, extras, &mut reporter); (result, warnings) } /// Report the deprecated marker from fn report_deprecated_options(&self, reporter: &mut impl Reporter) { let string_marker = match self.kind() { MarkerTreeKind::True | MarkerTreeKind::False => return, MarkerTreeKind::String(marker) => marker, MarkerTreeKind::Version(marker) => { for (_, tree) in marker.edges() { tree.report_deprecated_options(reporter); } return; } MarkerTreeKind::In(marker) => { for (_, tree) in marker.children() { tree.report_deprecated_options(reporter); } return; } MarkerTreeKind::Contains(marker) => { for (_, tree) in marker.children() { tree.report_deprecated_options(reporter); } return; } MarkerTreeKind::Extra(marker) => { for (_, tree) in marker.children() { tree.report_deprecated_options(reporter); } return; } }; match string_marker.key() { MarkerValueString::OsNameDeprecated => { reporter.report( MarkerWarningKind::DeprecatedMarkerName, "os.name is deprecated in favor of os_name".to_string(), ); } MarkerValueString::PlatformMachineDeprecated => { reporter.report( MarkerWarningKind::DeprecatedMarkerName, "platform.machine is deprecated in favor of platform_machine".to_string(), ); } MarkerValueString::PlatformPythonImplementationDeprecated => { reporter.report( MarkerWarningKind::DeprecatedMarkerName, "platform.python_implementation is deprecated in favor of platform_python_implementation" .to_string(), ); } MarkerValueString::PythonImplementationDeprecated => { reporter.report( MarkerWarningKind::DeprecatedMarkerName, "python_implementation is deprecated in favor of platform_python_implementation" .to_string(), ); } MarkerValueString::PlatformVersionDeprecated => { reporter.report( MarkerWarningKind::DeprecatedMarkerName, "platform.version is deprecated in favor of platform_version".to_string(), ); } MarkerValueString::SysPlatformDeprecated => { reporter.report( MarkerWarningKind::DeprecatedMarkerName, "sys.platform is deprecated in favor of sys_platform".to_string(), ); } _ => {} } for (_, tree) in string_marker.children() { tree.report_deprecated_options(reporter); } } /// Find a top level `extra == "..."` expression. /// /// ASSUMPTION: There is one `extra = "..."`, and it's either the only marker or part of the /// main conjunction. pub fn top_level_extra(&self) -> Option { let mut extra_expression = None; for conjunction in self.to_dnf() { let found = conjunction.iter().find(|expression| { matches!( expression, MarkerExpression::Extra { operator: ExtraOperator::Equal, .. } ) })?; // Because the marker tree is in DNF form, we must verify that the extra expression is part // of all solutions to this marker. if let Some(ref extra_expression) = extra_expression { if *extra_expression != *found { return None; } continue; } extra_expression = Some(found.clone()); } extra_expression } /// Simplify this marker by *assuming* that the Python version range /// provided is true and that the complement of it is false. /// /// For example, with `requires-python = '>=3.8'` and a marker tree of /// `python_full_version >= '3.8' and python_full_version <= '3.10'`, this /// would result in a marker of `python_full_version <= '3.10'`. /// /// This is useful when one wants to write "simpler" markers in a /// particular context with a bound on the supported Python versions. /// In general, the simplified markers returned shouldn't be used for /// evaluation. Instead, they should be turned back into their more /// "complex" form first. /// /// Note that simplifying a marker and then complexifying it, even /// with the same Python version bounds, is a lossy operation. For /// example, simplifying `python_version < '3.7'` with `requires-python /// = ">=3.8"` will result in a marker that always returns false (e.g., /// `python_version < '0'`). Therefore, complexifying an always-false /// marker will result in a marker that is still always false, despite /// the fact that the original marker was true for `<3.7`. Therefore, /// simplifying should only be done as a one-way transformation when it is /// known that `requires-python` reflects an eternal lower bound on the /// results of that simplification. (If `requires-python` changes, then one /// should reconstitute all relevant markers from the source data.) #[must_use] #[allow(clippy::needless_pass_by_value)] pub fn simplify_python_versions( self, lower: Bound<&Version>, upper: Bound<&Version>, ) -> MarkerTree { MarkerTree( INTERNER .lock() .simplify_python_versions(self.0, lower, upper), ) } /// Complexify marker tree by requiring the given Python version range /// to be true in order for this marker tree to evaluate to true in all /// circumstances. /// /// For example, with `requires-python = '>=3.8'` and a marker tree of /// `python_full_version <= '3.10'`, this would result in a marker of /// `python_full_version >= '3.8' and python_full_version <= '3.10'`. #[must_use] #[allow(clippy::needless_pass_by_value)] pub fn complexify_python_versions( self, lower: Bound<&Version>, upper: Bound<&Version>, ) -> MarkerTree { MarkerTree( INTERNER .lock() .complexify_python_versions(self.0, lower, upper), ) } /// Remove the extras from a marker, returning `None` if the marker tree evaluates to `true`. /// /// Any `extra` markers that are always `true` given the provided extras will be removed. /// Any `extra` markers that are always `false` given the provided extras will be left /// unchanged. /// /// For example, if `dev` is a provided extra, given `sys_platform == 'linux' and extra == 'dev'`, /// the marker will be simplified to `sys_platform == 'linux'`. #[must_use] pub fn simplify_extras(self, extras: &[ExtraName]) -> MarkerTree { self.simplify_extras_with(|name| extras.contains(name)) } /// Remove the extras from a marker, returning `None` if the marker tree evaluates to `true`. /// /// Any `extra` markers that are always `true` given the provided predicate will be removed. /// Any `extra` markers that are always `false` given the provided predicate will be left /// unchanged. /// /// For example, if `is_extra('dev')` is true, given /// `sys_platform == 'linux' and extra == 'dev'`, the marker will be simplified to /// `sys_platform == 'linux'`. #[must_use] pub fn simplify_extras_with(self, is_extra: impl Fn(&ExtraName) -> bool) -> MarkerTree { // Because `simplify_extras_with_impl` is recursive, and we need to use // our predicate in recursive calls, we need the predicate itself to // have some indirection (or else we'd have to clone it). To avoid a // recursive type at codegen time, we just introduce the indirection // here, but keep the calling API ergonomic. self.simplify_extras_with_impl(&is_extra) } fn simplify_extras_with_impl(self, is_extra: &impl Fn(&ExtraName) -> bool) -> MarkerTree { MarkerTree(INTERNER.lock().restrict(self.0, &|var| { match var { Variable::Extra(name) => name .as_extra() .and_then(|name| is_extra(name).then_some(true)), _ => None, } })) } } impl fmt::Debug for MarkerTree { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { if self.is_true() { return write!(f, "true"); } if self.is_false() { return write!(f, "false"); } write!(f, "{}", self.contents().unwrap()) } } impl MarkerTree { /// Formats a [`MarkerTree`] as a graph. /// /// This is useful for debugging when one wants to look at a /// representation of a `MarkerTree` that is more faithful to its /// internal representation. pub fn debug_graph(&self) -> MarkerTreeDebugGraph<'_> { MarkerTreeDebugGraph { marker: self } } /// Formats a [`MarkerTree`] in its "raw" representation. /// /// This is useful for debugging when one wants to look at a /// representation of a `MarkerTree` that is precisely identical /// to its internal representation. pub fn debug_raw(&self) -> MarkerTreeDebugRaw<'_> { MarkerTreeDebugRaw { marker: self } } fn fmt_graph(&self, f: &mut fmt::Formatter<'_>, level: usize) -> fmt::Result { match self.kind() { MarkerTreeKind::True => return write!(f, "true"), MarkerTreeKind::False => return write!(f, "false"), MarkerTreeKind::Version(kind) => { for (tree, range) in simplify::collect_edges(kind.edges()) { writeln!(f)?; for _ in 0..level { write!(f, " ")?; } write!(f, "{key}{range} -> ", key = kind.key())?; tree.fmt_graph(f, level + 1)?; } } MarkerTreeKind::String(kind) => { for (tree, range) in simplify::collect_edges(kind.children()) { writeln!(f)?; for _ in 0..level { write!(f, " ")?; } write!(f, "{key}{range} -> ", key = kind.key())?; tree.fmt_graph(f, level + 1)?; } } MarkerTreeKind::In(kind) => { writeln!(f)?; for _ in 0..level { write!(f, " ")?; } write!(f, "{} in {} -> ", kind.key(), kind.value())?; kind.edge(true).fmt_graph(f, level + 1)?; writeln!(f)?; for _ in 0..level { write!(f, " ")?; } write!(f, "{} not in {} -> ", kind.key(), kind.value())?; kind.edge(false).fmt_graph(f, level + 1)?; } MarkerTreeKind::Contains(kind) => { writeln!(f)?; for _ in 0..level { write!(f, " ")?; } write!(f, "{} in {} -> ", kind.value(), kind.key())?; kind.edge(true).fmt_graph(f, level + 1)?; writeln!(f)?; for _ in 0..level { write!(f, " ")?; } write!(f, "{} not in {} -> ", kind.value(), kind.key())?; kind.edge(false).fmt_graph(f, level + 1)?; } MarkerTreeKind::Extra(kind) => { writeln!(f)?; for _ in 0..level { write!(f, " ")?; } write!(f, "extra == {} -> ", kind.name())?; kind.edge(true).fmt_graph(f, level + 1)?; writeln!(f)?; for _ in 0..level { write!(f, " ")?; } write!(f, "extra != {} -> ", kind.name())?; kind.edge(false).fmt_graph(f, level + 1)?; } } Ok(()) } } impl PartialOrd for MarkerTree { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for MarkerTree { fn cmp(&self, other: &Self) -> Ordering { self.kind().cmp(&other.kind()) } } /// Formats a [`MarkerTree`] as a graph. /// /// This type is created by the [`MarkerTree::debug_graph`] routine. #[derive(Clone)] pub struct MarkerTreeDebugGraph<'a> { marker: &'a MarkerTree, } impl<'a> fmt::Debug for MarkerTreeDebugGraph<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.marker.fmt_graph(f, 0) } } /// Formats a [`MarkerTree`] using its raw internals. /// /// This is very verbose and likely only useful if you're working /// on the internals of this crate. /// /// This type is created by the [`MarkerTree::debug_raw`] routine. #[derive(Clone)] pub struct MarkerTreeDebugRaw<'a> { marker: &'a MarkerTree, } impl<'a> fmt::Debug for MarkerTreeDebugRaw<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let node = INTERNER.shared.node(self.marker.0); f.debug_tuple("MarkerTreeDebugRaw").field(node).finish() } } /// The underlying kind of an arbitrary node in a [`MarkerTree`]. /// /// A marker tree is represented as an algebraic decision tree with two terminal nodes /// `True` or `False`. The edges of a given node correspond to a particular assignment of /// a value to that variable. #[derive(PartialEq, Eq, Clone, Debug, PartialOrd, Ord)] pub enum MarkerTreeKind<'a> { /// An empty marker that always evaluates to `true`. True, /// An unsatisfiable marker that always evaluates to `false`. False, /// A version expression. Version(VersionMarkerTree<'a>), /// A string expression. String(StringMarkerTree<'a>), /// A string expression with the `in` operator. In(InMarkerTree<'a>), /// A string expression with the `contains` operator. Contains(ContainsMarkerTree<'a>), /// A string expression. Extra(ExtraMarkerTree<'a>), } /// A version marker node, such as `python_version < '3.7'`. #[derive(PartialEq, Eq, Clone, Debug)] pub struct VersionMarkerTree<'a> { id: NodeId, key: MarkerValueVersion, map: &'a [(Ranges, NodeId)], } impl VersionMarkerTree<'_> { /// The key for this node. pub fn key(&self) -> &MarkerValueVersion { &self.key } /// The edges of this node, corresponding to possible output ranges of the given variable. pub fn edges(&self) -> impl ExactSizeIterator, MarkerTree)> + '_ { self.map .iter() .map(|(range, node)| (range, MarkerTree(node.negate(self.id)))) } } impl PartialOrd for VersionMarkerTree<'_> { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for VersionMarkerTree<'_> { fn cmp(&self, other: &Self) -> Ordering { self.key() .cmp(other.key()) .then_with(|| self.edges().cmp(other.edges())) } } /// A string marker node, such as `os_name == 'Linux'`. #[derive(PartialEq, Eq, Clone, Debug)] pub struct StringMarkerTree<'a> { id: NodeId, key: MarkerValueString, map: &'a [(Ranges, NodeId)], } impl StringMarkerTree<'_> { /// The key for this node. pub fn key(&self) -> &MarkerValueString { &self.key } /// The edges of this node, corresponding to possible output ranges of the given variable. pub fn children(&self) -> impl ExactSizeIterator, MarkerTree)> { self.map .iter() .map(|(range, node)| (range, MarkerTree(node.negate(self.id)))) } } impl PartialOrd for StringMarkerTree<'_> { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for StringMarkerTree<'_> { fn cmp(&self, other: &Self) -> Ordering { self.key() .cmp(other.key()) .then_with(|| self.children().cmp(other.children())) } } /// A string marker node with the `in` operator, such as `os_name in 'WindowsLinux'`. #[derive(PartialEq, Eq, Clone, Debug)] pub struct InMarkerTree<'a> { key: MarkerValueString, value: &'a str, high: NodeId, low: NodeId, } impl InMarkerTree<'_> { /// The key (LHS) for this expression. pub fn key(&self) -> &MarkerValueString { &self.key } /// The value (RHS) for this expression. pub fn value(&self) -> &str { self.value } /// The edges of this node, corresponding to the boolean evaluation of the expression. pub fn children(&self) -> impl Iterator { [(true, MarkerTree(self.high)), (false, MarkerTree(self.low))].into_iter() } /// Returns the subtree associated with the given edge value. pub fn edge(&self, value: bool) -> MarkerTree { if value { MarkerTree(self.high) } else { MarkerTree(self.low) } } } impl PartialOrd for InMarkerTree<'_> { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for InMarkerTree<'_> { fn cmp(&self, other: &Self) -> Ordering { self.key() .cmp(other.key()) .then_with(|| self.value().cmp(other.value())) .then_with(|| self.children().cmp(other.children())) } } /// A string marker node with inverse of the `in` operator, such as `'nux' in os_name`. #[derive(PartialEq, Eq, Clone, Debug)] pub struct ContainsMarkerTree<'a> { key: MarkerValueString, value: &'a str, high: NodeId, low: NodeId, } impl ContainsMarkerTree<'_> { /// The key (LHS) for this expression. pub fn key(&self) -> &MarkerValueString { &self.key } /// The value (RHS) for this expression. pub fn value(&self) -> &str { self.value } /// The edges of this node, corresponding to the boolean evaluation of the expression. pub fn children(&self) -> impl Iterator { [(true, MarkerTree(self.high)), (false, MarkerTree(self.low))].into_iter() } /// Returns the subtree associated with the given edge value. pub fn edge(&self, value: bool) -> MarkerTree { if value { MarkerTree(self.high) } else { MarkerTree(self.low) } } } impl PartialOrd for ContainsMarkerTree<'_> { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for ContainsMarkerTree<'_> { fn cmp(&self, other: &Self) -> Ordering { self.key() .cmp(other.key()) .then_with(|| self.value().cmp(other.value())) .then_with(|| self.children().cmp(other.children())) } } /// A node representing the existence or absence of a given extra, such as `extra == 'bar'`. #[derive(PartialEq, Eq, Clone, Debug)] pub struct ExtraMarkerTree<'a> { name: &'a MarkerValueExtra, high: NodeId, low: NodeId, } impl ExtraMarkerTree<'_> { /// Returns the name of the extra in this expression. pub fn name(&self) -> &MarkerValueExtra { self.name } /// The edges of this node, corresponding to the boolean evaluation of the expression. pub fn children(&self) -> impl Iterator { [(true, MarkerTree(self.high)), (false, MarkerTree(self.low))].into_iter() } /// Returns the subtree associated with the given edge value. pub fn edge(&self, value: bool) -> MarkerTree { if value { MarkerTree(self.high) } else { MarkerTree(self.low) } } } impl PartialOrd for ExtraMarkerTree<'_> { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for ExtraMarkerTree<'_> { fn cmp(&self, other: &Self) -> Ordering { self.name() .cmp(other.name()) .then_with(|| self.children().cmp(other.children())) } } /// A marker tree that contains at least one expression. /// /// See [`MarkerTree::contents`] for details. #[derive(Clone, Eq, Hash, PartialEq, PartialOrd, Ord, Debug)] pub struct MarkerTreeContents(MarkerTree); impl From for MarkerTree { fn from(contents: MarkerTreeContents) -> Self { contents.0 } } impl From> for MarkerTree { fn from(marker: Option) -> Self { marker.map(|contents| contents.0).unwrap_or_default() } } impl AsRef for MarkerTreeContents { fn as_ref(&self) -> &MarkerTree { &self.0 } } impl Serialize for MarkerTreeContents { fn serialize(&self, serializer: S) -> Result where S: Serializer, { serializer.serialize_str(&self.to_string()) } } impl Display for MarkerTreeContents { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { // Normalize all `false` expressions to the same trivially false expression. if self.0.is_false() { return write!(f, "python_version < '0'"); } // Write the output in DNF form. let dnf = self.0.to_dnf(); let format_conjunction = |conjunction: &Vec| { conjunction .iter() .map(MarkerExpression::to_string) .collect::>() .join(" and ") }; let expr = match &dnf[..] { [conjunction] => format_conjunction(conjunction), _ => dnf .iter() .map(|conjunction| { if conjunction.len() == 1 { format_conjunction(conjunction) } else { format!("({})", format_conjunction(conjunction)) } }) .collect::>() .join(" or "), }; f.write_str(&expr) } } #[cfg(feature = "schemars")] impl schemars::JsonSchema for MarkerTree { fn schema_name() -> String { "MarkerTree".to_string() } fn json_schema(_gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema { schemars::schema::SchemaObject { instance_type: Some(schemars::schema::InstanceType::String.into()), metadata: Some(Box::new(schemars::schema::Metadata { description: Some( "A PEP 508-compliant marker expression, e.g., `sys_platform == 'Darwin'`" .to_string(), ), ..schemars::schema::Metadata::default() })), ..schemars::schema::SchemaObject::default() } .into() } } #[cfg(test)] mod test { use std::ops::Bound; use std::str::FromStr; use insta::assert_snapshot; use pep440_rs::Version; use crate::marker::{MarkerEnvironment, MarkerEnvironmentBuilder}; use crate::{ExtraName, MarkerExpression, MarkerOperator, MarkerTree, MarkerValueString}; fn parse_err(input: &str) -> String { MarkerTree::from_str(input).unwrap_err().to_string() } fn m(s: &str) -> MarkerTree { s.parse().unwrap() } fn env37() -> MarkerEnvironment { MarkerEnvironment::try_from(MarkerEnvironmentBuilder { implementation_name: "", implementation_version: "3.7", os_name: "linux", platform_machine: "", platform_python_implementation: "", platform_release: "", platform_system: "", platform_version: "", python_full_version: "3.7", python_version: "3.7", sys_platform: "linux", }) .unwrap() } /// Copied from #[test] fn test_marker_equivalence() { let values = [ (r"python_version == '2.7'", r#"python_version == "2.7""#), (r#"python_version == "2.7""#, r#"python_version == "2.7""#), ( r#"python_version == "2.7" and os_name == "posix""#, r#"python_version == "2.7" and os_name == "posix""#, ), ( r#"python_version == "2.7" or os_name == "posix""#, r#"python_version == "2.7" or os_name == "posix""#, ), ( r#"python_version == "2.7" and os_name == "posix" or sys_platform == "win32""#, r#"python_version == "2.7" and os_name == "posix" or sys_platform == "win32""#, ), (r#"(python_version == "2.7")"#, r#"python_version == "2.7""#), ( r#"(python_version == "2.7" and sys_platform == "win32")"#, r#"python_version == "2.7" and sys_platform == "win32""#, ), ( r#"python_version == "2.7" and (sys_platform == "win32" or sys_platform == "linux")"#, r#"python_version == "2.7" and (sys_platform == "win32" or sys_platform == "linux")"#, ), ]; for (a, b) in values { assert_eq!(m(a), m(b), "{a} {b}"); } } #[test] fn simplify_python_versions() { assert_eq!( m("(extra == 'foo' and sys_platform == 'win32') or extra == 'foo'") .simplify_extras(&["foo".parse().unwrap()]), MarkerTree::TRUE ); assert_eq!( m("(python_version <= '3.11' and sys_platform == 'win32') or python_version > '3.11'") .simplify_python_versions( Bound::Excluded(Version::new([3, 12])).as_ref(), Bound::Unbounded.as_ref(), ), MarkerTree::TRUE ); assert_eq!( m("python_version < '3.10'") .simplify_python_versions( Bound::Excluded(Version::new([3, 7])).as_ref(), Bound::Unbounded.as_ref(), ) .try_to_string() .unwrap(), "python_full_version < '3.10'" ); // Note that `3.12.1` will still match. assert_eq!( m("python_version <= '3.12'") .simplify_python_versions( Bound::Excluded(Version::new([3, 12])).as_ref(), Bound::Unbounded.as_ref(), ) .try_to_string() .unwrap(), "python_full_version < '3.13'" ); assert_eq!( m("python_full_version <= '3.12'").simplify_python_versions( Bound::Excluded(Version::new([3, 12])).as_ref(), Bound::Unbounded.as_ref(), ), MarkerTree::FALSE ); assert_eq!( m("python_full_version <= '3.12.1'") .simplify_python_versions( Bound::Excluded(Version::new([3, 12])).as_ref(), Bound::Unbounded.as_ref(), ) .try_to_string() .unwrap(), "python_full_version <= '3.12.1'" ); } #[test] fn release_only() { assert!(m("python_full_version > '3.10' or python_full_version <= '3.10'").is_true()); assert!( m("python_full_version > '3.10' or python_full_version <= '3.10'") .negate() .is_false() ); assert!(m("python_full_version > '3.10' and python_full_version <= '3.10'").is_false()); } #[test] fn test_marker_evaluation() { let env27 = MarkerEnvironment::try_from(MarkerEnvironmentBuilder { implementation_name: "", implementation_version: "2.7", os_name: "linux", platform_machine: "", platform_python_implementation: "", platform_release: "", platform_system: "", platform_version: "", python_full_version: "2.7", python_version: "2.7", sys_platform: "linux", }) .unwrap(); let env37 = env37(); let marker1 = MarkerTree::from_str("python_version == '2.7'").unwrap(); let marker2 = MarkerTree::from_str( "os_name == \"linux\" or python_version == \"3.7\" and sys_platform == \"win32\"", ) .unwrap(); let marker3 = MarkerTree::from_str( "python_version == \"2.7\" and (sys_platform == \"win32\" or sys_platform == \"linux\")", ).unwrap(); assert!(marker1.evaluate(&env27, &[])); assert!(!marker1.evaluate(&env37, &[])); assert!(marker2.evaluate(&env27, &[])); assert!(marker2.evaluate(&env37, &[])); assert!(marker3.evaluate(&env27, &[])); assert!(!marker3.evaluate(&env37, &[])); } #[test] fn test_version_in_evaluation() { let env27 = MarkerEnvironment::try_from(MarkerEnvironmentBuilder { implementation_name: "", implementation_version: "2.7", os_name: "linux", platform_machine: "", platform_python_implementation: "", platform_release: "", platform_system: "", platform_version: "", python_full_version: "2.7", python_version: "2.7", sys_platform: "linux", }) .unwrap(); let env37 = env37(); let marker = MarkerTree::from_str("python_version in \"2.7 3.2 3.3\"").unwrap(); assert!(marker.evaluate(&env27, &[])); assert!(!marker.evaluate(&env37, &[])); let marker = MarkerTree::from_str("python_version in \"2.7 3.7\"").unwrap(); assert!(marker.evaluate(&env27, &[])); assert!(marker.evaluate(&env37, &[])); let marker = MarkerTree::from_str("python_version in \"2.4 3.8 4.0\"").unwrap(); assert!(!marker.evaluate(&env27, &[])); assert!(!marker.evaluate(&env37, &[])); let marker = MarkerTree::from_str("python_version not in \"2.7 3.2 3.3\"").unwrap(); assert!(!marker.evaluate(&env27, &[])); assert!(marker.evaluate(&env37, &[])); let marker = MarkerTree::from_str("python_version not in \"2.7 3.7\"").unwrap(); assert!(!marker.evaluate(&env27, &[])); assert!(!marker.evaluate(&env37, &[])); let marker = MarkerTree::from_str("python_version not in \"2.4 3.8 4.0\"").unwrap(); assert!(marker.evaluate(&env27, &[])); assert!(marker.evaluate(&env37, &[])); let marker = MarkerTree::from_str("python_full_version in \"2.7\"").unwrap(); assert!(marker.evaluate(&env27, &[])); assert!(!marker.evaluate(&env37, &[])); let marker = MarkerTree::from_str("implementation_version in \"2.7 3.2 3.3\"").unwrap(); assert!(marker.evaluate(&env27, &[])); assert!(!marker.evaluate(&env37, &[])); let marker = MarkerTree::from_str("implementation_version in \"2.7 3.7\"").unwrap(); assert!(marker.evaluate(&env27, &[])); assert!(marker.evaluate(&env37, &[])); let marker = MarkerTree::from_str("implementation_version not in \"2.7 3.7\"").unwrap(); assert!(!marker.evaluate(&env27, &[])); assert!(!marker.evaluate(&env37, &[])); let marker = MarkerTree::from_str("implementation_version not in \"2.4 3.8 4.0\"").unwrap(); assert!(marker.evaluate(&env27, &[])); assert!(marker.evaluate(&env37, &[])); } #[test] #[cfg(feature = "tracing")] #[tracing_test::traced_test] fn warnings1() { let env37 = env37(); let compare_keys = MarkerTree::from_str("platform_version == sys_platform").unwrap(); compare_keys.evaluate(&env37, &[]); logs_contain( "Comparing two markers with each other doesn't make any sense, will evaluate to false", ); } #[test] #[cfg(feature = "tracing")] #[tracing_test::traced_test] fn warnings2() { let env37 = env37(); let non_pep440 = MarkerTree::from_str("python_version >= '3.9.'").unwrap(); non_pep440.evaluate(&env37, &[]); logs_contain( "Expected PEP 440 version to compare with python_version, found `3.9.`, \ will evaluate to false: after parsing `3.9`, found `.`, which is \ not part of a valid version", ); } #[test] #[cfg(feature = "tracing")] #[tracing_test::traced_test] fn warnings3() { let env37 = env37(); let string_string = MarkerTree::from_str("'b' >= 'a'").unwrap(); string_string.evaluate(&env37, &[]); logs_contain( "Comparing two quoted strings with each other doesn't make sense: 'b' >= 'a', will evaluate to false" ); } #[test] #[cfg(feature = "tracing")] #[tracing_test::traced_test] fn warnings4() { let env37 = env37(); let string_string = MarkerTree::from_str(r"os.name == 'posix' and platform.machine == 'x86_64' and platform.python_implementation == 'CPython' and 'Ubuntu' in platform.version and sys.platform == 'linux'").unwrap(); string_string.evaluate(&env37, &[]); logs_assert(|lines: &[&str]| { let lines: Vec<_> = lines .iter() .map(|s| s.split_once(" ").unwrap().1) .collect(); let expected = [ "WARN warnings4: pep508_rs: os.name is deprecated in favor of os_name", "WARN warnings4: pep508_rs: platform.machine is deprecated in favor of platform_machine", "WARN warnings4: pep508_rs: platform.python_implementation is deprecated in favor of", "WARN warnings4: pep508_rs: sys.platform is deprecated in favor of sys_platform", "WARN warnings4: pep508_rs: Comparing linux and posix lexicographically" ]; if lines == expected { Ok(()) } else { Err(format!("{:?}", lines)) } }); } #[test] fn test_not_in() { MarkerTree::from_str("'posix' not in os_name").unwrap(); } #[test] fn test_marker_version_inverted() { let env37 = env37(); let (result, warnings) = MarkerTree::from_str("python_version > '3.6'") .unwrap() .evaluate_collect_warnings(&env37, &[]); assert_eq!(warnings, &[]); assert!(result); let (result, warnings) = MarkerTree::from_str("'3.6' > python_version") .unwrap() .evaluate_collect_warnings(&env37, &[]); assert_eq!(warnings, &[]); assert!(!result); // Meaningless expressions are ignored, so this is always true. let (result, warnings) = MarkerTree::from_str("'3.*' == python_version") .unwrap() .evaluate_collect_warnings(&env37, &[]); assert_eq!(warnings, &[]); assert!(result); } #[test] fn test_marker_string_inverted() { let env37 = env37(); let (result, warnings) = MarkerTree::from_str("'nux' in sys_platform") .unwrap() .evaluate_collect_warnings(&env37, &[]); assert_eq!(warnings, &[]); assert!(result); let (result, warnings) = MarkerTree::from_str("sys_platform in 'nux'") .unwrap() .evaluate_collect_warnings(&env37, &[]); assert_eq!(warnings, &[]); assert!(!result); } #[test] fn test_marker_version_star() { let env37 = env37(); let (result, warnings) = MarkerTree::from_str("python_version == '3.7.*'") .unwrap() .evaluate_collect_warnings(&env37, &[]); assert_eq!(warnings, &[]); assert!(result); } #[test] fn test_tilde_equal() { let env37 = env37(); let (result, warnings) = MarkerTree::from_str("python_version ~= '3.7'") .unwrap() .evaluate_collect_warnings(&env37, &[]); assert_eq!(warnings, &[]); assert!(result); } #[test] fn test_closing_parentheses() { MarkerTree::from_str(r#"( "linux" in sys_platform) and extra == 'all'"#).unwrap(); } #[test] fn wrong_quotes_dot_star() { assert_snapshot!( parse_err(r#"python_version == "3.8".* and python_version >= "3.8""#), @r#" Unexpected character '.', expected 'and', 'or' or end of input python_version == "3.8".* and python_version >= "3.8" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"# ); assert_snapshot!( parse_err(r#"python_version == "3.8".*"#), @r#" Unexpected character '.', expected 'and', 'or' or end of input python_version == "3.8".* ^"# ); } #[test] fn test_marker_expression() { assert_eq!( MarkerExpression::from_str(r#"os_name == "nt""#) .unwrap() .unwrap(), MarkerExpression::String { key: MarkerValueString::OsName, operator: MarkerOperator::Equal, value: "nt".to_string(), } ); } #[test] fn test_marker_expression_inverted() { assert_eq!( MarkerTree::from_str( r#""nt" in os_name and '3.7' >= python_version and python_full_version >= '3.7'"# ) .unwrap() .contents() .unwrap() .to_string(), "python_full_version == '3.7.*' and 'nt' in os_name", ); } #[test] fn test_marker_expression_to_long() { let err = MarkerExpression::from_str(r#"os_name == "nt" and python_version >= "3.8""#) .unwrap_err() .to_string(); assert_snapshot!( err, @r#" Unexpected character 'a', expected end of input os_name == "nt" and python_version >= "3.8" ^^^^^^^^^^^^^^^^^^^^^^^^^^"# ); } #[test] fn test_marker_environment_from_json() { let _env: MarkerEnvironment = serde_json::from_str( r##"{ "implementation_name": "cpython", "implementation_version": "3.7.13", "os_name": "posix", "platform_machine": "x86_64", "platform_python_implementation": "CPython", "platform_release": "5.4.188+", "platform_system": "Linux", "platform_version": "#1 SMP Sun Apr 24 10:03:06 PDT 2022", "python_full_version": "3.7.13", "python_version": "3.7", "sys_platform": "linux" }"##, ) .unwrap(); } #[test] fn test_simplify_extras() { // Given `os_name == "nt" and extra == "dev"`, simplify to `os_name == "nt"`. let markers = MarkerTree::from_str(r#"os_name == "nt" and extra == "dev""#).unwrap(); let simplified = markers.simplify_extras(&[ExtraName::from_str("dev").unwrap()]); let expected = MarkerTree::from_str(r#"os_name == "nt""#).unwrap(); assert_eq!(simplified, expected); // Given `os_name == "nt" or extra == "dev"`, remove the marker entirely. let markers = MarkerTree::from_str(r#"os_name == "nt" or extra == "dev""#).unwrap(); let simplified = markers.simplify_extras(&[ExtraName::from_str("dev").unwrap()]); assert_eq!(simplified, MarkerTree::TRUE); // Given `extra == "dev"`, remove the marker entirely. let markers = MarkerTree::from_str(r#"extra == "dev""#).unwrap(); let simplified = markers.simplify_extras(&[ExtraName::from_str("dev").unwrap()]); assert_eq!(simplified, MarkerTree::TRUE); // Given `extra == "dev" and extra == "test"`, simplify to `extra == "test"`. let markers = MarkerTree::from_str(r#"extra == "dev" and extra == "test""#).unwrap(); let simplified = markers.simplify_extras(&[ExtraName::from_str("dev").unwrap()]); let expected = MarkerTree::from_str(r#"extra == "test""#).unwrap(); assert_eq!(simplified, expected); // Given `os_name == "nt" and extra == "test"`, don't simplify. let markers = MarkerTree::from_str(r#"os_name == "nt" and extra == "test""#).unwrap(); let simplified = markers .clone() .simplify_extras(&[ExtraName::from_str("dev").unwrap()]); assert_eq!(simplified, markers); // Given `os_name == "nt" and (python_version == "3.7" or extra == "dev")`, simplify to // `os_name == "nt". let markers = MarkerTree::from_str( r#"os_name == "nt" and (python_version == "3.7" or extra == "dev")"#, ) .unwrap(); let simplified = markers.simplify_extras(&[ExtraName::from_str("dev").unwrap()]); let expected = MarkerTree::from_str(r#"os_name == "nt""#).unwrap(); assert_eq!(simplified, expected); // Given `os_name == "nt" or (python_version == "3.7" and extra == "dev")`, simplify to // `os_name == "nt" or python_version == "3.7"`. let markers = MarkerTree::from_str( r#"os_name == "nt" or (python_version == "3.7" and extra == "dev")"#, ) .unwrap(); let simplified = markers.simplify_extras(&[ExtraName::from_str("dev").unwrap()]); let expected = MarkerTree::from_str(r#"os_name == "nt" or python_version == "3.7""#).unwrap(); assert_eq!(simplified, expected); } #[test] fn test_marker_simplification() { assert_false("python_version == '3.9.1'"); assert_false("python_version == '3.9.0.*'"); assert_true("python_version != '3.9.1'"); // Technically these is are valid substring comparison, but we do not allow them. // e.g., using a version with patch components with `python_version` is considered // impossible to satisfy since the value it is truncated at the minor version assert_false("python_version in '3.9.0'"); // e.g., using a version that is not PEP 440 compliant is considered arbitrary assert_true("python_version in 'foo'"); // e.g., including `*` versions, which would require tracking a version specifier assert_true("python_version in '3.9.*'"); // e.g., when non-whitespace separators are present assert_true("python_version in '3.9, 3.10'"); assert_true("python_version in '3.9,3.10'"); assert_true("python_version in '3.9 or 3.10'"); // e.g, when one of the values cannot be true // TODO(zanieb): This seems like a quirk of the `python_full_version` normalization, this // should just act as though the patch version isn't present assert_false("python_version in '3.9 3.10.0 3.11'"); assert_simplifies("python_version == '3.9'", "python_full_version == '3.9.*'"); assert_simplifies( "python_version == '3.9.0'", "python_full_version == '3.9.*'", ); // ` in` // e.g., when the range is not contiguous assert_simplifies( "python_version in '3.9 3.11'", "python_full_version == '3.9.*' or python_full_version == '3.11.*'", ); // e.g., when the range is contiguous assert_simplifies( "python_version in '3.9 3.10 3.11'", "python_full_version >= '3.9' and python_full_version < '3.12'", ); // e.g., with `implementation_version` instead of `python_version` assert_simplifies( "implementation_version in '3.9 3.11'", "implementation_version == '3.9' or implementation_version == '3.11'", ); // ' not in' // e.g., when the range is not contiguous assert_simplifies( "python_version not in '3.9 3.11'", "python_full_version < '3.9' or python_full_version == '3.10.*' or python_full_version >= '3.12'", ); // e.g, when the range is contiguous assert_simplifies( "python_version not in '3.9 3.10 3.11'", "python_full_version < '3.9' or python_full_version >= '3.12'", ); // e.g., with `implementation_version` instead of `python_version` assert_simplifies( "implementation_version not in '3.9 3.11'", "implementation_version != '3.9' and implementation_version != '3.11'", ); assert_simplifies("python_version != '3.9'", "python_full_version != '3.9.*'"); assert_simplifies("python_version >= '3.9.0'", "python_full_version >= '3.9'"); assert_simplifies("python_version <= '3.9.0'", "python_full_version < '3.10'"); assert_simplifies( "python_version == '3.*'", "python_full_version >= '3' and python_full_version < '4'", ); assert_simplifies( "python_version == '3.0.*'", "python_full_version == '3.0.*'", ); assert_simplifies( "python_version < '3.17' or python_version < '3.18'", "python_full_version < '3.18'", ); assert_simplifies( "python_version > '3.17' or python_version > '3.18' or python_version > '3.12'", "python_full_version >= '3.13'", ); // a quirk of how pubgrub works, but this is considered part of normalization assert_simplifies( "python_version > '3.17.post4' or python_version > '3.18.post4'", "python_full_version >= '3.18'", ); assert_simplifies( "python_version < '3.17' and python_version < '3.18'", "python_full_version < '3.17'", ); assert_simplifies( "python_version <= '3.18' and python_version == '3.18'", "python_full_version == '3.18.*'", ); assert_simplifies( "python_version <= '3.18' or python_version == '3.18'", "python_full_version < '3.19'", ); assert_simplifies( "python_version <= '3.15' or (python_version <= '3.17' and python_version < '3.16')", "python_full_version < '3.16'", ); assert_simplifies( "(python_version > '3.17' or python_version > '3.16') and python_version > '3.15'", "python_full_version >= '3.17'", ); assert_simplifies( "(python_version > '3.17' or python_version > '3.16') and python_version > '3.15' and implementation_version == '1'", "implementation_version == '1' and python_full_version >= '3.17'", ); assert_simplifies( "('3.17' < python_version or '3.16' < python_version) and '3.15' < python_version and implementation_version == '1'", "implementation_version == '1' and python_full_version >= '3.17'", ); assert_simplifies("extra == 'a' or extra == 'a'", "extra == 'a'"); assert_simplifies( "extra == 'a' and extra == 'a' or extra == 'b'", "extra == 'a' or extra == 'b'", ); assert!(m("python_version < '3.17' and '3.18' == python_version").is_false()); // flatten nested expressions assert_simplifies( "((extra == 'a' and extra == 'b') and extra == 'c') and extra == 'b'", "extra == 'a' and extra == 'b' and extra == 'c'", ); assert_simplifies( "((extra == 'a' or extra == 'b') or extra == 'c') or extra == 'b'", "extra == 'a' or extra == 'b' or extra == 'c'", ); // complex expressions assert_simplifies( "extra == 'a' or (extra == 'a' and extra == 'b')", "extra == 'a'", ); assert_simplifies( "extra == 'a' and (extra == 'a' or extra == 'b')", "extra == 'a'", ); assert_simplifies( "(extra == 'a' and (extra == 'a' or extra == 'b')) or extra == 'd'", "extra == 'a' or extra == 'd'", ); assert_simplifies( "((extra == 'a' and extra == 'b') or extra == 'c') or extra == 'b'", "extra == 'b' or extra == 'c'", ); assert_simplifies( "((extra == 'a' or extra == 'b') and extra == 'c') and extra == 'b'", "extra == 'b' and extra == 'c'", ); assert_simplifies( "((extra == 'a' or extra == 'b') and extra == 'c') or extra == 'b'", "(extra == 'a' and extra == 'c') or extra == 'b'", ); // post-normalization filtering assert_simplifies( "(python_version < '3.1' or python_version < '3.2') and (python_version < '3.2' or python_version == '3.3')", "python_full_version < '3.2'", ); // normalize out redundant ranges assert_true("python_version < '3.12.0rc1' or python_version >= '3.12.0rc1'"); assert_true( "extra == 'a' or (python_version < '3.12.0rc1' or python_version >= '3.12.0rc1')", ); assert_simplifies( "extra == 'a' and (python_version < '3.12.0rc1' or python_version >= '3.12.0rc1')", "extra == 'a'", ); // normalize `!=` operators assert_true("python_version != '3.10' or python_version < '3.12'"); assert_simplifies( "python_version != '3.10' or python_version > '3.12'", "python_full_version != '3.10.*'", ); assert_simplifies( "python_version != '3.8' and python_version < '3.10'", "python_full_version < '3.8' or python_full_version == '3.9.*'", ); assert_simplifies( "python_version != '3.8' and python_version != '3.9'", "python_full_version < '3.8' or python_full_version >= '3.10'", ); // normalize out redundant expressions assert_true("sys_platform == 'win32' or sys_platform != 'win32'"); assert_true("'win32' == sys_platform or sys_platform != 'win32'"); assert_true( "sys_platform == 'win32' or sys_platform == 'win32' or sys_platform != 'win32'", ); assert!(m("sys_platform == 'win32' and sys_platform != 'win32'").is_false()); } #[test] fn test_marker_negation() { assert_eq!( m("python_version > '3.6'").negate(), m("python_version <= '3.6'") ); assert_eq!( m("'3.6' < python_version").negate(), m("python_version <= '3.6'") ); assert_eq!( m("python_version != '3.6' and os_name == 'Linux'").negate(), m("python_version == '3.6' or os_name != 'Linux'") ); assert_eq!( m("python_version == '3.6' and os_name != 'Linux'").negate(), m("python_version != '3.6' or os_name == 'Linux'") ); assert_eq!( m("python_version != '3.6.*' and os_name == 'Linux'").negate(), m("python_version == '3.6.*' or os_name != 'Linux'") ); assert_eq!( m("python_version == '3.6.*'").negate(), m("python_version != '3.6.*'") ); assert_eq!( m("python_version != '3.6.*'").negate(), m("python_version == '3.6.*'") ); assert_eq!( m("python_version ~= '3.6'").negate(), m("python_version < '3.6' or python_version != '3.*'") ); assert_eq!( m("'3.6' ~= python_version").negate(), m("python_version < '3.6' or python_version != '3.*'") ); assert_eq!( m("python_version ~= '3.6.2'").negate(), m("python_version < '3.6.2' or python_version != '3.6.*'") ); assert_eq!( m("sys_platform == 'linux'").negate(), m("sys_platform != 'linux'") ); assert_eq!( m("'linux' == sys_platform").negate(), m("sys_platform != 'linux'") ); // ~= is nonsense on string markers, so the markers is ignored and always // evaluates to true. Thus the negation always returns false. assert_eq!(m("sys_platform ~= 'linux'").negate(), MarkerTree::FALSE); // As above, arbitrary exprs remain arbitrary. assert_eq!(m("'foo' == 'bar'").negate(), MarkerTree::FALSE); // Conjunctions assert_eq!( m("os_name == 'bar' and os_name == 'foo'").negate(), m("os_name != 'bar' or os_name != 'foo'") ); // Disjunctions assert_eq!( m("os_name == 'bar' or os_name == 'foo'").negate(), m("os_name != 'bar' and os_name != 'foo'") ); // Always true negates to always false! assert_eq!( m("python_version >= '3.6' or python_version < '3.6'").negate(), m("python_version < '3.6' and python_version >= '3.6'") ); } #[test] fn test_complex_marker_simplification() { // This expression should simplify to: // `(implementation_name == 'pypy' and sys_platform != 'win32') // or (sys_platform == 'win32' or os_name != 'nt') // or (implementation != 'pypy' or os_name == 'nt')` // // However, simplifying this expression is NP-complete and requires an exponential // algorithm such as Quine-McCluskey, which is not currently implemented. assert_simplifies( "(implementation_name == 'pypy' and sys_platform != 'win32') or (implementation_name != 'pypy' and sys_platform == 'win32') or (sys_platform == 'win32' and os_name != 'nt') or (sys_platform != 'win32' and os_name == 'nt')", "(os_name != 'nt' and sys_platform == 'win32') \ or (implementation_name != 'pypy' and os_name == 'nt') \ or (implementation_name == 'pypy' and os_name != 'nt') \ or (os_name == 'nt' and sys_platform != 'win32')", ); // This is another case we cannot simplify fully, depending on the variable order. // The expression is equivalent to `sys_platform == 'x' or (os_name == 'Linux' and platform_system == 'win32')`. assert_simplifies( "(os_name == 'Linux' and platform_system == 'win32') or (os_name == 'Linux' and platform_system == 'win32' and sys_platform == 'a') or (os_name == 'Linux' and platform_system == 'win32' and sys_platform == 'x') or (os_name != 'Linux' and platform_system == 'win32' and sys_platform == 'x') or (os_name == 'Linux' and platform_system != 'win32' and sys_platform == 'x') or (os_name != 'Linux' and platform_system != 'win32' and sys_platform == 'x')", "(os_name != 'Linux' and sys_platform == 'x') or (platform_system != 'win32' and sys_platform == 'x') or (os_name == 'Linux' and platform_system == 'win32')", ); assert_simplifies("python_version > '3.7'", "python_full_version >= '3.8'"); assert_simplifies( "(python_version <= '3.7' and os_name == 'Linux') or python_version > '3.7'", "os_name == 'Linux' or python_full_version >= '3.8'", ); // Again, the extra `<3.7` and `>=3.9` expressions cannot be seen as redundant due to them being interdependent. // TODO(ibraheem): We might be able to simplify these by checking for the negation of the combined ranges before we split them. assert_simplifies( "(os_name == 'Linux' and sys_platform == 'win32') \ or (os_name != 'Linux' and sys_platform == 'win32' and python_version == '3.7') \ or (os_name != 'Linux' and sys_platform == 'win32' and python_version == '3.8')", "(python_full_version < '3.7' and os_name == 'Linux' and sys_platform == 'win32') \ or (python_full_version >= '3.9' and os_name == 'Linux' and sys_platform == 'win32') \ or (python_full_version >= '3.7' and python_full_version < '3.9' and sys_platform == 'win32')", ); assert_simplifies( "(implementation_name != 'pypy' and os_name == 'nt' and sys_platform == 'darwin') or (os_name == 'nt' and sys_platform == 'win32')", "(implementation_name != 'pypy' and os_name == 'nt' and sys_platform == 'darwin') or (os_name == 'nt' and sys_platform == 'win32')", ); assert_simplifies( "(sys_platform == 'darwin' or sys_platform == 'win32') and ((implementation_name != 'pypy' and os_name == 'nt' and sys_platform == 'darwin') or (os_name == 'nt' and sys_platform == 'win32'))", "(implementation_name != 'pypy' and os_name == 'nt' and sys_platform == 'darwin') or (os_name == 'nt' and sys_platform == 'win32')", ); assert_simplifies( "(sys_platform == 'darwin' or sys_platform == 'win32') and ((platform_version != '1' and os_name == 'nt' and sys_platform == 'darwin') or (os_name == 'nt' and sys_platform == 'win32'))", "(os_name == 'nt' and platform_version != '1' and sys_platform == 'darwin') or (os_name == 'nt' and sys_platform == 'win32')", ); assert_simplifies( "(os_name == 'nt' and sys_platform == 'win32') \ or (os_name != 'nt' and platform_version == '1' and (sys_platform == 'win32' or sys_platform == 'win64'))", "(platform_version == '1' and sys_platform == 'win32') \ or (os_name != 'nt' and platform_version == '1' and sys_platform == 'win64') \ or (os_name == 'nt' and sys_platform == 'win32')", ); assert_simplifies( "(os_name == 'nt' and sys_platform == 'win32') or (os_name != 'nt' and (sys_platform == 'win32' or sys_platform == 'win64'))", "(os_name != 'nt' and sys_platform == 'win64') or sys_platform == 'win32'", ); } #[test] fn test_requires_python() { fn simplified(marker: &str) -> MarkerTree { let lower = Bound::Included(Version::new([3, 8])); let upper = Bound::Unbounded; m(marker).simplify_python_versions(lower.as_ref(), upper.as_ref()) } assert_eq!(simplified("python_version >= '3.8'"), MarkerTree::TRUE); assert_eq!( simplified("python_version >= '3.8' or sys_platform == 'win32'"), MarkerTree::TRUE ); assert_eq!( simplified("python_version >= '3.8' and sys_platform == 'win32'"), m("sys_platform == 'win32'"), ); assert_eq!( simplified("python_version == '3.8'") .try_to_string() .unwrap(), "python_full_version < '3.9'" ); assert_eq!( simplified("python_version <= '3.10'") .try_to_string() .unwrap(), "python_full_version < '3.11'" ); } #[test] fn test_extra_disjointness() { assert!(!is_disjoint("extra == 'a'", "python_version == '1'")); assert!(!is_disjoint("extra == 'a'", "extra == 'a'")); assert!(!is_disjoint("extra == 'a'", "extra == 'b'")); assert!(!is_disjoint("extra == 'b'", "extra == 'a'")); assert!(!is_disjoint("extra == 'b'", "extra != 'a'")); assert!(!is_disjoint("extra != 'b'", "extra == 'a'")); assert!(is_disjoint("extra != 'b'", "extra == 'b'")); assert!(is_disjoint("extra == 'b'", "extra != 'b'")); } #[test] fn test_arbitrary_disjointness() { // `python_version == 'Linux'` is nonsense and ignored, thus the first marker // is always `true` and not disjoint. assert!(!is_disjoint( "python_version == 'Linux'", "python_full_version == '3.7.1'" )); } #[test] fn test_version_disjointness() { assert!(!is_disjoint( "os_name == 'Linux'", "python_full_version == '3.7.1'" )); test_version_bounds_disjointness("python_full_version"); assert!(!is_disjoint( "python_full_version == '3.7.*'", "python_full_version == '3.7.1'" )); assert!(is_disjoint( "python_version == '3.7'", "python_full_version == '3.8'" )); assert!(!is_disjoint( "python_version == '3.7'", "python_full_version == '3.7.2'" )); assert!(is_disjoint( "python_version > '3.7'", "python_full_version == '3.7.1'" )); assert!(!is_disjoint( "python_version <= '3.7'", "python_full_version == '3.7.1'" )); } #[test] fn test_string_disjointness() { assert!(!is_disjoint( "os_name == 'Linux'", "platform_version == '3.7.1'" )); assert!(!is_disjoint( "implementation_version == '3.7.0'", "python_full_version == '3.7.1'" )); // basic version bounds checking should still work with lexicographical comparisons test_version_bounds_disjointness("platform_version"); assert!(is_disjoint("os_name == 'Linux'", "os_name == 'OSX'")); assert!(is_disjoint("os_name <= 'Linux'", "os_name == 'OSX'")); assert!(!is_disjoint( "os_name in 'OSXLinuxWindows'", "os_name == 'OSX'" )); assert!(!is_disjoint("'OSX' in os_name", "'Linux' in os_name")); // complicated `in` intersections are not supported assert!(!is_disjoint("os_name in 'OSX'", "os_name in 'Linux'")); assert!(!is_disjoint( "os_name in 'OSXLinux'", "os_name == 'Windows'" )); assert!(is_disjoint( "os_name in 'Windows'", "os_name not in 'Windows'" )); assert!(is_disjoint( "'Windows' in os_name", "'Windows' not in os_name" )); assert!(!is_disjoint("'Windows' in os_name", "'Windows' in os_name")); assert!(!is_disjoint("'Linux' in os_name", "os_name not in 'Linux'")); assert!(!is_disjoint("'Linux' not in os_name", "os_name in 'Linux'")); assert!(!is_disjoint( "os_name == 'Linux' and os_name != 'OSX'", "os_name == 'Linux'" )); assert!(is_disjoint( "os_name == 'Linux' and os_name != 'OSX'", "os_name == 'OSX'" )); assert!(!is_disjoint( "extra == 'Linux' and extra != 'OSX'", "extra == 'Linux'" )); assert!(is_disjoint( "extra == 'Linux' and extra != 'OSX'", "extra == 'OSX'" )); assert!(!is_disjoint( "extra == 'x1' and extra != 'x2'", "extra == 'x1'" )); assert!(is_disjoint( "extra == 'x1' and extra != 'x2'", "extra == 'x2'" )); } #[test] fn is_disjoint_commutative() { let m1 = m("extra == 'Linux' and extra != 'OSX'"); let m2 = m("extra == 'Linux'"); assert!(!m2.is_disjoint(&m1)); assert!(!m1.is_disjoint(&m2)); } #[test] fn test_combined_disjointness() { assert!(!is_disjoint( "os_name == 'a' and platform_version == '1'", "os_name == 'a'" )); assert!(!is_disjoint( "os_name == 'a' or platform_version == '1'", "os_name == 'a'" )); assert!(is_disjoint( "os_name == 'a' and platform_version == '1'", "os_name == 'a' and platform_version == '2'" )); assert!(is_disjoint( "os_name == 'a' and platform_version == '1'", "'2' == platform_version and os_name == 'a'" )); assert!(!is_disjoint( "os_name == 'a' or platform_version == '1'", "os_name == 'a' or platform_version == '2'" )); assert!(is_disjoint( "sys_platform == 'darwin' and implementation_name == 'pypy'", "sys_platform == 'bar' or implementation_name == 'foo'", )); assert!(is_disjoint( "sys_platform == 'bar' or implementation_name == 'foo'", "sys_platform == 'darwin' and implementation_name == 'pypy'", )); assert!(is_disjoint( "python_version >= '3.7' and implementation_name == 'pypy'", "python_version < '3.7'" )); assert!(is_disjoint( "implementation_name == 'pypy' and python_version >= '3.7'", "implementation_name != 'pypy'" )); assert!(is_disjoint( "implementation_name != 'pypy' and python_version >= '3.7'", "implementation_name == 'pypy'" )); } #[test] fn test_arbitrary() { assert!(m("'wat' == 'wat'").is_true()); assert!(m("os_name ~= 'wat'").is_true()); assert!(m("python_version == 'Linux'").is_true()); assert!(m("os_name ~= 'wat' or 'wat' == 'wat' and python_version == 'Linux'").is_true()); } #[test] fn test_is_false() { assert!(m("python_version < '3.10' and python_version >= '3.10'").is_false()); assert!(m("(python_version < '3.10' and python_version >= '3.10') \ or (python_version < '3.9' and python_version >= '3.9')",) .is_false()); assert!(!m("python_version < '3.10'").is_false()); assert!(!m("python_version < '0'").is_false()); assert!(!m("python_version < '3.10' and python_version >= '3.9'").is_false()); assert!(!m("python_version < '3.10' or python_version >= '3.11'").is_false()); } fn test_version_bounds_disjointness(version: &str) { assert!(!is_disjoint( format!("{version} > '2.7.0'"), format!("{version} == '3.6.0'") )); assert!(!is_disjoint( format!("{version} >= '3.7.0'"), format!("{version} == '3.7.1'") )); assert!(!is_disjoint( format!("{version} >= '3.7.0'"), format!("'3.7.1' == {version}") )); assert!(is_disjoint( format!("{version} >= '3.7.1'"), format!("{version} == '3.7.0'") )); assert!(is_disjoint( format!("'3.7.1' <= {version}"), format!("{version} == '3.7.0'") )); assert!(is_disjoint( format!("{version} < '3.7.0'"), format!("{version} == '3.7.0'") )); assert!(is_disjoint( format!("'3.7.0' > {version}"), format!("{version} == '3.7.0'") )); assert!(is_disjoint( format!("{version} < '3.7.0'"), format!("{version} == '3.7.1'") )); assert!(is_disjoint( format!("{version} == '3.7.0'"), format!("{version} == '3.7.1'") )); assert!(is_disjoint( format!("{version} == '3.7.0'"), format!("{version} != '3.7.0'") )); } fn assert_simplifies(left: &str, right: &str) { assert_eq!(m(left), m(right), "{left} != {right}"); assert_eq!(m(left).try_to_string().unwrap(), right, "{left} != {right}"); } fn assert_true(marker: &str) { assert!(m(marker).is_true(), "{marker} != true"); } fn assert_false(marker: &str) { assert!(m(marker).is_false(), "{marker} != false"); } fn is_disjoint(left: impl AsRef, right: impl AsRef) -> bool { let (left, right) = (m(left.as_ref()), m(right.as_ref())); left.is_disjoint(&right) && right.is_disjoint(&left) } #[test] fn complexified_markers() { // Takes optional lower (inclusive) and upper (exclusive) // bounds representing `requires-python` and a "simplified" // marker, and returns the "complexified" marker. That is, a // marker that embeds the `requires-python` constraint into it. let complexify = |lower: Option<[u64; 2]>, upper: Option<[u64; 2]>, marker: &str| -> MarkerTree { let lower = lower .map(|release| Bound::Included(Version::new(release))) .unwrap_or(Bound::Unbounded); let upper = upper .map(|release| Bound::Excluded(Version::new(release))) .unwrap_or(Bound::Unbounded); m(marker).complexify_python_versions(lower.as_ref(), upper.as_ref()) }; assert_eq!( complexify(None, None, "python_full_version < '3.10'"), m("python_full_version < '3.10'"), ); assert_eq!( complexify(Some([3, 8]), None, "python_full_version < '3.10'"), m("python_full_version >= '3.8' and python_full_version < '3.10'"), ); assert_eq!( complexify(None, Some([3, 8]), "python_full_version < '3.10'"), m("python_full_version < '3.8'"), ); assert_eq!( complexify(Some([3, 8]), Some([3, 8]), "python_full_version < '3.10'"), // Kinda weird, but this normalizes to `false`, just like the above. m("python_full_version < '0' and python_full_version > '0'"), ); assert_eq!( complexify(Some([3, 11]), None, "python_full_version < '3.10'"), // Kinda weird, but this normalizes to `false`, just like the above. m("python_full_version < '0' and python_full_version > '0'"), ); assert_eq!( complexify(Some([3, 11]), None, "python_full_version >= '3.10'"), m("python_full_version >= '3.11'"), ); assert_eq!( complexify(Some([3, 11]), None, "python_full_version >= '3.12'"), m("python_full_version >= '3.12'"), ); assert_eq!( complexify(None, Some([3, 11]), "python_full_version > '3.12'"), // Kinda weird, but this normalizes to `false`, just like the above. m("python_full_version < '0' and python_full_version > '0'"), ); assert_eq!( complexify(None, Some([3, 11]), "python_full_version <= '3.12'"), m("python_full_version < '3.11'"), ); assert_eq!( complexify(None, Some([3, 11]), "python_full_version <= '3.10'"), m("python_full_version <= '3.10'"), ); assert_eq!( complexify(Some([3, 11]), None, "python_full_version == '3.8'"), // Kinda weird, but this normalizes to `false`, just like the above. m("python_full_version < '0' and python_full_version > '0'"), ); assert_eq!( complexify( Some([3, 11]), None, "python_full_version == '3.8' or python_full_version == '3.12'" ), m("python_full_version == '3.12'"), ); assert_eq!( complexify( Some([3, 11]), None, "python_full_version == '3.8' \ or python_full_version == '3.11' \ or python_full_version == '3.12'" ), m("python_full_version == '3.11' or python_full_version == '3.12'"), ); // Tests a tricky case where if a marker is always true, then // complexifying it will proceed correctly by adding the // requires-python constraint. This is a regression test for // an early implementation that special cased the "always // true" case to return "always true" regardless of the // requires-python bounds. assert_eq!( complexify( Some([3, 12]), None, "python_full_version < '3.10' or python_full_version >= '3.10'" ), m("python_full_version >= '3.12'"), ); } #[test] fn simplified_markers() { // Takes optional lower (inclusive) and upper (exclusive) // bounds representing `requires-python` and a "complexified" // marker, and returns the "simplified" marker. That is, a // marker that assumes `requires-python` is true. let simplify = |lower: Option<[u64; 2]>, upper: Option<[u64; 2]>, marker: &str| -> MarkerTree { let lower = lower .map(|release| Bound::Included(Version::new(release))) .unwrap_or(Bound::Unbounded); let upper = upper .map(|release| Bound::Excluded(Version::new(release))) .unwrap_or(Bound::Unbounded); m(marker).simplify_python_versions(lower.as_ref(), upper.as_ref()) }; assert_eq!( simplify( Some([3, 8]), None, "python_full_version >= '3.8' and python_full_version < '3.10'" ), m("python_full_version < '3.10'"), ); assert_eq!( simplify(Some([3, 8]), None, "python_full_version < '3.7'"), // Kinda weird, but this normalizes to `false`, just like the above. m("python_full_version < '0' and python_full_version > '0'"), ); assert_eq!( simplify( Some([3, 8]), Some([3, 11]), "python_full_version == '3.7.*' \ or python_full_version == '3.8.*' \ or python_full_version == '3.10.*' \ or python_full_version == '3.11.*' \ " ), // Given `requires-python = '>=3.8,<3.11'`, only `3.8.*` // and `3.10.*` can possibly be true. So this simplifies // to `!= 3.9.*`. m("python_full_version != '3.9.*'"), ); assert_eq!( simplify( Some([3, 8]), None, "python_full_version >= '3.8' and sys_platform == 'win32'" ), m("sys_platform == 'win32'"), ); assert_eq!( simplify( Some([3, 8]), None, "python_full_version >= '3.9' \ and (sys_platform == 'win32' or python_full_version >= '3.8')", ), m("python_full_version >= '3.9'"), ); } } pep508_rs-0.9.1/src/normalize/extra_name.rs000064400000000000000000000030531046102023000166410ustar 00000000000000use serde::{Deserialize, Deserializer, Serialize}; use std::fmt; use std::fmt::{Display, Formatter}; use std::str::FromStr; use crate::normalize::{ validate_and_normalize_owned, validate_and_normalize_ref, InvalidNameError, }; /// The normalized name of an extra dependency group. /// /// Converts the name to lowercase and collapses any run of the characters `-`, `_` and `.` /// down to a single `-`, e.g., `---`, `.`, and `__` all get converted to just `-`. /// /// See: /// - /// - #[derive(Serialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct ExtraName(String); impl ExtraName { /// Create a validated, normalized extra name. pub fn new(name: String) -> Result { validate_and_normalize_owned(name).map(Self) } } impl FromStr for ExtraName { type Err = InvalidNameError; fn from_str(name: &str) -> Result { validate_and_normalize_ref(name).map(Self) } } impl<'de> Deserialize<'de> for ExtraName { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let s = String::deserialize(deserializer)?; Self::from_str(&s).map_err(serde::de::Error::custom) } } impl Display for ExtraName { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { self.0.fmt(f) } } impl AsRef for ExtraName { fn as_ref(&self) -> &str { &self.0 } } pep508_rs-0.9.1/src/normalize/mod.rs000064400000000000000000000130511046102023000152740ustar 00000000000000use std::error::Error; use std::fmt::{Display, Formatter}; pub use extra_name::ExtraName; pub use package_name::PackageName; mod extra_name; mod package_name; /// Validate and normalize an owned package or extra name. pub(crate) fn validate_and_normalize_owned(name: String) -> Result { if is_normalized(&name)? { Ok(name) } else { validate_and_normalize_ref(name) } } /// Validate and normalize an unowned package or extra name. pub(crate) fn validate_and_normalize_ref( name: impl AsRef, ) -> Result { let mut normalized = String::with_capacity(name.as_ref().len()); let mut last = None; for char in name.as_ref().bytes() { match char { b'A'..=b'Z' => { normalized.push(char.to_ascii_lowercase() as char); } b'a'..=b'z' | b'0'..=b'9' => { normalized.push(char as char); } b'-' | b'_' | b'.' => { match last { // Names can't start with punctuation. None => return Err(InvalidNameError(name.as_ref().to_string())), Some(b'-') | Some(b'_') | Some(b'.') => {} Some(_) => normalized.push('-'), } } _ => return Err(InvalidNameError(name.as_ref().to_string())), } last = Some(char); } // Names can't end with punctuation. if matches!(last, Some(b'-') | Some(b'_') | Some(b'.')) { return Err(InvalidNameError(name.as_ref().to_string())); } Ok(normalized) } /// Returns `true` if the name is already normalized. fn is_normalized(name: impl AsRef) -> Result { let mut last = None; for char in name.as_ref().bytes() { match char { b'A'..=b'Z' => { // Uppercase characters need to be converted to lowercase. return Ok(false); } b'a'..=b'z' | b'0'..=b'9' => {} b'_' | b'.' => { // `_` and `.` are normalized to `-`. return Ok(false); } b'-' => { match last { // Names can't start with punctuation. None => return Err(InvalidNameError(name.as_ref().to_string())), Some(b'-') => { // Runs of `-` are normalized to a single `-`. return Ok(false); } Some(_) => {} } } _ => return Err(InvalidNameError(name.as_ref().to_string())), } last = Some(char); } // Names can't end with punctuation. if matches!(last, Some(b'-') | Some(b'_') | Some(b'.')) { return Err(InvalidNameError(name.as_ref().to_string())); } Ok(true) } /// Invalid [`crate::PackageName`] or [`crate::ExtraName`]. #[derive(Clone, Debug, Eq, PartialEq)] pub struct InvalidNameError(String); impl InvalidNameError { /// Returns the invalid name. pub fn as_str(&self) -> &str { &self.0 } } impl Display for InvalidNameError { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!( f, "Not a valid package or extra name: \"{}\". Names must start and end with a letter or \ digit and may only contain -, _, ., and alphanumeric characters.", self.0 ) } } impl Error for InvalidNameError {} #[cfg(test)] mod tests { use super::*; #[test] fn normalize() { let inputs = [ "friendly-bard", "Friendly-Bard", "FRIENDLY-BARD", "friendly.bard", "friendly_bard", "friendly--bard", "friendly-.bard", "FrIeNdLy-._.-bArD", ]; for input in inputs { assert_eq!(validate_and_normalize_ref(input).unwrap(), "friendly-bard"); assert_eq!( validate_and_normalize_owned(input.to_string()).unwrap(), "friendly-bard" ); } } #[test] fn check() { let inputs = ["friendly-bard", "friendlybard"]; for input in inputs { assert!(is_normalized(input).unwrap(), "{:?}", input); } let inputs = [ "friendly.bard", "friendly.BARD", "friendly_bard", "friendly--bard", "friendly-.bard", "FrIeNdLy-._.-bArD", ]; for input in inputs { assert!(!is_normalized(input).unwrap(), "{:?}", input); } } #[test] fn unchanged() { // Unchanged let unchanged = ["friendly-bard", "1okay", "okay2"]; for input in unchanged { assert_eq!(validate_and_normalize_ref(input).unwrap(), input); assert_eq!( validate_and_normalize_owned(input.to_string()).unwrap(), input ); assert!(is_normalized(input).unwrap()); } } #[test] fn failures() { let failures = [ " starts-with-space", "-starts-with-dash", "ends-with-dash-", "ends-with-space ", "includes!invalid-char", "space in middle", "alpha-α", ]; for input in failures { assert!(validate_and_normalize_ref(input).is_err()); assert!(validate_and_normalize_owned(input.to_string()).is_err()); assert!(is_normalized(input).is_err()); } } } pep508_rs-0.9.1/src/normalize/package_name.rs000064400000000000000000000051541046102023000171150ustar 00000000000000use std::borrow::Cow; use std::str::FromStr; use serde::{Deserialize, Deserializer, Serialize}; use crate::normalize::{ validate_and_normalize_owned, validate_and_normalize_ref, InvalidNameError, }; /// The normalized name of a package. /// /// Converts the name to lowercase and collapses any run of the characters `-`, `_` and `.` /// down to a single `-`, e.g., `---`, `.`, and `__` all get converted to just `-`. /// /// See: #[derive(Serialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct PackageName(String); impl PackageName { /// Create a validated, normalized package name. pub fn new(name: String) -> Result { validate_and_normalize_owned(name).map(Self) } /// Escape this name with underscores (`_`) instead of dashes (`-`) /// /// See: pub fn as_dist_info_name(&self) -> Cow<'_, str> { if let Some(dash_position) = self.0.find('-') { // Initialize `replaced` with the start of the string up to the current character. let mut owned_string = String::with_capacity(self.0.len()); owned_string.push_str(&self.0[..dash_position]); owned_string.push('_'); // Iterate over the rest of the string. owned_string.extend(self.0[dash_position + 1..].chars().map(|character| { if character == '-' { '_' } else { character } })); Cow::Owned(owned_string) } else { Cow::Borrowed(self.0.as_str()) } } } impl From<&PackageName> for PackageName { /// Required for `WaitMap::wait`. fn from(package_name: &PackageName) -> Self { package_name.clone() } } impl FromStr for PackageName { type Err = InvalidNameError; fn from_str(name: &str) -> Result { validate_and_normalize_ref(name).map(Self) } } impl<'de> Deserialize<'de> for PackageName { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let s = String::deserialize(deserializer)?; Self::from_str(&s).map_err(serde::de::Error::custom) } } impl std::fmt::Display for PackageName { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.0.fmt(f) } } impl AsRef for PackageName { fn as_ref(&self) -> &str { &self.0 } } pep508_rs-0.9.1/src/origin.rs000064400000000000000000000020741046102023000140070ustar 00000000000000use crate::PackageName; use std::path::{Path, PathBuf}; /// The origin of a dependency, e.g., a `-r requirements.txt` file. #[derive( Hash, Debug, Clone, Eq, PartialEq, PartialOrd, Ord, serde::Serialize, serde::Deserialize, )] #[serde(rename_all = "kebab-case")] pub enum RequirementOrigin { /// The requirement was provided via a standalone file (e.g., a `requirements.txt` file). File(PathBuf), /// The requirement was provided via a local project (e.g., a `pyproject.toml` file). Project(PathBuf, PackageName), /// The requirement was provided via a workspace. Workspace, } impl RequirementOrigin { /// Returns the path of the requirement origin. pub fn path(&self) -> &Path { match self { RequirementOrigin::File(path) => path.as_path(), RequirementOrigin::Project(path, _) => path.as_path(), // Multiple toml are merged and difficult to track files where Requirement is defined. Returns a dummy path instead. RequirementOrigin::Workspace => Path::new("(workspace)"), } } } pep508_rs-0.9.1/src/path.rs000064400000000000000000000053421046102023000134550ustar 00000000000000use std::borrow::Cow; use std::path::{Component, Path, PathBuf}; /// Normalize a path, removing things like `.` and `..`. /// /// Source: /// /// CAUTION: Assumes that the path is already absolute. /// /// CAUTION: This does not resolve symlinks (unlike /// [`std::fs::canonicalize`]). This may cause incorrect or surprising /// behavior at times. This should be used carefully. Unfortunately, /// [`std::fs::canonicalize`] can be hard to use correctly, since it can often /// fail, or on Windows returns annoying device paths. /// /// # Errors /// /// When a relative path is provided with `..` components that extend beyond the base directory. /// For example, `./a/../../b` cannot be normalized because it escapes the base directory. #[cfg_attr(not(feature = "non-pep508-extensions"), allow(dead_code))] pub fn normalize_absolute_path(path: &Path) -> Result { let mut components = path.components().peekable(); let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().copied() { components.next(); PathBuf::from(c.as_os_str()) } else { PathBuf::new() }; for component in components { match component { Component::Prefix(..) => unreachable!(), Component::RootDir => { ret.push(component.as_os_str()); } Component::CurDir => {} Component::ParentDir => { if !ret.pop() { return Err(std::io::Error::new( std::io::ErrorKind::InvalidInput, format!( "cannot normalize a relative path beyond the base directory: {}", path.display() ), )); } } Component::Normal(c) => { ret.push(c); } } } Ok(ret) } /// Normalize the `path` component of a URL for use as a file path. /// /// For example, on Windows, transforms `C:\Users\ferris\wheel-0.42.0.tar.gz` to /// `/C:/Users/ferris/wheel-0.42.0.tar.gz`. /// /// On other platforms, this is a no-op. #[cfg_attr(not(feature = "non-pep508-extensions"), allow(dead_code))] pub fn normalize_url_path(path: &str) -> Cow<'_, str> { // Apply percent-decoding to the URL. let path = urlencoding::decode(path).unwrap_or(Cow::Borrowed(path)); // Return the path. if cfg!(windows) { Cow::Owned( path.strip_prefix('/') .unwrap_or(&path) .replace('/', std::path::MAIN_SEPARATOR_STR), ) } else { path } } pep508_rs-0.9.1/src/tests.rs000064400000000000000000000515711046102023000136700ustar 00000000000000//! Half of these tests are copied from use std::env; use std::str::FromStr; use insta::assert_snapshot; use url::Url; use pep440_rs::{Operator, Version, VersionPattern, VersionSpecifier}; use crate::cursor::Cursor; use crate::marker::{parse, MarkerExpression, MarkerTree, MarkerValueVersion}; use crate::{ ExtraName, InvalidNameError, MarkerOperator, MarkerValueString, PackageName, Requirement, TracingReporter, VerbatimUrl, VersionOrUrl, }; fn parse_pep508_err(input: &str) -> String { Requirement::::from_str(input) .unwrap_err() .to_string() } #[cfg(feature = "non-pep508-extensions")] fn parse_unnamed_err(input: &str) -> String { crate::UnnamedRequirement::::from_str(input) .unwrap_err() .to_string() } #[cfg(windows)] #[test] fn test_preprocess_url_windows() { use std::path::PathBuf; let actual = crate::parse_url::( &mut Cursor::new("file:///C:/Users/ferris/wheel-0.42.0.tar.gz"), None, ) .unwrap() .to_file_path(); let expected = PathBuf::from(r"C:\Users\ferris\wheel-0.42.0.tar.gz"); assert_eq!(actual, Ok(expected)); } #[test] fn error_empty() { assert_snapshot!( parse_pep508_err(""), @r" Empty field is not allowed for PEP508 ^" ); } #[test] fn error_start() { assert_snapshot!( parse_pep508_err("_name"), @" Expected package name starting with an alphanumeric character, found `_` _name ^" ); } #[test] fn error_end() { assert_snapshot!( parse_pep508_err("name_"), @" Package name must end with an alphanumeric character, not '_' name_ ^" ); } #[test] fn basic_examples() { let input = r"requests[security,tests]==2.8.*,>=2.8.1 ; python_full_version < '2.7'"; let requests = Requirement::::from_str(input).unwrap(); assert_eq!(input, requests.to_string()); let expected = Requirement { name: PackageName::from_str("requests").unwrap(), extras: vec![ ExtraName::from_str("security").unwrap(), ExtraName::from_str("tests").unwrap(), ], version_or_url: Some(VersionOrUrl::VersionSpecifier( [ VersionSpecifier::from_pattern( Operator::Equal, VersionPattern::wildcard(Version::new([2, 8])), ) .unwrap(), VersionSpecifier::from_pattern( Operator::GreaterThanEqual, VersionPattern::verbatim(Version::new([2, 8, 1])), ) .unwrap(), ] .into_iter() .collect(), )), marker: MarkerTree::expression(MarkerExpression::Version { key: MarkerValueVersion::PythonFullVersion, specifier: VersionSpecifier::from_pattern( pep440_rs::Operator::LessThan, "2.7".parse().unwrap(), ) .unwrap(), }), origin: None, }; assert_eq!(requests, expected); } #[test] fn leading_whitespace() { let numpy = Requirement::::from_str(" numpy").unwrap(); assert_eq!(numpy.name.as_ref(), "numpy"); } #[test] fn parenthesized_single() { let numpy = Requirement::::from_str("numpy ( >=1.19 )").unwrap(); assert_eq!(numpy.name.as_ref(), "numpy"); } #[test] fn parenthesized_double() { let numpy = Requirement::::from_str("numpy ( >=1.19, <2.0 )").unwrap(); assert_eq!(numpy.name.as_ref(), "numpy"); } #[test] fn versions_single() { let numpy = Requirement::::from_str("numpy >=1.19 ").unwrap(); assert_eq!(numpy.name.as_ref(), "numpy"); } #[test] fn versions_double() { let numpy = Requirement::::from_str("numpy >=1.19, <2.0 ").unwrap(); assert_eq!(numpy.name.as_ref(), "numpy"); } #[test] #[cfg(feature = "non-pep508-extensions")] fn direct_url_no_extras() { let numpy = crate::UnnamedRequirement::::from_str("https://files.pythonhosted.org/packages/28/4a/46d9e65106879492374999e76eb85f87b15328e06bd1550668f79f7b18c6/numpy-1.26.4-cp312-cp312-win32.whl").unwrap(); assert_eq!(numpy.url.to_string(), "https://files.pythonhosted.org/packages/28/4a/46d9e65106879492374999e76eb85f87b15328e06bd1550668f79f7b18c6/numpy-1.26.4-cp312-cp312-win32.whl"); assert_eq!(numpy.extras, vec![]); } #[test] #[cfg(all(unix, feature = "non-pep508-extensions"))] fn direct_url_extras() { let numpy = crate::UnnamedRequirement::::from_str( "/path/to/numpy-1.26.4-cp312-cp312-win32.whl[dev]", ) .unwrap(); assert_eq!( numpy.url.to_string(), "file:///path/to/numpy-1.26.4-cp312-cp312-win32.whl" ); assert_eq!(numpy.extras, vec![ExtraName::from_str("dev").unwrap()]); } #[test] #[cfg(all(windows, feature = "non-pep508-extensions"))] fn direct_url_extras() { let numpy = crate::UnnamedRequirement::::from_str( "C:\\path\\to\\numpy-1.26.4-cp312-cp312-win32.whl[dev]", ) .unwrap(); assert_eq!( numpy.url.to_string(), "file:///C:/path/to/numpy-1.26.4-cp312-cp312-win32.whl" ); assert_eq!(numpy.extras, vec![ExtraName::from_str("dev").unwrap()]); } #[test] fn error_extras_eof1() { assert_snapshot!( parse_pep508_err("black["), @r#" Missing closing bracket (expected ']', found end of dependency specification) black[ ^ "# ); } #[test] fn error_extras_eof2() { assert_snapshot!( parse_pep508_err("black[d"), @r#" Missing closing bracket (expected ']', found end of dependency specification) black[d ^ "# ); } #[test] fn error_extras_eof3() { assert_snapshot!( parse_pep508_err("black[d,"), @r#" Missing closing bracket (expected ']', found end of dependency specification) black[d, ^ "# ); } #[test] fn error_extras_illegal_start1() { assert_snapshot!( parse_pep508_err("black[ö]"), @r#" Expected an alphanumeric character starting the extra name, found `ö` black[ö] ^ "# ); } #[test] fn error_extras_illegal_start2() { assert_snapshot!( parse_pep508_err("black[_d]"), @r#" Expected an alphanumeric character starting the extra name, found `_` black[_d] ^ "# ); } #[test] fn error_extras_illegal_start3() { assert_snapshot!( parse_pep508_err("black[,]"), @r#" Expected either alphanumerical character (starting the extra name) or `]` (ending the extras section), found `,` black[,] ^ "# ); } #[test] fn error_extras_illegal_character() { assert_snapshot!( parse_pep508_err("black[jüpyter]"), @r#" Invalid character in extras name, expected an alphanumeric character, `-`, `_`, `.`, `,` or `]`, found `ü` black[jüpyter] ^ "# ); } #[test] fn error_extras1() { let numpy = Requirement::::from_str("black[d]").unwrap(); assert_eq!(numpy.extras, vec![ExtraName::from_str("d").unwrap()]); } #[test] fn error_extras2() { let numpy = Requirement::::from_str("black[d,jupyter]").unwrap(); assert_eq!( numpy.extras, vec![ ExtraName::from_str("d").unwrap(), ExtraName::from_str("jupyter").unwrap(), ] ); } #[test] fn empty_extras() { let black = Requirement::::from_str("black[]").unwrap(); assert_eq!(black.extras, vec![]); } #[test] fn empty_extras_with_spaces() { let black = Requirement::::from_str("black[ ]").unwrap(); assert_eq!(black.extras, vec![]); } #[test] fn error_extra_with_trailing_comma() { assert_snapshot!( parse_pep508_err("black[d,]"), @" Expected an alphanumeric character starting the extra name, found `]` black[d,] ^" ); } #[test] fn error_parenthesized_pep440() { assert_snapshot!( parse_pep508_err("numpy ( ><1.19 )"), @" no such comparison operator \"><\", must be one of ~= == != <= >= < > === numpy ( ><1.19 ) ^^^^^^^" ); } #[test] fn error_parenthesized_parenthesis() { assert_snapshot!( parse_pep508_err("numpy ( >=1.19"), @r#" Missing closing parenthesis (expected ')', found end of dependency specification) numpy ( >=1.19 ^ "# ); } #[test] fn error_whats_that() { assert_snapshot!( parse_pep508_err("numpy % 1.16"), @r#" Expected one of `@`, `(`, `<`, `=`, `>`, `~`, `!`, `;`, found `%` numpy % 1.16 ^ "# ); } #[test] fn url() { let pip_url = Requirement::from_str("pip @ https://github.com/pypa/pip/archive/1.3.1.zip#sha1=da9234ee9982d4bbb3c72346a6de940a148ea686") .unwrap(); let url = "https://github.com/pypa/pip/archive/1.3.1.zip#sha1=da9234ee9982d4bbb3c72346a6de940a148ea686"; let expected = Requirement { name: PackageName::from_str("pip").unwrap(), extras: vec![], marker: MarkerTree::TRUE, version_or_url: Some(VersionOrUrl::Url(Url::parse(url).unwrap())), origin: None, }; assert_eq!(pip_url, expected); } #[test] fn test_marker_parsing() { let marker = r#"python_version == "2.7" and (sys_platform == "win32" or (os_name == "linux" and implementation_name == 'cpython'))"#; let actual = parse::parse_markers_cursor::(&mut Cursor::new(marker), &mut TracingReporter) .unwrap() .unwrap(); let mut a = MarkerTree::expression(MarkerExpression::Version { key: MarkerValueVersion::PythonVersion, specifier: VersionSpecifier::from_pattern( pep440_rs::Operator::Equal, "2.7".parse().unwrap(), ) .unwrap(), }); let mut b = MarkerTree::expression(MarkerExpression::String { key: MarkerValueString::SysPlatform, operator: MarkerOperator::Equal, value: "win32".to_string(), }); let mut c = MarkerTree::expression(MarkerExpression::String { key: MarkerValueString::OsName, operator: MarkerOperator::Equal, value: "linux".to_string(), }); let d = MarkerTree::expression(MarkerExpression::String { key: MarkerValueString::ImplementationName, operator: MarkerOperator::Equal, value: "cpython".to_string(), }); c.and(d); b.or(c); a.and(b); assert_eq!(a, actual); } #[test] fn name_and_marker() { Requirement::::from_str(r#"numpy; sys_platform == "win32" or (os_name == "linux" and implementation_name == 'cpython')"#).unwrap(); } #[test] fn error_marker_incomplete1() { assert_snapshot!( parse_pep508_err(r"numpy; sys_platform"), @r#" Expected a valid marker operator (such as `>=` or `not in`), found `` numpy; sys_platform ^ "# ); } #[test] fn error_marker_incomplete2() { assert_snapshot!( parse_pep508_err(r"numpy; sys_platform =="), @r#" Expected marker value, found end of dependency specification numpy; sys_platform == ^ "# ); } #[test] fn error_marker_incomplete3() { assert_snapshot!( parse_pep508_err(r#"numpy; sys_platform == "win32" or"#), @r#" Expected marker value, found end of dependency specification numpy; sys_platform == "win32" or ^ "# ); } #[test] fn error_marker_incomplete4() { assert_snapshot!( parse_pep508_err(r#"numpy; sys_platform == "win32" or (os_name == "linux""#), @r#" Expected ')', found end of dependency specification numpy; sys_platform == "win32" or (os_name == "linux" ^ "# ); } #[test] fn error_marker_incomplete5() { assert_snapshot!( parse_pep508_err(r#"numpy; sys_platform == "win32" or (os_name == "linux" and"#), @r#" Expected marker value, found end of dependency specification numpy; sys_platform == "win32" or (os_name == "linux" and ^ "# ); } #[test] fn error_pep440() { assert_snapshot!( parse_pep508_err(r"numpy >=1.1.*"), @r#" Operator >= cannot be used with a wildcard version specifier numpy >=1.1.* ^^^^^^^ "# ); } #[test] fn error_no_name() { assert_snapshot!( parse_pep508_err(r"==0.0"), @r" Expected package name starting with an alphanumeric character, found `=` ==0.0 ^ " ); } #[test] fn error_unnamedunnamed_url() { assert_snapshot!( parse_pep508_err(r"git+https://github.com/pallets/flask.git"), @" URL requirement must be preceded by a package name. Add the name of the package before the URL (e.g., `package_name @ https://...`). git+https://github.com/pallets/flask.git ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" ); } #[test] fn error_unnamed_file_path() { assert_snapshot!( parse_pep508_err(r"/path/to/flask.tar.gz"), @r###" URL requirement must be preceded by a package name. Add the name of the package before the URL (e.g., `package_name @ /path/to/file`). /path/to/flask.tar.gz ^^^^^^^^^^^^^^^^^^^^^ "### ); } #[test] fn error_no_comma_between_extras() { assert_snapshot!( parse_pep508_err(r"name[bar baz]"), @r#" Expected either `,` (separating extras) or `]` (ending the extras section), found `b` name[bar baz] ^ "# ); } #[test] fn error_extra_comma_after_extras() { assert_snapshot!( parse_pep508_err(r"name[bar, baz,]"), @r#" Expected an alphanumeric character starting the extra name, found `]` name[bar, baz,] ^ "# ); } #[test] fn error_extras_not_closed() { assert_snapshot!( parse_pep508_err(r"name[bar, baz >= 1.0"), @r#" Expected either `,` (separating extras) or `]` (ending the extras section), found `>` name[bar, baz >= 1.0 ^ "# ); } #[test] fn error_no_space_after_url() { assert_snapshot!( parse_pep508_err(r"name @ https://example.com/; extra == 'example'"), @r#" Missing space before ';', the end of the URL is ambiguous name @ https://example.com/; extra == 'example' ^ "# ); } #[test] fn error_name_at_nothing() { assert_snapshot!( parse_pep508_err(r"name @"), @r#" Expected URL name @ ^ "# ); } #[test] fn test_error_invalid_marker_key() { assert_snapshot!( parse_pep508_err(r"name; invalid_name"), @r#" Expected a quoted string or a valid marker name, found `invalid_name` name; invalid_name ^^^^^^^^^^^^ "# ); } #[test] fn error_markers_invalid_order() { assert_snapshot!( parse_pep508_err("name; '3.7' <= invalid_name"), @r#" Expected a quoted string or a valid marker name, found `invalid_name` name; '3.7' <= invalid_name ^^^^^^^^^^^^ "# ); } #[test] fn error_markers_notin() { assert_snapshot!( parse_pep508_err("name; '3.7' notin python_version"), @" Expected a valid marker operator (such as `>=` or `not in`), found `notin` name; '3.7' notin python_version ^^^^^" ); } #[test] fn error_missing_quote() { assert_snapshot!( parse_pep508_err("name; python_version == 3.10"), @" Expected a quoted string or a valid marker name, found `3.10` name; python_version == 3.10 ^^^^ " ); } #[test] fn error_markers_inpython_version() { assert_snapshot!( parse_pep508_err("name; '3.6'inpython_version"), @r#" Expected a valid marker operator (such as `>=` or `not in`), found `inpython_version` name; '3.6'inpython_version ^^^^^^^^^^^^^^^^ "# ); } #[test] fn error_markers_not_python_version() { assert_snapshot!( parse_pep508_err("name; '3.7' not python_version"), @" Expected `i`, found `p` name; '3.7' not python_version ^" ); } #[test] fn error_markers_invalid_operator() { assert_snapshot!( parse_pep508_err("name; '3.7' ~ python_version"), @" Expected a valid marker operator (such as `>=` or `not in`), found `~` name; '3.7' ~ python_version ^" ); } #[test] fn error_invalid_prerelease() { assert_snapshot!( parse_pep508_err("name==1.0.org1"), @r###" after parsing `1.0`, found `.org1`, which is not part of a valid version name==1.0.org1 ^^^^^^^^^^ "### ); } #[test] fn error_no_version_value() { assert_snapshot!( parse_pep508_err("name=="), @" Unexpected end of version specifier, expected version name== ^^" ); } #[test] fn error_no_version_operator() { assert_snapshot!( parse_pep508_err("name 1.0"), @r#" Expected one of `@`, `(`, `<`, `=`, `>`, `~`, `!`, `;`, found `1` name 1.0 ^ "# ); } #[test] fn error_random_char() { assert_snapshot!( parse_pep508_err("name >= 1.0 #"), @r##" Trailing `#` is not allowed name >= 1.0 # ^^^^^^^^ "## ); } #[test] #[cfg(feature = "non-pep508-extensions")] fn error_invalid_extra_unnamed_url() { assert_snapshot!( parse_unnamed_err("/foo-3.0.0-py3-none-any.whl[d,]"), @r#" Expected an alphanumeric character starting the extra name, found `]` /foo-3.0.0-py3-none-any.whl[d,] ^ "# ); } /// Check that the relative path support feature toggle works. #[test] #[cfg(feature = "non-pep508-extensions")] fn non_pep508_paths() { let requirements = &[ "foo @ file://./foo", "foo @ file://foo-3.0.0-py3-none-any.whl", "foo @ file:foo-3.0.0-py3-none-any.whl", "foo @ ./foo-3.0.0-py3-none-any.whl", ]; let cwd = env::current_dir().unwrap(); for requirement in requirements { assert_eq!( Requirement::::parse(requirement, &cwd).is_ok(), cfg!(feature = "non-pep508-extensions"), "{}: {:?}", requirement, Requirement::::parse(requirement, &cwd) ); } } #[test] fn no_space_after_operator() { let requirement = Requirement::::from_str("pytest;python_version<='4.0'").unwrap(); assert_eq!( requirement.to_string(), "pytest ; python_full_version < '4.1'" ); let requirement = Requirement::::from_str("pytest;'4.0'>=python_version").unwrap(); assert_eq!( requirement.to_string(), "pytest ; python_full_version < '4.1'" ); } #[test] #[cfg(feature = "non-pep508-extensions")] fn path_with_fragment() { let requirements = if cfg!(windows) { &[ "wheel @ file:///C:/Users/ferris/wheel-0.42.0.whl#hash=somehash", "wheel @ C:/Users/ferris/wheel-0.42.0.whl#hash=somehash", ] } else { &[ "wheel @ file:///Users/ferris/wheel-0.42.0.whl#hash=somehash", "wheel @ /Users/ferris/wheel-0.42.0.whl#hash=somehash", ] }; for requirement in requirements { // Extract the URL. let Some(VersionOrUrl::Url(url)) = Requirement::::from_str(requirement) .unwrap() .version_or_url else { unreachable!("Expected a URL") }; // Assert that the fragment and path have been separated correctly. assert_eq!(url.fragment(), Some("hash=somehash")); assert!( url.path().ends_with("/Users/ferris/wheel-0.42.0.whl"), "Expected the path to end with `/Users/ferris/wheel-0.42.0.whl`, found `{}`", url.path() ); } } #[test] fn add_extra_marker() -> Result<(), InvalidNameError> { let requirement = Requirement::::from_str("pytest").unwrap(); let expected = Requirement::::from_str("pytest; extra == 'dotenv'").unwrap(); let actual = requirement.with_extra_marker(&ExtraName::from_str("dotenv")?); assert_eq!(actual, expected); let requirement = Requirement::::from_str("pytest; '4.0' >= python_version").unwrap(); let expected = Requirement::from_str("pytest; '4.0' >= python_version and extra == 'dotenv'").unwrap(); let actual = requirement.with_extra_marker(&ExtraName::from_str("dotenv")?); assert_eq!(actual, expected); let requirement = Requirement::::from_str("pytest; '4.0' >= python_version or sys_platform == 'win32'") .unwrap(); let expected = Requirement::from_str( "pytest; ('4.0' >= python_version or sys_platform == 'win32') and extra == 'dotenv'", ) .unwrap(); let actual = requirement.with_extra_marker(&ExtraName::from_str("dotenv")?); assert_eq!(actual, expected); Ok(()) } pep508_rs-0.9.1/src/unnamed.rs000064400000000000000000000364271046102023000141600ustar 00000000000000use std::fmt::{Debug, Display, Formatter}; use std::hash::Hash; use std::path::Path; use std::str::FromStr; use crate::path::normalize_url_path; use crate::marker::parse; use crate::{ expand_env_vars, parse_extras_cursor, split_extras, split_scheme, strip_host, Cursor, ExtraName, MarkerEnvironment, MarkerTree, Pep508Error, Pep508ErrorSource, Pep508Url, Reporter, RequirementOrigin, Scheme, TracingReporter, VerbatimUrl, VerbatimUrlError, }; /// An extension over [`Pep508Url`] that also supports parsing unnamed requirements, namely paths. /// /// The error type is fixed to the same as the [`Pep508Url`] impl error. pub trait UnnamedRequirementUrl: Pep508Url { /// Parse a URL from a relative or absolute path. fn parse_path(path: impl AsRef, working_dir: impl AsRef) -> Result; /// Parse a URL from an absolute path. fn parse_absolute_path(path: impl AsRef) -> Result; /// Parse a URL from a string. fn parse_unnamed_url(given: impl AsRef) -> Result; /// Set the verbatim representation of the URL. #[must_use] fn with_given(self, given: impl Into) -> Self; /// Return the original string as given by the user, if available. fn given(&self) -> Option<&str>; } impl UnnamedRequirementUrl for VerbatimUrl { fn parse_path( path: impl AsRef, working_dir: impl AsRef, ) -> Result { Self::from_path(path, working_dir) } fn parse_absolute_path(path: impl AsRef) -> Result { Self::from_absolute_path(path) } fn parse_unnamed_url(given: impl AsRef) -> Result { Ok(Self::parse_url(given)?) } fn with_given(self, given: impl Into) -> Self { self.with_given(given) } fn given(&self) -> Option<&str> { self.given() } } /// A PEP 508-like, direct URL dependency specifier without a package name. /// /// In a `requirements.txt` file, the name of the package is optional for direct URL /// dependencies. This isn't compliant with PEP 508, but is common in `requirements.txt`, which /// is implementation-defined. #[derive(Hash, Debug, Clone, Eq, PartialEq)] pub struct UnnamedRequirement { /// The direct URL that defines the version specifier. pub url: Url, /// The list of extras such as `security`, `tests` in /// `requests [security,tests] >= 2.8.1, == 2.8.* ; python_version > "3.8"`. pub extras: Vec, /// The markers such as `python_version > "3.8"` in /// `requests [security,tests] >= 2.8.1, == 2.8.* ; python_version > "3.8"`. /// Those are a nested and/or tree. pub marker: MarkerTree, /// The source file containing the requirement. pub origin: Option, } impl UnnamedRequirement { /// Returns whether the markers apply for the given environment pub fn evaluate_markers(&self, env: &MarkerEnvironment, extras: &[ExtraName]) -> bool { self.evaluate_optional_environment(Some(env), extras) } /// Returns whether the markers apply for the given environment pub fn evaluate_optional_environment( &self, env: Option<&MarkerEnvironment>, extras: &[ExtraName], ) -> bool { self.marker.evaluate_optional_environment(env, extras) } /// Set the source file containing the requirement. #[must_use] pub fn with_origin(self, origin: RequirementOrigin) -> Self { Self { origin: Some(origin), ..self } } /// Parse a PEP 508-like direct URL requirement without a package name. pub fn parse( input: &str, working_dir: impl AsRef, reporter: &mut impl Reporter, ) -> Result> { parse_unnamed_requirement( &mut Cursor::new(input), Some(working_dir.as_ref()), reporter, ) } } impl Display for UnnamedRequirement { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.url)?; if !self.extras.is_empty() { write!( f, "[{}]", self.extras .iter() .map(ToString::to_string) .collect::>() .join(",") )?; } if let Some(marker) = self.marker.contents() { write!(f, " ; {marker}")?; } Ok(()) } } impl FromStr for UnnamedRequirement { type Err = Pep508Error; /// Parse a PEP 508-like direct URL requirement without a package name. fn from_str(input: &str) -> Result { parse_unnamed_requirement(&mut Cursor::new(input), None, &mut TracingReporter) } } /// Parse a PEP 508-like direct URL specifier without a package name. /// /// Unlike pip, we allow extras on URLs and paths. fn parse_unnamed_requirement( cursor: &mut Cursor, working_dir: Option<&Path>, reporter: &mut impl Reporter, ) -> Result, Pep508Error> { cursor.eat_whitespace(); // Parse the URL itself, along with any extras. let (url, extras) = parse_unnamed_url::(cursor, working_dir)?; let requirement_end = cursor.pos(); // wsp* cursor.eat_whitespace(); // quoted_marker? let marker = if cursor.peek_char() == Some(';') { // Skip past the semicolon cursor.next(); parse::parse_markers_cursor(cursor, reporter)? } else { None }; // wsp* cursor.eat_whitespace(); if let Some((pos, char)) = cursor.next() { if marker.is_none() { if let Some(given) = url.given() { for c in [';', '#'] { if given.ends_with(c) { return Err(Pep508Error { message: Pep508ErrorSource::String(format!( "Missing space before '{c}', the end of the URL is ambiguous" )), start: requirement_end - c.len_utf8(), len: c.len_utf8(), input: cursor.to_string(), }); } } } } let message = if marker.is_none() { format!(r#"Expected end of input or `;`, found `{char}`"#) } else { format!(r#"Expected end of input, found `{char}`"#) }; return Err(Pep508Error { message: Pep508ErrorSource::String(message), start: pos, len: char.len_utf8(), input: cursor.to_string(), }); } Ok(UnnamedRequirement { url, extras, marker: marker.unwrap_or_default(), origin: None, }) } /// Create a `VerbatimUrl` to represent the requirement, and extracts any extras at the end of the /// URL, to comply with the non-PEP 508 extensions. /// /// For example: /// - `file:///home/ferris/project/scripts/...` /// - `file:../editable/` /// - `../editable/` /// - `https://download.pytorch.org/whl/torch_stable.html` fn preprocess_unnamed_url( url: &str, #[cfg_attr(not(feature = "non-pep508-extensions"), allow(unused))] working_dir: Option<&Path>, cursor: &Cursor, start: usize, len: usize, ) -> Result<(Url, Vec), Pep508Error> { // Split extras _before_ expanding the URL. We assume that the extras are not environment // variables. If we parsed the extras after expanding the URL, then the verbatim representation // of the URL itself would be ambiguous, since it would consist of the environment variable, // which would expand to _more_ than the URL. let (url, extras) = if let Some((url, extras)) = split_extras(url) { (url, Some(extras)) } else { (url, None) }; // Parse the extras, if provided. let extras = if let Some(extras) = extras { parse_extras_cursor(&mut Cursor::new(extras)).map_err(|err| Pep508Error { message: err.message, start: start + url.len() + err.start, len: err.len, input: cursor.to_string(), })? } else { vec![] }; // Expand environment variables in the URL. let expanded = expand_env_vars(url); if let Some((scheme, path)) = split_scheme(&expanded) { match Scheme::parse(scheme) { // Ex) `file:///home/ferris/project/scripts/...`, `file://localhost/home/ferris/project/scripts/...`, or `file:../ferris/` Some(Scheme::File) => { // Strip the leading slashes, along with the `localhost` host, if present. let path = strip_host(path); // Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`. let path = normalize_url_path(path); #[cfg(feature = "non-pep508-extensions")] if let Some(working_dir) = working_dir { let url = Url::parse_path(path.as_ref(), working_dir) .map_err(|err| Pep508Error { message: Pep508ErrorSource::UrlError(err), start, len, input: cursor.to_string(), })? .with_given(url.to_string()); return Ok((url, extras)); } let url = Url::parse_absolute_path(path.as_ref()) .map_err(|err| Pep508Error { message: Pep508ErrorSource::UrlError(err), start, len, input: cursor.to_string(), })? .with_given(url.to_string()); Ok((url, extras)) } // Ex) `https://download.pytorch.org/whl/torch_stable.html` Some(_) => { // Ex) `https://download.pytorch.org/whl/torch_stable.html` let url = Url::parse_unnamed_url(expanded.as_ref()) .map_err(|err| Pep508Error { message: Pep508ErrorSource::UrlError(err), start, len, input: cursor.to_string(), })? .with_given(url.to_string()); Ok((url, extras)) } // Ex) `C:\Users\ferris\wheel-0.42.0.tar.gz` _ => { if let Some(working_dir) = working_dir { let url = Url::parse_path(expanded.as_ref(), working_dir) .map_err(|err| Pep508Error { message: Pep508ErrorSource::UrlError(err), start, len, input: cursor.to_string(), })? .with_given(url.to_string()); return Ok((url, extras)); } let url = Url::parse_absolute_path(expanded.as_ref()) .map_err(|err| Pep508Error { message: Pep508ErrorSource::UrlError(err), start, len, input: cursor.to_string(), })? .with_given(url.to_string()); Ok((url, extras)) } } } else { // Ex) `../editable/` if let Some(working_dir) = working_dir { let url = Url::parse_path(expanded.as_ref(), working_dir) .map_err(|err| Pep508Error { message: Pep508ErrorSource::UrlError(err), start, len, input: cursor.to_string(), })? .with_given(url.to_string()); return Ok((url, extras)); } let url = Url::parse_absolute_path(expanded.as_ref()) .map_err(|err| Pep508Error { message: Pep508ErrorSource::UrlError(err), start, len, input: cursor.to_string(), })? .with_given(url.to_string()); Ok((url, extras)) } } /// Like [`crate::parse_url`], but allows for extras to be present at the end of the URL, to comply /// with the non-PEP 508 extensions. /// /// When parsing, we eat characters until we see any of the following: /// - A newline. /// - A semicolon (marker) or hash (comment), _preceded_ by a space. We parse the URL until the last /// non-whitespace character (inclusive). /// - A semicolon (marker) or hash (comment) _followed_ by a space. We treat this as an error, since /// the end of the URL is ambiguous. /// /// URLs can include extras at the end, enclosed in square brackets. /// /// For example: /// - `https://download.pytorch.org/whl/torch_stable.html[dev]` /// - `../editable[dev]` /// - `https://download.pytorch.org/whl/torch_stable.html ; python_version > "3.8"` /// - `https://download.pytorch.org/whl/torch_stable.html # this is a comment` fn parse_unnamed_url( cursor: &mut Cursor, working_dir: Option<&Path>, ) -> Result<(Url, Vec), Pep508Error> { // wsp* cursor.eat_whitespace(); // let (start, len) = { let start = cursor.pos(); let mut len = 0; let mut depth = 0u32; while let Some((_, c)) = cursor.next() { // If we see a line break, we're done. if matches!(c, '\r' | '\n') { break; } // Track the depth of brackets. if c == '[' { depth = depth.saturating_add(1); } else if c == ']' { depth = depth.saturating_sub(1); } // If we see top-level whitespace, check if it's followed by a semicolon or hash. If so, // end the URL at the last non-whitespace character. if depth == 0 && c.is_whitespace() { let mut cursor = cursor.clone(); cursor.eat_whitespace(); if matches!(cursor.peek_char(), None | Some(';' | '#')) { break; } } len += c.len_utf8(); // If we see a top-level semicolon or hash followed by whitespace, we're done. if depth == 0 { match c { ';' if cursor.peek_char().is_some_and(char::is_whitespace) => { break; } '#' if cursor.peek_char().is_some_and(char::is_whitespace) => { break; } _ => {} } } } (start, len) }; let url = cursor.slice(start, len); if url.is_empty() { return Err(Pep508Error { message: Pep508ErrorSource::String("Expected URL".to_string()), start, len, input: cursor.to_string(), }); } let url = preprocess_unnamed_url(url, working_dir, cursor, start, len)?; Ok(url) } pep508_rs-0.9.1/src/verbatim_url/tests.rs000064400000000000000000000030311046102023000163470ustar 00000000000000use super::*; #[test] fn scheme() { assert_eq!( split_scheme("file:///home/ferris/project/scripts"), Some(("file", "///home/ferris/project/scripts")) ); assert_eq!( split_scheme("file:home/ferris/project/scripts"), Some(("file", "home/ferris/project/scripts")) ); assert_eq!( split_scheme("https://example.com"), Some(("https", "//example.com")) ); assert_eq!(split_scheme("https:"), Some(("https", ""))); } #[test] fn fragment() { assert_eq!( split_fragment(Path::new( "file:///home/ferris/project/scripts#hash=somehash" )), ( Cow::Owned(PathBuf::from("file:///home/ferris/project/scripts")), Some("hash=somehash") ) ); assert_eq!( split_fragment(Path::new("file:home/ferris/project/scripts#hash=somehash")), ( Cow::Owned(PathBuf::from("file:home/ferris/project/scripts")), Some("hash=somehash") ) ); assert_eq!( split_fragment(Path::new("/home/ferris/project/scripts#hash=somehash")), ( Cow::Owned(PathBuf::from("/home/ferris/project/scripts")), Some("hash=somehash") ) ); assert_eq!( split_fragment(Path::new("file:///home/ferris/project/scripts")), ( Cow::Borrowed(Path::new("file:///home/ferris/project/scripts")), None ) ); assert_eq!( split_fragment(Path::new("")), (Cow::Borrowed(Path::new("")), None) ); } pep508_rs-0.9.1/src/verbatim_url.rs000064400000000000000000000417341046102023000152210ustar 00000000000000use regex::Regex; use std::borrow::Cow; use std::cmp::Ordering; use std::fmt::Debug; use std::hash::Hash; use std::ops::Deref; use std::path::{Path, PathBuf}; use std::sync::LazyLock; use thiserror::Error; use url::{ParseError, Url}; #[cfg(feature = "non-pep508-extensions")] use crate::path::{normalize_absolute_path, normalize_url_path}; use crate::Pep508Url; /// A wrapper around [`Url`] that preserves the original string. #[derive(Debug, Clone, Eq)] pub struct VerbatimUrl { /// The parsed URL. url: Url, /// The URL as it was provided by the user. given: Option, } impl Hash for VerbatimUrl { fn hash(&self, state: &mut H) { self.url.hash(state); } } impl PartialEq for VerbatimUrl { fn eq(&self, other: &Self) -> bool { self.url == other.url } } impl VerbatimUrl { /// Create a [`VerbatimUrl`] from a [`Url`]. pub fn from_url(url: Url) -> Self { Self { url, given: None } } /// Parse a URL from a string, expanding any environment variables. pub fn parse_url(given: impl AsRef) -> Result { let url = Url::parse(given.as_ref())?; Ok(Self { url, given: None }) } /// Parse a URL from an absolute or relative path. #[cfg(feature = "non-pep508-extensions")] // PEP 508 arguably only allows absolute file URLs. pub fn from_path( path: impl AsRef, base_dir: impl AsRef, ) -> Result { debug_assert!(base_dir.as_ref().is_absolute(), "base dir must be absolute"); let path = path.as_ref(); // Convert the path to an absolute path, if necessary. let path = if path.is_absolute() { Cow::Borrowed(path) } else { Cow::Owned(base_dir.as_ref().join(path)) }; let path = normalize_absolute_path(&path) .map_err(|err| VerbatimUrlError::Normalization(path.to_path_buf(), err))?; // Extract the fragment, if it exists. let (path, fragment) = split_fragment(&path); // Convert to a URL. let mut url = Url::from_file_path(path.clone()) .map_err(|()| VerbatimUrlError::UrlConversion(path.to_path_buf()))?; // Set the fragment, if it exists. if let Some(fragment) = fragment { url.set_fragment(Some(fragment)); } Ok(Self { url, given: None }) } /// Parse a URL from an absolute path. #[cfg(feature = "non-pep508-extensions")] pub fn from_absolute_path(path: impl AsRef) -> Result { let path = path.as_ref(); // Error if the path is relative. let path = if path.is_absolute() { path } else { return Err(VerbatimUrlError::WorkingDirectory(path.to_path_buf())); }; // Normalize the path. let path = normalize_absolute_path(path) .map_err(|err| VerbatimUrlError::Normalization(path.to_path_buf(), err))?; // Extract the fragment, if it exists. let (path, fragment) = split_fragment(&path); // Convert to a URL. let mut url = Url::from_file_path(path.clone()) .unwrap_or_else(|()| panic!("path is absolute: {}", path.display())); // Set the fragment, if it exists. if let Some(fragment) = fragment { url.set_fragment(Some(fragment)); } Ok(Self { url, given: None }) } /// Set the verbatim representation of the URL. #[must_use] pub fn with_given(self, given: impl Into) -> Self { Self { given: Some(given.into()), ..self } } /// Return the original string as given by the user, if available. pub fn given(&self) -> Option<&str> { self.given.as_deref() } /// Return the underlying [`Url`]. pub fn raw(&self) -> &Url { &self.url } /// Convert a [`VerbatimUrl`] into a [`Url`]. pub fn to_url(&self) -> Url { self.url.clone() } /// Convert a [`VerbatimUrl`] into a [`Url`]. pub fn into_url(self) -> Url { self.url } /// Return the underlying [`Path`], if the URL is a file URL. #[cfg(feature = "non-pep508-extensions")] pub fn as_path(&self) -> Result { self.url .to_file_path() .map_err(|()| VerbatimUrlError::UrlConversion(self.url.to_file_path().unwrap())) } } impl Ord for VerbatimUrl { fn cmp(&self, other: &Self) -> Ordering { self.url.cmp(&other.url) } } impl PartialOrd for VerbatimUrl { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl std::str::FromStr for VerbatimUrl { type Err = VerbatimUrlError; fn from_str(s: &str) -> Result { Ok(Self::parse_url(s).map(|url| url.with_given(s.to_owned()))?) } } impl std::fmt::Display for VerbatimUrl { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(&self.url, f) } } impl Deref for VerbatimUrl { type Target = Url; fn deref(&self) -> &Self::Target { &self.url } } impl From for VerbatimUrl { fn from(url: Url) -> Self { VerbatimUrl::from_url(url) } } impl serde::Serialize for VerbatimUrl { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { self.url.serialize(serializer) } } impl<'de> serde::Deserialize<'de> for VerbatimUrl { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { let url = Url::deserialize(deserializer)?; Ok(VerbatimUrl::from_url(url)) } } impl Pep508Url for VerbatimUrl { type Err = VerbatimUrlError; /// Create a `VerbatimUrl` to represent the requirement. #[cfg_attr(not(feature = "non-pep508-extensions"), allow(unused_variables))] fn parse_url(url: &str, working_dir: Option<&Path>) -> Result { // Expand environment variables in the URL. let expanded = expand_env_vars(url); if let Some((scheme, path)) = split_scheme(&expanded) { match Scheme::parse(scheme) { // Ex) `file:///home/ferris/project/scripts/...`, `file://localhost/home/ferris/project/scripts/...`, or `file:../ferris/` Some(Scheme::File) => { // Strip the leading slashes, along with the `localhost` host, if present. // Transform, e.g., `/C:/Users/ferris/wheel-0.42.0.tar.gz` to `C:\Users\ferris\wheel-0.42.0.tar.gz`. #[cfg(feature = "non-pep508-extensions")] { let path = strip_host(path); let path = normalize_url_path(path); if let Some(working_dir) = working_dir { return Ok(VerbatimUrl::from_path(path.as_ref(), working_dir)? .with_given(url.to_string())); } Ok(VerbatimUrl::from_absolute_path(path.as_ref())? .with_given(url.to_string())) } #[cfg(not(feature = "non-pep508-extensions"))] Ok(VerbatimUrl::parse_url(expanded)?.with_given(url.to_string())) } // Ex) `https://download.pytorch.org/whl/torch_stable.html` Some(_) => { // Ex) `https://download.pytorch.org/whl/torch_stable.html` Ok(VerbatimUrl::parse_url(expanded.as_ref())?.with_given(url.to_string())) } // Ex) `C:\Users\ferris\wheel-0.42.0.tar.gz` _ => { #[cfg(feature = "non-pep508-extensions")] { if let Some(working_dir) = working_dir { return Ok(VerbatimUrl::from_path(expanded.as_ref(), working_dir)? .with_given(url.to_string())); } Ok(VerbatimUrl::from_absolute_path(expanded.as_ref())? .with_given(url.to_string())) } #[cfg(not(feature = "non-pep508-extensions"))] Err(Self::Err::NotAUrl(expanded.to_string())) } } } else { // Ex) `../editable/` #[cfg(feature = "non-pep508-extensions")] { if let Some(working_dir) = working_dir { return Ok(VerbatimUrl::from_path(expanded.as_ref(), working_dir)? .with_given(url.to_string())); } Ok(VerbatimUrl::from_absolute_path(expanded.as_ref())?.with_given(url.to_string())) } #[cfg(not(feature = "non-pep508-extensions"))] Err(Self::Err::NotAUrl(expanded.to_string())) } } } /// An error that can occur when parsing a [`VerbatimUrl`]. #[derive(Error, Debug)] pub enum VerbatimUrlError { /// Failed to parse a URL. #[error(transparent)] Url(#[from] ParseError), /// Received a relative path, but no working directory was provided. #[error("relative path without a working directory: {0}")] WorkingDirectory(PathBuf), /// Received a path that could not be converted to a URL. #[error("path could not be converted to a URL: {0}")] UrlConversion(PathBuf), /// Received a path that could not be normalized. #[error("path could not be normalized: {0}")] Normalization(PathBuf, #[source] std::io::Error), /// Received a path that could not be normalized. #[cfg(not(feature = "non-pep508-extensions"))] #[error("Not a URL (missing scheme): {0}")] NotAUrl(String), } /// Expand all available environment variables. /// /// This is modeled off of pip's environment variable expansion, which states: /// /// The only allowed format for environment variables defined in the /// requirement file is `${MY_VARIABLE_1}` to ensure two things: /// /// 1. Strings that contain a `$` aren't accidentally (partially) expanded. /// 2. Ensure consistency across platforms for requirement files. /// /// ... /// /// Valid characters in variable names follow the `POSIX standard /// `_ and are limited /// to uppercase letter, digits and the `_` (underscore). pub fn expand_env_vars(s: &str) -> Cow<'_, str> { // Generate the project root, to be used via the `${PROJECT_ROOT}` // environment variable. static PROJECT_ROOT_FRAGMENT: LazyLock = LazyLock::new(|| { let project_root = std::env::current_dir().unwrap(); project_root.to_string_lossy().to_string() }); static RE: LazyLock = LazyLock::new(|| Regex::new(r"(?P\$\{(?P[A-Z0-9_]+)})").unwrap()); RE.replace_all(s, |caps: ®ex::Captures<'_>| { let name = caps.name("name").unwrap().as_str(); std::env::var(name).unwrap_or_else(|_| match name { "PROJECT_ROOT" => PROJECT_ROOT_FRAGMENT.to_string(), _ => caps["var"].to_owned(), }) }) } /// Like [`Url::parse`], but only splits the scheme. Derived from the `url` crate. pub fn split_scheme(s: &str) -> Option<(&str, &str)> { /// #[inline] fn c0_control_or_space(ch: char) -> bool { ch <= ' ' // U+0000 to U+0020 } /// #[inline] fn ascii_alpha(ch: char) -> bool { ch.is_ascii_alphabetic() } // Trim control characters and spaces from the start and end. let s = s.trim_matches(c0_control_or_space); if s.is_empty() || !s.starts_with(ascii_alpha) { return None; } // Find the `:` following any alpha characters. let mut iter = s.char_indices(); let end = loop { match iter.next() { Some((_i, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.')) => {} Some((i, ':')) => break i, _ => return None, } }; let scheme = &s[..end]; let rest = &s[end + 1..]; Some((scheme, rest)) } /// Strip the `file://localhost/` host from a file path. pub fn strip_host(path: &str) -> &str { // Ex) `file://localhost/...`. if let Some(path) = path .strip_prefix("//localhost") .filter(|path| path.starts_with('/')) { return path; } // Ex) `file:///...`. if let Some(path) = path.strip_prefix("//") { return path; } path } /// Split the fragment from a URL. /// /// For example, given `file:///home/ferris/project/scripts#hash=somehash`, returns /// `("/home/ferris/project/scripts", Some("hash=somehash"))`. #[cfg_attr(not(feature = "non-pep508-extensions"), allow(dead_code))] fn split_fragment(path: &Path) -> (Cow, Option<&str>) { let Some(s) = path.to_str() else { return (Cow::Borrowed(path), None); }; let Some((path, fragment)) = s.split_once('#') else { return (Cow::Borrowed(path), None); }; (Cow::Owned(PathBuf::from(path)), Some(fragment)) } /// A supported URL scheme for PEP 508 direct-URL requirements. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Scheme { /// `file://...` File, /// `git+git://...` GitGit, /// `git+http://...` GitHttp, /// `git+file://...` GitFile, /// `git+ssh://...` GitSsh, /// `git+https://...` GitHttps, /// `bzr+http://...` BzrHttp, /// `bzr+https://...` BzrHttps, /// `bzr+ssh://...` BzrSsh, /// `bzr+sftp://...` BzrSftp, /// `bzr+ftp://...` BzrFtp, /// `bzr+lp://...` BzrLp, /// `bzr+file://...` BzrFile, /// `hg+file://...` HgFile, /// `hg+http://...` HgHttp, /// `hg+https://...` HgHttps, /// `hg+ssh://...` HgSsh, /// `hg+static-http://...` HgStaticHttp, /// `svn+ssh://...` SvnSsh, /// `svn+http://...` SvnHttp, /// `svn+https://...` SvnHttps, /// `svn+svn://...` SvnSvn, /// `svn+file://...` SvnFile, /// `http://...` Http, /// `https://...` Https, } impl Scheme { /// Determine the [`Scheme`] from the given string, if possible. pub fn parse(s: &str) -> Option { match s { "file" => Some(Self::File), "git+git" => Some(Self::GitGit), "git+http" => Some(Self::GitHttp), "git+file" => Some(Self::GitFile), "git+ssh" => Some(Self::GitSsh), "git+https" => Some(Self::GitHttps), "bzr+http" => Some(Self::BzrHttp), "bzr+https" => Some(Self::BzrHttps), "bzr+ssh" => Some(Self::BzrSsh), "bzr+sftp" => Some(Self::BzrSftp), "bzr+ftp" => Some(Self::BzrFtp), "bzr+lp" => Some(Self::BzrLp), "bzr+file" => Some(Self::BzrFile), "hg+file" => Some(Self::HgFile), "hg+http" => Some(Self::HgHttp), "hg+https" => Some(Self::HgHttps), "hg+ssh" => Some(Self::HgSsh), "hg+static-http" => Some(Self::HgStaticHttp), "svn+ssh" => Some(Self::SvnSsh), "svn+http" => Some(Self::SvnHttp), "svn+https" => Some(Self::SvnHttps), "svn+svn" => Some(Self::SvnSvn), "svn+file" => Some(Self::SvnFile), "http" => Some(Self::Http), "https" => Some(Self::Https), _ => None, } } /// Returns `true` if the scheme is a file scheme. pub fn is_file(self) -> bool { matches!(self, Self::File) } } impl std::fmt::Display for Scheme { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::File => write!(f, "file"), Self::GitGit => write!(f, "git+git"), Self::GitHttp => write!(f, "git+http"), Self::GitFile => write!(f, "git+file"), Self::GitSsh => write!(f, "git+ssh"), Self::GitHttps => write!(f, "git+https"), Self::BzrHttp => write!(f, "bzr+http"), Self::BzrHttps => write!(f, "bzr+https"), Self::BzrSsh => write!(f, "bzr+ssh"), Self::BzrSftp => write!(f, "bzr+sftp"), Self::BzrFtp => write!(f, "bzr+ftp"), Self::BzrLp => write!(f, "bzr+lp"), Self::BzrFile => write!(f, "bzr+file"), Self::HgFile => write!(f, "hg+file"), Self::HgHttp => write!(f, "hg+http"), Self::HgHttps => write!(f, "hg+https"), Self::HgSsh => write!(f, "hg+ssh"), Self::HgStaticHttp => write!(f, "hg+static-http"), Self::SvnSsh => write!(f, "svn+ssh"), Self::SvnHttp => write!(f, "svn+http"), Self::SvnHttps => write!(f, "svn+https"), Self::SvnSvn => write!(f, "svn+svn"), Self::SvnFile => write!(f, "svn+file"), Self::Http => write!(f, "http"), Self::Https => write!(f, "https"), } } } #[cfg(test)] mod tests;