gix-url-0.28.2/.cargo_vcs_info.json0000644000000001450000000000100125140ustar { "git": { "sha1": "c1ba5719132227410abefeb54e3032b015233e94" }, "path_in_vcs": "gix-url" }gix-url-0.28.2/Cargo.toml0000644000000073470000000000100105250ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.65" name = "gix-url" version = "0.28.2" authors = ["Sebastian Thiel "] build = false include = [ "src/**/*", "LICENSE-*", "tests/baseline/**/*", ] autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "A crate of the gitoxide project implementing parsing and serialization of gix-url" readme = false license = "MIT OR Apache-2.0" repository = "https://github.com/GitoxideLabs/gitoxide" [package.metadata.docs.rs] all-features = true features = ["document-features"] [lib] name = "gix_url" path = "src/lib.rs" doctest = false [dependencies.bstr] version = "1.3.0" features = ["std"] default-features = false [dependencies.document-features] version = "0.2.0" optional = true [dependencies.gix-features] version = "^0.39.1" [dependencies.gix-path] version = "^0.10.13" [dependencies.percent-encoding] version = "2.3.1" [dependencies.serde] version = "1.0.114" features = [ "std", "derive", ] optional = true default-features = false [dependencies.thiserror] version = "2.0.0" [dependencies.url] version = "2.5.2" [dev-dependencies.assert_matches] version = "1.5.0" [features] serde = [ "dep:serde", "bstr/serde", ] [lints.clippy] bool_to_int_with_if = "allow" borrow_as_ptr = "allow" cast_lossless = "allow" cast_possible_truncation = "allow" cast_possible_wrap = "allow" cast_precision_loss = "allow" cast_sign_loss = "allow" checked_conversions = "allow" copy_iterator = "allow" default_trait_access = "allow" doc_markdown = "allow" empty_docs = "allow" enum_glob_use = "allow" explicit_deref_methods = "allow" explicit_into_iter_loop = "allow" explicit_iter_loop = "allow" filter_map_next = "allow" fn_params_excessive_bools = "allow" from_iter_instead_of_collect = "allow" if_not_else = "allow" ignored_unit_patterns = "allow" implicit_clone = "allow" inconsistent_struct_constructor = "allow" inefficient_to_string = "allow" inline_always = "allow" items_after_statements = "allow" iter_not_returning_iterator = "allow" iter_without_into_iter = "allow" manual_assert = "allow" manual_is_variant_and = "allow" manual_let_else = "allow" manual_string_new = "allow" many_single_char_names = "allow" match_bool = "allow" match_same_arms = "allow" match_wild_err_arm = "allow" match_wildcard_for_single_variants = "allow" missing_errors_doc = "allow" missing_panics_doc = "allow" module_name_repetitions = "allow" must_use_candidate = "allow" mut_mut = "allow" naive_bytecount = "allow" needless_for_each = "allow" needless_pass_by_value = "allow" needless_raw_string_hashes = "allow" no_effect_underscore_binding = "allow" option_option = "allow" range_plus_one = "allow" redundant_else = "allow" return_self_not_must_use = "allow" should_panic_without_expect = "allow" similar_names = "allow" single_match_else = "allow" stable_sort_primitive = "allow" struct_excessive_bools = "allow" struct_field_names = "allow" too_long_first_doc_paragraph = "allow" too_many_lines = "allow" transmute_ptr_to_ptr = "allow" trivially_copy_pass_by_ref = "allow" unnecessary_join = "allow" unnecessary_wraps = "allow" unreadable_literal = "allow" unused_self = "allow" used_underscore_binding = "allow" wildcard_imports = "allow" [lints.clippy.pedantic] level = "warn" priority = -1 [lints.rust] gix-url-0.28.2/Cargo.toml.orig000064400000000000000000000022171046102023000141750ustar 00000000000000lints.workspace = true [package] name = "gix-url" version = "0.28.2" repository = "https://github.com/GitoxideLabs/gitoxide" license = "MIT OR Apache-2.0" description = "A crate of the gitoxide project implementing parsing and serialization of gix-url" authors = ["Sebastian Thiel "] edition = "2021" include = ["src/**/*", "LICENSE-*", "tests/baseline/**/*"] rust-version = "1.65" [lib] doctest = false [features] ## Data structures implement `serde::Serialize` and `serde::Deserialize`. serde = ["dep:serde", "bstr/serde"] [dependencies] gix-features = { version = "^0.39.1", path = "../gix-features" } gix-path = { version = "^0.10.13", path = "../gix-path" } serde = { version = "1.0.114", optional = true, default-features = false, features = ["std", "derive"] } thiserror = "2.0.0" url = "2.5.2" bstr = { version = "1.3.0", default-features = false, features = ["std"] } percent-encoding = "2.3.1" document-features = { version = "0.2.0", optional = true } [dev-dependencies] assert_matches = "1.5.0" gix-testtools = { path = "../tests/tools" } [package.metadata.docs.rs] all-features = true features = ["document-features"] gix-url-0.28.2/LICENSE-APACHE000064400000000000000000000247461046102023000132450ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. gix-url-0.28.2/LICENSE-MIT000064400000000000000000000017771046102023000127540ustar 00000000000000Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. gix-url-0.28.2/src/expand_path.rs000064400000000000000000000074441046102023000147450ustar 00000000000000//! Functions for expanding repository paths. use std::path::{Path, PathBuf}; use bstr::{BStr, BString, ByteSlice}; /// Whether a repository is resolving for the current user, or the given one. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum ForUser { /// The currently logged in user. Current, /// The user with the given name. Name(BString), } impl From for Option { fn from(v: ForUser) -> Self { match v { ForUser::Name(user) => Some(user), ForUser::Current => None, } } } /// The error used by [`parse()`], [`with()`] and [`expand_path()`](crate::expand_path()). #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("UTF8 conversion on non-unix system failed for path: {path:?}")] IllformedUtf8 { path: BString }, #[error("Home directory could not be obtained for {}", match user {Some(user) => format!("user '{user}'"), None => "current user".into()})] MissingHome { user: Option }, } fn path_segments(path: &BStr) -> Option> { if path.starts_with(b"/") { Some(path[1..].split(|c| *c == b'/')) } else { None } } /// Parse user information from the given `path`, returning `(possible user information, adjusted input path)`. /// /// Supported formats for user extraction are… /// * `~/repopath` - the currently logged in user's home. /// * `~user/repopath` - the repository in the given user's home. pub fn parse(path: &BStr) -> Result<(Option, BString), Error> { Ok(path_segments(path) .and_then(|mut iter| { iter.next().map(|segment| { if segment.starts_with(b"~") { let eu = if segment.len() == 1 { Some(ForUser::Current) } else { Some(ForUser::Name(segment[1..].into())) }; ( eu, format!( "/{}", iter.map(|s| s.as_bstr().to_str_lossy()).collect::>().join("/") ) .into(), ) } else { (None, path.into()) } }) }) .unwrap_or_else(|| (None, path.into()))) } /// Expand `path` for use in a shell and return the expanded path. pub fn for_shell(path: BString) -> BString { use bstr::ByteVec; match parse(path.as_slice().as_bstr()) { Ok((user, mut path)) => match user { Some(ForUser::Current) => { path.insert(0, b'~'); path } Some(ForUser::Name(mut user)) => { user.insert(0, b'~'); user.append(path.as_vec_mut()); user } None => path, }, Err(_) => path, } } /// Expand `path` for the given `user`, which can be obtained by [`parse()`], resolving them with `home_for_user(&user)`. /// /// For the common case consider using [`expand_path()]` instead. pub fn with( user: Option<&ForUser>, path: &BStr, home_for_user: impl FnOnce(&ForUser) -> Option, ) -> Result { fn make_relative(path: &Path) -> PathBuf { path.components().skip(1).collect() } let path = gix_path::try_from_byte_slice(path).map_err(|_| Error::IllformedUtf8 { path: path.to_owned() })?; Ok(match user { Some(user) => home_for_user(user) .ok_or_else(|| Error::MissingHome { user: user.to_owned().into(), })? .join(make_relative(path)), None => path.into(), }) } gix-url-0.28.2/src/impls.rs000064400000000000000000000041271046102023000135710ustar 00000000000000use std::path::{Path, PathBuf}; use bstr::BStr; use crate::{parse, Scheme, Url}; impl Default for Url { fn default() -> Self { Url { serialize_alternative_form: false, scheme: Scheme::Ssh, user: None, password: None, host: None, port: None, path: bstr::BString::default(), } } } impl TryFrom<&str> for Url { type Error = parse::Error; fn try_from(value: &str) -> Result { Self::from_bytes(value.into()) } } impl TryFrom for Url { type Error = parse::Error; fn try_from(value: String) -> Result { Self::from_bytes(value.as_str().into()) } } impl TryFrom for Url { type Error = parse::Error; fn try_from(value: PathBuf) -> Result { gix_path::into_bstr(value).try_into() } } impl TryFrom<&Path> for Url { type Error = parse::Error; fn try_from(value: &Path) -> Result { gix_path::into_bstr(value).try_into() } } impl TryFrom<&std::ffi::OsStr> for Url { type Error = parse::Error; fn try_from(value: &std::ffi::OsStr) -> Result { gix_path::os_str_into_bstr(value) .expect("no illformed UTF-8 on Windows") .try_into() } } impl TryFrom<&BStr> for Url { type Error = parse::Error; fn try_from(value: &BStr) -> Result { Self::from_bytes(value) } } impl<'a> TryFrom> for Url { type Error = parse::Error; fn try_from(value: std::borrow::Cow<'a, BStr>) -> Result { Self::try_from(&*value) } } impl std::fmt::Display for Url { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut storage; let to_print = if self.password.is_some() { storage = self.clone(); storage.password = Some("redacted".into()); &storage } else { self }; to_print.to_bstring().fmt(f) } } gix-url-0.28.2/src/lib.rs000064400000000000000000000401631046102023000132130ustar 00000000000000//! A library implementing a URL for use in git with access to its special capabilities. //! ## Feature Flags #![cfg_attr( all(doc, feature = "document-features"), doc = ::document_features::document_features!() )] #![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg, doc_auto_cfg))] #![deny(rust_2018_idioms, missing_docs)] #![forbid(unsafe_code)] use std::{borrow::Cow, path::PathBuf}; use bstr::{BStr, BString}; /// pub mod expand_path; mod scheme; pub use scheme::Scheme; mod impls; /// pub mod parse; /// Parse the given `bytes` as a [git url](Url). /// /// # Note /// /// We cannot and should never have to deal with UTF-16 encoded windows strings, so bytes input is acceptable. /// For file-paths, we don't expect UTF8 encoding either. pub fn parse(input: &BStr) -> Result { use parse::InputScheme; match parse::find_scheme(input) { InputScheme::Local => parse::local(input), InputScheme::Url { protocol_end } if input[..protocol_end].eq_ignore_ascii_case(b"file") => { parse::file_url(input, protocol_end) } InputScheme::Url { protocol_end } => parse::url(input, protocol_end), InputScheme::Scp { colon } => parse::scp(input, colon), } } /// Expand `path` for the given `user`, which can be obtained by [`parse()`], resolving the home directories /// of `user` automatically. /// /// If more precise control of the resolution mechanism is needed, then use the [expand_path::with()] function. pub fn expand_path(user: Option<&expand_path::ForUser>, path: &BStr) -> Result { expand_path::with(user, path, |user| match user { expand_path::ForUser::Current => gix_path::env::home_dir(), expand_path::ForUser::Name(user) => { gix_path::env::home_dir().and_then(|home| home.parent().map(|home_dirs| home_dirs.join(user.to_string()))) } }) } /// Classification of a portion of a URL by whether it is *syntactically* safe to pass as an argument to a command-line program. /// /// Various parts of URLs can be specified to begin with `-`. If they are used as options to a command-line application /// such as an SSH client, they will be treated as options rather than as non-option arguments as the developer intended. /// This is a security risk, because URLs are not always trusted and can often be composed or influenced by an attacker. /// See for details. /// /// # Security Warning /// /// This type only expresses known *syntactic* risk. It does not cover other risks, such as passing a personal access /// token as a username rather than a password in an application that logs usernames. #[derive(Debug, PartialEq, Eq, Copy, Clone)] pub enum ArgumentSafety<'a> { /// May be safe. There is nothing to pass, so there is nothing dangerous. Absent, /// May be safe. The argument does not begin with a `-` and so will not be confused as an option. Usable(&'a str), /// Dangerous! Begins with `-` and could be treated as an option. Use the value in error messages only. Dangerous(&'a str), } /// A URL with support for specialized git related capabilities. /// /// Additionally there is support for [deserialization](Url::from_bytes()) and [serialization](Url::to_bstring()). /// /// # Security Warning /// /// URLs may contain passwords and using standard [formatting](std::fmt::Display) will redact /// such password, whereas [lossless serialization](Url::to_bstring()) will contain all parts of the /// URL. /// **Beware that some URls still print secrets if they use them outside of the designated password fields.** /// /// Also note that URLs that fail to parse are typically stored in [the resulting error](parse::Error) type /// and printed in full using its display implementation. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Url { /// The URL scheme. pub scheme: Scheme, /// The user to impersonate on the remote. user: Option, /// The password associated with a user. password: Option, /// The host to which to connect. Localhost is implied if `None`. host: Option, /// When serializing, use the alternative forms as it was parsed as such. serialize_alternative_form: bool, /// The port to use when connecting to a host. If `None`, standard ports depending on `scheme` will be used. pub port: Option, /// The path portion of the URL, usually the location of the git repository. /// /// # Security Warning /// /// URLs allow paths to start with `-` which makes it possible to mask command-line arguments as path which then leads to /// the invocation of programs from an attacker controlled URL. See for details. /// /// If this value is ever going to be passed to a command-line application, call [Self::path_argument_safe()] instead. pub path: BString, } /// Instantiation impl Url { /// Create a new instance from the given parts, including a password, which will be validated by parsing them back. pub fn from_parts( scheme: Scheme, user: Option, password: Option, host: Option, port: Option, path: BString, serialize_alternative_form: bool, ) -> Result { parse( Url { scheme, user, password, host, port, path, serialize_alternative_form, } .to_bstring() .as_ref(), ) } } /// Modification impl Url { /// Set the given `user`, or unset it with `None`. Return the previous value. pub fn set_user(&mut self, user: Option) -> Option { let prev = self.user.take(); self.user = user; prev } /// Set the given `password`, or unset it with `None`. Return the previous value. pub fn set_password(&mut self, password: Option) -> Option { let prev = self.password.take(); self.password = password; prev } } /// Builder impl Url { /// Enable alternate serialization for this url, e.g. `file:///path` becomes `/path`. /// /// This is automatically set correctly for parsed URLs, but can be set here for urls /// created by constructor. pub fn serialize_alternate_form(mut self, use_alternate_form: bool) -> Self { self.serialize_alternative_form = use_alternate_form; self } /// Turn a file url like `file://relative` into `file:///root/relative`, hence it assures the url's path component is absolute, /// using `current_dir` if needed to achieve that. pub fn canonicalize(&mut self, current_dir: &std::path::Path) -> Result<(), gix_path::realpath::Error> { if self.scheme == Scheme::File { let path = gix_path::from_bstr(Cow::Borrowed(self.path.as_ref())); let abs_path = gix_path::realpath_opts(path.as_ref(), current_dir, gix_path::realpath::MAX_SYMLINKS)?; self.path = gix_path::into_bstr(abs_path).into_owned(); } Ok(()) } } /// Access impl Url { /// Return the username mentioned in the URL, if present. /// /// # Security Warning /// /// URLs allow usernames to start with `-` which makes it possible to mask command-line arguments as username which then leads to /// the invocation of programs from an attacker controlled URL. See for details. /// /// If this value is ever going to be passed to a command-line application, call [Self::user_argument_safe()] instead. pub fn user(&self) -> Option<&str> { self.user.as_deref() } /// Classify the username of this URL by whether it is safe to pass as a command-line argument. /// /// Use this method instead of [Self::user()] if the host is going to be passed to a command-line application. /// If the unsafe and absent cases need not be distinguished, [Self::user_argument_safe()] may also be used. pub fn user_as_argument(&self) -> ArgumentSafety<'_> { match self.user() { Some(user) if looks_like_command_line_option(user.as_bytes()) => ArgumentSafety::Dangerous(user), Some(user) => ArgumentSafety::Usable(user), None => ArgumentSafety::Absent, } } /// Return the username of this URL if present *and* if it can't be mistaken for a command-line argument. /// /// Use this method or [Self::user_as_argument()] instead of [Self::user()] if the host is going to be /// passed to a command-line application. Prefer [Self::user_as_argument()] unless the unsafe and absent /// cases need not be distinguished from each other. pub fn user_argument_safe(&self) -> Option<&str> { match self.user_as_argument() { ArgumentSafety::Usable(user) => Some(user), _ => None, } } /// Return the password mentioned in the url, if present. pub fn password(&self) -> Option<&str> { self.password.as_deref() } /// Return the host mentioned in the URL, if present. /// /// # Security Warning /// /// URLs allow hosts to start with `-` which makes it possible to mask command-line arguments as host which then leads to /// the invocation of programs from an attacker controlled URL. See for details. /// /// If this value is ever going to be passed to a command-line application, call [Self::host_as_argument()] /// or [Self::host_argument_safe()] instead. pub fn host(&self) -> Option<&str> { self.host.as_deref() } /// Classify the host of this URL by whether it is safe to pass as a command-line argument. /// /// Use this method instead of [Self::host()] if the host is going to be passed to a command-line application. /// If the unsafe and absent cases need not be distinguished, [Self::host_argument_safe()] may also be used. pub fn host_as_argument(&self) -> ArgumentSafety<'_> { match self.host() { Some(host) if looks_like_command_line_option(host.as_bytes()) => ArgumentSafety::Dangerous(host), Some(host) => ArgumentSafety::Usable(host), None => ArgumentSafety::Absent, } } /// Return the host of this URL if present *and* if it can't be mistaken for a command-line argument. /// /// Use this method or [Self::host_as_argument()] instead of [Self::host()] if the host is going to be /// passed to a command-line application. Prefer [Self::host_as_argument()] unless the unsafe and absent /// cases need not be distinguished from each other. pub fn host_argument_safe(&self) -> Option<&str> { match self.host_as_argument() { ArgumentSafety::Usable(host) => Some(host), _ => None, } } /// Return the path of this URL *if* it can't be mistaken for a command-line argument. /// Note that it always begins with a slash, which is ignored for this comparison. /// /// Use this method instead of accessing [Self::path] directly if the path is going to be passed to a /// command-line application, unless it is certain that the leading `/` will always be included. pub fn path_argument_safe(&self) -> Option<&BStr> { self.path .get(1..) .and_then(|truncated| (!looks_like_command_line_option(truncated)).then_some(self.path.as_ref())) } /// Return true if the path portion of the URL is `/`. pub fn path_is_root(&self) -> bool { self.path == "/" } /// Return the actual or default port for use according to the URL scheme. /// Note that there may be no default port either. pub fn port_or_default(&self) -> Option { self.port.or_else(|| { use Scheme::*; Some(match self.scheme { Http => 80, Https => 443, Ssh => 22, Git => 9418, File | Ext(_) => return None, }) }) } } fn looks_like_command_line_option(b: &[u8]) -> bool { b.first() == Some(&b'-') } /// Transformation impl Url { /// Turn a file URL like `file://relative` into `file:///root/relative`, hence it assures the URL's path component is absolute, using /// `current_dir` if necessary. pub fn canonicalized(&self, current_dir: &std::path::Path) -> Result { let mut res = self.clone(); res.canonicalize(current_dir)?; Ok(res) } } fn percent_encode(s: &str) -> Cow<'_, str> { percent_encoding::utf8_percent_encode(s, percent_encoding::NON_ALPHANUMERIC).into() } /// Serialization impl Url { /// Write this URL losslessly to `out`, ready to be parsed again. pub fn write_to(&self, mut out: &mut dyn std::io::Write) -> std::io::Result<()> { if !(self.serialize_alternative_form && (self.scheme == Scheme::File || self.scheme == Scheme::Ssh)) { out.write_all(self.scheme.as_str().as_bytes())?; out.write_all(b"://")?; } match (&self.user, &self.host) { (Some(user), Some(host)) => { out.write_all(percent_encode(user).as_bytes())?; if let Some(password) = &self.password { out.write_all(b":")?; out.write_all(percent_encode(password).as_bytes())?; } out.write_all(b"@")?; out.write_all(host.as_bytes())?; } (None, Some(host)) => { out.write_all(host.as_bytes())?; } (None, None) => {} (Some(_user), None) => unreachable!("BUG: should not be possible to have a user but no host"), }; if let Some(port) = &self.port { write!(&mut out, ":{port}")?; } if self.serialize_alternative_form && self.scheme == Scheme::Ssh { out.write_all(b":")?; } out.write_all(&self.path)?; Ok(()) } /// Transform ourselves into a binary string, losslessly, or fail if the URL is malformed due to host or user parts being incorrect. pub fn to_bstring(&self) -> BString { let mut buf = Vec::with_capacity( (5 + 3) + self.user.as_ref().map(String::len).unwrap_or_default() + 1 + self.host.as_ref().map(String::len).unwrap_or_default() + self.port.map(|_| 5).unwrap_or_default() + self.path.len(), ); self.write_to(&mut buf).expect("io cannot fail in memory"); buf.into() } } /// Deserialization impl Url { /// Parse a URL from `bytes`. pub fn from_bytes(bytes: &BStr) -> Result { parse(bytes) } } /// This module contains extensions to the [Url] struct which are only intended to be used /// for testing code. Do not use this module in production! For all intends and purposes the APIs of /// all functions and types exposed by this module are considered unstable and are allowed to break /// even in patch releases! #[doc(hidden)] #[cfg(debug_assertions)] pub mod testing { use bstr::BString; use crate::{Scheme, Url}; /// Additional functions for [Url] which are only intended to be used for tests. pub trait TestUrlExtension { /// Create a new instance from the given parts without validating them. /// /// This function is primarily intended for testing purposes. For production code please /// consider using [Url::from_parts] instead! fn from_parts_unchecked( scheme: Scheme, user: Option, password: Option, host: Option, port: Option, path: BString, serialize_alternative_form: bool, ) -> Url { Url { scheme, user, password, host, port, path, serialize_alternative_form, } } } impl TestUrlExtension for Url {} } gix-url-0.28.2/src/parse.rs000064400000000000000000000216001046102023000135520ustar 00000000000000use std::convert::Infallible; use crate::Scheme; use bstr::{BStr, BString, ByteSlice}; use percent_encoding::percent_decode_str; /// The error returned by [parse()](crate::parse()). #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("{} \"{url}\" is not valid UTF-8", kind.as_str())] Utf8 { url: BString, kind: UrlKind, source: std::str::Utf8Error, }, #[error("{} {url:?} can not be parsed as valid URL", kind.as_str())] Url { url: String, kind: UrlKind, source: url::ParseError, }, #[error("The host portion of the following URL is too long ({} bytes, {len} bytes total): {truncated_url:?}", truncated_url.len())] TooLong { truncated_url: BString, len: usize }, #[error("{} \"{url}\" does not specify a path to a repository", kind.as_str())] MissingRepositoryPath { url: BString, kind: UrlKind }, #[error("URL {url:?} is relative which is not allowed in this context")] RelativeUrl { url: String }, } impl From for Error { fn from(_: Infallible) -> Self { unreachable!("Cannot actually happen, but it seems there can't be a blanket impl for this") } } /// #[derive(Debug, Clone, Copy)] pub enum UrlKind { /// Url, /// Scp, /// Local, } impl UrlKind { fn as_str(&self) -> &'static str { match self { UrlKind::Url => "URL", UrlKind::Scp => "SCP-like target", UrlKind::Local => "local path", } } } pub(crate) enum InputScheme { Url { protocol_end: usize }, Scp { colon: usize }, Local, } pub(crate) fn find_scheme(input: &BStr) -> InputScheme { // TODO: url's may only contain `:/`, we should additionally check if the characters used for // protocol are all valid if let Some(protocol_end) = input.find("://") { return InputScheme::Url { protocol_end }; } if let Some(colon) = input.find_byte(b':') { // allow user to select files containing a `:` by passing them as absolute or relative path // this is behavior explicitly mentioned by the scp and git manuals let explicitly_local = &input[..colon].contains(&b'/'); let dos_driver_letter = cfg!(windows) && input[..colon].len() == 1; if !explicitly_local && !dos_driver_letter { return InputScheme::Scp { colon }; } } InputScheme::Local } pub(crate) fn url(input: &BStr, protocol_end: usize) -> Result { const MAX_LEN: usize = 1024; let bytes_to_path = input[protocol_end + "://".len()..] .iter() .filter(|b| !b.is_ascii_whitespace()) .skip_while(|b| **b == b'/' || **b == b'\\') .position(|b| *b == b'/') .unwrap_or(input.len() - protocol_end); if bytes_to_path > MAX_LEN || protocol_end > MAX_LEN { return Err(Error::TooLong { truncated_url: input[..(protocol_end + "://".len() + MAX_LEN).min(input.len())].into(), len: input.len(), }); } let (input, url) = input_to_utf8_and_url(input, UrlKind::Url)?; let scheme = url.scheme().into(); if matches!(scheme, Scheme::Git | Scheme::Ssh) && url.path().is_empty() { return Err(Error::MissingRepositoryPath { url: input.into(), kind: UrlKind::Url, }); } if url.cannot_be_a_base() { return Err(Error::RelativeUrl { url: input.to_owned() }); } Ok(crate::Url { serialize_alternative_form: false, scheme, user: url_user(&url, UrlKind::Url)?, password: url .password() .map(|s| percent_decoded_utf8(s, UrlKind::Url)) .transpose()?, host: url.host_str().map(Into::into), port: url.port(), path: url.path().into(), }) } fn percent_decoded_utf8(s: &str, kind: UrlKind) -> Result { Ok(percent_decode_str(s) .decode_utf8() .map_err(|err| Error::Utf8 { url: s.into(), kind, source: err, })? .into_owned()) } pub(crate) fn scp(input: &BStr, colon: usize) -> Result { let input = input_to_utf8(input, UrlKind::Scp)?; // TODO: this incorrectly splits at IPv6 addresses, check for `[]` before splitting let (host, path) = input.split_at(colon); debug_assert_eq!(path.get(..1), Some(":"), "{path} should start with :"); let path = &path[1..]; if path.is_empty() { return Err(Error::MissingRepositoryPath { url: input.to_owned().into(), kind: UrlKind::Scp, }); } // The path returned by the parsed url often has the wrong number of leading `/` characters but // should never differ in any other way (ssh URLs should not contain a query or fragment part). // To avoid the various off-by-one errors caused by the `/` characters, we keep using the path // determined above and can therefore skip parsing it here as well. let url = url::Url::parse(&format!("ssh://{host}")).map_err(|source| Error::Url { url: input.to_owned(), kind: UrlKind::Scp, source, })?; Ok(crate::Url { serialize_alternative_form: true, scheme: url.scheme().into(), user: url_user(&url, UrlKind::Scp)?, password: url .password() .map(|s| percent_decoded_utf8(s, UrlKind::Scp)) .transpose()?, host: url.host_str().map(Into::into), port: url.port(), path: path.into(), }) } fn url_user(url: &url::Url, kind: UrlKind) -> Result, Error> { if url.username().is_empty() && url.password().is_none() { Ok(None) } else { Ok(Some(percent_decoded_utf8(url.username(), kind)?)) } } pub(crate) fn file_url(input: &BStr, protocol_colon: usize) -> Result { let input = input_to_utf8(input, UrlKind::Url)?; let input_after_protocol = &input[protocol_colon + "://".len()..]; let Some(first_slash) = input_after_protocol .find('/') .or_else(|| cfg!(windows).then(|| input_after_protocol.find('\\')).flatten()) else { return Err(Error::MissingRepositoryPath { url: input.to_owned().into(), kind: UrlKind::Url, }); }; // We cannot use the url crate to parse host and path because it special cases Windows // driver letters. With the url crate an input of `file://x:/path/to/git` is parsed as empty // host and with `x:/path/to/git` as path. This behavior is wrong for Git which only follows // that rule on Windows and parses `x:` as host on Unix platforms. Additionally, the url crate // does not account for Windows special UNC path support. // TODO: implement UNC path special case let windows_special_path = if cfg!(windows) { // Inputs created via url::Url::from_file_path contain an additional `/` between the // protocol and the absolute path. Make sure we ignore that first slash character to avoid // producing invalid paths. let input_after_protocol = if first_slash == 0 { &input_after_protocol[1..] } else { input_after_protocol }; // parse `file://x:/path/to/git` as explained above if input_after_protocol.chars().nth(1) == Some(':') { Some(input_after_protocol) } else { None } } else { None }; let host = if windows_special_path.is_some() || first_slash == 0 { // `file:///path/to/git` or a windows special case was triggered None } else { // `file://host/path/to/git` Some(&input_after_protocol[..first_slash]) }; // default behavior on Unix platforms and if no Windows special case was triggered let path = windows_special_path.unwrap_or(&input_after_protocol[first_slash..]); Ok(crate::Url { serialize_alternative_form: false, host: host.map(Into::into), ..local(path.into())? }) } pub(crate) fn local(input: &BStr) -> Result { if input.is_empty() { return Err(Error::MissingRepositoryPath { url: input.to_owned(), kind: UrlKind::Local, }); } Ok(crate::Url { serialize_alternative_form: true, scheme: Scheme::File, password: None, user: None, host: None, port: None, path: input.to_owned(), }) } fn input_to_utf8(input: &BStr, kind: UrlKind) -> Result<&str, Error> { std::str::from_utf8(input).map_err(|source| Error::Utf8 { url: input.to_owned(), kind, source, }) } fn input_to_utf8_and_url(input: &BStr, kind: UrlKind) -> Result<(&str, url::Url), Error> { let input = input_to_utf8(input, kind)?; url::Url::parse(input) .map(|url| (input, url)) .map_err(|source| Error::Url { url: input.to_owned(), kind, source, }) } gix-url-0.28.2/src/scheme.rs000064400000000000000000000032661046102023000137140ustar 00000000000000/// A scheme or protocol for use in a [`Url`][crate::Url]. /// /// It defines how to talk to a given repository. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[allow(missing_docs)] pub enum Scheme { /// A local resource that is accessible on the current host. File, /// A git daemon, like `File` over TCP/IP. Git, /// Launch `git-upload-pack` through an `ssh` tunnel. Ssh, /// Use the HTTP protocol to talk to git servers. Http, /// Use the HTTPS protocol to talk to git servers. Https, /// Any other protocol or transport that isn't known at compile time. /// /// It's used to support plug-in transports. Ext(String), } impl<'a> From<&'a str> for Scheme { fn from(value: &'a str) -> Self { match value { // "ssh+git" and "git+ssh" are legacy, but Git still allows them and so should we "ssh" | "ssh+git" | "git+ssh" => Scheme::Ssh, "file" => Scheme::File, "git" => Scheme::Git, "http" => Scheme::Http, "https" => Scheme::Https, unknown => Scheme::Ext(unknown.into()), } } } impl Scheme { /// Return ourselves parseable name. pub fn as_str(&self) -> &str { use Scheme::*; match self { File => "file", Git => "git", Ssh => "ssh", Http => "http", Https => "https", Ext(name) => name.as_str(), } } } impl std::fmt::Display for Scheme { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str(self.as_str()) } }