url-2.5.2/.cargo_vcs_info.json0000644000000001410000000000100116400ustar { "git": { "sha1": "54346fa288e16b25b71c45149d7067c752b450e0" }, "path_in_vcs": "url" }url-2.5.2/Cargo.toml0000644000000035010000000000100076410ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" rust-version = "1.56" name = "url" version = "2.5.2" authors = ["The rust-url developers"] include = [ "src/**/*", "LICENSE-*", "README.md", "tests/**", ] description = "URL library for Rust, based on the WHATWG URL Standard" documentation = "https://docs.rs/url" readme = "README.md" keywords = [ "url", "parser", ] categories = [ "parser-implementations", "web-programming", "encoding", ] license = "MIT OR Apache-2.0" repository = "https://github.com/servo/rust-url" [package.metadata.docs.rs] features = ["serde"] rustdoc-args = ["--generate-link-to-definition"] [package.metadata.playground] features = ["serde"] [[test]] name = "url_wpt" path = "tests/wpt.rs" harness = false [[bench]] name = "parse_url" path = "benches/parse_url.rs" harness = false [dependencies.form_urlencoded] version = "1.2.1" [dependencies.idna] version = "0.5.0" [dependencies.percent-encoding] version = "2.3.1" [dependencies.serde] version = "1.0" features = ["derive"] optional = true [dev-dependencies.bencher] version = "0.1" [dev-dependencies.serde] version = "1.0" features = ["derive"] [dev-dependencies.serde_json] version = "1.0" [features] debugger_visualizer = [] default = [] expose_internals = [] [target."cfg(all(target_arch = \"wasm32\", target_os = \"unknown\"))".dev-dependencies.wasm-bindgen-test] version = "0.3" url-2.5.2/Cargo.toml.orig000064400000000000000000000030011046102023000133150ustar 00000000000000[package] name = "url" # When updating version, also modify html_root_url in the lib.rs version = "2.5.2" authors = ["The rust-url developers"] description = "URL library for Rust, based on the WHATWG URL Standard" documentation = "https://docs.rs/url" repository = "https://github.com/servo/rust-url" readme = "../README.md" keywords = ["url", "parser"] categories = ["parser-implementations", "web-programming", "encoding"] license = "MIT OR Apache-2.0" include = ["src/**/*", "LICENSE-*", "README.md", "tests/**"] edition = "2018" rust-version = "1.56" [dev-dependencies] serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" bencher = "0.1" [target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dev-dependencies] wasm-bindgen-test = "0.3" [dependencies] form_urlencoded = { version = "1.2.1", path = "../form_urlencoded" } idna = { version = "0.5.0", path = "../idna" } percent-encoding = { version = "2.3.1", path = "../percent_encoding" } serde = { version = "1.0", optional = true, features = ["derive"] } [features] default = [] # Enable to use the #[debugger_visualizer] attribute. This feature requires Rust >= 1.71. debugger_visualizer = [] # Expose internal offsets of the URL. expose_internals = [] [[test]] name = "url_wpt" path = "tests/wpt.rs" harness = false [[bench]] name = "parse_url" path = "benches/parse_url.rs" harness = false [package.metadata.docs.rs] features = ["serde"] rustdoc-args = ["--generate-link-to-definition"] [package.metadata.playground] features = ["serde"] url-2.5.2/LICENSE-APACHE000064400000000000000000000251371046102023000123700ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. url-2.5.2/LICENSE-MIT000064400000000000000000000020601046102023000120660ustar 00000000000000Copyright (c) 2013-2022 The rust-url developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. url-2.5.2/README.md000064400000000000000000000015171046102023000117170ustar 00000000000000rust-url ======== [![Build status](https://github.com/servo/rust-url/workflows/CI/badge.svg)](https://github.com/servo/rust-url/actions?query=workflow%3ACI) [![Coverage](https://codecov.io/gh/servo/rust-url/branch/master/graph/badge.svg)](https://codecov.io/gh/servo/rust-url) [![Chat](https://img.shields.io/badge/chat-%23rust--url:mozilla.org-%2346BC99?logo=Matrix)](https://matrix.to/#/#rust-url:mozilla.org) [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE-MIT) [![License: Apache 2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE-APACHE) URL library for Rust, based on the [URL Standard](https://url.spec.whatwg.org/). [Documentation](https://docs.rs/url) Please see [UPGRADING.md](https://github.com/servo/rust-url/blob/main/UPGRADING.md) if you are upgrading from a previous version. url-2.5.2/src/host.rs000064400000000000000000000353451046102023000125600ustar 00000000000000// Copyright 2013-2016 The rust-url developers. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. use std::cmp; use std::fmt::{self, Formatter}; use std::net::{Ipv4Addr, Ipv6Addr}; use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; use crate::parser::{ParseError, ParseResult}; #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))] #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub(crate) enum HostInternal { None, Domain, Ipv4(Ipv4Addr), Ipv6(Ipv6Addr), } impl From> for HostInternal { fn from(host: Host) -> HostInternal { match host { Host::Domain(ref s) if s.is_empty() => HostInternal::None, Host::Domain(_) => HostInternal::Domain, Host::Ipv4(address) => HostInternal::Ipv4(address), Host::Ipv6(address) => HostInternal::Ipv6(address), } } } /// The host name of an URL. #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))] #[derive(Clone, Debug, Eq, Ord, PartialOrd, Hash)] pub enum Host { /// A DNS domain name, as '.' dot-separated labels. /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of /// a special URL, or percent encoded for non-special URLs. Hosts for /// non-special URLs are also called opaque hosts. Domain(S), /// An IPv4 address. /// `Url::host_str` returns the serialization of this address, /// as four decimal integers separated by `.` dots. Ipv4(Ipv4Addr), /// An IPv6 address. /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets, /// in the format per [RFC 5952 *A Recommendation /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952): /// lowercase hexadecimal with maximal `::` compression. Ipv6(Ipv6Addr), } impl<'a> Host<&'a str> { /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`. pub fn to_owned(&self) -> Host { match *self { Host::Domain(domain) => Host::Domain(domain.to_owned()), Host::Ipv4(address) => Host::Ipv4(address), Host::Ipv6(address) => Host::Ipv6(address), } } } impl Host { /// Parse a host: either an IPv6 address in [] square brackets, or a domain. /// /// pub fn parse(input: &str) -> Result { if input.starts_with('[') { if !input.ends_with(']') { return Err(ParseError::InvalidIpv6Address); } return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6); } let domain = percent_decode(input.as_bytes()).decode_utf8_lossy(); let domain = Self::domain_to_ascii(&domain)?; if domain.is_empty() { return Err(ParseError::EmptyHost); } let is_invalid_domain_char = |c| { matches!( c, '\0'..='\u{001F}' | ' ' | '#' | '%' | '/' | ':' | '<' | '>' | '?' | '@' | '[' | '\\' | ']' | '^' | '\u{007F}' | '|' ) }; if domain.find(is_invalid_domain_char).is_some() { Err(ParseError::InvalidDomainCharacter) } else if ends_in_a_number(&domain) { let address = parse_ipv4addr(&domain)?; Ok(Host::Ipv4(address)) } else { Ok(Host::Domain(domain)) } } // pub fn parse_opaque(input: &str) -> Result { if input.starts_with('[') { if !input.ends_with(']') { return Err(ParseError::InvalidIpv6Address); } return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6); } let is_invalid_host_char = |c| { matches!( c, '\0' | '\t' | '\n' | '\r' | ' ' | '#' | '/' | ':' | '<' | '>' | '?' | '@' | '[' | '\\' | ']' | '^' | '|' ) }; if input.find(is_invalid_host_char).is_some() { Err(ParseError::InvalidDomainCharacter) } else { Ok(Host::Domain( utf8_percent_encode(input, CONTROLS).to_string(), )) } } /// convert domain with idna fn domain_to_ascii(domain: &str) -> Result { idna::domain_to_ascii(domain).map_err(Into::into) } } impl> fmt::Display for Host { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { Host::Domain(ref domain) => domain.as_ref().fmt(f), Host::Ipv4(ref addr) => addr.fmt(f), Host::Ipv6(ref addr) => { f.write_str("[")?; write_ipv6(addr, f)?; f.write_str("]") } } } } impl PartialEq> for Host where S: PartialEq, { fn eq(&self, other: &Host) -> bool { match (self, other) { (Host::Domain(a), Host::Domain(b)) => a == b, (Host::Ipv4(a), Host::Ipv4(b)) => a == b, (Host::Ipv6(a), Host::Ipv6(b)) => a == b, (_, _) => false, } } } fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result { let segments = addr.segments(); let (compress_start, compress_end) = longest_zero_sequence(&segments); let mut i = 0; while i < 8 { if i == compress_start { f.write_str(":")?; if i == 0 { f.write_str(":")?; } if compress_end < 8 { i = compress_end; } else { break; } } write!(f, "{:x}", segments[i as usize])?; if i < 7 { f.write_str(":")?; } i += 1; } Ok(()) } // https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3 fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) { let mut longest = -1; let mut longest_length = -1; let mut start = -1; macro_rules! finish_sequence( ($end: expr) => { if start >= 0 { let length = $end - start; if length > longest_length { longest = start; longest_length = length; } } }; ); for i in 0..8 { if pieces[i as usize] == 0 { if start < 0 { start = i; } } else { finish_sequence!(i); start = -1; } } finish_sequence!(8); // https://url.spec.whatwg.org/#concept-ipv6-serializer // step 3: ignore lone zeroes if longest_length < 2 { (-1, -2) } else { (longest, longest + longest_length) } } /// fn ends_in_a_number(input: &str) -> bool { let mut parts = input.rsplit('.'); let last = parts.next().unwrap(); let last = if last.is_empty() { if let Some(last) = parts.next() { last } else { return false; } } else { last }; if !last.is_empty() && last.as_bytes().iter().all(|c| c.is_ascii_digit()) { return true; } parse_ipv4number(last).is_ok() } /// /// Ok(None) means the input is a valid number, but it overflows a `u32`. fn parse_ipv4number(mut input: &str) -> Result, ()> { if input.is_empty() { return Err(()); } let mut r = 10; if input.starts_with("0x") || input.starts_with("0X") { input = &input[2..]; r = 16; } else if input.len() >= 2 && input.starts_with('0') { input = &input[1..]; r = 8; } if input.is_empty() { return Ok(Some(0)); } let valid_number = match r { 8 => input.as_bytes().iter().all(|c| (b'0'..=b'7').contains(c)), 10 => input.as_bytes().iter().all(|c| c.is_ascii_digit()), 16 => input.as_bytes().iter().all(|c| c.is_ascii_hexdigit()), _ => false, }; if !valid_number { return Err(()); } match u32::from_str_radix(input, r) { Ok(num) => Ok(Some(num)), Err(_) => Ok(None), // The only possible error kind here is an integer overflow. // The validity of the chars in the input is checked above. } } /// fn parse_ipv4addr(input: &str) -> ParseResult { let mut parts: Vec<&str> = input.split('.').collect(); if parts.last() == Some(&"") { parts.pop(); } if parts.len() > 4 { return Err(ParseError::InvalidIpv4Address); } let mut numbers: Vec = Vec::new(); for part in parts { match parse_ipv4number(part) { Ok(Some(n)) => numbers.push(n), Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow Err(()) => return Err(ParseError::InvalidIpv4Address), }; } let mut ipv4 = numbers.pop().expect("a non-empty list of numbers"); // Equivalent to: ipv4 >= 256 ** (4 − numbers.len()) if ipv4 > u32::MAX >> (8 * numbers.len() as u32) { return Err(ParseError::InvalidIpv4Address); } if numbers.iter().any(|x| *x > 255) { return Err(ParseError::InvalidIpv4Address); } for (counter, n) in numbers.iter().enumerate() { ipv4 += n << (8 * (3 - counter as u32)) } Ok(Ipv4Addr::from(ipv4)) } /// fn parse_ipv6addr(input: &str) -> ParseResult { let input = input.as_bytes(); let len = input.len(); let mut is_ip_v4 = false; let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0]; let mut piece_pointer = 0; let mut compress_pointer = None; let mut i = 0; if len < 2 { return Err(ParseError::InvalidIpv6Address); } if input[0] == b':' { if input[1] != b':' { return Err(ParseError::InvalidIpv6Address); } i = 2; piece_pointer = 1; compress_pointer = Some(1); } while i < len { if piece_pointer == 8 { return Err(ParseError::InvalidIpv6Address); } if input[i] == b':' { if compress_pointer.is_some() { return Err(ParseError::InvalidIpv6Address); } i += 1; piece_pointer += 1; compress_pointer = Some(piece_pointer); continue; } let start = i; let end = cmp::min(len, start + 4); let mut value = 0u16; while i < end { match (input[i] as char).to_digit(16) { Some(digit) => { value = value * 0x10 + digit as u16; i += 1; } None => break, } } if i < len { match input[i] { b'.' => { if i == start { return Err(ParseError::InvalidIpv6Address); } i = start; if piece_pointer > 6 { return Err(ParseError::InvalidIpv6Address); } is_ip_v4 = true; } b':' => { i += 1; if i == len { return Err(ParseError::InvalidIpv6Address); } } _ => return Err(ParseError::InvalidIpv6Address), } } if is_ip_v4 { break; } pieces[piece_pointer] = value; piece_pointer += 1; } if is_ip_v4 { if piece_pointer > 6 { return Err(ParseError::InvalidIpv6Address); } let mut numbers_seen = 0; while i < len { if numbers_seen > 0 { if numbers_seen < 4 && (i < len && input[i] == b'.') { i += 1 } else { return Err(ParseError::InvalidIpv6Address); } } let mut ipv4_piece = None; while i < len { let digit = match input[i] { c @ b'0'..=b'9' => c - b'0', _ => break, }; match ipv4_piece { None => ipv4_piece = Some(digit as u16), Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero Some(ref mut v) => { *v = *v * 10 + digit as u16; if *v > 255 { return Err(ParseError::InvalidIpv6Address); } } } i += 1; } pieces[piece_pointer] = if let Some(v) = ipv4_piece { pieces[piece_pointer] * 0x100 + v } else { return Err(ParseError::InvalidIpv6Address); }; numbers_seen += 1; if numbers_seen == 2 || numbers_seen == 4 { piece_pointer += 1; } } if numbers_seen != 4 { return Err(ParseError::InvalidIpv6Address); } } if i < len { return Err(ParseError::InvalidIpv6Address); } match compress_pointer { Some(compress_pointer) => { let mut swaps = piece_pointer - compress_pointer; piece_pointer = 7; while swaps > 0 { pieces.swap(piece_pointer, compress_pointer + swaps - 1); swaps -= 1; piece_pointer -= 1; } } _ => { if piece_pointer != 8 { return Err(ParseError::InvalidIpv6Address); } } } Ok(Ipv6Addr::new( pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7], )) } url-2.5.2/src/lib.rs000064400000000000000000003112251046102023000123430ustar 00000000000000// Copyright 2013-2015 The rust-url developers. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. /*! rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/) for the [Rust](http://rust-lang.org/) programming language. # URL parsing and data structures First, URL parsing may fail for various reasons and therefore returns a `Result`. ``` use url::{Url, ParseError}; assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address)) ``` Let’s parse a valid URL and look at its components. ``` use url::{Url, Host, Position}; # use url::ParseError; # fn run() -> Result<(), ParseError> { let issue_list_url = Url::parse( "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open" )?; assert!(issue_list_url.scheme() == "https"); assert!(issue_list_url.username() == ""); assert!(issue_list_url.password() == None); assert!(issue_list_url.host_str() == Some("github.com")); assert!(issue_list_url.host() == Some(Host::Domain("github.com"))); assert!(issue_list_url.port() == None); assert!(issue_list_url.path() == "/rust-lang/rust/issues"); assert!(issue_list_url.path_segments().map(|c| c.collect::>()) == Some(vec!["rust-lang", "rust", "issues"])); assert!(issue_list_url.query() == Some("labels=E-easy&state=open")); assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open"); assert!(issue_list_url.fragment() == None); assert!(!issue_list_url.cannot_be_a_base()); # Ok(()) # } # run().unwrap(); ``` Some URLs are said to be *cannot-be-a-base*: they don’t have a username, password, host, or port, and their "path" is an arbitrary string rather than slash-separated segments: ``` use url::Url; # use url::ParseError; # fn run() -> Result<(), ParseError> { let data_url = Url::parse("data:text/plain,Hello?World#")?; assert!(data_url.cannot_be_a_base()); assert!(data_url.scheme() == "data"); assert!(data_url.path() == "text/plain,Hello"); assert!(data_url.path_segments().is_none()); assert!(data_url.query() == Some("World")); assert!(data_url.fragment() == Some("")); # Ok(()) # } # run().unwrap(); ``` ## Serde Enable the `serde` feature to include `Deserialize` and `Serialize` implementations for `url::Url`. # Base URL Many contexts allow URL *references* that can be relative to a *base URL*: ```html ``` Since parsed URLs are absolute, giving a base is required for parsing relative URLs: ``` use url::{Url, ParseError}; assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase)) ``` Use the `join` method on an `Url` to use it as a base URL: ``` use url::Url; # use url::ParseError; # fn run() -> Result<(), ParseError> { let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html")?; let css_url = this_document.join("../main.css")?; assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css"); # Ok(()) # } # run().unwrap(); ``` # Feature: `serde` If you enable the `serde` feature, [`Url`](struct.Url.html) will implement [`serde::Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) and [`serde::Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html). See [serde documentation](https://serde.rs) for more information. ```toml url = { version = "2", features = ["serde"] } ``` # Feature: `debugger_visualizer` If you enable the `debugger_visualizer` feature, the `url` crate will include a [natvis file](https://docs.microsoft.com/en-us/visualstudio/debugger/create-custom-views-of-native-objects) for [Visual Studio](https://www.visualstudio.com/) that allows you to view [`Url`](struct.Url.html) objects in the debugger. This feature requires Rust 1.71 or later. ```toml url = { version = "2", features = ["debugger_visualizer"] } ``` */ #![doc(html_root_url = "https://docs.rs/url/2.5.2")] #![cfg_attr( feature = "debugger_visualizer", debugger_visualizer(natvis_file = "../../debug_metadata/url.natvis") )] pub use form_urlencoded; #[cfg(feature = "serde")] extern crate serde; use crate::host::HostInternal; use crate::parser::{ to_u32, Context, Parser, SchemeType, PATH_SEGMENT, SPECIAL_PATH_SEGMENT, USERINFO, }; use percent_encoding::{percent_decode, percent_encode, utf8_percent_encode}; use std::borrow::Borrow; use std::cmp; use std::fmt::{self, Write}; use std::hash; #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] use std::io; use std::mem; use std::net::IpAddr; #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] use std::net::{SocketAddr, ToSocketAddrs}; use std::ops::{Range, RangeFrom, RangeTo}; use std::path::{Path, PathBuf}; use std::str; use std::convert::TryFrom; pub use crate::host::Host; pub use crate::origin::{OpaqueOrigin, Origin}; pub use crate::parser::{ParseError, SyntaxViolation}; pub use crate::path_segments::PathSegmentsMut; pub use crate::slicing::Position; pub use form_urlencoded::EncodingOverride; mod host; mod origin; mod parser; mod path_segments; mod slicing; #[doc(hidden)] pub mod quirks; /// A parsed URL record. #[derive(Clone)] pub struct Url { /// Syntax in pseudo-BNF: /// /// url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]? /// non-hierarchical = non-hierarchical-path /// non-hierarchical-path = /* Does not start with "/" */ /// hierarchical = authority? hierarchical-path /// authority = "//" userinfo? host [ ":" port ]? /// userinfo = username [ ":" password ]? "@" /// hierarchical-path = [ "/" path-segment ]+ serialization: String, // Components scheme_end: u32, // Before ':' username_end: u32, // Before ':' (if a password is given) or '@' (if not) host_start: u32, host_end: u32, host: HostInternal, port: Option, path_start: u32, // Before initial '/', if any query_start: Option, // Before '?', unlike Position::QueryStart fragment_start: Option, // Before '#', unlike Position::FragmentStart } /// Full configuration for the URL parser. #[derive(Copy, Clone)] #[must_use] pub struct ParseOptions<'a> { base_url: Option<&'a Url>, encoding_override: EncodingOverride<'a>, violation_fn: Option<&'a dyn Fn(SyntaxViolation)>, } impl<'a> ParseOptions<'a> { /// Change the base URL /// /// See the notes of [`Url::join`] for more details about how this base is considered /// when parsing. pub fn base_url(mut self, new: Option<&'a Url>) -> Self { self.base_url = new; self } /// Override the character encoding of query strings. /// This is a legacy concept only relevant for HTML. pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self { self.encoding_override = new; self } /// Call the provided function or closure for a non-fatal `SyntaxViolation` /// when it occurs during parsing. Note that since the provided function is /// `Fn`, the caller might need to utilize _interior mutability_, such as with /// a `RefCell`, to collect the violations. /// /// ## Example /// ``` /// use std::cell::RefCell; /// use url::{Url, SyntaxViolation}; /// # use url::ParseError; /// # fn run() -> Result<(), url::ParseError> { /// let violations = RefCell::new(Vec::new()); /// let url = Url::options() /// .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v))) /// .parse("https:////example.com")?; /// assert_eq!(url.as_str(), "https://example.com/"); /// assert_eq!(violations.into_inner(), /// vec!(SyntaxViolation::ExpectedDoubleSlash)); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self { self.violation_fn = new; self } /// Parse an URL string with the configuration so far. pub fn parse(self, input: &str) -> Result { Parser { serialization: String::with_capacity(input.len()), base_url: self.base_url, query_encoding_override: self.encoding_override, violation_fn: self.violation_fn, context: Context::UrlParser, } .parse_url(input) } } impl Url { /// Parse an absolute URL from a string. /// /// # Examples /// /// ```rust /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("https://example.net")?; /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// /// # Errors /// /// If the function can not parse an absolute URL from the given string, /// a [`ParseError`] variant will be returned. /// /// [`ParseError`]: enum.ParseError.html #[inline] pub fn parse(input: &str) -> Result { Url::options().parse(input) } /// Parse an absolute URL from a string and add params to its query string. /// /// Existing params are not removed. /// /// # Examples /// /// ```rust /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse_with_params("https://example.net?dont=clobberme", /// &[("lang", "rust"), ("browser", "servo")])?; /// assert_eq!("https://example.net/?dont=clobberme&lang=rust&browser=servo", url.as_str()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// /// # Errors /// /// If the function can not parse an absolute URL from the given string, /// a [`ParseError`] variant will be returned. /// /// [`ParseError`]: enum.ParseError.html #[inline] pub fn parse_with_params(input: &str, iter: I) -> Result where I: IntoIterator, I::Item: Borrow<(K, V)>, K: AsRef, V: AsRef, { let mut url = Url::options().parse(input); if let Ok(ref mut url) = url { url.query_pairs_mut().extend_pairs(iter); } url } /// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path fn strip_trailing_spaces_from_opaque_path(&mut self) { if !self.cannot_be_a_base() { return; } if self.fragment_start.is_some() { return; } if self.query_start.is_some() { return; } let trailing_space_count = self .serialization .chars() .rev() .take_while(|c| *c == ' ') .count(); let start = self.serialization.len() - trailing_space_count; self.serialization.truncate(start); } /// Parse a string as an URL, with this URL as the base URL. /// /// The inverse of this is [`make_relative`]. /// /// # Notes /// /// - A trailing slash is significant. /// Without it, the last path component is considered to be a “file” name /// to be removed to get at the “directory” that is used as the base. /// - A [scheme relative special URL](https://url.spec.whatwg.org/#scheme-relative-special-url-string) /// as input replaces everything in the base URL after the scheme. /// - An absolute URL (with a scheme) as input replaces the whole base URL (even the scheme). /// /// # Examples /// /// ```rust /// use url::Url; /// # use url::ParseError; /// /// // Base without a trailing slash /// # fn run() -> Result<(), ParseError> { /// let base = Url::parse("https://example.net/a/b.html")?; /// let url = base.join("c.png")?; /// assert_eq!(url.as_str(), "https://example.net/a/c.png"); // Not /a/b.html/c.png /// /// // Base with a trailing slash /// let base = Url::parse("https://example.net/a/b/")?; /// let url = base.join("c.png")?; /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png"); /// /// // Input as scheme relative special URL /// let base = Url::parse("https://alice.com/a")?; /// let url = base.join("//eve.com/b")?; /// assert_eq!(url.as_str(), "https://eve.com/b"); /// /// // Input as absolute URL /// let base = Url::parse("https://alice.com/a")?; /// let url = base.join("http://eve.com/b")?; /// assert_eq!(url.as_str(), "http://eve.com/b"); // http instead of https /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// /// # Errors /// /// If the function can not parse an URL from the given string /// with this URL as the base URL, a [`ParseError`] variant will be returned. /// /// [`ParseError`]: enum.ParseError.html /// [`make_relative`]: #method.make_relative #[inline] pub fn join(&self, input: &str) -> Result { Url::options().base_url(Some(self)).parse(input) } /// Creates a relative URL if possible, with this URL as the base URL. /// /// This is the inverse of [`join`]. /// /// # Examples /// /// ```rust /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let base = Url::parse("https://example.net/a/b.html")?; /// let url = Url::parse("https://example.net/a/c.png")?; /// let relative = base.make_relative(&url); /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png")); /// /// let base = Url::parse("https://example.net/a/b/")?; /// let url = Url::parse("https://example.net/a/b/c.png")?; /// let relative = base.make_relative(&url); /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png")); /// /// let base = Url::parse("https://example.net/a/b/")?; /// let url = Url::parse("https://example.net/a/d/c.png")?; /// let relative = base.make_relative(&url); /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("../d/c.png")); /// /// let base = Url::parse("https://example.net/a/b.html?c=d")?; /// let url = Url::parse("https://example.net/a/b.html?e=f")?; /// let relative = base.make_relative(&url); /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("?e=f")); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// /// # Errors /// /// If this URL can't be a base for the given URL, `None` is returned. /// This is for example the case if the scheme, host or port are not the same. /// /// [`join`]: #method.join pub fn make_relative(&self, url: &Url) -> Option { if self.cannot_be_a_base() { return None; } // Scheme, host and port need to be the same if self.scheme() != url.scheme() || self.host() != url.host() || self.port() != url.port() { return None; } // We ignore username/password at this point // The path has to be transformed let mut relative = String::new(); // Extract the filename of both URIs, these need to be handled separately fn extract_path_filename(s: &str) -> (&str, &str) { let last_slash_idx = s.rfind('/').unwrap_or(0); let (path, filename) = s.split_at(last_slash_idx); if filename.is_empty() { (path, "") } else { (path, &filename[1..]) } } let (base_path, base_filename) = extract_path_filename(self.path()); let (url_path, url_filename) = extract_path_filename(url.path()); let mut base_path = base_path.split('/').peekable(); let mut url_path = url_path.split('/').peekable(); // Skip over the common prefix while base_path.peek().is_some() && base_path.peek() == url_path.peek() { base_path.next(); url_path.next(); } // Add `..` segments for the remainder of the base path for base_path_segment in base_path { // Skip empty last segments if base_path_segment.is_empty() { break; } if !relative.is_empty() { relative.push('/'); } relative.push_str(".."); } // Append the remainder of the other URI for url_path_segment in url_path { if !relative.is_empty() { relative.push('/'); } relative.push_str(url_path_segment); } // Add the filename if they are not the same if !relative.is_empty() || base_filename != url_filename { // If the URIs filename is empty this means that it was a directory // so we'll have to append a '/'. // // Otherwise append it directly as the new filename. if url_filename.is_empty() { relative.push('/'); } else { if !relative.is_empty() { relative.push('/'); } relative.push_str(url_filename); } } // Query and fragment are only taken from the other URI if let Some(query) = url.query() { relative.push('?'); relative.push_str(query); } if let Some(fragment) = url.fragment() { relative.push('#'); relative.push_str(fragment); } Some(relative) } /// Return a default `ParseOptions` that can fully configure the URL parser. /// /// # Examples /// /// Get default `ParseOptions`, then change base url /// /// ```rust /// use url::Url; /// # use url::ParseError; /// # fn run() -> Result<(), ParseError> { /// let options = Url::options(); /// let api = Url::parse("https://api.example.com")?; /// let base_url = options.base_url(Some(&api)); /// let version_url = base_url.parse("version.json")?; /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json"); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn options<'a>() -> ParseOptions<'a> { ParseOptions { base_url: None, encoding_override: None, violation_fn: None, } } /// Return the serialization of this URL. /// /// This is fast since that serialization is already stored in the `Url` struct. /// /// # Examples /// /// ```rust /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url_str = "https://example.net/"; /// let url = Url::parse(url_str)?; /// assert_eq!(url.as_str(), url_str); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` #[inline] pub fn as_str(&self) -> &str { &self.serialization } /// Return the serialization of this URL. /// /// This consumes the `Url` and takes ownership of the `String` stored in it. /// /// # Examples /// /// ```rust /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url_str = "https://example.net/"; /// let url = Url::parse(url_str)?; /// assert_eq!(String::from(url), url_str); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` #[inline] #[deprecated(since = "2.3.0", note = "use Into")] pub fn into_string(self) -> String { self.into() } /// For internal testing, not part of the public API. /// /// Methods of the `Url` struct assume a number of invariants. /// This checks each of these invariants and panic if one is not met. /// This is for testing rust-url itself. #[doc(hidden)] pub fn check_invariants(&self) -> Result<(), String> { macro_rules! assert { ($x: expr) => { if !$x { return Err(format!( "!( {} ) for URL {:?}", stringify!($x), self.serialization )); } }; } macro_rules! assert_eq { ($a: expr, $b: expr) => { { let a = $a; let b = $b; if a != b { return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}", a, b, stringify!($a), stringify!($b), self.serialization)) } } } } assert!(self.scheme_end >= 1); assert!(self.byte_at(0).is_ascii_alphabetic()); assert!(self .slice(1..self.scheme_end) .chars() .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.'))); assert_eq!(self.byte_at(self.scheme_end), b':'); if self.slice(self.scheme_end + 1..).starts_with("//") { // URL with authority if self.username_end != self.serialization.len() as u32 { match self.byte_at(self.username_end) { b':' => { assert!(self.host_start >= self.username_end + 2); assert_eq!(self.byte_at(self.host_start - 1), b'@'); } b'@' => assert!(self.host_start == self.username_end + 1), _ => assert_eq!(self.username_end, self.scheme_end + 3), } } assert!(self.host_start >= self.username_end); assert!(self.host_end >= self.host_start); let host_str = self.slice(self.host_start..self.host_end); match self.host { HostInternal::None => assert_eq!(host_str, ""), HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()), HostInternal::Ipv6(address) => { let h: Host = Host::Ipv6(address); assert_eq!(host_str, h.to_string()) } HostInternal::Domain => { if SchemeType::from(self.scheme()).is_special() { assert!(!host_str.is_empty()) } } } if self.path_start == self.host_end { assert_eq!(self.port, None); } else { assert_eq!(self.byte_at(self.host_end), b':'); let port_str = self.slice(self.host_end + 1..self.path_start); assert_eq!( self.port, Some(port_str.parse::().expect("Couldn't parse port?")) ); } assert!( self.path_start as usize == self.serialization.len() || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?') ); } else { // Anarchist URL (no authority) assert_eq!(self.username_end, self.scheme_end + 1); assert_eq!(self.host_start, self.scheme_end + 1); assert_eq!(self.host_end, self.scheme_end + 1); assert_eq!(self.host, HostInternal::None); assert_eq!(self.port, None); if self.path().starts_with("//") { // special case when first path segment is empty assert_eq!(self.byte_at(self.scheme_end + 1), b'/'); assert_eq!(self.byte_at(self.scheme_end + 2), b'.'); assert_eq!(self.path_start, self.scheme_end + 3); } else { assert_eq!(self.path_start, self.scheme_end + 1); } } if let Some(start) = self.query_start { assert!(start >= self.path_start); assert_eq!(self.byte_at(start), b'?'); } if let Some(start) = self.fragment_start { assert!(start >= self.path_start); assert_eq!(self.byte_at(start), b'#'); } if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) { assert!(fragment_start > query_start); } let other = Url::parse(self.as_str()).expect("Failed to parse myself?"); assert_eq!(&self.serialization, &other.serialization); assert_eq!(self.scheme_end, other.scheme_end); assert_eq!(self.username_end, other.username_end); assert_eq!(self.host_start, other.host_start); assert_eq!(self.host_end, other.host_end); assert!( self.host == other.host || // XXX No host round-trips to empty host. // See https://github.com/whatwg/url/issues/79 (self.host_str(), other.host_str()) == (None, Some("")) ); assert_eq!(self.port, other.port); assert_eq!(self.path_start, other.path_start); assert_eq!(self.query_start, other.query_start); assert_eq!(self.fragment_start, other.fragment_start); Ok(()) } /// Return the origin of this URL () /// /// Note: this returns an opaque origin for `file:` URLs, which causes /// `url.origin() != url.origin()`. /// /// # Examples /// /// URL with `ftp` scheme: /// /// ```rust /// use url::{Host, Origin, Url}; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("ftp://example.com/foo")?; /// assert_eq!(url.origin(), /// Origin::Tuple("ftp".into(), /// Host::Domain("example.com".into()), /// 21)); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// /// URL with `blob` scheme: /// /// ```rust /// use url::{Host, Origin, Url}; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("blob:https://example.com/foo")?; /// assert_eq!(url.origin(), /// Origin::Tuple("https".into(), /// Host::Domain("example.com".into()), /// 443)); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// /// URL with `file` scheme: /// /// ```rust /// use url::{Host, Origin, Url}; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("file:///tmp/foo")?; /// assert!(!url.origin().is_tuple()); /// /// let other_url = Url::parse("file:///tmp/foo")?; /// assert!(url.origin() != other_url.origin()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// /// URL with other scheme: /// /// ```rust /// use url::{Host, Origin, Url}; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("foo:bar")?; /// assert!(!url.origin().is_tuple()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` #[inline] pub fn origin(&self) -> Origin { origin::url_origin(self) } /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter. /// /// # Examples /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("file:///tmp/foo")?; /// assert_eq!(url.scheme(), "file"); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` #[inline] pub fn scheme(&self) -> &str { self.slice(..self.scheme_end) } /// Return whether the URL is special (has a special scheme) /// /// # Examples /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// assert!(Url::parse("http:///tmp/foo")?.is_special()); /// assert!(Url::parse("file:///tmp/foo")?.is_special()); /// assert!(!Url::parse("moz:///tmp/foo")?.is_special()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn is_special(&self) -> bool { let scheme_type = SchemeType::from(self.scheme()); scheme_type.is_special() } /// Return whether the URL has an 'authority', /// which can contain a username, password, host, and port number. /// /// URLs that do *not* are either path-only like `unix:/run/foo.socket` /// or cannot-be-a-base like `data:text/plain,Stuff`. /// /// See also the `authority` method. /// /// # Examples /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("ftp://rms@example.com")?; /// assert!(url.has_authority()); /// /// let url = Url::parse("unix:/run/foo.socket")?; /// assert!(!url.has_authority()); /// /// let url = Url::parse("data:text/plain,Stuff")?; /// assert!(!url.has_authority()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` #[inline] pub fn has_authority(&self) -> bool { debug_assert!(self.byte_at(self.scheme_end) == b':'); self.slice(self.scheme_end..).starts_with("://") } /// Return the authority of this URL as an ASCII string. /// /// Non-ASCII domains are punycode-encoded per IDNA if this is the host /// of a special URL, or percent encoded for non-special URLs. /// IPv6 addresses are given between `[` and `]` brackets. /// Ports are omitted if they match the well known port of a special URL. /// /// Username and password are percent-encoded. /// /// See also the `has_authority` method. /// /// # Examples /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("unix:/run/foo.socket")?; /// assert_eq!(url.authority(), ""); /// let url = Url::parse("file:///tmp/foo")?; /// assert_eq!(url.authority(), ""); /// let url = Url::parse("https://user:password@example.com/tmp/foo")?; /// assert_eq!(url.authority(), "user:password@example.com"); /// let url = Url::parse("irc://àlex.рф.example.com:6667/foo")?; /// assert_eq!(url.authority(), "%C3%A0lex.%D1%80%D1%84.example.com:6667"); /// let url = Url::parse("http://àlex.рф.example.com:80/foo")?; /// assert_eq!(url.authority(), "xn--lex-8ka.xn--p1ai.example.com"); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn authority(&self) -> &str { let scheme_separator_len = "://".len() as u32; if self.has_authority() && self.path_start > self.scheme_end + scheme_separator_len { self.slice(self.scheme_end + scheme_separator_len..self.path_start) } else { "" } } /// Return whether this URL is a cannot-be-a-base URL, /// meaning that parsing a relative URL string with this URL as the base will return an error. /// /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash, /// as is typically the case of `data:` and `mailto:` URLs. /// /// # Examples /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("ftp://rms@example.com")?; /// assert!(!url.cannot_be_a_base()); /// /// let url = Url::parse("unix:/run/foo.socket")?; /// assert!(!url.cannot_be_a_base()); /// /// let url = Url::parse("data:text/plain,Stuff")?; /// assert!(url.cannot_be_a_base()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` #[inline] pub fn cannot_be_a_base(&self) -> bool { !self.slice(self.scheme_end + 1..).starts_with('/') } /// Return the username for this URL (typically the empty string) /// as a percent-encoded ASCII string. /// /// # Examples /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("ftp://rms@example.com")?; /// assert_eq!(url.username(), "rms"); /// /// let url = Url::parse("ftp://:secret123@example.com")?; /// assert_eq!(url.username(), ""); /// /// let url = Url::parse("https://example.com")?; /// assert_eq!(url.username(), ""); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn username(&self) -> &str { let scheme_separator_len = "://".len() as u32; if self.has_authority() && self.username_end > self.scheme_end + scheme_separator_len { self.slice(self.scheme_end + scheme_separator_len..self.username_end) } else { "" } } /// Return the password for this URL, if any, as a percent-encoded ASCII string. /// /// # Examples /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("ftp://rms:secret123@example.com")?; /// assert_eq!(url.password(), Some("secret123")); /// /// let url = Url::parse("ftp://:secret123@example.com")?; /// assert_eq!(url.password(), Some("secret123")); /// /// let url = Url::parse("ftp://rms@example.com")?; /// assert_eq!(url.password(), None); /// /// let url = Url::parse("https://example.com")?; /// assert_eq!(url.password(), None); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn password(&self) -> Option<&str> { // This ':' is not the one marking a port number since a host can not be empty. // (Except for file: URLs, which do not have port numbers.) if self.has_authority() && self.username_end != self.serialization.len() as u32 && self.byte_at(self.username_end) == b':' { debug_assert!(self.byte_at(self.host_start - 1) == b'@'); Some(self.slice(self.username_end + 1..self.host_start - 1)) } else { None } } /// Equivalent to `url.host().is_some()`. /// /// # Examples /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("ftp://rms@example.com")?; /// assert!(url.has_host()); /// /// let url = Url::parse("unix:/run/foo.socket")?; /// assert!(!url.has_host()); /// /// let url = Url::parse("data:text/plain,Stuff")?; /// assert!(!url.has_host()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn has_host(&self) -> bool { !matches!(self.host, HostInternal::None) } /// Return the string representation of the host (domain or IP address) for this URL, if any. /// /// Non-ASCII domains are punycode-encoded per IDNA if this is the host /// of a special URL, or percent encoded for non-special URLs. /// IPv6 addresses are given between `[` and `]` brackets. /// /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs /// don’t have a host. /// /// See also the `host` method. /// /// # Examples /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("https://127.0.0.1/index.html")?; /// assert_eq!(url.host_str(), Some("127.0.0.1")); /// /// let url = Url::parse("ftp://rms@example.com")?; /// assert_eq!(url.host_str(), Some("example.com")); /// /// let url = Url::parse("unix:/run/foo.socket")?; /// assert_eq!(url.host_str(), None); /// /// let url = Url::parse("data:text/plain,Stuff")?; /// assert_eq!(url.host_str(), None); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn host_str(&self) -> Option<&str> { if self.has_host() { Some(self.slice(self.host_start..self.host_end)) } else { None } } /// Return the parsed representation of the host for this URL. /// Non-ASCII domain labels are punycode-encoded per IDNA if this is the host /// of a special URL, or percent encoded for non-special URLs. /// /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs /// don’t have a host. /// /// See also the `host_str` method. /// /// # Examples /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("https://127.0.0.1/index.html")?; /// assert!(url.host().is_some()); /// /// let url = Url::parse("ftp://rms@example.com")?; /// assert!(url.host().is_some()); /// /// let url = Url::parse("unix:/run/foo.socket")?; /// assert!(url.host().is_none()); /// /// let url = Url::parse("data:text/plain,Stuff")?; /// assert!(url.host().is_none()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn host(&self) -> Option> { match self.host { HostInternal::None => None, HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))), HostInternal::Ipv4(address) => Some(Host::Ipv4(address)), HostInternal::Ipv6(address) => Some(Host::Ipv6(address)), } } /// If this URL has a host and it is a domain name (not an IP address), return it. /// Non-ASCII domains are punycode-encoded per IDNA if this is the host /// of a special URL, or percent encoded for non-special URLs. /// /// # Examples /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("https://127.0.0.1/")?; /// assert_eq!(url.domain(), None); /// /// let url = Url::parse("mailto:rms@example.net")?; /// assert_eq!(url.domain(), None); /// /// let url = Url::parse("https://example.com/")?; /// assert_eq!(url.domain(), Some("example.com")); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn domain(&self) -> Option<&str> { match self.host { HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)), _ => None, } } /// Return the port number for this URL, if any. /// /// Note that default port numbers are never reflected by the serialization, /// use the `port_or_known_default()` method if you want a default port number returned. /// /// # Examples /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("https://example.com")?; /// assert_eq!(url.port(), None); /// /// let url = Url::parse("https://example.com:443/")?; /// assert_eq!(url.port(), None); /// /// let url = Url::parse("ssh://example.com:22")?; /// assert_eq!(url.port(), Some(22)); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` #[inline] pub fn port(&self) -> Option { self.port } /// Return the port number for this URL, or the default port number if it is known. /// /// This method only knows the default port number /// of the `http`, `https`, `ws`, `wss` and `ftp` schemes. /// /// For URLs in these schemes, this method always returns `Some(_)`. /// For other schemes, it is the same as `Url::port()`. /// /// # Examples /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("foo://example.com")?; /// assert_eq!(url.port_or_known_default(), None); /// /// let url = Url::parse("foo://example.com:1456")?; /// assert_eq!(url.port_or_known_default(), Some(1456)); /// /// let url = Url::parse("https://example.com")?; /// assert_eq!(url.port_or_known_default(), Some(443)); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` #[inline] pub fn port_or_known_default(&self) -> Option { self.port.or_else(|| parser::default_port(self.scheme())) } /// Resolve a URL’s host and port number to `SocketAddr`. /// /// If the URL has the default port number of a scheme that is unknown to this library, /// `default_port_number` provides an opportunity to provide the actual port number. /// In non-example code this should be implemented either simply as `|| None`, /// or by matching on the URL’s `.scheme()`. /// /// If the host is a domain, it is resolved using the standard library’s DNS support. /// /// # Examples /// /// ```no_run /// let url = url::Url::parse("https://example.net/").unwrap(); /// let addrs = url.socket_addrs(|| None).unwrap(); /// std::net::TcpStream::connect(&*addrs) /// # ; /// ``` /// /// ``` /// /// With application-specific known default port numbers /// fn socket_addrs(url: url::Url) -> std::io::Result> { /// url.socket_addrs(|| match url.scheme() { /// "socks5" | "socks5h" => Some(1080), /// _ => None, /// }) /// } /// ``` #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] pub fn socket_addrs( &self, default_port_number: impl Fn() -> Option, ) -> io::Result> { // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>` // causes borrowck issues because the return value borrows `default_port_number`: // // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters // // > This RFC proposes that *all* type parameters are considered in scope // > for `impl Trait` in return position fn io_result(opt: Option, message: &str) -> io::Result { opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message)) } let host = io_result(self.host(), "No host name in the URL")?; let port = io_result( self.port_or_known_default().or_else(default_port_number), "No port number in the URL", )?; Ok(match host { Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(), Host::Ipv4(ip) => vec![(ip, port).into()], Host::Ipv6(ip) => vec![(ip, port).into()], }) } /// Return the path for this URL, as a percent-encoded ASCII string. /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'. /// For other URLs, this starts with a '/' slash /// and continues with slash-separated path segments. /// /// # Examples /// /// ```rust /// use url::{Url, ParseError}; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("https://example.com/api/versions?page=2")?; /// assert_eq!(url.path(), "/api/versions"); /// /// let url = Url::parse("https://example.com")?; /// assert_eq!(url.path(), "/"); /// /// let url = Url::parse("https://example.com/countries/việt nam")?; /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam"); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn path(&self) -> &str { match (self.query_start, self.fragment_start) { (None, None) => self.slice(self.path_start..), (Some(next_component_start), _) | (None, Some(next_component_start)) => { self.slice(self.path_start..next_component_start) } } } /// Unless this URL is cannot-be-a-base, /// return an iterator of '/' slash-separated path segments, /// each as a percent-encoded ASCII string. /// /// Return `None` for cannot-be-a-base URLs. /// /// When `Some` is returned, the iterator always contains at least one string /// (which may be empty). /// /// # Examples /// /// ``` /// use url::Url; /// # use std::error::Error; /// /// # fn run() -> Result<(), Box> { /// let url = Url::parse("https://example.com/foo/bar")?; /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?; /// assert_eq!(path_segments.next(), Some("foo")); /// assert_eq!(path_segments.next(), Some("bar")); /// assert_eq!(path_segments.next(), None); /// /// let url = Url::parse("https://example.com")?; /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?; /// assert_eq!(path_segments.next(), Some("")); /// assert_eq!(path_segments.next(), None); /// /// let url = Url::parse("data:text/plain,HelloWorld")?; /// assert!(url.path_segments().is_none()); /// /// let url = Url::parse("https://example.com/countries/việt nam")?; /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?; /// assert_eq!(path_segments.next(), Some("countries")); /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam")); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn path_segments(&self) -> Option> { let path = self.path(); path.strip_prefix('/').map(|remainder| remainder.split('/')) } /// Return this URL’s query string, if any, as a percent-encoded ASCII string. /// /// # Examples /// /// ```rust /// use url::Url; /// # use url::ParseError; /// /// fn run() -> Result<(), ParseError> { /// let url = Url::parse("https://example.com/products?page=2")?; /// let query = url.query(); /// assert_eq!(query, Some("page=2")); /// /// let url = Url::parse("https://example.com/products")?; /// let query = url.query(); /// assert!(query.is_none()); /// /// let url = Url::parse("https://example.com/?country=español")?; /// let query = url.query(); /// assert_eq!(query, Some("country=espa%C3%B1ol")); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn query(&self) -> Option<&str> { match (self.query_start, self.fragment_start) { (None, _) => None, (Some(query_start), None) => { debug_assert!(self.byte_at(query_start) == b'?'); Some(self.slice(query_start + 1..)) } (Some(query_start), Some(fragment_start)) => { debug_assert!(self.byte_at(query_start) == b'?'); Some(self.slice(query_start + 1..fragment_start)) } } } /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded` /// and return an iterator of (key, value) pairs. /// /// # Examples /// /// ```rust /// use std::borrow::Cow; /// /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("https://example.com/products?page=2&sort=desc")?; /// let mut pairs = url.query_pairs(); /// /// assert_eq!(pairs.count(), 2); /// /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2")))); /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc")))); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` #[inline] pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> { form_urlencoded::parse(self.query().unwrap_or("").as_bytes()) } /// Return this URL’s fragment identifier, if any. /// /// A fragment is the part of the URL after the `#` symbol. /// The fragment is optional and, if present, contains a fragment identifier /// that identifies a secondary resource, such as a section heading /// of a document. /// /// In HTML, the fragment identifier is usually the id attribute of a an element /// that is scrolled to on load. Browsers typically will not send the fragment portion /// of a URL to the server. /// /// **Note:** the parser did *not* percent-encode this component, /// but the input may have been percent-encoded already. /// /// # Examples /// /// ```rust /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let url = Url::parse("https://example.com/data.csv#row=4")?; /// /// assert_eq!(url.fragment(), Some("row=4")); /// /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2")?; /// /// assert_eq!(url.fragment(), Some("cell=4,1-6,2")); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn fragment(&self) -> Option<&str> { self.fragment_start.map(|start| { debug_assert!(self.byte_at(start) == b'#'); self.slice(start + 1..) }) } fn mutate) -> R, R>(&mut self, f: F) -> R { let mut parser = Parser::for_setter(mem::take(&mut self.serialization)); let result = f(&mut parser); self.serialization = parser.serialization; result } /// Change this URL’s fragment identifier. /// /// # Examples /// /// ```rust /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("https://example.com/data.csv")?; /// assert_eq!(url.as_str(), "https://example.com/data.csv"); /// url.set_fragment(Some("cell=4,1-6,2")); /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2"); /// assert_eq!(url.fragment(), Some("cell=4,1-6,2")); /// /// url.set_fragment(None); /// assert_eq!(url.as_str(), "https://example.com/data.csv"); /// assert!(url.fragment().is_none()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn set_fragment(&mut self, fragment: Option<&str>) { // Remove any previous fragment if let Some(start) = self.fragment_start { debug_assert!(self.byte_at(start) == b'#'); self.serialization.truncate(start as usize); } // Write the new one if let Some(input) = fragment { self.fragment_start = Some(to_u32(self.serialization.len()).unwrap()); self.serialization.push('#'); self.mutate(|parser| parser.parse_fragment(parser::Input::new_no_trim(input))) } else { self.fragment_start = None; self.strip_trailing_spaces_from_opaque_path(); } } fn take_fragment(&mut self) -> Option { self.fragment_start.take().map(|start| { debug_assert!(self.byte_at(start) == b'#'); let fragment = self.slice(start + 1..).to_owned(); self.serialization.truncate(start as usize); fragment }) } fn restore_already_parsed_fragment(&mut self, fragment: Option) { if let Some(ref fragment) = fragment { assert!(self.fragment_start.is_none()); self.fragment_start = Some(to_u32(self.serialization.len()).unwrap()); self.serialization.push('#'); self.serialization.push_str(fragment); } } /// Change this URL’s query string. If `query` is `None`, this URL's /// query string will be cleared. /// /// # Examples /// /// ```rust /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("https://example.com/products")?; /// assert_eq!(url.as_str(), "https://example.com/products"); /// /// url.set_query(Some("page=2")); /// assert_eq!(url.as_str(), "https://example.com/products?page=2"); /// assert_eq!(url.query(), Some("page=2")); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn set_query(&mut self, query: Option<&str>) { let fragment = self.take_fragment(); // Remove any previous query if let Some(start) = self.query_start.take() { debug_assert!(self.byte_at(start) == b'?'); self.serialization.truncate(start as usize); } // Write the new query, if any if let Some(input) = query { self.query_start = Some(to_u32(self.serialization.len()).unwrap()); self.serialization.push('?'); let scheme_type = SchemeType::from(self.scheme()); let scheme_end = self.scheme_end; self.mutate(|parser| { let vfn = parser.violation_fn; parser.parse_query( scheme_type, scheme_end, parser::Input::new_trim_tab_and_newlines(input, vfn), ) }); } else { self.query_start = None; if fragment.is_none() { self.strip_trailing_spaces_from_opaque_path(); } } self.restore_already_parsed_fragment(fragment); } /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs /// in `application/x-www-form-urlencoded` syntax. /// /// The return value has a method-chaining API: /// /// ```rust /// # use url::{Url, ParseError}; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("https://example.net?lang=fr#nav")?; /// assert_eq!(url.query(), Some("lang=fr")); /// /// url.query_pairs_mut().append_pair("foo", "bar"); /// assert_eq!(url.query(), Some("lang=fr&foo=bar")); /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav"); /// /// url.query_pairs_mut() /// .clear() /// .append_pair("foo", "bar & baz") /// .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver"); /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver")); /// assert_eq!(url.as_str(), /// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav"); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`, /// not `url.set_query(None)`. /// /// The state of `Url` is unspecified if this return value is leaked without being dropped. pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>> { let fragment = self.take_fragment(); let query_start; if let Some(start) = self.query_start { debug_assert!(self.byte_at(start) == b'?'); query_start = start as usize; } else { query_start = self.serialization.len(); self.query_start = Some(to_u32(query_start).unwrap()); self.serialization.push('?'); } let query = UrlQuery { url: Some(self), fragment, }; form_urlencoded::Serializer::for_suffix(query, query_start + "?".len()) } fn take_after_path(&mut self) -> String { match (self.query_start, self.fragment_start) { (Some(i), _) | (None, Some(i)) => { let after_path = self.slice(i..).to_owned(); self.serialization.truncate(i as usize); after_path } (None, None) => String::new(), } } /// Change this URL’s path. /// /// # Examples /// /// ```rust /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("https://example.com")?; /// url.set_path("api/comments"); /// assert_eq!(url.as_str(), "https://example.com/api/comments"); /// assert_eq!(url.path(), "/api/comments"); /// /// let mut url = Url::parse("https://example.com/api")?; /// url.set_path("data/report.csv"); /// assert_eq!(url.as_str(), "https://example.com/data/report.csv"); /// assert_eq!(url.path(), "/data/report.csv"); /// /// // `set_path` percent-encodes the given string if it's not already percent-encoded. /// let mut url = Url::parse("https://example.com")?; /// url.set_path("api/some comments"); /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments"); /// assert_eq!(url.path(), "/api/some%20comments"); /// /// // `set_path` will not double percent-encode the string if it's already percent-encoded. /// let mut url = Url::parse("https://example.com")?; /// url.set_path("api/some%20comments"); /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments"); /// assert_eq!(url.path(), "/api/some%20comments"); /// /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn set_path(&mut self, mut path: &str) { let after_path = self.take_after_path(); let old_after_path_pos = to_u32(self.serialization.len()).unwrap(); let cannot_be_a_base = self.cannot_be_a_base(); let scheme_type = SchemeType::from(self.scheme()); self.serialization.truncate(self.path_start as usize); self.mutate(|parser| { if cannot_be_a_base { if path.starts_with('/') { parser.serialization.push_str("%2F"); path = &path[1..]; } parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path)); } else { let mut has_host = true; // FIXME parser.parse_path_start( scheme_type, &mut has_host, parser::Input::new_no_trim(path), ); } }); self.restore_after_path(old_after_path_pos, &after_path); } /// Return an object with methods to manipulate this URL’s path segments. /// /// Return `Err(())` if this URL is cannot-be-a-base. #[allow(clippy::result_unit_err)] pub fn path_segments_mut(&mut self) -> Result, ()> { if self.cannot_be_a_base() { Err(()) } else { Ok(path_segments::new(self)) } } fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) { let new_after_path_position = to_u32(self.serialization.len()).unwrap(); let adjust = |index: &mut u32| { *index -= old_after_path_position; *index += new_after_path_position; }; if let Some(ref mut index) = self.query_start { adjust(index) } if let Some(ref mut index) = self.fragment_start { adjust(index) } self.serialization.push_str(after_path) } /// Change this URL’s port number. /// /// Note that default port numbers are not reflected in the serialization. /// /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme; /// do nothing and return `Err`. /// /// # Examples /// /// ``` /// use url::Url; /// # use std::error::Error; /// /// # fn run() -> Result<(), Box> { /// let mut url = Url::parse("ssh://example.net:2048/")?; /// /// url.set_port(Some(4096)).map_err(|_| "cannot be base")?; /// assert_eq!(url.as_str(), "ssh://example.net:4096/"); /// /// url.set_port(None).map_err(|_| "cannot be base")?; /// assert_eq!(url.as_str(), "ssh://example.net/"); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// /// Known default port numbers are not reflected: /// /// ```rust /// use url::Url; /// # use std::error::Error; /// /// # fn run() -> Result<(), Box> { /// let mut url = Url::parse("https://example.org/")?; /// /// url.set_port(Some(443)).map_err(|_| "cannot be base")?; /// assert!(url.port().is_none()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// /// Cannot set port for cannot-be-a-base URLs: /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("mailto:rms@example.net")?; /// /// let result = url.set_port(Some(80)); /// assert!(result.is_err()); /// /// let result = url.set_port(None); /// assert!(result.is_err()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` #[allow(clippy::result_unit_err)] pub fn set_port(&mut self, mut port: Option) -> Result<(), ()> { // has_host implies !cannot_be_a_base if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" { return Err(()); } if port.is_some() && port == parser::default_port(self.scheme()) { port = None } self.set_port_internal(port); Ok(()) } fn set_port_internal(&mut self, port: Option) { match (self.port, port) { (None, None) => {} (Some(_), None) => { self.serialization .drain(self.host_end as usize..self.path_start as usize); let offset = self.path_start - self.host_end; self.path_start = self.host_end; if let Some(ref mut index) = self.query_start { *index -= offset } if let Some(ref mut index) = self.fragment_start { *index -= offset } } (Some(old), Some(new)) if old == new => {} (_, Some(new)) => { let path_and_after = self.slice(self.path_start..).to_owned(); self.serialization.truncate(self.host_end as usize); write!(&mut self.serialization, ":{}", new).unwrap(); let old_path_start = self.path_start; let new_path_start = to_u32(self.serialization.len()).unwrap(); self.path_start = new_path_start; let adjust = |index: &mut u32| { *index -= old_path_start; *index += new_path_start; }; if let Some(ref mut index) = self.query_start { adjust(index) } if let Some(ref mut index) = self.fragment_start { adjust(index) } self.serialization.push_str(&path_and_after); } } self.port = port; } /// Change this URL’s host. /// /// Removing the host (calling this with `None`) /// will also remove any username, password, and port number. /// /// # Examples /// /// Change host: /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("https://example.net")?; /// let result = url.set_host(Some("rust-lang.org")); /// assert!(result.is_ok()); /// assert_eq!(url.as_str(), "https://rust-lang.org/"); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// /// Remove host: /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("foo://example.net")?; /// let result = url.set_host(None); /// assert!(result.is_ok()); /// assert_eq!(url.as_str(), "foo:/"); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// /// Cannot remove host for 'special' schemes (e.g. `http`): /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("https://example.net")?; /// let result = url.set_host(None); /// assert!(result.is_err()); /// assert_eq!(url.as_str(), "https://example.net/"); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// /// Cannot change or remove host for cannot-be-a-base URLs: /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("mailto:rms@example.net")?; /// /// let result = url.set_host(Some("rust-lang.org")); /// assert!(result.is_err()); /// assert_eq!(url.as_str(), "mailto:rms@example.net"); /// /// let result = url.set_host(None); /// assert!(result.is_err()); /// assert_eq!(url.as_str(), "mailto:rms@example.net"); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// /// # Errors /// /// If this URL is cannot-be-a-base or there is an error parsing the given `host`, /// a [`ParseError`] variant will be returned. /// /// [`ParseError`]: enum.ParseError.html pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> { if self.cannot_be_a_base() { return Err(ParseError::SetHostOnCannotBeABaseUrl); } let scheme_type = SchemeType::from(self.scheme()); if let Some(host) = host { if host.is_empty() && scheme_type.is_special() && !scheme_type.is_file() { return Err(ParseError::EmptyHost); } let mut host_substr = host; // Otherwise, if c is U+003A (:) and the [] flag is unset, then if !host.starts_with('[') || !host.ends_with(']') { match host.find(':') { Some(0) => { // If buffer is the empty string, validation error, return failure. return Err(ParseError::InvalidDomainCharacter); } // Let host be the result of host parsing buffer Some(colon_index) => { host_substr = &host[..colon_index]; } None => {} } } if SchemeType::from(self.scheme()).is_special() { self.set_host_internal(Host::parse(host_substr)?, None); } else { self.set_host_internal(Host::parse_opaque(host_substr)?, None); } } else if self.has_host() { if scheme_type.is_special() && !scheme_type.is_file() { return Err(ParseError::EmptyHost); } else if self.serialization.len() == self.path_start as usize { self.serialization.push('/'); } debug_assert!(self.byte_at(self.scheme_end) == b':'); debug_assert!(self.byte_at(self.path_start) == b'/'); let new_path_start = if scheme_type.is_file() { self.scheme_end + 3 } else { self.scheme_end + 1 }; self.serialization .drain(new_path_start as usize..self.path_start as usize); let offset = self.path_start - new_path_start; self.path_start = new_path_start; self.username_end = new_path_start; self.host_start = new_path_start; self.host_end = new_path_start; self.port = None; if let Some(ref mut index) = self.query_start { *index -= offset } if let Some(ref mut index) = self.fragment_start { *index -= offset } } Ok(()) } /// opt_new_port: None means leave unchanged, Some(None) means remove any port number. fn set_host_internal(&mut self, host: Host, opt_new_port: Option>) { let old_suffix_pos = if opt_new_port.is_some() { self.path_start } else { self.host_end }; let suffix = self.slice(old_suffix_pos..).to_owned(); self.serialization.truncate(self.host_start as usize); if !self.has_authority() { debug_assert!(self.slice(self.scheme_end..self.host_start) == ":"); debug_assert!(self.username_end == self.host_start); self.serialization.push('/'); self.serialization.push('/'); self.username_end += 2; self.host_start += 2; } write!(&mut self.serialization, "{}", host).unwrap(); self.host_end = to_u32(self.serialization.len()).unwrap(); self.host = host.into(); if let Some(new_port) = opt_new_port { self.port = new_port; if let Some(port) = new_port { write!(&mut self.serialization, ":{}", port).unwrap(); } } let new_suffix_pos = to_u32(self.serialization.len()).unwrap(); self.serialization.push_str(&suffix); let adjust = |index: &mut u32| { *index -= old_suffix_pos; *index += new_suffix_pos; }; adjust(&mut self.path_start); if let Some(ref mut index) = self.query_start { adjust(index) } if let Some(ref mut index) = self.fragment_start { adjust(index) } } /// Change this URL’s host to the given IP address. /// /// If this URL is cannot-be-a-base, do nothing and return `Err`. /// /// Compared to `Url::set_host`, this skips the host parser. /// /// # Examples /// /// ```rust /// use url::{Url, ParseError}; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("http://example.com")?; /// url.set_ip_host("127.0.0.1".parse().unwrap()); /// assert_eq!(url.host_str(), Some("127.0.0.1")); /// assert_eq!(url.as_str(), "http://127.0.0.1/"); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// /// Cannot change URL's from mailto(cannot-be-base) to ip: /// /// ```rust /// use url::{Url, ParseError}; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("mailto:rms@example.com")?; /// let result = url.set_ip_host("127.0.0.1".parse().unwrap()); /// /// assert_eq!(url.as_str(), "mailto:rms@example.com"); /// assert!(result.is_err()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// #[allow(clippy::result_unit_err)] pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> { if self.cannot_be_a_base() { return Err(()); } let address = match address { IpAddr::V4(address) => Host::Ipv4(address), IpAddr::V6(address) => Host::Ipv6(address), }; self.set_host_internal(address, None); Ok(()) } /// Change this URL’s password. /// /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`. /// /// # Examples /// /// ```rust /// use url::{Url, ParseError}; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("mailto:rmz@example.com")?; /// let result = url.set_password(Some("secret_password")); /// assert!(result.is_err()); /// /// let mut url = Url::parse("ftp://user1:secret1@example.com")?; /// let result = url.set_password(Some("secret_password")); /// assert_eq!(url.password(), Some("secret_password")); /// /// let mut url = Url::parse("ftp://user2:@example.com")?; /// let result = url.set_password(Some("secret2")); /// assert!(result.is_ok()); /// assert_eq!(url.password(), Some("secret2")); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` #[allow(clippy::result_unit_err)] pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> { // has_host implies !cannot_be_a_base if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" { return Err(()); } let password = password.unwrap_or_default(); if !password.is_empty() { let host_and_after = self.slice(self.host_start..).to_owned(); self.serialization.truncate(self.username_end as usize); self.serialization.push(':'); self.serialization .extend(utf8_percent_encode(password, USERINFO)); self.serialization.push('@'); let old_host_start = self.host_start; let new_host_start = to_u32(self.serialization.len()).unwrap(); let adjust = |index: &mut u32| { *index -= old_host_start; *index += new_host_start; }; self.host_start = new_host_start; adjust(&mut self.host_end); adjust(&mut self.path_start); if let Some(ref mut index) = self.query_start { adjust(index) } if let Some(ref mut index) = self.fragment_start { adjust(index) } self.serialization.push_str(&host_and_after); } else if self.byte_at(self.username_end) == b':' { // If there is a password to remove let has_username_or_password = self.byte_at(self.host_start - 1) == b'@'; debug_assert!(has_username_or_password); let username_start = self.scheme_end + 3; let empty_username = username_start == self.username_end; let start = self.username_end; // Remove the ':' let end = if empty_username { self.host_start // Remove the '@' as well } else { self.host_start - 1 // Keep the '@' to separate the username from the host }; self.serialization.drain(start as usize..end as usize); let offset = end - start; self.host_start -= offset; self.host_end -= offset; self.path_start -= offset; if let Some(ref mut index) = self.query_start { *index -= offset } if let Some(ref mut index) = self.fragment_start { *index -= offset } } Ok(()) } /// Change this URL’s username. /// /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`. /// # Examples /// /// Cannot setup username from mailto(cannot-be-base) /// /// ```rust /// use url::{Url, ParseError}; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("mailto:rmz@example.com")?; /// let result = url.set_username("user1"); /// assert_eq!(url.as_str(), "mailto:rmz@example.com"); /// assert!(result.is_err()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// /// Setup username to user1 /// /// ```rust /// use url::{Url, ParseError}; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("ftp://:secre1@example.com/")?; /// let result = url.set_username("user1"); /// assert!(result.is_ok()); /// assert_eq!(url.username(), "user1"); /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/"); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` #[allow(clippy::result_unit_err)] pub fn set_username(&mut self, username: &str) -> Result<(), ()> { // has_host implies !cannot_be_a_base if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" { return Err(()); } let username_start = self.scheme_end + 3; debug_assert!(self.slice(self.scheme_end..username_start) == "://"); if self.slice(username_start..self.username_end) == username { return Ok(()); } let after_username = self.slice(self.username_end..).to_owned(); self.serialization.truncate(username_start as usize); self.serialization .extend(utf8_percent_encode(username, USERINFO)); let mut removed_bytes = self.username_end; self.username_end = to_u32(self.serialization.len()).unwrap(); let mut added_bytes = self.username_end; let new_username_is_empty = self.username_end == username_start; match (new_username_is_empty, after_username.chars().next()) { (true, Some('@')) => { removed_bytes += 1; self.serialization.push_str(&after_username[1..]); } (false, Some('@')) | (_, Some(':')) | (true, _) => { self.serialization.push_str(&after_username); } (false, _) => { added_bytes += 1; self.serialization.push('@'); self.serialization.push_str(&after_username); } } let adjust = |index: &mut u32| { *index -= removed_bytes; *index += added_bytes; }; adjust(&mut self.host_start); adjust(&mut self.host_end); adjust(&mut self.path_start); if let Some(ref mut index) = self.query_start { adjust(index) } if let Some(ref mut index) = self.fragment_start { adjust(index) } Ok(()) } /// Change this URL’s scheme. /// /// Do nothing and return `Err` under the following circumstances: /// /// * If the new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+` /// * If this URL is cannot-be-a-base and the new scheme is one of /// `http`, `https`, `ws`, `wss` or `ftp` /// * If either the old or new scheme is `http`, `https`, `ws`, /// `wss` or `ftp` and the other is not one of these /// * If the new scheme is `file` and this URL includes credentials /// or has a non-null port /// * If this URL's scheme is `file` and its host is empty or null /// /// See also [the URL specification's section on legal scheme state /// overrides](https://url.spec.whatwg.org/#scheme-state). /// /// # Examples /// /// Change the URL’s scheme from `https` to `http`: /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("https://example.net")?; /// let result = url.set_scheme("http"); /// assert_eq!(url.as_str(), "http://example.net/"); /// assert!(result.is_ok()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// Change the URL’s scheme from `foo` to `bar`: /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("foo://example.net")?; /// let result = url.set_scheme("bar"); /// assert_eq!(url.as_str(), "bar://example.net"); /// assert!(result.is_ok()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// /// Cannot change URL’s scheme from `https` to `foõ`: /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("https://example.net")?; /// let result = url.set_scheme("foõ"); /// assert_eq!(url.as_str(), "https://example.net/"); /// assert!(result.is_err()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`: /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("mailto:rms@example.net")?; /// let result = url.set_scheme("https"); /// assert_eq!(url.as_str(), "mailto:rms@example.net"); /// assert!(result.is_err()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// Cannot change the URL’s scheme from `foo` to `https`: /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("foo://example.net")?; /// let result = url.set_scheme("https"); /// assert_eq!(url.as_str(), "foo://example.net"); /// assert!(result.is_err()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// Cannot change the URL’s scheme from `http` to `foo`: /// /// ``` /// use url::Url; /// # use url::ParseError; /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("http://example.net")?; /// let result = url.set_scheme("foo"); /// assert_eq!(url.as_str(), "http://example.net/"); /// assert!(result.is_err()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` #[allow(clippy::result_unit_err, clippy::suspicious_operation_groupings)] pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> { let mut parser = Parser::for_setter(String::new()); let remaining = parser.parse_scheme(parser::Input::new_no_trim(scheme))?; let new_scheme_type = SchemeType::from(&parser.serialization); let old_scheme_type = SchemeType::from(self.scheme()); // If url’s scheme is a special scheme and buffer is not a special scheme, then return. if (new_scheme_type.is_special() && !old_scheme_type.is_special()) || // If url’s scheme is not a special scheme and buffer is a special scheme, then return. (!new_scheme_type.is_special() && old_scheme_type.is_special()) || // If url includes credentials or has a non-null port, and buffer is "file", then return. // If url’s scheme is "file" and its host is an empty host or null, then return. (new_scheme_type.is_file() && self.has_authority()) { return Err(()); } if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) { return Err(()); } let old_scheme_end = self.scheme_end; let new_scheme_end = to_u32(parser.serialization.len()).unwrap(); let adjust = |index: &mut u32| { *index -= old_scheme_end; *index += new_scheme_end; }; self.scheme_end = new_scheme_end; adjust(&mut self.username_end); adjust(&mut self.host_start); adjust(&mut self.host_end); adjust(&mut self.path_start); if let Some(ref mut index) = self.query_start { adjust(index) } if let Some(ref mut index) = self.fragment_start { adjust(index) } parser.serialization.push_str(self.slice(old_scheme_end..)); self.serialization = parser.serialization; // Update the port so it can be removed // If it is the scheme's default // we don't mind it silently failing // if there was no port in the first place let previous_port = self.port(); let _ = self.set_port(previous_port); Ok(()) } /// Convert a file name as `std::path::Path` into an URL in the `file` scheme. /// /// This returns `Err` if the given path is not absolute or, /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`). /// /// # Examples /// /// On Unix-like platforms: /// /// ``` /// # if cfg!(unix) { /// use url::Url; /// /// # fn run() -> Result<(), ()> { /// let url = Url::from_file_path("/tmp/foo.txt")?; /// assert_eq!(url.as_str(), "file:///tmp/foo.txt"); /// /// let url = Url::from_file_path("../foo.txt"); /// assert!(url.is_err()); /// /// let url = Url::from_file_path("https://google.com/"); /// assert!(url.is_err()); /// # Ok(()) /// # } /// # run().unwrap(); /// # } /// ``` #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] #[allow(clippy::result_unit_err)] pub fn from_file_path>(path: P) -> Result { let mut serialization = "file://".to_owned(); let host_start = serialization.len() as u32; let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?; Ok(Url { serialization, scheme_end: "file".len() as u32, username_end: host_start, host_start, host_end, host, port: None, path_start: host_end, query_start: None, fragment_start: None, }) } /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme. /// /// This returns `Err` if the given path is not absolute or, /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`). /// /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash /// so that the entire path is considered when using this URL as a base URL. /// /// For example: /// /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))` /// as the base URL is `file:///var/www/index.html` /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))` /// as the base URL is `file:///var/index.html`, which might not be what was intended. /// /// Note that `std::path` does not consider trailing slashes significant /// and usually does not include them (e.g. in `Path::parent()`). #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] #[allow(clippy::result_unit_err)] pub fn from_directory_path>(path: P) -> Result { let mut url = Url::from_file_path(path)?; if !url.serialization.ends_with('/') { url.serialization.push('/') } Ok(url) } /// Serialize with Serde using the internal representation of the `Url` struct. /// /// The corresponding `deserialize_internal` method sacrifices some invariant-checking /// for speed, compared to the `Deserialize` trait impl. /// /// This method is only available if the `serde` Cargo feature is enabled. #[cfg(feature = "serde")] #[deny(unused)] pub fn serialize_internal(&self, serializer: S) -> Result where S: serde::Serializer, { use serde::Serialize; // Destructuring first lets us ensure that adding or removing fields forces this method // to be updated let Url { ref serialization, ref scheme_end, ref username_end, ref host_start, ref host_end, ref host, ref port, ref path_start, ref query_start, ref fragment_start, } = *self; ( serialization, scheme_end, username_end, host_start, host_end, host, port, path_start, query_start, fragment_start, ) .serialize(serializer) } /// Serialize with Serde using the internal representation of the `Url` struct. /// /// The corresponding `deserialize_internal` method sacrifices some invariant-checking /// for speed, compared to the `Deserialize` trait impl. /// /// This method is only available if the `serde` Cargo feature is enabled. #[cfg(feature = "serde")] #[deny(unused)] pub fn deserialize_internal<'de, D>(deserializer: D) -> Result where D: serde::Deserializer<'de>, { use serde::de::{Deserialize, Error, Unexpected}; let ( serialization, scheme_end, username_end, host_start, host_end, host, port, path_start, query_start, fragment_start, ) = Deserialize::deserialize(deserializer)?; let url = Url { serialization, scheme_end, username_end, host_start, host_end, host, port, path_start, query_start, fragment_start, }; if cfg!(debug_assertions) { url.check_invariants().map_err(|reason| { let reason: &str = &reason; Error::invalid_value(Unexpected::Other("value"), &reason) })? } Ok(url) } /// Assuming the URL is in the `file` scheme or similar, /// convert its path to an absolute `std::path::Path`. /// /// **Note:** This does not actually check the URL’s `scheme`, /// and may give nonsensical results for other schemes. /// It is the user’s responsibility to check the URL’s scheme before calling this. /// /// ``` /// # use url::Url; /// # let url = Url::parse("file:///etc/passwd").unwrap(); /// let path = url.to_file_path(); /// ``` /// /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where /// `file:` URLs may have a non-local host), /// or if `Path::new_opt()` returns `None`. /// (That is, if the percent-decoded path contains a NUL byte or, /// for a Windows path, is not UTF-8.) #[inline] #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] #[allow(clippy::result_unit_err)] pub fn to_file_path(&self) -> Result { if let Some(segments) = self.path_segments() { let host = match self.host() { None | Some(Host::Domain("localhost")) => None, Some(_) if cfg!(windows) && self.scheme() == "file" => { Some(&self.serialization[self.host_start as usize..self.host_end as usize]) } _ => return Err(()), }; return file_url_segments_to_pathbuf(host, segments); } Err(()) } // Private helper methods: #[inline] fn slice(&self, range: R) -> &str where R: RangeArg, { range.slice_of(&self.serialization) } #[inline] fn byte_at(&self, i: u32) -> u8 { self.serialization.as_bytes()[i as usize] } } /// Parse a string as an URL, without a base URL or encoding override. impl str::FromStr for Url { type Err = ParseError; #[inline] fn from_str(input: &str) -> Result { Url::parse(input) } } impl<'a> TryFrom<&'a str> for Url { type Error = ParseError; fn try_from(s: &'a str) -> Result { Url::parse(s) } } /// Display the serialization of this URL. impl fmt::Display for Url { #[inline] fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(&self.serialization, formatter) } } /// String conversion. impl From for String { fn from(value: Url) -> String { value.serialization } } /// Debug the serialization of this URL. impl fmt::Debug for Url { #[inline] fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter .debug_struct("Url") .field("scheme", &self.scheme()) .field("cannot_be_a_base", &self.cannot_be_a_base()) .field("username", &self.username()) .field("password", &self.password()) .field("host", &self.host()) .field("port", &self.port()) .field("path", &self.path()) .field("query", &self.query()) .field("fragment", &self.fragment()) .finish() } } /// URLs compare like their serialization. impl Eq for Url {} /// URLs compare like their serialization. impl PartialEq for Url { #[inline] fn eq(&self, other: &Self) -> bool { self.serialization == other.serialization } } /// URLs compare like their serialization. impl Ord for Url { #[inline] fn cmp(&self, other: &Self) -> cmp::Ordering { self.serialization.cmp(&other.serialization) } } /// URLs compare like their serialization. impl PartialOrd for Url { #[inline] fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } /// URLs hash like their serialization. impl hash::Hash for Url { #[inline] fn hash(&self, state: &mut H) where H: hash::Hasher, { hash::Hash::hash(&self.serialization, state) } } /// Return the serialization of this URL. impl AsRef for Url { #[inline] fn as_ref(&self) -> &str { &self.serialization } } trait RangeArg { fn slice_of<'a>(&self, s: &'a str) -> &'a str; } impl RangeArg for Range { #[inline] fn slice_of<'a>(&self, s: &'a str) -> &'a str { &s[self.start as usize..self.end as usize] } } impl RangeArg for RangeFrom { #[inline] fn slice_of<'a>(&self, s: &'a str) -> &'a str { &s[self.start as usize..] } } impl RangeArg for RangeTo { #[inline] fn slice_of<'a>(&self, s: &'a str) -> &'a str { &s[..self.end as usize] } } /// Serializes this URL into a `serde` stream. /// /// This implementation is only available if the `serde` Cargo feature is enabled. #[cfg(feature = "serde")] impl serde::Serialize for Url { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { serializer.serialize_str(self.as_str()) } } /// Deserializes this URL from a `serde` stream. /// /// This implementation is only available if the `serde` Cargo feature is enabled. #[cfg(feature = "serde")] impl<'de> serde::Deserialize<'de> for Url { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { use serde::de::{Error, Unexpected, Visitor}; struct UrlVisitor; impl<'de> Visitor<'de> for UrlVisitor { type Value = Url; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("a string representing an URL") } fn visit_str(self, s: &str) -> Result where E: Error, { Url::parse(s).map_err(|err| { let err_s = format!("{}", err); Error::invalid_value(Unexpected::Str(s), &err_s.as_str()) }) } } deserializer.deserialize_str(UrlVisitor) } } #[cfg(any(unix, target_os = "redox", target_os = "wasi"))] fn path_to_file_url_segments( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()> { #[cfg(any(unix, target_os = "redox"))] use std::os::unix::prelude::OsStrExt; #[cfg(target_os = "wasi")] use std::os::wasi::prelude::OsStrExt; if !path.is_absolute() { return Err(()); } let host_end = to_u32(serialization.len()).unwrap(); let mut empty = true; // skip the root component for component in path.components().skip(1) { empty = false; serialization.push('/'); serialization.extend(percent_encode( component.as_os_str().as_bytes(), SPECIAL_PATH_SEGMENT, )); } if empty { // An URL’s path must not be empty. serialization.push('/'); } Ok((host_end, HostInternal::None)) } #[cfg(windows)] fn path_to_file_url_segments( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()> { path_to_file_url_segments_windows(path, serialization) } // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 #[cfg_attr(not(windows), allow(dead_code))] fn path_to_file_url_segments_windows( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()> { use std::path::{Component, Prefix}; if !path.is_absolute() { return Err(()); } let mut components = path.components(); let host_start = serialization.len() + 1; let host_end; let host_internal; match components.next() { Some(Component::Prefix(ref p)) => match p.kind() { Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => { host_end = to_u32(serialization.len()).unwrap(); host_internal = HostInternal::None; serialization.push('/'); serialization.push(letter as char); serialization.push(':'); } Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => { let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?; write!(serialization, "{}", host).unwrap(); host_end = to_u32(serialization.len()).unwrap(); host_internal = host.into(); serialization.push('/'); let share = share.to_str().ok_or(())?; serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT)); } _ => return Err(()), }, _ => return Err(()), } let mut path_only_has_prefix = true; for component in components { if component == Component::RootDir { continue; } path_only_has_prefix = false; // FIXME: somehow work with non-unicode? let component = component.as_os_str().to_str().ok_or(())?; serialization.push('/'); serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT)); } // A windows drive letter must end with a slash. if serialization.len() > host_start && parser::is_windows_drive_letter(&serialization[host_start..]) && path_only_has_prefix { serialization.push('/'); } Ok((host_end, host_internal)) } #[cfg(any(unix, target_os = "redox", target_os = "wasi"))] fn file_url_segments_to_pathbuf( host: Option<&str>, segments: str::Split<'_, char>, ) -> Result { use std::ffi::OsStr; #[cfg(any(unix, target_os = "redox"))] use std::os::unix::prelude::OsStrExt; #[cfg(target_os = "wasi")] use std::os::wasi::prelude::OsStrExt; if host.is_some() { return Err(()); } let mut bytes = if cfg!(target_os = "redox") { b"file:".to_vec() } else { Vec::new() }; for segment in segments { bytes.push(b'/'); bytes.extend(percent_decode(segment.as_bytes())); } // A windows drive letter must end with a slash. if bytes.len() > 2 && bytes[bytes.len() - 2].is_ascii_alphabetic() && matches!(bytes[bytes.len() - 1], b':' | b'|') { bytes.push(b'/'); } let os_str = OsStr::from_bytes(&bytes); let path = PathBuf::from(os_str); debug_assert!( path.is_absolute(), "to_file_path() failed to produce an absolute Path" ); Ok(path) } #[cfg(windows)] fn file_url_segments_to_pathbuf( host: Option<&str>, segments: str::Split, ) -> Result { file_url_segments_to_pathbuf_windows(host, segments) } // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102 #[cfg_attr(not(windows), allow(dead_code))] fn file_url_segments_to_pathbuf_windows( host: Option<&str>, mut segments: str::Split<'_, char>, ) -> Result { let mut string = if let Some(host) = host { r"\\".to_owned() + host } else { let first = segments.next().ok_or(())?; match first.len() { 2 => { if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' { return Err(()); } first.to_owned() } 4 => { if !first.starts_with(parser::ascii_alpha) { return Err(()); } let bytes = first.as_bytes(); if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') { return Err(()); } first[0..1].to_owned() + ":" } _ => return Err(()), } }; for segment in segments { string.push('\\'); // Currently non-unicode windows paths cannot be represented match String::from_utf8(percent_decode(segment.as_bytes()).collect()) { Ok(s) => string.push_str(&s), Err(..) => return Err(()), } } let path = PathBuf::from(string); debug_assert!( path.is_absolute(), "to_file_path() failed to produce an absolute Path" ); Ok(path) } /// Implementation detail of `Url::query_pairs_mut`. Typically not used directly. #[derive(Debug)] pub struct UrlQuery<'a> { url: Option<&'a mut Url>, fragment: Option, } // `as_mut_string` string here exposes the internal serialization of an `Url`, // which should not be exposed to users. // We achieve that by not giving users direct access to `UrlQuery`: // * Its fields are private // (and so can not be constructed with struct literal syntax outside of this crate), // * It has no constructor // * It is only visible (on the type level) to users in the return type of // `Url::query_pairs_mut` which is `Serializer` // * `Serializer` keeps its target in a private field // * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`. impl<'a> form_urlencoded::Target for UrlQuery<'a> { fn as_mut_string(&mut self) -> &mut String { &mut self.url.as_mut().unwrap().serialization } fn finish(mut self) -> &'a mut Url { let url = self.url.take().unwrap(); url.restore_already_parsed_fragment(self.fragment.take()); url } type Finished = &'a mut Url; } impl<'a> Drop for UrlQuery<'a> { fn drop(&mut self) { if let Some(url) = self.url.take() { url.restore_already_parsed_fragment(self.fragment.take()) } } } url-2.5.2/src/origin.rs000064400000000000000000000076021046102023000130650ustar 00000000000000// Copyright 2016 The rust-url developers. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. use crate::host::Host; use crate::parser::default_port; use crate::Url; use std::sync::atomic::{AtomicUsize, Ordering}; pub fn url_origin(url: &Url) -> Origin { let scheme = url.scheme(); match scheme { "blob" => { let result = Url::parse(url.path()); match result { Ok(ref url) => url_origin(url), Err(_) => Origin::new_opaque(), } } "ftp" | "http" | "https" | "ws" | "wss" => Origin::Tuple( scheme.to_owned(), url.host().unwrap().to_owned(), url.port_or_known_default().unwrap(), ), // TODO: Figure out what to do if the scheme is a file "file" => Origin::new_opaque(), _ => Origin::new_opaque(), } } /// The origin of an URL /// /// Two URLs with the same origin are considered /// to originate from the same entity and can therefore trust /// each other. /// /// The origin is determined based on the scheme as follows: /// /// - If the scheme is "blob" the origin is the origin of the /// URL contained in the path component. If parsing fails, /// it is an opaque origin. /// - If the scheme is "ftp", "http", "https", "ws", or "wss", /// then the origin is a tuple of the scheme, host, and port. /// - If the scheme is anything else, the origin is opaque, meaning /// the URL does not have the same origin as any other URL. /// /// For more information see #[derive(PartialEq, Eq, Hash, Clone, Debug)] pub enum Origin { /// A globally unique identifier Opaque(OpaqueOrigin), /// Consists of the URL's scheme, host and port Tuple(String, Host, u16), } impl Origin { /// Creates a new opaque origin that is only equal to itself. pub fn new_opaque() -> Origin { static COUNTER: AtomicUsize = AtomicUsize::new(0); Origin::Opaque(OpaqueOrigin(COUNTER.fetch_add(1, Ordering::SeqCst))) } /// Return whether this origin is a (scheme, host, port) tuple /// (as opposed to an opaque origin). pub fn is_tuple(&self) -> bool { matches!(*self, Origin::Tuple(..)) } /// pub fn ascii_serialization(&self) -> String { match *self { Origin::Opaque(_) => "null".to_owned(), Origin::Tuple(ref scheme, ref host, port) => { if default_port(scheme) == Some(port) { format!("{}://{}", scheme, host) } else { format!("{}://{}:{}", scheme, host, port) } } } } /// pub fn unicode_serialization(&self) -> String { match *self { Origin::Opaque(_) => "null".to_owned(), Origin::Tuple(ref scheme, ref host, port) => { let host = match *host { Host::Domain(ref domain) => { let (domain, _errors) = idna::domain_to_unicode(domain); Host::Domain(domain) } _ => host.clone(), }; if default_port(scheme) == Some(port) { format!("{}://{}", scheme, host) } else { format!("{}://{}:{}", scheme, host, port) } } } } } /// Opaque identifier for URLs that have file or other schemes #[derive(Eq, PartialEq, Hash, Clone, Debug)] pub struct OpaqueOrigin(usize); url-2.5.2/src/parser.rs000064400000000000000000001715451046102023000131020ustar 00000000000000// Copyright 2013-2016 The rust-url developers. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. use std::error::Error; use std::fmt::{self, Formatter, Write}; use std::str; use crate::host::{Host, HostInternal}; use crate::Url; use form_urlencoded::EncodingOverride; use percent_encoding::{percent_encode, utf8_percent_encode, AsciiSet, CONTROLS}; /// https://url.spec.whatwg.org/#fragment-percent-encode-set const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`'); /// https://url.spec.whatwg.org/#path-percent-encode-set const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}'); /// https://url.spec.whatwg.org/#userinfo-percent-encode-set pub(crate) const USERINFO: &AsciiSet = &PATH .add(b'/') .add(b':') .add(b';') .add(b'=') .add(b'@') .add(b'[') .add(b'\\') .add(b']') .add(b'^') .add(b'|'); pub(crate) const PATH_SEGMENT: &AsciiSet = &PATH.add(b'/').add(b'%'); // The backslash (\) character is treated as a path separator in special URLs // so it needs to be additionally escaped in that case. pub(crate) const SPECIAL_PATH_SEGMENT: &AsciiSet = &PATH_SEGMENT.add(b'\\'); // https://url.spec.whatwg.org/#query-state const QUERY: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(b'>'); const SPECIAL_QUERY: &AsciiSet = &QUERY.add(b'\''); pub type ParseResult = Result; macro_rules! simple_enum_error { ($($name: ident => $description: expr,)+) => { /// Errors that can occur during parsing. /// /// This may be extended in the future so exhaustive matching is /// discouraged with an unused variant. #[derive(PartialEq, Eq, Clone, Copy, Debug)] #[non_exhaustive] pub enum ParseError { $( $name, )+ } impl fmt::Display for ParseError { fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { match *self { $( ParseError::$name => fmt.write_str($description), )+ } } } } } impl Error for ParseError {} simple_enum_error! { EmptyHost => "empty host", IdnaError => "invalid international domain name", InvalidPort => "invalid port number", InvalidIpv4Address => "invalid IPv4 address", InvalidIpv6Address => "invalid IPv6 address", InvalidDomainCharacter => "invalid domain character", RelativeUrlWithoutBase => "relative URL without a base", RelativeUrlWithCannotBeABaseBase => "relative URL with a cannot-be-a-base base", SetHostOnCannotBeABaseUrl => "a cannot-be-a-base URL doesn’t have a host to set", Overflow => "URLs more than 4 GB are not supported", } impl From<::idna::Errors> for ParseError { fn from(_: ::idna::Errors) -> ParseError { ParseError::IdnaError } } macro_rules! syntax_violation_enum { ($($name: ident => $description: literal,)+) => { /// Non-fatal syntax violations that can occur during parsing. /// /// This may be extended in the future so exhaustive matching is /// forbidden. #[derive(PartialEq, Eq, Clone, Copy, Debug)] #[non_exhaustive] pub enum SyntaxViolation { $( /// ```text #[doc = $description] /// ``` $name, )+ } impl SyntaxViolation { pub fn description(&self) -> &'static str { match *self { $( SyntaxViolation::$name => $description, )+ } } } } } syntax_violation_enum! { Backslash => "backslash", C0SpaceIgnored => "leading or trailing control or space character are ignored in URLs", EmbeddedCredentials => "embedding authentication information (username or password) \ in an URL is not recommended", ExpectedDoubleSlash => "expected //", ExpectedFileDoubleSlash => "expected // after file:", FileWithHostAndWindowsDrive => "file: with host and Windows drive letter", NonUrlCodePoint => "non-URL code point", NullInFragment => "NULL characters are ignored in URL fragment identifiers", PercentDecode => "expected 2 hex digits after %", TabOrNewlineIgnored => "tabs or newlines are ignored in URLs", UnencodedAtSign => "unencoded @ sign in username or password", } impl fmt::Display for SyntaxViolation { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self.description(), f) } } #[derive(Copy, Clone, PartialEq, Eq)] pub enum SchemeType { File, SpecialNotFile, NotSpecial, } impl SchemeType { pub fn is_special(&self) -> bool { !matches!(*self, SchemeType::NotSpecial) } pub fn is_file(&self) -> bool { matches!(*self, SchemeType::File) } } impl> From for SchemeType { fn from(s: T) -> Self { match s.as_ref() { "http" | "https" | "ws" | "wss" | "ftp" => SchemeType::SpecialNotFile, "file" => SchemeType::File, _ => SchemeType::NotSpecial, } } } pub fn default_port(scheme: &str) -> Option { match scheme { "http" | "ws" => Some(80), "https" | "wss" => Some(443), "ftp" => Some(21), _ => None, } } #[derive(Clone, Debug)] pub struct Input<'i> { chars: str::Chars<'i>, } impl<'i> Input<'i> { pub fn new_no_trim(input: &'i str) -> Self { Input { chars: input.chars(), } } pub fn new_trim_tab_and_newlines( original_input: &'i str, vfn: Option<&dyn Fn(SyntaxViolation)>, ) -> Self { let input = original_input.trim_matches(ascii_tab_or_new_line); if let Some(vfn) = vfn { if input.len() < original_input.len() { vfn(SyntaxViolation::C0SpaceIgnored) } if input.chars().any(|c| matches!(c, '\t' | '\n' | '\r')) { vfn(SyntaxViolation::TabOrNewlineIgnored) } } Input { chars: input.chars(), } } pub fn new_trim_c0_control_and_space( original_input: &'i str, vfn: Option<&dyn Fn(SyntaxViolation)>, ) -> Self { let input = original_input.trim_matches(c0_control_or_space); if let Some(vfn) = vfn { if input.len() < original_input.len() { vfn(SyntaxViolation::C0SpaceIgnored) } if input.chars().any(|c| matches!(c, '\t' | '\n' | '\r')) { vfn(SyntaxViolation::TabOrNewlineIgnored) } } Input { chars: input.chars(), } } #[inline] pub fn is_empty(&self) -> bool { self.clone().next().is_none() } #[inline] fn starts_with(&self, p: P) -> bool { p.split_prefix(&mut self.clone()) } #[inline] pub fn split_prefix(&self, p: P) -> Option { let mut remaining = self.clone(); if p.split_prefix(&mut remaining) { Some(remaining) } else { None } } #[inline] fn split_first(&self) -> (Option, Self) { let mut remaining = self.clone(); (remaining.next(), remaining) } #[inline] fn count_matching bool>(&self, f: F) -> (u32, Self) { let mut count = 0; let mut remaining = self.clone(); loop { let mut input = remaining.clone(); if matches!(input.next(), Some(c) if f(c)) { remaining = input; count += 1; } else { return (count, remaining); } } } #[inline] fn next_utf8(&mut self) -> Option<(char, &'i str)> { loop { let utf8 = self.chars.as_str(); match self.chars.next() { Some(c) => { if !matches!(c, '\t' | '\n' | '\r') { return Some((c, &utf8[..c.len_utf8()])); } } None => return None, } } } } pub trait Pattern { fn split_prefix(self, input: &mut Input) -> bool; } impl Pattern for char { fn split_prefix(self, input: &mut Input) -> bool { input.next() == Some(self) } } impl<'a> Pattern for &'a str { fn split_prefix(self, input: &mut Input) -> bool { for c in self.chars() { if input.next() != Some(c) { return false; } } true } } impl bool> Pattern for F { fn split_prefix(self, input: &mut Input) -> bool { input.next().map_or(false, self) } } impl<'i> Iterator for Input<'i> { type Item = char; fn next(&mut self) -> Option { self.chars .by_ref() .find(|&c| !matches!(c, '\t' | '\n' | '\r')) } } pub struct Parser<'a> { pub serialization: String, pub base_url: Option<&'a Url>, pub query_encoding_override: EncodingOverride<'a>, pub violation_fn: Option<&'a dyn Fn(SyntaxViolation)>, pub context: Context, } #[derive(PartialEq, Eq, Copy, Clone)] pub enum Context { UrlParser, Setter, PathSegmentSetter, } impl<'a> Parser<'a> { fn log_violation(&self, v: SyntaxViolation) { if let Some(f) = self.violation_fn { f(v) } } fn log_violation_if(&self, v: SyntaxViolation, test: impl FnOnce() -> bool) { if let Some(f) = self.violation_fn { if test() { f(v) } } } pub fn for_setter(serialization: String) -> Parser<'a> { Parser { serialization, base_url: None, query_encoding_override: None, violation_fn: None, context: Context::Setter, } } /// https://url.spec.whatwg.org/#concept-basic-url-parser pub fn parse_url(mut self, input: &str) -> ParseResult { let input = Input::new_trim_c0_control_and_space(input, self.violation_fn); if let Ok(remaining) = self.parse_scheme(input.clone()) { return self.parse_with_scheme(remaining); } // No-scheme state if let Some(base_url) = self.base_url { if input.starts_with('#') { self.fragment_only(base_url, input) } else if base_url.cannot_be_a_base() { Err(ParseError::RelativeUrlWithCannotBeABaseBase) } else { let scheme_type = SchemeType::from(base_url.scheme()); if scheme_type.is_file() { self.parse_file(input, scheme_type, Some(base_url)) } else { self.parse_relative(input, scheme_type, base_url) } } } else { Err(ParseError::RelativeUrlWithoutBase) } } pub fn parse_scheme<'i>(&mut self, mut input: Input<'i>) -> Result, ()> { if input.is_empty() || !input.starts_with(ascii_alpha) { return Err(()); } debug_assert!(self.serialization.is_empty()); while let Some(c) = input.next() { match c { 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.' => { self.serialization.push(c.to_ascii_lowercase()) } ':' => return Ok(input), _ => { self.serialization.clear(); return Err(()); } } } // EOF before ':' if self.context == Context::Setter { Ok(input) } else { self.serialization.clear(); Err(()) } } fn parse_with_scheme(mut self, input: Input<'_>) -> ParseResult { use crate::SyntaxViolation::{ExpectedDoubleSlash, ExpectedFileDoubleSlash}; let scheme_end = to_u32(self.serialization.len())?; let scheme_type = SchemeType::from(&self.serialization); self.serialization.push(':'); match scheme_type { SchemeType::File => { self.log_violation_if(ExpectedFileDoubleSlash, || !input.starts_with("//")); let base_file_url = self.base_url.and_then(|base| { if base.scheme() == "file" { Some(base) } else { None } }); self.serialization.clear(); self.parse_file(input, scheme_type, base_file_url) } SchemeType::SpecialNotFile => { // special relative or authority state let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\')); if let Some(base_url) = self.base_url { if slashes_count < 2 && base_url.scheme() == &self.serialization[..scheme_end as usize] { // "Cannot-be-a-base" URLs only happen with "not special" schemes. debug_assert!(!base_url.cannot_be_a_base()); self.serialization.clear(); return self.parse_relative(input, scheme_type, base_url); } } // special authority slashes state self.log_violation_if(ExpectedDoubleSlash, || { input .clone() .take_while(|&c| matches!(c, '/' | '\\')) .collect::() != "//" }); self.after_double_slash(remaining, scheme_type, scheme_end) } SchemeType::NotSpecial => self.parse_non_special(input, scheme_type, scheme_end), } } /// Scheme other than file, http, https, ws, ws, ftp. fn parse_non_special( mut self, input: Input<'_>, scheme_type: SchemeType, scheme_end: u32, ) -> ParseResult { // path or authority state ( if let Some(input) = input.split_prefix("//") { return self.after_double_slash(input, scheme_type, scheme_end); } // Anarchist URL (no authority) let path_start = to_u32(self.serialization.len())?; let username_end = path_start; let host_start = path_start; let host_end = path_start; let host = HostInternal::None; let port = None; let remaining = if let Some(input) = input.split_prefix('/') { self.serialization.push('/'); self.parse_path(scheme_type, &mut false, path_start as usize, input) } else { self.parse_cannot_be_a_base_path(input) }; self.with_query_and_fragment( scheme_type, scheme_end, username_end, host_start, host_end, host, port, path_start, remaining, ) } fn parse_file( mut self, input: Input<'_>, scheme_type: SchemeType, base_file_url: Option<&Url>, ) -> ParseResult { use crate::SyntaxViolation::Backslash; // file state debug_assert!(self.serialization.is_empty()); let (first_char, input_after_first_char) = input.split_first(); if matches!(first_char, Some('/') | Some('\\')) { self.log_violation_if(SyntaxViolation::Backslash, || first_char == Some('\\')); // file slash state let (next_char, input_after_next_char) = input_after_first_char.split_first(); if matches!(next_char, Some('/') | Some('\\')) { self.log_violation_if(Backslash, || next_char == Some('\\')); // file host state self.serialization.push_str("file://"); let scheme_end = "file".len() as u32; let host_start = "file://".len() as u32; let (path_start, mut host, remaining) = self.parse_file_host(input_after_next_char)?; let mut host_end = to_u32(self.serialization.len())?; let mut has_host = !matches!(host, HostInternal::None); let remaining = if path_start { self.parse_path_start(SchemeType::File, &mut has_host, remaining) } else { let path_start = self.serialization.len(); self.serialization.push('/'); self.parse_path(SchemeType::File, &mut has_host, path_start, remaining) }; // For file URLs that have a host and whose path starts // with the windows drive letter we just remove the host. if !has_host { self.serialization .drain(host_start as usize..host_end as usize); host_end = host_start; host = HostInternal::None; } let (query_start, fragment_start) = self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?; return Ok(Url { serialization: self.serialization, scheme_end, username_end: host_start, host_start, host_end, host, port: None, path_start: host_end, query_start, fragment_start, }); } else { self.serialization.push_str("file://"); let scheme_end = "file".len() as u32; let host_start = "file://".len(); let mut host_end = host_start; let mut host = HostInternal::None; if !starts_with_windows_drive_letter_segment(&input_after_first_char) { if let Some(base_url) = base_file_url { let first_segment = base_url.path_segments().unwrap().next().unwrap(); if is_normalized_windows_drive_letter(first_segment) { self.serialization.push('/'); self.serialization.push_str(first_segment); } else if let Some(host_str) = base_url.host_str() { self.serialization.push_str(host_str); host_end = self.serialization.len(); host = base_url.host; } } } // If c is the EOF code point, U+002F (/), U+005C (\), U+003F (?), or U+0023 (#), then decrease pointer by one let parse_path_input = if let Some(c) = first_char { if c == '/' || c == '\\' || c == '?' || c == '#' { input } else { input_after_first_char } } else { input_after_first_char }; let remaining = self.parse_path(SchemeType::File, &mut false, host_end, parse_path_input); let host_start = host_start as u32; let (query_start, fragment_start) = self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?; let host_end = host_end as u32; return Ok(Url { serialization: self.serialization, scheme_end, username_end: host_start, host_start, host_end, host, port: None, path_start: host_end, query_start, fragment_start, }); } } if let Some(base_url) = base_file_url { match first_char { None => { // Copy everything except the fragment let before_fragment = match base_url.fragment_start { Some(i) => &base_url.serialization[..i as usize], None => &*base_url.serialization, }; self.serialization.push_str(before_fragment); Ok(Url { serialization: self.serialization, fragment_start: None, ..*base_url }) } Some('?') => { // Copy everything up to the query string let before_query = match (base_url.query_start, base_url.fragment_start) { (None, None) => &*base_url.serialization, (Some(i), _) | (None, Some(i)) => base_url.slice(..i), }; self.serialization.push_str(before_query); let (query_start, fragment_start) = self.parse_query_and_fragment(scheme_type, base_url.scheme_end, input)?; Ok(Url { serialization: self.serialization, query_start, fragment_start, ..*base_url }) } Some('#') => self.fragment_only(base_url, input), _ => { if !starts_with_windows_drive_letter_segment(&input) { let before_query = match (base_url.query_start, base_url.fragment_start) { (None, None) => &*base_url.serialization, (Some(i), _) | (None, Some(i)) => base_url.slice(..i), }; self.serialization.push_str(before_query); self.shorten_path(SchemeType::File, base_url.path_start as usize); let remaining = self.parse_path( SchemeType::File, &mut true, base_url.path_start as usize, input, ); self.with_query_and_fragment( SchemeType::File, base_url.scheme_end, base_url.username_end, base_url.host_start, base_url.host_end, base_url.host, base_url.port, base_url.path_start, remaining, ) } else { self.serialization.push_str("file:///"); let scheme_end = "file".len() as u32; let path_start = "file://".len(); let remaining = self.parse_path(SchemeType::File, &mut false, path_start, input); let (query_start, fragment_start) = self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?; let path_start = path_start as u32; Ok(Url { serialization: self.serialization, scheme_end, username_end: path_start, host_start: path_start, host_end: path_start, host: HostInternal::None, port: None, path_start, query_start, fragment_start, }) } } } } else { self.serialization.push_str("file:///"); let scheme_end = "file".len() as u32; let path_start = "file://".len(); let remaining = self.parse_path(SchemeType::File, &mut false, path_start, input); let (query_start, fragment_start) = self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?; let path_start = path_start as u32; Ok(Url { serialization: self.serialization, scheme_end, username_end: path_start, host_start: path_start, host_end: path_start, host: HostInternal::None, port: None, path_start, query_start, fragment_start, }) } } fn parse_relative( mut self, input: Input<'_>, scheme_type: SchemeType, base_url: &Url, ) -> ParseResult { // relative state debug_assert!(self.serialization.is_empty()); let (first_char, input_after_first_char) = input.split_first(); match first_char { None => { // Copy everything except the fragment let before_fragment = match base_url.fragment_start { Some(i) => &base_url.serialization[..i as usize], None => &*base_url.serialization, }; self.serialization.push_str(before_fragment); Ok(Url { serialization: self.serialization, fragment_start: None, ..*base_url }) } Some('?') => { // Copy everything up to the query string let before_query = match (base_url.query_start, base_url.fragment_start) { (None, None) => &*base_url.serialization, (Some(i), _) | (None, Some(i)) => base_url.slice(..i), }; self.serialization.push_str(before_query); let (query_start, fragment_start) = self.parse_query_and_fragment(scheme_type, base_url.scheme_end, input)?; Ok(Url { serialization: self.serialization, query_start, fragment_start, ..*base_url }) } Some('#') => self.fragment_only(base_url, input), Some('/') | Some('\\') => { let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\')); if slashes_count >= 2 { self.log_violation_if(SyntaxViolation::ExpectedDoubleSlash, || { input .clone() .take_while(|&c| matches!(c, '/' | '\\')) .collect::() != "//" }); let scheme_end = base_url.scheme_end; debug_assert!(base_url.byte_at(scheme_end) == b':'); self.serialization .push_str(base_url.slice(..scheme_end + 1)); if let Some(after_prefix) = input.split_prefix("//") { return self.after_double_slash(after_prefix, scheme_type, scheme_end); } return self.after_double_slash(remaining, scheme_type, scheme_end); } let path_start = base_url.path_start; self.serialization.push_str(base_url.slice(..path_start)); self.serialization.push('/'); let remaining = self.parse_path( scheme_type, &mut true, path_start as usize, input_after_first_char, ); self.with_query_and_fragment( scheme_type, base_url.scheme_end, base_url.username_end, base_url.host_start, base_url.host_end, base_url.host, base_url.port, base_url.path_start, remaining, ) } _ => { let before_query = match (base_url.query_start, base_url.fragment_start) { (None, None) => &*base_url.serialization, (Some(i), _) | (None, Some(i)) => base_url.slice(..i), }; self.serialization.push_str(before_query); // FIXME spec says just "remove last entry", not the "pop" algorithm self.pop_path(scheme_type, base_url.path_start as usize); // A special url always has a path. // A path always starts with '/' if self.serialization.len() == base_url.path_start as usize && (SchemeType::from(base_url.scheme()).is_special() || !input.is_empty()) { self.serialization.push('/'); } let remaining = match input.split_first() { (Some('/'), remaining) => self.parse_path( scheme_type, &mut true, base_url.path_start as usize, remaining, ), _ => { self.parse_path(scheme_type, &mut true, base_url.path_start as usize, input) } }; self.with_query_and_fragment( scheme_type, base_url.scheme_end, base_url.username_end, base_url.host_start, base_url.host_end, base_url.host, base_url.port, base_url.path_start, remaining, ) } } } fn after_double_slash( mut self, input: Input<'_>, scheme_type: SchemeType, scheme_end: u32, ) -> ParseResult { self.serialization.push('/'); self.serialization.push('/'); // authority state let before_authority = self.serialization.len(); let (username_end, remaining) = self.parse_userinfo(input, scheme_type)?; let has_authority = before_authority != self.serialization.len(); // host state let host_start = to_u32(self.serialization.len())?; let (host_end, host, port, remaining) = self.parse_host_and_port(remaining, scheme_end, scheme_type)?; if host == HostInternal::None && has_authority { return Err(ParseError::EmptyHost); } // path state let path_start = to_u32(self.serialization.len())?; let remaining = self.parse_path_start(scheme_type, &mut true, remaining); self.with_query_and_fragment( scheme_type, scheme_end, username_end, host_start, host_end, host, port, path_start, remaining, ) } /// Return (username_end, remaining) fn parse_userinfo<'i>( &mut self, mut input: Input<'i>, scheme_type: SchemeType, ) -> ParseResult<(u32, Input<'i>)> { let mut last_at = None; let mut remaining = input.clone(); let mut char_count = 0; while let Some(c) = remaining.next() { match c { '@' => { if last_at.is_some() { self.log_violation(SyntaxViolation::UnencodedAtSign) } else { self.log_violation(SyntaxViolation::EmbeddedCredentials) } last_at = Some((char_count, remaining.clone())) } '/' | '?' | '#' => break, '\\' if scheme_type.is_special() => break, _ => (), } char_count += 1; } let (mut userinfo_char_count, remaining) = match last_at { None => return Ok((to_u32(self.serialization.len())?, input)), Some((0, remaining)) => { // Otherwise, if one of the following is true // c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#) // url is special and c is U+005C (\) // If @ flag is set and buffer is the empty string, validation error, return failure. if let (Some(c), _) = remaining.split_first() { if c == '/' || c == '?' || c == '#' || (scheme_type.is_special() && c == '\\') { return Err(ParseError::EmptyHost); } } return Ok((to_u32(self.serialization.len())?, remaining)); } Some(x) => x, }; let mut username_end = None; let mut has_password = false; let mut has_username = false; while userinfo_char_count > 0 { let (c, utf8_c) = input.next_utf8().unwrap(); userinfo_char_count -= 1; if c == ':' && username_end.is_none() { // Start parsing password username_end = Some(to_u32(self.serialization.len())?); // We don't add a colon if the password is empty if userinfo_char_count > 0 { self.serialization.push(':'); has_password = true; } } else { if !has_password { has_username = true; } self.check_url_code_point(c, &input); self.serialization .extend(utf8_percent_encode(utf8_c, USERINFO)); } } let username_end = match username_end { Some(i) => i, None => to_u32(self.serialization.len())?, }; if has_username || has_password { self.serialization.push('@'); } Ok((username_end, remaining)) } fn parse_host_and_port<'i>( &mut self, input: Input<'i>, scheme_end: u32, scheme_type: SchemeType, ) -> ParseResult<(u32, HostInternal, Option, Input<'i>)> { let (host, remaining) = Parser::parse_host(input, scheme_type)?; write!(&mut self.serialization, "{}", host).unwrap(); let host_end = to_u32(self.serialization.len())?; if let Host::Domain(h) = &host { if h.is_empty() { // Port with an empty host if remaining.starts_with(":") { return Err(ParseError::EmptyHost); } if scheme_type.is_special() { return Err(ParseError::EmptyHost); } } }; let (port, remaining) = if let Some(remaining) = remaining.split_prefix(':') { let scheme = || default_port(&self.serialization[..scheme_end as usize]); Parser::parse_port(remaining, scheme, self.context)? } else { (None, remaining) }; if let Some(port) = port { write!(&mut self.serialization, ":{}", port).unwrap() } Ok((host_end, host.into(), port, remaining)) } pub fn parse_host( mut input: Input<'_>, scheme_type: SchemeType, ) -> ParseResult<(Host, Input<'_>)> { if scheme_type.is_file() { return Parser::get_file_host(input); } // Undo the Input abstraction here to avoid allocating in the common case // where the host part of the input does not contain any tab or newline let input_str = input.chars.as_str(); let mut inside_square_brackets = false; let mut has_ignored_chars = false; let mut non_ignored_chars = 0; let mut bytes = 0; for c in input_str.chars() { match c { ':' if !inside_square_brackets => break, '\\' if scheme_type.is_special() => break, '/' | '?' | '#' => break, '\t' | '\n' | '\r' => { has_ignored_chars = true; } '[' => { inside_square_brackets = true; non_ignored_chars += 1 } ']' => { inside_square_brackets = false; non_ignored_chars += 1 } _ => non_ignored_chars += 1, } bytes += c.len_utf8(); } let replaced: String; let host_str; { let host_input = input.by_ref().take(non_ignored_chars); if has_ignored_chars { replaced = host_input.collect(); host_str = &*replaced } else { for _ in host_input {} host_str = &input_str[..bytes] } } if scheme_type == SchemeType::SpecialNotFile && host_str.is_empty() { return Err(ParseError::EmptyHost); } if !scheme_type.is_special() { let host = Host::parse_opaque(host_str)?; return Ok((host, input)); } let host = Host::parse(host_str)?; Ok((host, input)) } fn get_file_host(input: Input<'_>) -> ParseResult<(Host, Input<'_>)> { let (_, host_str, remaining) = Parser::file_host(input)?; let host = match Host::parse(&host_str)? { Host::Domain(ref d) if d == "localhost" => Host::Domain("".to_string()), host => host, }; Ok((host, remaining)) } fn parse_file_host<'i>( &mut self, input: Input<'i>, ) -> ParseResult<(bool, HostInternal, Input<'i>)> { let has_host; let (_, host_str, remaining) = Parser::file_host(input)?; let host = if host_str.is_empty() { has_host = false; HostInternal::None } else { match Host::parse(&host_str)? { Host::Domain(ref d) if d == "localhost" => { has_host = false; HostInternal::None } host => { write!(&mut self.serialization, "{}", host).unwrap(); has_host = true; host.into() } } }; Ok((has_host, host, remaining)) } pub fn file_host(input: Input) -> ParseResult<(bool, String, Input)> { // Undo the Input abstraction here to avoid allocating in the common case // where the host part of the input does not contain any tab or newline let input_str = input.chars.as_str(); let mut has_ignored_chars = false; let mut non_ignored_chars = 0; let mut bytes = 0; for c in input_str.chars() { match c { '/' | '\\' | '?' | '#' => break, '\t' | '\n' | '\r' => has_ignored_chars = true, _ => non_ignored_chars += 1, } bytes += c.len_utf8(); } let replaced: String; let host_str; let mut remaining = input.clone(); { let host_input = remaining.by_ref().take(non_ignored_chars); if has_ignored_chars { replaced = host_input.collect(); host_str = &*replaced } else { for _ in host_input {} host_str = &input_str[..bytes] } } if is_windows_drive_letter(host_str) { return Ok((false, "".to_string(), input)); } Ok((true, host_str.to_string(), remaining)) } pub fn parse_port

( mut input: Input<'_>, default_port: P, context: Context, ) -> ParseResult<(Option, Input<'_>)> where P: Fn() -> Option, { let mut port: u32 = 0; let mut has_any_digit = false; while let (Some(c), remaining) = input.split_first() { if let Some(digit) = c.to_digit(10) { port = port * 10 + digit; if port > u16::MAX as u32 { return Err(ParseError::InvalidPort); } has_any_digit = true; } else if context == Context::UrlParser && !matches!(c, '/' | '\\' | '?' | '#') { return Err(ParseError::InvalidPort); } else { break; } input = remaining; } let mut opt_port = Some(port as u16); if !has_any_digit || opt_port == default_port() { opt_port = None; } Ok((opt_port, input)) } pub fn parse_path_start<'i>( &mut self, scheme_type: SchemeType, has_host: &mut bool, input: Input<'i>, ) -> Input<'i> { let path_start = self.serialization.len(); let (maybe_c, remaining) = input.split_first(); // If url is special, then: if scheme_type.is_special() { if maybe_c == Some('\\') { // If c is U+005C (\), validation error. self.log_violation(SyntaxViolation::Backslash); } // A special URL always has a non-empty path. if !self.serialization.ends_with('/') { self.serialization.push('/'); // We have already made sure the forward slash is present. if maybe_c == Some('/') || maybe_c == Some('\\') { return self.parse_path(scheme_type, has_host, path_start, remaining); } } return self.parse_path(scheme_type, has_host, path_start, input); } else if maybe_c == Some('?') || maybe_c == Some('#') { // Otherwise, if state override is not given and c is U+003F (?), // set url’s query to the empty string and state to query state. // Otherwise, if state override is not given and c is U+0023 (#), // set url’s fragment to the empty string and state to fragment state. // The query and path states will be handled by the caller. return input; } if maybe_c.is_some() && maybe_c != Some('/') { self.serialization.push('/'); } // Otherwise, if c is not the EOF code point: self.parse_path(scheme_type, has_host, path_start, input) } pub fn parse_path<'i>( &mut self, scheme_type: SchemeType, has_host: &mut bool, path_start: usize, mut input: Input<'i>, ) -> Input<'i> { // Relative path state loop { let mut segment_start = self.serialization.len(); let mut ends_with_slash = false; loop { let input_before_c = input.clone(); let (c, utf8_c) = if let Some(x) = input.next_utf8() { x } else { break; }; match c { '/' if self.context != Context::PathSegmentSetter => { self.serialization.push(c); ends_with_slash = true; break; } '\\' if self.context != Context::PathSegmentSetter && scheme_type.is_special() => { self.log_violation(SyntaxViolation::Backslash); self.serialization.push('/'); ends_with_slash = true; break; } '?' | '#' if self.context == Context::UrlParser => { input = input_before_c; break; } _ => { self.check_url_code_point(c, &input); if scheme_type.is_file() && self.serialization.len() > path_start && is_normalized_windows_drive_letter( &self.serialization[path_start + 1..], ) { self.serialization.push('/'); segment_start += 1; } if self.context == Context::PathSegmentSetter { if scheme_type.is_special() { self.serialization .extend(utf8_percent_encode(utf8_c, SPECIAL_PATH_SEGMENT)); } else { self.serialization .extend(utf8_percent_encode(utf8_c, PATH_SEGMENT)); } } else { self.serialization.extend(utf8_percent_encode(utf8_c, PATH)); } } } } let segment_before_slash = if ends_with_slash { &self.serialization[segment_start..self.serialization.len() - 1] } else { &self.serialization[segment_start..self.serialization.len()] }; match segment_before_slash { // If buffer is a double-dot path segment, shorten url’s path, ".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e" | ".%2E" => { debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/'); self.serialization.truncate(segment_start); if self.serialization.ends_with('/') && Parser::last_slash_can_be_removed(&self.serialization, path_start) { self.serialization.pop(); } self.shorten_path(scheme_type, path_start); // and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path. if ends_with_slash && !self.serialization.ends_with('/') { self.serialization.push('/'); } } // Otherwise, if buffer is a single-dot path segment and if neither c is U+002F (/), // nor url is special and c is U+005C (\), append the empty string to url’s path. "." | "%2e" | "%2E" => { self.serialization.truncate(segment_start); if !self.serialization.ends_with('/') { self.serialization.push('/'); } } _ => { // If url’s scheme is "file", url’s path is empty, and buffer is a Windows drive letter, then if scheme_type.is_file() && segment_start == path_start + 1 && is_windows_drive_letter(segment_before_slash) { // Replace the second code point in buffer with U+003A (:). if let Some(c) = segment_before_slash.chars().next() { self.serialization.truncate(segment_start); self.serialization.push(c); self.serialization.push(':'); if ends_with_slash { self.serialization.push('/'); } } // If url’s host is neither the empty string nor null, // validation error, set url’s host to the empty string. if *has_host { self.log_violation(SyntaxViolation::FileWithHostAndWindowsDrive); *has_host = false; // FIXME account for this in callers } } } } if !ends_with_slash { break; } } if scheme_type.is_file() { // while url’s path’s size is greater than 1 // and url’s path[0] is the empty string, // validation error, remove the first item from url’s path. //FIXME: log violation let path = self.serialization.split_off(path_start); self.serialization.push('/'); self.serialization.push_str(path.trim_start_matches('/')); } input } fn last_slash_can_be_removed(serialization: &str, path_start: usize) -> bool { let url_before_segment = &serialization[..serialization.len() - 1]; if let Some(segment_before_start) = url_before_segment.rfind('/') { // Do not remove the root slash segment_before_start >= path_start // Or a windows drive letter slash && !path_starts_with_windows_drive_letter(&serialization[segment_before_start..]) } else { false } } /// https://url.spec.whatwg.org/#shorten-a-urls-path fn shorten_path(&mut self, scheme_type: SchemeType, path_start: usize) { // If path is empty, then return. if self.serialization.len() == path_start { return; } // If url’s scheme is "file", path’s size is 1, and path[0] is a normalized Windows drive letter, then return. if scheme_type.is_file() && is_normalized_windows_drive_letter(&self.serialization[path_start..]) { return; } // Remove path’s last item. self.pop_path(scheme_type, path_start); } /// https://url.spec.whatwg.org/#pop-a-urls-path fn pop_path(&mut self, scheme_type: SchemeType, path_start: usize) { if self.serialization.len() > path_start { let slash_position = self.serialization[path_start..].rfind('/').unwrap(); // + 1 since rfind returns the position before the slash. let segment_start = path_start + slash_position + 1; // Don’t pop a Windows drive letter if !(scheme_type.is_file() && is_normalized_windows_drive_letter(&self.serialization[segment_start..])) { self.serialization.truncate(segment_start); } } } pub fn parse_cannot_be_a_base_path<'i>(&mut self, mut input: Input<'i>) -> Input<'i> { loop { let input_before_c = input.clone(); match input.next_utf8() { Some(('?', _)) | Some(('#', _)) if self.context == Context::UrlParser => { return input_before_c } Some((c, utf8_c)) => { self.check_url_code_point(c, &input); self.serialization .extend(utf8_percent_encode(utf8_c, CONTROLS)); } None => return input, } } } #[allow(clippy::too_many_arguments)] fn with_query_and_fragment( mut self, scheme_type: SchemeType, scheme_end: u32, username_end: u32, host_start: u32, host_end: u32, host: HostInternal, port: Option, mut path_start: u32, remaining: Input<'_>, ) -> ParseResult { // Special case for anarchist URL's with a leading empty path segment // This prevents web+demo:/.//not-a-host/ or web+demo:/path/..//not-a-host/, // when parsed and then serialized, from ending up as web+demo://not-a-host/ // (they end up as web+demo:/.//not-a-host/). // // If url’s host is null, url does not have an opaque path, // url’s path’s size is greater than 1, and url’s path[0] is the empty string, // then append U+002F (/) followed by U+002E (.) to output. let scheme_end_as_usize = scheme_end as usize; let path_start_as_usize = path_start as usize; if path_start_as_usize == scheme_end_as_usize + 1 { // Anarchist URL if self.serialization[path_start_as_usize..].starts_with("//") { // Case 1: The base URL did not have an empty path segment, but the resulting one does // Insert the "/." prefix self.serialization.insert_str(path_start_as_usize, "/."); path_start += 2; } assert!(!self.serialization[scheme_end_as_usize..].starts_with("://")); } else if path_start_as_usize == scheme_end_as_usize + 3 && &self.serialization[scheme_end_as_usize..path_start_as_usize] == ":/." { // Anarchist URL with leading empty path segment // The base URL has a "/." between the host and the path assert_eq!(self.serialization.as_bytes()[path_start_as_usize], b'/'); if self .serialization .as_bytes() .get(path_start_as_usize + 1) .copied() != Some(b'/') { // Case 2: The base URL had an empty path segment, but the resulting one does not // Remove the "/." prefix self.serialization .replace_range(scheme_end_as_usize..path_start_as_usize, ":"); path_start -= 2; } assert!(!self.serialization[scheme_end_as_usize..].starts_with("://")); } let (query_start, fragment_start) = self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?; Ok(Url { serialization: self.serialization, scheme_end, username_end, host_start, host_end, host, port, path_start, query_start, fragment_start, }) } /// Return (query_start, fragment_start) fn parse_query_and_fragment( &mut self, scheme_type: SchemeType, scheme_end: u32, mut input: Input<'_>, ) -> ParseResult<(Option, Option)> { let mut query_start = None; match input.next() { Some('#') => {} Some('?') => { query_start = Some(to_u32(self.serialization.len())?); self.serialization.push('?'); let remaining = self.parse_query(scheme_type, scheme_end, input); if let Some(remaining) = remaining { input = remaining } else { return Ok((query_start, None)); } } None => return Ok((None, None)), _ => panic!("Programming error. parse_query_and_fragment() called without ? or #"), } let fragment_start = to_u32(self.serialization.len())?; self.serialization.push('#'); self.parse_fragment(input); Ok((query_start, Some(fragment_start))) } pub fn parse_query<'i>( &mut self, scheme_type: SchemeType, scheme_end: u32, mut input: Input<'i>, ) -> Option> { let len = input.chars.as_str().len(); let mut query = String::with_capacity(len); // FIXME: use a streaming decoder instead let mut remaining = None; while let Some(c) = input.next() { if c == '#' && self.context == Context::UrlParser { remaining = Some(input); break; } else { self.check_url_code_point(c, &input); query.push(c); } } let encoding = match &self.serialization[..scheme_end as usize] { "http" | "https" | "file" | "ftp" => self.query_encoding_override, _ => None, }; let query_bytes = if let Some(o) = encoding { o(&query) } else { query.as_bytes().into() }; let set = if scheme_type.is_special() { SPECIAL_QUERY } else { QUERY }; self.serialization.extend(percent_encode(&query_bytes, set)); remaining } fn fragment_only(mut self, base_url: &Url, mut input: Input<'_>) -> ParseResult { let before_fragment = match base_url.fragment_start { Some(i) => base_url.slice(..i), None => &*base_url.serialization, }; debug_assert!(self.serialization.is_empty()); self.serialization .reserve(before_fragment.len() + input.chars.as_str().len()); self.serialization.push_str(before_fragment); self.serialization.push('#'); let next = input.next(); debug_assert!(next == Some('#')); self.parse_fragment(input); Ok(Url { serialization: self.serialization, fragment_start: Some(to_u32(before_fragment.len())?), ..*base_url }) } pub fn parse_fragment(&mut self, mut input: Input<'_>) { while let Some((c, utf8_c)) = input.next_utf8() { if c == '\0' { self.log_violation(SyntaxViolation::NullInFragment) } else { self.check_url_code_point(c, &input); } self.serialization .extend(utf8_percent_encode(utf8_c, FRAGMENT)); } } fn check_url_code_point(&self, c: char, input: &Input<'_>) { if let Some(vfn) = self.violation_fn { if c == '%' { let mut input = input.clone(); if !matches!((input.next(), input.next()), (Some(a), Some(b)) if a.is_ascii_hexdigit() && b.is_ascii_hexdigit()) { vfn(SyntaxViolation::PercentDecode) } } else if !is_url_code_point(c) { vfn(SyntaxViolation::NonUrlCodePoint) } } } } // Non URL code points: // U+0000 to U+0020 (space) // " # % < > [ \ ] ^ ` { | } // U+007F to U+009F // surrogates // U+FDD0 to U+FDEF // Last two of each plane: U+__FFFE to U+__FFFF for __ in 00 to 10 hex #[inline] fn is_url_code_point(c: char) -> bool { matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '-' | '.' | '/' | ':' | ';' | '=' | '?' | '@' | '_' | '~' | '\u{A0}'..='\u{D7FF}' | '\u{E000}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | '\u{10000}'..='\u{1FFFD}' | '\u{20000}'..='\u{2FFFD}' | '\u{30000}'..='\u{3FFFD}' | '\u{40000}'..='\u{4FFFD}' | '\u{50000}'..='\u{5FFFD}' | '\u{60000}'..='\u{6FFFD}' | '\u{70000}'..='\u{7FFFD}' | '\u{80000}'..='\u{8FFFD}' | '\u{90000}'..='\u{9FFFD}' | '\u{A0000}'..='\u{AFFFD}' | '\u{B0000}'..='\u{BFFFD}' | '\u{C0000}'..='\u{CFFFD}' | '\u{D0000}'..='\u{DFFFD}' | '\u{E1000}'..='\u{EFFFD}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}') } /// https://url.spec.whatwg.org/#c0-controls-and-space #[inline] fn c0_control_or_space(ch: char) -> bool { ch <= ' ' // U+0000 to U+0020 } /// https://infra.spec.whatwg.org/#ascii-tab-or-newline #[inline] fn ascii_tab_or_new_line(ch: char) -> bool { matches!(ch, '\t' | '\r' | '\n') } /// https://url.spec.whatwg.org/#ascii-alpha #[inline] pub fn ascii_alpha(ch: char) -> bool { ch.is_ascii_alphabetic() } #[inline] pub fn to_u32(i: usize) -> ParseResult { if i <= u32::MAX as usize { Ok(i as u32) } else { Err(ParseError::Overflow) } } fn is_normalized_windows_drive_letter(segment: &str) -> bool { is_windows_drive_letter(segment) && segment.as_bytes()[1] == b':' } /// Whether the scheme is file:, the path has a single segment, and that segment /// is a Windows drive letter #[inline] pub fn is_windows_drive_letter(segment: &str) -> bool { segment.len() == 2 && starts_with_windows_drive_letter(segment) } /// Whether path starts with a root slash /// and a windows drive letter eg: "/c:" or "/a:/" fn path_starts_with_windows_drive_letter(s: &str) -> bool { if let Some(c) = s.as_bytes().first() { matches!(c, b'/' | b'\\' | b'?' | b'#') && starts_with_windows_drive_letter(&s[1..]) } else { false } } fn starts_with_windows_drive_letter(s: &str) -> bool { s.len() >= 2 && ascii_alpha(s.as_bytes()[0] as char) && matches!(s.as_bytes()[1], b':' | b'|') && (s.len() == 2 || matches!(s.as_bytes()[2], b'/' | b'\\' | b'?' | b'#')) } /// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter fn starts_with_windows_drive_letter_segment(input: &Input<'_>) -> bool { let mut input = input.clone(); match (input.next(), input.next(), input.next()) { // its first two code points are a Windows drive letter // its third code point is U+002F (/), U+005C (\), U+003F (?), or U+0023 (#). (Some(a), Some(b), Some(c)) if ascii_alpha(a) && matches!(b, ':' | '|') && matches!(c, '/' | '\\' | '?' | '#') => { true } // its first two code points are a Windows drive letter // its length is 2 (Some(a), Some(b), None) if ascii_alpha(a) && matches!(b, ':' | '|') => true, _ => false, } } url-2.5.2/src/path_segments.rs000064400000000000000000000207501046102023000144360ustar 00000000000000// Copyright 2016 The rust-url developers. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. use crate::parser::{self, to_u32, SchemeType}; use crate::Url; use std::str; /// Exposes methods to manipulate the path of an URL that is not cannot-be-base. /// /// The path always starts with a `/` slash, and is made of slash-separated segments. /// There is always at least one segment (which may be the empty string). /// /// Examples: /// /// ```rust /// use url::Url; /// # use std::error::Error; /// /// # fn run() -> Result<(), Box> { /// let mut url = Url::parse("mailto:me@example.com")?; /// assert!(url.path_segments_mut().is_err()); /// /// let mut url = Url::parse("http://example.net/foo/index.html")?; /// url.path_segments_mut().map_err(|_| "cannot be base")? /// .pop().push("img").push("2/100%.png"); /// assert_eq!(url.as_str(), "http://example.net/foo/img/2%2F100%25.png"); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` #[derive(Debug)] pub struct PathSegmentsMut<'a> { url: &'a mut Url, after_first_slash: usize, after_path: String, old_after_path_position: u32, } // Not re-exported outside the crate pub fn new(url: &mut Url) -> PathSegmentsMut<'_> { let after_path = url.take_after_path(); let old_after_path_position = to_u32(url.serialization.len()).unwrap(); // Special urls always have a non empty path if SchemeType::from(url.scheme()).is_special() { debug_assert!(url.byte_at(url.path_start) == b'/'); } else { debug_assert!( url.serialization.len() == url.path_start as usize || url.byte_at(url.path_start) == b'/' ); } PathSegmentsMut { after_first_slash: url.path_start as usize + "/".len(), url, old_after_path_position, after_path, } } impl<'a> Drop for PathSegmentsMut<'a> { fn drop(&mut self) { self.url .restore_after_path(self.old_after_path_position, &self.after_path) } } impl<'a> PathSegmentsMut<'a> { /// Remove all segments in the path, leaving the minimal `url.path() == "/"`. /// /// Returns `&mut Self` so that method calls can be chained. /// /// Example: /// /// ```rust /// use url::Url; /// # use std::error::Error; /// /// # fn run() -> Result<(), Box> { /// let mut url = Url::parse("https://github.com/servo/rust-url/")?; /// url.path_segments_mut().map_err(|_| "cannot be base")? /// .clear().push("logout"); /// assert_eq!(url.as_str(), "https://github.com/logout"); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn clear(&mut self) -> &mut Self { self.url.serialization.truncate(self.after_first_slash); self } /// Remove the last segment of this URL’s path if it is empty, /// except if these was only one segment to begin with. /// /// In other words, remove one path trailing slash, if any, /// unless it is also the initial slash (so this does nothing if `url.path() == "/")`. /// /// Returns `&mut Self` so that method calls can be chained. /// /// Example: /// /// ```rust /// use url::Url; /// # use std::error::Error; /// /// # fn run() -> Result<(), Box> { /// let mut url = Url::parse("https://github.com/servo/rust-url/")?; /// url.path_segments_mut().map_err(|_| "cannot be base")? /// .push("pulls"); /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url//pulls"); /// /// let mut url = Url::parse("https://github.com/servo/rust-url/")?; /// url.path_segments_mut().map_err(|_| "cannot be base")? /// .pop_if_empty().push("pulls"); /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/pulls"); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn pop_if_empty(&mut self) -> &mut Self { if self.after_first_slash >= self.url.serialization.len() { return self; } if self.url.serialization[self.after_first_slash..].ends_with('/') { self.url.serialization.pop(); } self } /// Remove the last segment of this URL’s path. /// /// If the path only has one segment, make it empty such that `url.path() == "/"`. /// /// Returns `&mut Self` so that method calls can be chained. pub fn pop(&mut self) -> &mut Self { if self.after_first_slash >= self.url.serialization.len() { return self; } let last_slash = self.url.serialization[self.after_first_slash..] .rfind('/') .unwrap_or(0); self.url .serialization .truncate(self.after_first_slash + last_slash); self } /// Append the given segment at the end of this URL’s path. /// /// See the documentation for `.extend()`. /// /// Returns `&mut Self` so that method calls can be chained. pub fn push(&mut self, segment: &str) -> &mut Self { self.extend(Some(segment)) } /// Append each segment from the given iterator at the end of this URL’s path. /// /// Each segment is percent-encoded like in `Url::parse` or `Url::join`, /// except that `%` and `/` characters are also encoded (to `%25` and `%2F`). /// This is unlike `Url::parse` where `%` is left as-is in case some of the input /// is already percent-encoded, and `/` denotes a path segment separator.) /// /// Note that, in addition to slashes between new segments, /// this always adds a slash between the existing path and the new segments /// *except* if the existing path is `"/"`. /// If the previous last segment was empty (if the path had a trailing slash) /// the path after `.extend()` will contain two consecutive slashes. /// If that is undesired, call `.pop_if_empty()` first. /// /// To obtain a behavior similar to `Url::join`, call `.pop()` unconditionally first. /// /// Returns `&mut Self` so that method calls can be chained. /// /// Example: /// /// ```rust /// use url::Url; /// # use std::error::Error; /// /// # fn run() -> Result<(), Box> { /// let mut url = Url::parse("https://github.com/")?; /// let org = "servo"; /// let repo = "rust-url"; /// let issue_number = "188"; /// url.path_segments_mut().map_err(|_| "cannot be base")? /// .extend(&[org, repo, "issues", issue_number]); /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/issues/188"); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` /// /// In order to make sure that parsing the serialization of an URL gives the same URL, /// a segment is ignored if it is `"."` or `".."`: /// /// ```rust /// use url::Url; /// # use std::error::Error; /// /// # fn run() -> Result<(), Box> { /// let mut url = Url::parse("https://github.com/servo")?; /// url.path_segments_mut().map_err(|_| "cannot be base")? /// .extend(&["..", "rust-url", ".", "pulls"]); /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/pulls"); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` pub fn extend(&mut self, segments: I) -> &mut Self where I: IntoIterator, I::Item: AsRef, { let scheme_type = SchemeType::from(self.url.scheme()); let path_start = self.url.path_start as usize; self.url.mutate(|parser| { parser.context = parser::Context::PathSegmentSetter; for segment in segments { let segment = segment.as_ref(); if matches!(segment, "." | "..") { continue; } if parser.serialization.len() > path_start + 1 // Non special url's path might still be empty || parser.serialization.len() == path_start { parser.serialization.push('/'); } let mut has_host = true; // FIXME account for this? parser.parse_path( scheme_type, &mut has_host, path_start, parser::Input::new_no_trim(segment), ); } }); self } } url-2.5.2/src/quirks.rs000064400000000000000000000247071046102023000131210ustar 00000000000000// Copyright 2016 The rust-url developers. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! Getters and setters for URL components implemented per //! //! Unless you need to be interoperable with web browsers, //! you probably want to use `Url` method instead. use crate::parser::{default_port, Context, Input, Parser, SchemeType}; use crate::{Host, ParseError, Position, Url}; /// Internal components / offsets of a URL. /// /// https://user@pass:example.com:1234/foo/bar?baz#quux /// | | | | ^^^^| | | /// | | | | | | | `----- fragment_start /// | | | | | | `--------- query_start /// | | | | | `----------------- path_start /// | | | | `--------------------- port /// | | | `----------------------- host_end /// | | `---------------------------------- host_start /// | `--------------------------------------- username_end /// `---------------------------------------------- scheme_end #[derive(Copy, Clone)] #[cfg(feature = "expose_internals")] pub struct InternalComponents { pub scheme_end: u32, pub username_end: u32, pub host_start: u32, pub host_end: u32, pub port: Option, pub path_start: u32, pub query_start: Option, pub fragment_start: Option, } /// Internal component / parsed offsets of the URL. /// /// This can be useful for implementing efficient serialization /// for the URL. #[cfg(feature = "expose_internals")] pub fn internal_components(url: &Url) -> InternalComponents { InternalComponents { scheme_end: url.scheme_end, username_end: url.username_end, host_start: url.host_start, host_end: url.host_end, port: url.port, path_start: url.path_start, query_start: url.query_start, fragment_start: url.fragment_start, } } /// pub fn domain_to_ascii(domain: &str) -> String { match Host::parse(domain) { Ok(Host::Domain(domain)) => domain, _ => String::new(), } } /// pub fn domain_to_unicode(domain: &str) -> String { match Host::parse(domain) { Ok(Host::Domain(ref domain)) => { let (unicode, _errors) = idna::domain_to_unicode(domain); unicode } _ => String::new(), } } /// Getter for pub fn href(url: &Url) -> &str { url.as_str() } /// Setter for pub fn set_href(url: &mut Url, value: &str) -> Result<(), ParseError> { *url = Url::parse(value)?; Ok(()) } /// Getter for pub fn origin(url: &Url) -> String { url.origin().ascii_serialization() } /// Getter for #[inline] pub fn protocol(url: &Url) -> &str { &url.as_str()[..url.scheme().len() + ":".len()] } /// Setter for #[allow(clippy::result_unit_err)] pub fn set_protocol(url: &mut Url, mut new_protocol: &str) -> Result<(), ()> { // The scheme state in the spec ignores everything after the first `:`, // but `set_scheme` errors if there is more. if let Some(position) = new_protocol.find(':') { new_protocol = &new_protocol[..position]; } url.set_scheme(new_protocol) } /// Getter for #[inline] pub fn username(url: &Url) -> &str { url.username() } /// Setter for #[allow(clippy::result_unit_err)] pub fn set_username(url: &mut Url, new_username: &str) -> Result<(), ()> { url.set_username(new_username) } /// Getter for #[inline] pub fn password(url: &Url) -> &str { url.password().unwrap_or("") } /// Setter for #[allow(clippy::result_unit_err)] pub fn set_password(url: &mut Url, new_password: &str) -> Result<(), ()> { url.set_password(if new_password.is_empty() { None } else { Some(new_password) }) } /// Getter for #[inline] pub fn host(url: &Url) -> &str { &url[Position::BeforeHost..Position::AfterPort] } /// Setter for #[allow(clippy::result_unit_err)] pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> { // If context object’s url’s cannot-be-a-base-URL flag is set, then return. if url.cannot_be_a_base() { return Err(()); } // Host parsing rules are strict, // We don't want to trim the input let input = Input::new_no_trim(new_host); let host; let opt_port; { let scheme = url.scheme(); let scheme_type = SchemeType::from(scheme); if scheme_type == SchemeType::File && new_host.is_empty() { url.set_host_internal(Host::Domain(String::new()), None); return Ok(()); } if let Ok((h, remaining)) = Parser::parse_host(input, scheme_type) { host = h; opt_port = if let Some(remaining) = remaining.split_prefix(':') { if remaining.is_empty() { None } else { Parser::parse_port(remaining, || default_port(scheme), Context::Setter) .ok() .map(|(port, _remaining)| port) } } else { None }; } else { return Err(()); } } // Make sure we won't set an empty host to a url with a username or a port if host == Host::Domain("".to_string()) && (!username(url).is_empty() || matches!(opt_port, Some(Some(_))) || url.port().is_some()) { return Err(()); } url.set_host_internal(host, opt_port); Ok(()) } /// Getter for #[inline] pub fn hostname(url: &Url) -> &str { url.host_str().unwrap_or("") } /// Setter for #[allow(clippy::result_unit_err)] pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> { if url.cannot_be_a_base() { return Err(()); } // Host parsing rules are strict we don't want to trim the input let input = Input::new_no_trim(new_hostname); let scheme_type = SchemeType::from(url.scheme()); if scheme_type == SchemeType::File && new_hostname.is_empty() { url.set_host_internal(Host::Domain(String::new()), None); return Ok(()); } if let Ok((host, _remaining)) = Parser::parse_host(input, scheme_type) { if let Host::Domain(h) = &host { if h.is_empty() { // Empty host on special not file url if SchemeType::from(url.scheme()) == SchemeType::SpecialNotFile // Port with an empty host ||!port(url).is_empty() // Empty host that includes credentials || !url.username().is_empty() || !url.password().unwrap_or("").is_empty() { return Err(()); } } } url.set_host_internal(host, None); Ok(()) } else { Err(()) } } /// Getter for #[inline] pub fn port(url: &Url) -> &str { &url[Position::BeforePort..Position::AfterPort] } /// Setter for #[allow(clippy::result_unit_err)] pub fn set_port(url: &mut Url, new_port: &str) -> Result<(), ()> { let result; { // has_host implies !cannot_be_a_base let scheme = url.scheme(); if !url.has_host() || url.host() == Some(Host::Domain("")) || scheme == "file" { return Err(()); } result = Parser::parse_port( Input::new_no_trim(new_port), || default_port(scheme), Context::Setter, ) } if let Ok((new_port, _remaining)) = result { url.set_port_internal(new_port); Ok(()) } else { Err(()) } } /// Getter for #[inline] pub fn pathname(url: &Url) -> &str { url.path() } /// Setter for pub fn set_pathname(url: &mut Url, new_pathname: &str) { if url.cannot_be_a_base() { return; } if new_pathname.starts_with('/') || (SchemeType::from(url.scheme()).is_special() // \ is a segment delimiter for 'special' URLs" && new_pathname.starts_with('\\')) { url.set_path(new_pathname) } else if SchemeType::from(url.scheme()).is_special() || !new_pathname.is_empty() || !url.has_host() { let mut path_to_set = String::from("/"); path_to_set.push_str(new_pathname); url.set_path(&path_to_set) } else { url.set_path(new_pathname) } } /// Getter for pub fn search(url: &Url) -> &str { trim(&url[Position::AfterPath..Position::AfterQuery]) } /// Setter for pub fn set_search(url: &mut Url, new_search: &str) { url.set_query(match new_search { "" => None, _ if new_search.starts_with('?') => Some(&new_search[1..]), _ => Some(new_search), }) } /// Getter for pub fn hash(url: &Url) -> &str { trim(&url[Position::AfterQuery..]) } /// Setter for pub fn set_hash(url: &mut Url, new_hash: &str) { url.set_fragment(match new_hash { // If the given value is the empty string, // then set context object’s url’s fragment to null and return. "" => None, // Let input be the given value with a single leading U+0023 (#) removed, if any. _ if new_hash.starts_with('#') => Some(&new_hash[1..]), _ => Some(new_hash), }) } fn trim(s: &str) -> &str { if s.len() == 1 { "" } else { s } } url-2.5.2/src/slicing.rs000064400000000000000000000161571046102023000132330ustar 00000000000000// Copyright 2016 The rust-url developers. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. use crate::Url; use std::ops::{Index, Range, RangeFrom, RangeFull, RangeTo}; impl Index for Url { type Output = str; fn index(&self, _: RangeFull) -> &str { &self.serialization } } impl Index> for Url { type Output = str; fn index(&self, range: RangeFrom) -> &str { &self.serialization[self.index(range.start)..] } } impl Index> for Url { type Output = str; fn index(&self, range: RangeTo) -> &str { &self.serialization[..self.index(range.end)] } } impl Index> for Url { type Output = str; fn index(&self, range: Range) -> &str { &self.serialization[self.index(range.start)..self.index(range.end)] } } // Counts how many base-10 digits are required to represent n in the given base fn count_digits(n: u16) -> usize { match n { 0..=9 => 1, 10..=99 => 2, 100..=999 => 3, 1000..=9999 => 4, 10000..=65535 => 5, } } #[test] fn test_count_digits() { assert_eq!(count_digits(0), 1); assert_eq!(count_digits(1), 1); assert_eq!(count_digits(9), 1); assert_eq!(count_digits(10), 2); assert_eq!(count_digits(99), 2); assert_eq!(count_digits(100), 3); assert_eq!(count_digits(9999), 4); assert_eq!(count_digits(65535), 5); } /// Indicates a position within a URL based on its components. /// /// A range of positions can be used for slicing `Url`: /// /// ```rust /// # use url::{Url, Position}; /// # fn something(some_url: Url) { /// let serialization: &str = &some_url[..]; /// let serialization_without_fragment: &str = &some_url[..Position::AfterQuery]; /// let authority: &str = &some_url[Position::BeforeUsername..Position::AfterPort]; /// let data_url_payload: &str = &some_url[Position::BeforePath..Position::AfterQuery]; /// let scheme_relative: &str = &some_url[Position::BeforeUsername..]; /// # } /// ``` /// /// In a pseudo-grammar (where `[`…`]?` makes a sub-sequence optional), /// URL components and delimiters that separate them are: /// /// ```notrust /// url = /// scheme ":" /// [ "//" [ username [ ":" password ]? "@" ]? host [ ":" port ]? ]? /// path [ "?" query ]? [ "#" fragment ]? /// ``` /// /// When a given component is not present, /// its "before" and "after" position are the same /// (so that `&some_url[BeforeFoo..AfterFoo]` is the empty string) /// and component ordering is preserved /// (so that a missing query "is between" a path and a fragment). /// /// The end of a component and the start of the next are either the same or separate /// by a delimiter. /// (Note that the initial `/` of a path is considered part of the path here, not a delimiter.) /// For example, `&url[..BeforeFragment]` would include a `#` delimiter (if present in `url`), /// so `&url[..AfterQuery]` might be desired instead. /// /// `BeforeScheme` and `AfterFragment` are always the start and end of the entire URL, /// so `&url[BeforeScheme..X]` is the same as `&url[..X]` /// and `&url[X..AfterFragment]` is the same as `&url[X..]`. #[derive(Copy, Clone, Debug)] pub enum Position { BeforeScheme, AfterScheme, BeforeUsername, AfterUsername, BeforePassword, AfterPassword, BeforeHost, AfterHost, BeforePort, AfterPort, BeforePath, AfterPath, BeforeQuery, AfterQuery, BeforeFragment, AfterFragment, } impl Url { #[inline] fn index(&self, position: Position) -> usize { match position { Position::BeforeScheme => 0, Position::AfterScheme => self.scheme_end as usize, Position::BeforeUsername => { if self.has_authority() { self.scheme_end as usize + "://".len() } else { debug_assert!(self.byte_at(self.scheme_end) == b':'); debug_assert!(self.scheme_end + ":".len() as u32 == self.username_end); self.scheme_end as usize + ":".len() } } Position::AfterUsername => self.username_end as usize, Position::BeforePassword => { if self.has_authority() && self.byte_at(self.username_end) == b':' { self.username_end as usize + ":".len() } else { debug_assert!(self.username_end == self.host_start); self.username_end as usize } } Position::AfterPassword => { if self.has_authority() && self.byte_at(self.username_end) == b':' { debug_assert!(self.byte_at(self.host_start - "@".len() as u32) == b'@'); self.host_start as usize - "@".len() } else { debug_assert!(self.username_end == self.host_start); self.host_start as usize } } Position::BeforeHost => self.host_start as usize, Position::AfterHost => self.host_end as usize, Position::BeforePort => { if self.port.is_some() { debug_assert!(self.byte_at(self.host_end) == b':'); self.host_end as usize + ":".len() } else { self.host_end as usize } } Position::AfterPort => { if let Some(port) = self.port { debug_assert!(self.byte_at(self.host_end) == b':'); self.host_end as usize + ":".len() + count_digits(port) } else { self.host_end as usize } } Position::BeforePath => self.path_start as usize, Position::AfterPath => match (self.query_start, self.fragment_start) { (Some(q), _) => q as usize, (None, Some(f)) => f as usize, (None, None) => self.serialization.len(), }, Position::BeforeQuery => match (self.query_start, self.fragment_start) { (Some(q), _) => { debug_assert!(self.byte_at(q) == b'?'); q as usize + "?".len() } (None, Some(f)) => f as usize, (None, None) => self.serialization.len(), }, Position::AfterQuery => match self.fragment_start { None => self.serialization.len(), Some(f) => f as usize, }, Position::BeforeFragment => match self.fragment_start { Some(f) => { debug_assert!(self.byte_at(f) == b'#'); f as usize + "#".len() } None => self.serialization.len(), }, Position::AfterFragment => self.serialization.len(), } } } url-2.5.2/tests/expected_failures.txt000064400000000000000000000032631046102023000160360ustar 00000000000000 against against <\\/localhost//pig> against against against against against against against against against against against against against against against against against against set hostname to set hostname to set hostname to set hostname to <> set pathname to <> set port to set href to set pathname to <\\\\> set pathname to set pathname to set pathname to set pathname to set pathname to set pathname to

url-2.5.2/tests/setters_tests.json000064400000000000000000002215741046102023000154170ustar 00000000000000{ "comment": [ "# Pulled from https://github.com/web-platform-tests/wpt/blob/befe66343e5f21dc464c8c772c6d20695936714f/url/resources/setters_tests.json", "## Tests for setters of https://url.spec.whatwg.org/#urlutils-members", "", "This file contains a JSON object.", "Other than 'comment', each key is an attribute of the `URL` interface", "defined in WHATWG’s URL Standard.", "The values are arrays of test case objects for that attribute.", "", "To run a test case for the attribute `attr`:", "", "* Create a new `URL` object with the value for the 'href' key", " the constructor single parameter. (Without a base URL.)", " This must not throw.", "* Set the attribute `attr` to (invoke its setter with)", " with the value of for 'new_value' key.", "* The value for the 'expected' key is another object.", " For each `key` / `value` pair of that object,", " get the attribute `key` (invoke its getter).", " The returned string must be equal to `value`.", "", "Note: the 'href' setter is already covered by urltestdata.json." ], "protocol": [ { "comment": "The empty string is not a valid scheme. Setter leaves the URL unchanged.", "href": "a://example.net", "new_value": "", "expected": { "href": "a://example.net", "protocol": "a:" } }, { "href": "a://example.net", "new_value": "b", "expected": { "href": "b://example.net", "protocol": "b:" } }, { "href": "javascript:alert(1)", "new_value": "defuse", "expected": { "href": "defuse:alert(1)", "protocol": "defuse:" } }, { "comment": "Upper-case ASCII is lower-cased", "href": "a://example.net", "new_value": "B", "expected": { "href": "b://example.net", "protocol": "b:" } }, { "comment": "Non-ASCII is rejected", "href": "a://example.net", "new_value": "é", "expected": { "href": "a://example.net", "protocol": "a:" } }, { "comment": "No leading digit", "href": "a://example.net", "new_value": "0b", "expected": { "href": "a://example.net", "protocol": "a:" } }, { "comment": "No leading punctuation", "href": "a://example.net", "new_value": "+b", "expected": { "href": "a://example.net", "protocol": "a:" } }, { "href": "a://example.net", "new_value": "bC0+-.", "expected": { "href": "bc0+-.://example.net", "protocol": "bc0+-.:" } }, { "comment": "Only some punctuation is acceptable", "href": "a://example.net", "new_value": "b,c", "expected": { "href": "a://example.net", "protocol": "a:" } }, { "comment": "Non-ASCII is rejected", "href": "a://example.net", "new_value": "bé", "expected": { "href": "a://example.net", "protocol": "a:" } }, { "comment": "Can’t switch from URL containing username/password/port to file", "href": "http://test@example.net", "new_value": "file", "expected": { "href": "http://test@example.net/", "protocol": "http:" } }, { "href": "https://example.net:1234", "new_value": "file", "expected": { "href": "https://example.net:1234/", "protocol": "https:" } }, { "href": "wss://x:x@example.net:1234", "new_value": "file", "expected": { "href": "wss://x:x@example.net:1234/", "protocol": "wss:" } }, { "comment": "Can’t switch from file URL with no host", "href": "file://localhost/", "new_value": "http", "expected": { "href": "file:///", "protocol": "file:" } }, { "href": "file:///test", "new_value": "https", "expected": { "href": "file:///test", "protocol": "file:" } }, { "href": "file:", "new_value": "wss", "expected": { "href": "file:///", "protocol": "file:" } }, { "comment": "Can’t switch from special scheme to non-special", "href": "http://example.net", "new_value": "b", "expected": { "href": "http://example.net/", "protocol": "http:" } }, { "href": "file://hi/path", "new_value": "s", "expected": { "href": "file://hi/path", "protocol": "file:" } }, { "href": "https://example.net", "new_value": "s", "expected": { "href": "https://example.net/", "protocol": "https:" } }, { "href": "ftp://example.net", "new_value": "test", "expected": { "href": "ftp://example.net/", "protocol": "ftp:" } }, { "comment": "Cannot-be-a-base URL doesn’t have a host, but URL in a special scheme must.", "href": "mailto:me@example.net", "new_value": "http", "expected": { "href": "mailto:me@example.net", "protocol": "mailto:" } }, { "comment": "Can’t switch from non-special scheme to special", "href": "ssh://me@example.net", "new_value": "http", "expected": { "href": "ssh://me@example.net", "protocol": "ssh:" } }, { "href": "ssh://me@example.net", "new_value": "https", "expected": { "href": "ssh://me@example.net", "protocol": "ssh:" } }, { "href": "ssh://me@example.net", "new_value": "file", "expected": { "href": "ssh://me@example.net", "protocol": "ssh:" } }, { "href": "ssh://example.net", "new_value": "file", "expected": { "href": "ssh://example.net", "protocol": "ssh:" } }, { "href": "nonsense:///test", "new_value": "https", "expected": { "href": "nonsense:///test", "protocol": "nonsense:" } }, { "comment": "Stuff after the first ':' is ignored", "href": "http://example.net", "new_value": "https:foo : bar", "expected": { "href": "https://example.net/", "protocol": "https:" } }, { "comment": "Stuff after the first ':' is ignored", "href": "data:text/html,

Test", "new_value": "view-source+data:foo : bar", "expected": { "href": "view-source+data:text/html,

Test", "protocol": "view-source+data:" } }, { "comment": "Port is set to null if it is the default for new scheme.", "href": "http://foo.com:443/", "new_value": "https", "expected": { "href": "https://foo.com/", "protocol": "https:", "port": "" } }, { "comment": "Tab and newline are stripped", "href": "http://test/", "new_value": "h\u000D\u000Att\u0009ps", "expected": { "href": "https://test/", "protocol": "https:", "port": "" } }, { "href": "http://test/", "new_value": "https\u000D", "expected": { "href": "https://test/", "protocol": "https:" } }, { "comment": "Non-tab/newline C0 controls result in no-op", "href": "http://test/", "new_value": "https\u0000", "expected": { "href": "http://test/", "protocol": "http:" } }, { "href": "http://test/", "new_value": "https\u000C", "expected": { "href": "http://test/", "protocol": "http:" } }, { "href": "http://test/", "new_value": "https\u000E", "expected": { "href": "http://test/", "protocol": "http:" } }, { "href": "http://test/", "new_value": "https\u0020", "expected": { "href": "http://test/", "protocol": "http:" } } ], "username": [ { "comment": "No host means no username", "href": "file:///home/you/index.html", "new_value": "me", "expected": { "href": "file:///home/you/index.html", "username": "" } }, { "comment": "No host means no username", "href": "unix:/run/foo.socket", "new_value": "me", "expected": { "href": "unix:/run/foo.socket", "username": "" } }, { "comment": "Cannot-be-a-base means no username", "href": "mailto:you@example.net", "new_value": "me", "expected": { "href": "mailto:you@example.net", "username": "" } }, { "href": "javascript:alert(1)", "new_value": "wario", "expected": { "href": "javascript:alert(1)", "username": "" } }, { "href": "http://example.net", "new_value": "me", "expected": { "href": "http://me@example.net/", "username": "me" } }, { "href": "http://:secret@example.net", "new_value": "me", "expected": { "href": "http://me:secret@example.net/", "username": "me" } }, { "href": "http://me@example.net", "new_value": "", "expected": { "href": "http://example.net/", "username": "" } }, { "href": "http://me:secret@example.net", "new_value": "", "expected": { "href": "http://:secret@example.net/", "username": "" } }, { "comment": "UTF-8 percent encoding with the userinfo encode set.", "href": "http://example.net", "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", "expected": { "href": "http://%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9@example.net/", "username": "%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9" } }, { "comment": "Bytes already percent-encoded are left as-is.", "href": "http://example.net", "new_value": "%c3%89té", "expected": { "href": "http://%c3%89t%C3%A9@example.net/", "username": "%c3%89t%C3%A9" } }, { "href": "sc:///", "new_value": "x", "expected": { "href": "sc:///", "username": "" } }, { "href": "javascript://x/", "new_value": "wario", "expected": { "href": "javascript://wario@x/", "username": "wario" } }, { "href": "file://test/", "new_value": "test", "expected": { "href": "file://test/", "username": "" } } ], "password": [ { "comment": "No host means no password", "href": "file:///home/me/index.html", "new_value": "secret", "expected": { "href": "file:///home/me/index.html", "password": "" } }, { "comment": "No host means no password", "href": "unix:/run/foo.socket", "new_value": "secret", "expected": { "href": "unix:/run/foo.socket", "password": "" } }, { "comment": "Cannot-be-a-base means no password", "href": "mailto:me@example.net", "new_value": "secret", "expected": { "href": "mailto:me@example.net", "password": "" } }, { "href": "http://example.net", "new_value": "secret", "expected": { "href": "http://:secret@example.net/", "password": "secret" } }, { "href": "http://me@example.net", "new_value": "secret", "expected": { "href": "http://me:secret@example.net/", "password": "secret" } }, { "href": "http://:secret@example.net", "new_value": "", "expected": { "href": "http://example.net/", "password": "" } }, { "href": "http://me:secret@example.net", "new_value": "", "expected": { "href": "http://me@example.net/", "password": "" } }, { "comment": "UTF-8 percent encoding with the userinfo encode set.", "href": "http://example.net", "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", "expected": { "href": "http://:%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9@example.net/", "password": "%00%01%09%0A%0D%1F%20!%22%23$%&'()*+,-.%2F09%3A%3B%3C%3D%3E%3F%40AZ%5B%5C%5D%5E_%60az%7B%7C%7D~%7F%C2%80%C2%81%C3%89%C3%A9" } }, { "comment": "Bytes already percent-encoded are left as-is.", "href": "http://example.net", "new_value": "%c3%89té", "expected": { "href": "http://:%c3%89t%C3%A9@example.net/", "password": "%c3%89t%C3%A9" } }, { "href": "sc:///", "new_value": "x", "expected": { "href": "sc:///", "password": "" } }, { "href": "javascript://x/", "new_value": "bowser", "expected": { "href": "javascript://:bowser@x/", "password": "bowser" } }, { "href": "file://test/", "new_value": "test", "expected": { "href": "file://test/", "password": "" } } ], "host": [ { "comment": "Non-special scheme", "href": "sc://x/", "new_value": "\u0000", "expected": { "href": "sc://x/", "host": "x", "hostname": "x" } }, { "href": "sc://x/", "new_value": "\u0009", "expected": { "href": "sc:///", "host": "", "hostname": "" } }, { "href": "sc://x/", "new_value": "\u000A", "expected": { "href": "sc:///", "host": "", "hostname": "" } }, { "href": "sc://x/", "new_value": "\u000D", "expected": { "href": "sc:///", "host": "", "hostname": "" } }, { "href": "sc://x/", "new_value": " ", "expected": { "href": "sc://x/", "host": "x", "hostname": "x" } }, { "href": "sc://x/", "new_value": "#", "expected": { "href": "sc:///", "host": "", "hostname": "" } }, { "href": "sc://x/", "new_value": "/", "expected": { "href": "sc:///", "host": "", "hostname": "" } }, { "href": "sc://x/", "new_value": "?", "expected": { "href": "sc:///", "host": "", "hostname": "" } }, { "href": "sc://x/", "new_value": "@", "expected": { "href": "sc://x/", "host": "x", "hostname": "x" } }, { "href": "sc://x/", "new_value": "ß", "expected": { "href": "sc://%C3%9F/", "host": "%C3%9F", "hostname": "%C3%9F" } }, { "comment": "IDNA Nontransitional_Processing", "href": "https://x/", "new_value": "ß", "expected": { "href": "https://xn--zca/", "host": "xn--zca", "hostname": "xn--zca" } }, { "comment": "Cannot-be-a-base means no host", "href": "mailto:me@example.net", "new_value": "example.com", "expected": { "href": "mailto:me@example.net", "host": "" } }, { "comment": "Cannot-be-a-base means no host", "href": "data:text/plain,Stuff", "new_value": "example.net", "expected": { "href": "data:text/plain,Stuff", "host": "" } }, { "href": "http://example.net", "new_value": "example.com:8080", "expected": { "href": "http://example.com:8080/", "host": "example.com:8080", "hostname": "example.com", "port": "8080" } }, { "comment": "Port number is unchanged if not specified in the new value", "href": "http://example.net:8080", "new_value": "example.com", "expected": { "href": "http://example.com:8080/", "host": "example.com:8080", "hostname": "example.com", "port": "8080" } }, { "comment": "Port number is unchanged if not specified", "href": "http://example.net:8080", "new_value": "example.com:", "expected": { "href": "http://example.com:8080/", "host": "example.com:8080", "hostname": "example.com", "port": "8080" } }, { "comment": "The empty host is not valid for special schemes", "href": "http://example.net", "new_value": "", "expected": { "href": "http://example.net/", "host": "example.net" } }, { "comment": "The empty host is OK for non-special schemes", "href": "view-source+http://example.net/foo", "new_value": "", "expected": { "href": "view-source+http:///foo", "host": "" } }, { "comment": "Path-only URLs can gain a host", "href": "a:/foo", "new_value": "example.net", "expected": { "href": "a://example.net/foo", "host": "example.net" } }, { "comment": "IPv4 address syntax is normalized", "href": "http://example.net", "new_value": "0x7F000001:8080", "expected": { "href": "http://127.0.0.1:8080/", "host": "127.0.0.1:8080", "hostname": "127.0.0.1", "port": "8080" } }, { "comment": "IPv6 address syntax is normalized", "href": "http://example.net", "new_value": "[::0:01]:2", "expected": { "href": "http://[::1]:2/", "host": "[::1]:2", "hostname": "[::1]", "port": "2" } }, { "comment": "IPv6 literal address with port, crbug.com/1012416", "href": "http://example.net", "new_value": "[2001:db8::2]:4002", "expected": { "href": "http://[2001:db8::2]:4002/", "host": "[2001:db8::2]:4002", "hostname": "[2001:db8::2]", "port": "4002" } }, { "comment": "Default port number is removed", "href": "http://example.net", "new_value": "example.com:80", "expected": { "href": "http://example.com/", "host": "example.com", "hostname": "example.com", "port": "" } }, { "comment": "Default port number is removed", "href": "https://example.net", "new_value": "example.com:443", "expected": { "href": "https://example.com/", "host": "example.com", "hostname": "example.com", "port": "" } }, { "comment": "Default port number is only removed for the relevant scheme", "href": "https://example.net", "new_value": "example.com:80", "expected": { "href": "https://example.com:80/", "host": "example.com:80", "hostname": "example.com", "port": "80" } }, { "comment": "Port number is removed if new port is scheme default and existing URL has a non-default port", "href": "http://example.net:8080", "new_value": "example.com:80", "expected": { "href": "http://example.com/", "host": "example.com", "hostname": "example.com", "port": "" } }, { "comment": "Stuff after a / delimiter is ignored", "href": "http://example.net/path", "new_value": "example.com/stuff", "expected": { "href": "http://example.com/path", "host": "example.com", "hostname": "example.com", "port": "" } }, { "comment": "Stuff after a / delimiter is ignored", "href": "http://example.net/path", "new_value": "example.com:8080/stuff", "expected": { "href": "http://example.com:8080/path", "host": "example.com:8080", "hostname": "example.com", "port": "8080" } }, { "comment": "Stuff after a ? delimiter is ignored", "href": "http://example.net/path", "new_value": "example.com?stuff", "expected": { "href": "http://example.com/path", "host": "example.com", "hostname": "example.com", "port": "" } }, { "comment": "Stuff after a ? delimiter is ignored", "href": "http://example.net/path", "new_value": "example.com:8080?stuff", "expected": { "href": "http://example.com:8080/path", "host": "example.com:8080", "hostname": "example.com", "port": "8080" } }, { "comment": "Stuff after a # delimiter is ignored", "href": "http://example.net/path", "new_value": "example.com#stuff", "expected": { "href": "http://example.com/path", "host": "example.com", "hostname": "example.com", "port": "" } }, { "comment": "Stuff after a # delimiter is ignored", "href": "http://example.net/path", "new_value": "example.com:8080#stuff", "expected": { "href": "http://example.com:8080/path", "host": "example.com:8080", "hostname": "example.com", "port": "8080" } }, { "comment": "Stuff after a \\ delimiter is ignored for special schemes", "href": "http://example.net/path", "new_value": "example.com\\stuff", "expected": { "href": "http://example.com/path", "host": "example.com", "hostname": "example.com", "port": "" } }, { "comment": "Stuff after a \\ delimiter is ignored for special schemes", "href": "http://example.net/path", "new_value": "example.com:8080\\stuff", "expected": { "href": "http://example.com:8080/path", "host": "example.com:8080", "hostname": "example.com", "port": "8080" } }, { "comment": "\\ is not a delimiter for non-special schemes, but still forbidden in hosts", "href": "view-source+http://example.net/path", "new_value": "example.com\\stuff", "expected": { "href": "view-source+http://example.net/path", "host": "example.net", "hostname": "example.net", "port": "" } }, { "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", "href": "view-source+http://example.net/path", "new_value": "example.com:8080stuff2", "expected": { "href": "view-source+http://example.com:8080/path", "host": "example.com:8080", "hostname": "example.com", "port": "8080" } }, { "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", "href": "http://example.net/path", "new_value": "example.com:8080stuff2", "expected": { "href": "http://example.com:8080/path", "host": "example.com:8080", "hostname": "example.com", "port": "8080" } }, { "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", "href": "http://example.net/path", "new_value": "example.com:8080+2", "expected": { "href": "http://example.com:8080/path", "host": "example.com:8080", "hostname": "example.com", "port": "8080" } }, { "comment": "Port numbers are 16 bit integers", "href": "http://example.net/path", "new_value": "example.com:65535", "expected": { "href": "http://example.com:65535/path", "host": "example.com:65535", "hostname": "example.com", "port": "65535" } }, { "comment": "Port numbers are 16 bit integers, overflowing is an error. Hostname is still set, though.", "href": "http://example.net/path", "new_value": "example.com:65536", "expected": { "href": "http://example.com/path", "host": "example.com", "hostname": "example.com", "port": "" } }, { "comment": "Broken IPv6", "href": "http://example.net/", "new_value": "[google.com]", "expected": { "href": "http://example.net/", "host": "example.net", "hostname": "example.net" } }, { "href": "http://example.net/", "new_value": "[::1.2.3.4x]", "expected": { "href": "http://example.net/", "host": "example.net", "hostname": "example.net" } }, { "href": "http://example.net/", "new_value": "[::1.2.3.]", "expected": { "href": "http://example.net/", "host": "example.net", "hostname": "example.net" } }, { "href": "http://example.net/", "new_value": "[::1.2.]", "expected": { "href": "http://example.net/", "host": "example.net", "hostname": "example.net" } }, { "href": "http://example.net/", "new_value": "[::1.]", "expected": { "href": "http://example.net/", "host": "example.net", "hostname": "example.net" } }, { "href": "file://y/", "new_value": "x:123", "expected": { "href": "file://y/", "host": "y", "hostname": "y", "port": "" } }, { "href": "file://y/", "new_value": "loc%41lhost", "expected": { "href": "file:///", "host": "", "hostname": "", "port": "" } }, { "href": "file://hi/x", "new_value": "", "expected": { "href": "file:///x", "host": "", "hostname": "", "port": "" } }, { "href": "sc://test@test/", "new_value": "", "expected": { "href": "sc://test@test/", "host": "test", "hostname": "test", "username": "test" } }, { "href": "sc://test:12/", "new_value": "", "expected": { "href": "sc://test:12/", "host": "test:12", "hostname": "test", "port": "12" } }, { "comment": "Leading / is not stripped", "href": "http://example.com/", "new_value": "///bad.com", "expected": { "href": "http://example.com/", "host": "example.com", "hostname": "example.com" } }, { "comment": "Leading / is not stripped", "href": "sc://example.com/", "new_value": "///bad.com", "expected": { "href": "sc:///", "host": "", "hostname": "" } }, { "href": "https://example.com/", "new_value": "a%C2%ADb", "expected": { "href": "https://ab/", "host": "ab", "hostname": "ab" } }, { "href": "https://example.com/", "new_value": "\u00AD", "expected": { "href": "https://example.com/", "host": "example.com", "hostname": "example.com" } }, { "href": "https://example.com/", "new_value": "%C2%AD", "expected": { "href": "https://example.com/", "host": "example.com", "hostname": "example.com" } }, { "href": "https://example.com/", "new_value": "xn--", "expected": { "href": "https://example.com/", "host": "example.com", "hostname": "example.com" } } ], "hostname": [ { "comment": "Non-special scheme", "href": "sc://x/", "new_value": "\u0000", "expected": { "href": "sc://x/", "host": "x", "hostname": "x" } }, { "href": "sc://x/", "new_value": "\u0009", "expected": { "href": "sc:///", "host": "", "hostname": "" } }, { "href": "sc://x/", "new_value": "\u000A", "expected": { "href": "sc:///", "host": "", "hostname": "" } }, { "href": "sc://x/", "new_value": "\u000D", "expected": { "href": "sc:///", "host": "", "hostname": "" } }, { "href": "sc://x/", "new_value": " ", "expected": { "href": "sc://x/", "host": "x", "hostname": "x" } }, { "href": "sc://x/", "new_value": "#", "expected": { "href": "sc:///", "host": "", "hostname": "" } }, { "href": "sc://x/", "new_value": "/", "expected": { "href": "sc:///", "host": "", "hostname": "" } }, { "href": "sc://x/", "new_value": "?", "expected": { "href": "sc:///", "host": "", "hostname": "" } }, { "href": "sc://x/", "new_value": "@", "expected": { "href": "sc://x/", "host": "x", "hostname": "x" } }, { "comment": "Cannot-be-a-base means no host", "href": "mailto:me@example.net", "new_value": "example.com", "expected": { "href": "mailto:me@example.net", "host": "" } }, { "comment": "Cannot-be-a-base means no host", "href": "data:text/plain,Stuff", "new_value": "example.net", "expected": { "href": "data:text/plain,Stuff", "host": "" } }, { "href": "http://example.net:8080", "new_value": "example.com", "expected": { "href": "http://example.com:8080/", "host": "example.com:8080", "hostname": "example.com", "port": "8080" } }, { "comment": "The empty host is not valid for special schemes", "href": "http://example.net", "new_value": "", "expected": { "href": "http://example.net/", "host": "example.net" } }, { "comment": "The empty host is OK for non-special schemes", "href": "view-source+http://example.net/foo", "new_value": "", "expected": { "href": "view-source+http:///foo", "host": "" } }, { "comment": "Path-only URLs can gain a host", "href": "a:/foo", "new_value": "example.net", "expected": { "href": "a://example.net/foo", "host": "example.net" } }, { "comment": "IPv4 address syntax is normalized", "href": "http://example.net:8080", "new_value": "0x7F000001", "expected": { "href": "http://127.0.0.1:8080/", "host": "127.0.0.1:8080", "hostname": "127.0.0.1", "port": "8080" } }, { "comment": "IPv6 address syntax is normalized", "href": "http://example.net", "new_value": "[::0:01]", "expected": { "href": "http://[::1]/", "host": "[::1]", "hostname": "[::1]", "port": "" } }, { "comment": ": delimiter invalidates entire value", "href": "http://example.net/path", "new_value": "example.com:8080", "expected": { "href": "http://example.net/path", "host": "example.net", "hostname": "example.net", "port": "" } }, { "comment": ": delimiter invalidates entire value", "href": "http://example.net:8080/path", "new_value": "example.com:", "expected": { "href": "http://example.net:8080/path", "host": "example.net:8080", "hostname": "example.net", "port": "8080" } }, { "comment": "Stuff after a / delimiter is ignored", "href": "http://example.net/path", "new_value": "example.com/stuff", "expected": { "href": "http://example.com/path", "host": "example.com", "hostname": "example.com", "port": "" } }, { "comment": "Stuff after a ? delimiter is ignored", "href": "http://example.net/path", "new_value": "example.com?stuff", "expected": { "href": "http://example.com/path", "host": "example.com", "hostname": "example.com", "port": "" } }, { "comment": "Stuff after a # delimiter is ignored", "href": "http://example.net/path", "new_value": "example.com#stuff", "expected": { "href": "http://example.com/path", "host": "example.com", "hostname": "example.com", "port": "" } }, { "comment": "Stuff after a \\ delimiter is ignored for special schemes", "href": "http://example.net/path", "new_value": "example.com\\stuff", "expected": { "href": "http://example.com/path", "host": "example.com", "hostname": "example.com", "port": "" } }, { "comment": "\\ is not a delimiter for non-special schemes, but still forbidden in hosts", "href": "view-source+http://example.net/path", "new_value": "example.com\\stuff", "expected": { "href": "view-source+http://example.net/path", "host": "example.net", "hostname": "example.net", "port": "" } }, { "comment": "Broken IPv6", "href": "http://example.net/", "new_value": "[google.com]", "expected": { "href": "http://example.net/", "host": "example.net", "hostname": "example.net" } }, { "href": "http://example.net/", "new_value": "[::1.2.3.4x]", "expected": { "href": "http://example.net/", "host": "example.net", "hostname": "example.net" } }, { "href": "http://example.net/", "new_value": "[::1.2.3.]", "expected": { "href": "http://example.net/", "host": "example.net", "hostname": "example.net" } }, { "href": "http://example.net/", "new_value": "[::1.2.]", "expected": { "href": "http://example.net/", "host": "example.net", "hostname": "example.net" } }, { "href": "http://example.net/", "new_value": "[::1.]", "expected": { "href": "http://example.net/", "host": "example.net", "hostname": "example.net" } }, { "href": "file://y/", "new_value": "x:123", "expected": { "href": "file://y/", "host": "y", "hostname": "y", "port": "" } }, { "href": "file://y/", "new_value": "loc%41lhost", "expected": { "href": "file:///", "host": "", "hostname": "", "port": "" } }, { "href": "file://hi/x", "new_value": "", "expected": { "href": "file:///x", "host": "", "hostname": "", "port": "" } }, { "href": "sc://test@test/", "new_value": "", "expected": { "href": "sc://test@test/", "host": "test", "hostname": "test", "username": "test" } }, { "href": "sc://test:12/", "new_value": "", "expected": { "href": "sc://test:12/", "host": "test:12", "hostname": "test", "port": "12" } }, { "comment": "Drop /. from path", "href": "non-spec:/.//p", "new_value": "h", "expected": { "href": "non-spec://h//p", "host": "h", "hostname": "h", "pathname": "//p" } }, { "href": "non-spec:/.//p", "new_value": "", "expected": { "href": "non-spec:////p", "host": "", "hostname": "", "pathname": "//p" } }, { "comment": "Leading / is not stripped", "href": "http://example.com/", "new_value": "///bad.com", "expected": { "href": "http://example.com/", "host": "example.com", "hostname": "example.com" } }, { "comment": "Leading / is not stripped", "href": "sc://example.com/", "new_value": "///bad.com", "expected": { "href": "sc:///", "host": "", "hostname": "" } }, { "href": "https://example.com/", "new_value": "a%C2%ADb", "expected": { "href": "https://ab/", "host": "ab", "hostname": "ab" } }, { "href": "https://example.com/", "new_value": "\u00AD", "expected": { "href": "https://example.com/", "host": "example.com", "hostname": "example.com" } }, { "href": "https://example.com/", "new_value": "%C2%AD", "expected": { "href": "https://example.com/", "host": "example.com", "hostname": "example.com" } }, { "href": "https://example.com/", "new_value": "xn--", "expected": { "href": "https://example.com/", "host": "example.com", "hostname": "example.com" } } ], "port": [ { "href": "http://example.net", "new_value": "8080", "expected": { "href": "http://example.net:8080/", "host": "example.net:8080", "hostname": "example.net", "port": "8080" } }, { "comment": "Port number is removed if empty is the new value", "href": "http://example.net:8080", "new_value": "", "expected": { "href": "http://example.net/", "host": "example.net", "hostname": "example.net", "port": "" } }, { "comment": "Default port number is removed", "href": "http://example.net:8080", "new_value": "80", "expected": { "href": "http://example.net/", "host": "example.net", "hostname": "example.net", "port": "" } }, { "comment": "Default port number is removed", "href": "https://example.net:4433", "new_value": "443", "expected": { "href": "https://example.net/", "host": "example.net", "hostname": "example.net", "port": "" } }, { "comment": "Default port number is only removed for the relevant scheme", "href": "https://example.net", "new_value": "80", "expected": { "href": "https://example.net:80/", "host": "example.net:80", "hostname": "example.net", "port": "80" } }, { "comment": "Stuff after a / delimiter is ignored", "href": "http://example.net/path", "new_value": "8080/stuff", "expected": { "href": "http://example.net:8080/path", "host": "example.net:8080", "hostname": "example.net", "port": "8080" } }, { "comment": "Stuff after a ? delimiter is ignored", "href": "http://example.net/path", "new_value": "8080?stuff", "expected": { "href": "http://example.net:8080/path", "host": "example.net:8080", "hostname": "example.net", "port": "8080" } }, { "comment": "Stuff after a # delimiter is ignored", "href": "http://example.net/path", "new_value": "8080#stuff", "expected": { "href": "http://example.net:8080/path", "host": "example.net:8080", "hostname": "example.net", "port": "8080" } }, { "comment": "Stuff after a \\ delimiter is ignored for special schemes", "href": "http://example.net/path", "new_value": "8080\\stuff", "expected": { "href": "http://example.net:8080/path", "host": "example.net:8080", "hostname": "example.net", "port": "8080" } }, { "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", "href": "view-source+http://example.net/path", "new_value": "8080stuff2", "expected": { "href": "view-source+http://example.net:8080/path", "host": "example.net:8080", "hostname": "example.net", "port": "8080" } }, { "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", "href": "http://example.net/path", "new_value": "8080stuff2", "expected": { "href": "http://example.net:8080/path", "host": "example.net:8080", "hostname": "example.net", "port": "8080" } }, { "comment": "Anything other than ASCII digit stops the port parser in a setter but is not an error", "href": "http://example.net/path", "new_value": "8080+2", "expected": { "href": "http://example.net:8080/path", "host": "example.net:8080", "hostname": "example.net", "port": "8080" } }, { "comment": "Port numbers are 16 bit integers", "href": "http://example.net/path", "new_value": "65535", "expected": { "href": "http://example.net:65535/path", "host": "example.net:65535", "hostname": "example.net", "port": "65535" } }, { "comment": "Port numbers are 16 bit integers, overflowing is an error", "href": "http://example.net:8080/path", "new_value": "65536", "expected": { "href": "http://example.net:8080/path", "host": "example.net:8080", "hostname": "example.net", "port": "8080" } }, { "comment": "Setting port to a string that doesn't parse as a number", "href": "http://example.net:8080/path", "new_value": "randomstring", "expected": { "href": "http://example.net:8080/path", "host": "example.net:8080", "hostname": "example.net", "port": "8080" } }, { "comment": "Port numbers are 16 bit integers, overflowing is an error", "href": "non-special://example.net:8080/path", "new_value": "65536", "expected": { "href": "non-special://example.net:8080/path", "host": "example.net:8080", "hostname": "example.net", "port": "8080" } }, { "href": "file://test/", "new_value": "12", "expected": { "href": "file://test/", "port": "" } }, { "href": "file://localhost/", "new_value": "12", "expected": { "href": "file:///", "port": "" } }, { "href": "non-base:value", "new_value": "12", "expected": { "href": "non-base:value", "port": "" } }, { "href": "sc:///", "new_value": "12", "expected": { "href": "sc:///", "port": "" } }, { "href": "sc://x/", "new_value": "12", "expected": { "href": "sc://x:12/", "port": "12" } }, { "href": "javascript://x/", "new_value": "12", "expected": { "href": "javascript://x:12/", "port": "12" } }, { "comment": "Leading u0009 on special scheme", "href": "https://domain.com:443", "new_value": "\u00098080", "expected": { "port": "8080" } }, { "comment": "Leading u0009 on non-special scheme", "href": "wpt++://domain.com:443", "new_value": "\u00098080", "expected": { "port": "8080" } }, { "comment": "Should use all ascii prefixed characters as port", "href": "https://www.google.com:4343", "new_value": "4wpt", "expected": { "port": "4" } } ], "pathname": [ { "comment": "Opaque paths cannot be set", "href": "mailto:me@example.net", "new_value": "/foo", "expected": { "href": "mailto:me@example.net", "pathname": "me@example.net" } }, { "href": "data:original", "new_value": "new value", "expected": { "href": "data:original", "pathname": "original" } }, { "href": "sc:original", "new_value": "new value", "expected": { "href": "sc:original", "pathname": "original" } }, { "comment": "Special URLs cannot have their paths erased", "href": "file:///some/path", "new_value": "", "expected": { "href": "file:///", "pathname": "/" } }, { "comment": "Non-special URLs can have their paths erased", "href": "foo://somehost/some/path", "new_value": "", "expected": { "href": "foo://somehost", "pathname": "" } }, { "comment": "Non-special URLs with an empty host can have their paths erased", "href": "foo:///some/path", "new_value": "", "expected": { "href": "foo://", "pathname": "" } }, { "comment": "Path-only URLs cannot have their paths erased", "href": "foo:/some/path", "new_value": "", "expected": { "href": "foo:/", "pathname": "/" } }, { "comment": "Path-only URLs always have an initial slash", "href": "foo:/some/path", "new_value": "test", "expected": { "href": "foo:/test", "pathname": "/test" } }, { "href": "unix:/run/foo.socket?timeout=10", "new_value": "/var/log/../run/bar.socket", "expected": { "href": "unix:/var/run/bar.socket?timeout=10", "pathname": "/var/run/bar.socket" } }, { "href": "https://example.net#nav", "new_value": "home", "expected": { "href": "https://example.net/home#nav", "pathname": "/home" } }, { "href": "https://example.net#nav", "new_value": "../home", "expected": { "href": "https://example.net/home#nav", "pathname": "/home" } }, { "comment": "\\ is a segment delimiter for 'special' URLs", "href": "http://example.net/home?lang=fr#nav", "new_value": "\\a\\%2E\\b\\%2e.\\c", "expected": { "href": "http://example.net/a/c?lang=fr#nav", "pathname": "/a/c" } }, { "comment": "\\ is *not* a segment delimiter for non-'special' URLs", "href": "view-source+http://example.net/home?lang=fr#nav", "new_value": "\\a\\%2E\\b\\%2e.\\c", "expected": { "href": "view-source+http://example.net/\\a\\%2E\\b\\%2e.\\c?lang=fr#nav", "pathname": "/\\a\\%2E\\b\\%2e.\\c" } }, { "comment": "UTF-8 percent encoding with the default encode set. Tabs and newlines are removed.", "href": "a:/", "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", "expected": { "href": "a:/%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9", "pathname": "/%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9" } }, { "comment": "Bytes already percent-encoded are left as-is, including %2E outside dotted segments.", "href": "http://example.net", "new_value": "%2e%2E%c3%89té", "expected": { "href": "http://example.net/%2e%2E%c3%89t%C3%A9", "pathname": "/%2e%2E%c3%89t%C3%A9" } }, { "comment": "? needs to be encoded", "href": "http://example.net", "new_value": "?", "expected": { "href": "http://example.net/%3F", "pathname": "/%3F" } }, { "comment": "# needs to be encoded", "href": "http://example.net", "new_value": "#", "expected": { "href": "http://example.net/%23", "pathname": "/%23" } }, { "comment": "? needs to be encoded, non-special scheme", "href": "sc://example.net", "new_value": "?", "expected": { "href": "sc://example.net/%3F", "pathname": "/%3F" } }, { "comment": "# needs to be encoded, non-special scheme", "href": "sc://example.net", "new_value": "#", "expected": { "href": "sc://example.net/%23", "pathname": "/%23" } }, { "comment": "? doesn't mess up encoding", "href": "http://example.net", "new_value": "/?é", "expected": { "href": "http://example.net/%3F%C3%A9", "pathname": "/%3F%C3%A9" } }, { "comment": "# doesn't mess up encoding", "href": "http://example.net", "new_value": "/#é", "expected": { "href": "http://example.net/%23%C3%A9", "pathname": "/%23%C3%A9" } }, { "comment": "File URLs and (back)slashes", "href": "file://monkey/", "new_value": "\\\\", "expected": { "href": "file://monkey//", "pathname": "//" } }, { "comment": "File URLs and (back)slashes", "href": "file:///unicorn", "new_value": "//\\/", "expected": { "href": "file://////", "pathname": "////" } }, { "comment": "File URLs and (back)slashes", "href": "file:///unicorn", "new_value": "//monkey/..//", "expected": { "href": "file://///", "pathname": "///" } }, { "comment": "Serialize /. in path", "href": "non-spec:/", "new_value": "/.//p", "expected": { "href": "non-spec:/.//p", "pathname": "//p" } }, { "href": "non-spec:/", "new_value": "/..//p", "expected": { "href": "non-spec:/.//p", "pathname": "//p" } }, { "href": "non-spec:/", "new_value": "//p", "expected": { "href": "non-spec:/.//p", "pathname": "//p" } }, { "comment": "Drop /. from path", "href": "non-spec:/.//", "new_value": "p", "expected": { "href": "non-spec:/p", "pathname": "/p" } }, { "comment": "Non-special URLs with non-opaque paths percent-encode U+0020", "href": "data:/nospace", "new_value": "space ", "expected": { "href": "data:/space%20", "pathname": "/space%20" } }, { "href": "sc:/nospace", "new_value": "space ", "expected": { "href": "sc:/space%20", "pathname": "/space%20" } }, { "comment": "Trailing space should be encoded", "href": "http://example.net", "new_value": " ", "expected": { "href": "http://example.net/%20", "pathname": "/%20" } }, { "comment": "Trailing C0 control should be encoded", "href": "http://example.net", "new_value": "\u0000", "expected": { "href": "http://example.net/%00", "pathname": "/%00" } } ], "search": [ { "href": "https://example.net#nav", "new_value": "lang=fr", "expected": { "href": "https://example.net/?lang=fr#nav", "search": "?lang=fr" } }, { "href": "https://example.net?lang=en-US#nav", "new_value": "lang=fr", "expected": { "href": "https://example.net/?lang=fr#nav", "search": "?lang=fr" } }, { "href": "https://example.net?lang=en-US#nav", "new_value": "?lang=fr", "expected": { "href": "https://example.net/?lang=fr#nav", "search": "?lang=fr" } }, { "href": "https://example.net?lang=en-US#nav", "new_value": "??lang=fr", "expected": { "href": "https://example.net/??lang=fr#nav", "search": "??lang=fr" } }, { "href": "https://example.net?lang=en-US#nav", "new_value": "?", "expected": { "href": "https://example.net/?#nav", "search": "" } }, { "href": "https://example.net?lang=en-US#nav", "new_value": "", "expected": { "href": "https://example.net/#nav", "search": "" } }, { "href": "https://example.net?lang=en-US", "new_value": "", "expected": { "href": "https://example.net/", "search": "" } }, { "href": "https://example.net", "new_value": "", "expected": { "href": "https://example.net/", "search": "" } }, { "comment": "UTF-8 percent encoding with the query encode set. Tabs and newlines are removed.", "href": "a:/", "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", "expected": { "href": "a:/?%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9", "search": "?%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9" } }, { "comment": "Bytes already percent-encoded are left as-is", "href": "http://example.net", "new_value": "%c3%89té", "expected": { "href": "http://example.net/?%c3%89t%C3%A9", "search": "?%c3%89t%C3%A9" } }, { "comment": "Drop trailing spaces from trailing opaque paths", "href": "data:space ?query", "new_value": "", "expected": { "href": "data:space", "pathname": "space", "search": "" } }, { "href": "sc:space ?query", "new_value": "", "expected": { "href": "sc:space", "pathname": "space", "search": "" } }, { "comment": "Do not drop trailing spaces from non-trailing opaque paths", "href": "data:space ?query#fragment", "new_value": "", "expected": { "href": "data:space #fragment", "search": "" } }, { "href": "sc:space ?query#fragment", "new_value": "", "expected": { "href": "sc:space #fragment", "search": "" } }, { "comment": "Trailing space should be encoded", "href": "http://example.net", "new_value": " ", "expected": { "href": "http://example.net/?%20", "search": "?%20" } }, { "comment": "Trailing C0 control should be encoded", "href": "http://example.net", "new_value": "\u0000", "expected": { "href": "http://example.net/?%00", "search": "?%00" } } ], "hash": [ { "href": "https://example.net", "new_value": "main", "expected": { "href": "https://example.net/#main", "hash": "#main" } }, { "href": "https://example.net#nav", "new_value": "main", "expected": { "href": "https://example.net/#main", "hash": "#main" } }, { "href": "https://example.net?lang=en-US", "new_value": "##nav", "expected": { "href": "https://example.net/?lang=en-US##nav", "hash": "##nav" } }, { "href": "https://example.net?lang=en-US#nav", "new_value": "#main", "expected": { "href": "https://example.net/?lang=en-US#main", "hash": "#main" } }, { "href": "https://example.net?lang=en-US#nav", "new_value": "#", "expected": { "href": "https://example.net/?lang=en-US#", "hash": "" } }, { "href": "https://example.net?lang=en-US#nav", "new_value": "", "expected": { "href": "https://example.net/?lang=en-US", "hash": "" } }, { "href": "http://example.net", "new_value": "#foo bar", "expected": { "href": "http://example.net/#foo%20bar", "hash": "#foo%20bar" } }, { "href": "http://example.net", "new_value": "#foo\"bar", "expected": { "href": "http://example.net/#foo%22bar", "hash": "#foo%22bar" } }, { "href": "http://example.net", "new_value": "#foobar", "expected": { "href": "http://example.net/#foo%3Ebar", "hash": "#foo%3Ebar" } }, { "href": "http://example.net", "new_value": "#foo`bar", "expected": { "href": "http://example.net/#foo%60bar", "hash": "#foo%60bar" } }, { "comment": "Simple percent-encoding; tabs and newlines are removed", "href": "a:/", "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", "expected": { "href": "a:/#%00%01%1F%20!%22#$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_%60az{|}~%7F%C2%80%C2%81%C3%89%C3%A9", "hash": "#%00%01%1F%20!%22#$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_%60az{|}~%7F%C2%80%C2%81%C3%89%C3%A9" } }, { "comment": "Percent-encode NULLs in fragment", "href": "http://example.net", "new_value": "a\u0000b", "expected": { "href": "http://example.net/#a%00b", "hash": "#a%00b" } }, { "comment": "Percent-encode NULLs in fragment", "href": "non-spec:/", "new_value": "a\u0000b", "expected": { "href": "non-spec:/#a%00b", "hash": "#a%00b" } }, { "comment": "Bytes already percent-encoded are left as-is", "href": "http://example.net", "new_value": "%c3%89té", "expected": { "href": "http://example.net/#%c3%89t%C3%A9", "hash": "#%c3%89t%C3%A9" } }, { "href": "javascript:alert(1)", "new_value": "castle", "expected": { "href": "javascript:alert(1)#castle", "hash": "#castle" } }, { "comment": "Drop trailing spaces from trailing opaque paths", "href": "data:space #fragment", "new_value": "", "expected": { "href": "data:space", "pathname": "space", "hash": "" } }, { "href": "sc:space #fragment", "new_value": "", "expected": { "href": "sc:space", "pathname": "space", "hash": "" } }, { "comment": "Do not drop trailing spaces from non-trailing opaque paths", "href": "data:space ?query#fragment", "new_value": "", "expected": { "href": "data:space ?query", "hash": "" } }, { "href": "sc:space ?query#fragment", "new_value": "", "expected": { "href": "sc:space ?query", "hash": "" } }, { "comment": "Trailing space should be encoded", "href": "http://example.net", "new_value": " ", "expected": { "href": "http://example.net/#%20", "hash": "#%20" } }, { "comment": "Trailing C0 control should be encoded", "href": "http://example.net", "new_value": "\u0000", "expected": { "href": "http://example.net/#%00", "hash": "#%00" } } ], "href": [ { "href": "file:///var/log/system.log", "new_value": "http://0300.168.0xF0", "expected": { "href": "http://192.168.0.240/", "protocol": "http:" } } ] } url-2.5.2/tests/unit.rs000064400000000000000000001233321046102023000131270ustar 00000000000000// Copyright 2013-2014 The rust-url developers. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! Unit tests use std::borrow::Cow; use std::cell::{Cell, RefCell}; use std::net::{Ipv4Addr, Ipv6Addr}; #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] use std::path::{Path, PathBuf}; use url::{form_urlencoded, Host, Origin, Url}; // https://rustwasm.github.io/wasm-bindgen/wasm-bindgen-test/usage.html #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] use wasm_bindgen_test::{wasm_bindgen_test as test, wasm_bindgen_test_configure}; #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] wasm_bindgen_test_configure!(run_in_browser); #[test] fn size() { use std::mem::size_of; assert_eq!(size_of::(), size_of::>()); } #[test] fn test_relative() { let base: Url = "sc://%C3%B1".parse().unwrap(); let url = base.join("/resources/testharness.js").unwrap(); assert_eq!(url.as_str(), "sc://%C3%B1/resources/testharness.js"); } #[test] fn test_relative_empty() { let base: Url = "sc://%C3%B1".parse().unwrap(); let url = base.join("").unwrap(); assert_eq!(url.as_str(), "sc://%C3%B1"); } #[test] fn test_strip_trailing_spaces_from_opaque_path() { let mut url: Url = "data:space ?query".parse().unwrap(); url.set_query(None); assert_eq!(url.as_str(), "data:space"); let mut url: Url = "data:space #hash".parse().unwrap(); url.set_fragment(None); assert_eq!(url.as_str(), "data:space"); } #[test] fn test_set_empty_host() { let mut base: Url = "moz://foo:bar@servo/baz".parse().unwrap(); base.set_username("").unwrap(); assert_eq!(base.as_str(), "moz://:bar@servo/baz"); base.set_host(None).unwrap(); assert_eq!(base.as_str(), "moz:/baz"); base.set_host(Some("servo")).unwrap(); assert_eq!(base.as_str(), "moz://servo/baz"); let mut base: Url = "file://server/share/foo/bar".parse().unwrap(); base.set_host(None).unwrap(); assert_eq!(base.as_str(), "file:///share/foo/bar"); let mut base: Url = "file://server/share/foo/bar".parse().unwrap(); base.set_host(Some("foo")).unwrap(); assert_eq!(base.as_str(), "file://foo/share/foo/bar"); } #[test] fn test_set_empty_username_and_password() { let mut base: Url = "moz://foo:bar@servo/baz".parse().unwrap(); base.set_username("").unwrap(); assert_eq!(base.as_str(), "moz://:bar@servo/baz"); base.set_password(Some("")).unwrap(); assert_eq!(base.as_str(), "moz://servo/baz"); base.set_password(None).unwrap(); assert_eq!(base.as_str(), "moz://servo/baz"); } #[test] fn test_set_empty_password() { let mut base: Url = "moz://foo:bar@servo/baz".parse().unwrap(); base.set_password(Some("")).unwrap(); assert_eq!(base.as_str(), "moz://foo@servo/baz"); base.set_password(None).unwrap(); assert_eq!(base.as_str(), "moz://foo@servo/baz"); } #[test] fn test_set_empty_hostname() { use url::quirks; let mut base: Url = "moz://foo@servo/baz".parse().unwrap(); assert!( quirks::set_hostname(&mut base, "").is_err(), "setting an empty hostname to a url with a username should fail" ); base = "moz://:pass@servo/baz".parse().unwrap(); assert!( quirks::set_hostname(&mut base, "").is_err(), "setting an empty hostname to a url with a password should fail" ); base = "moz://servo/baz".parse().unwrap(); quirks::set_hostname(&mut base, "").unwrap(); assert_eq!(base.as_str(), "moz:///baz"); } #[test] fn test_set_empty_query() { let mut base: Url = "moz://example.com/path?query".parse().unwrap(); base.set_query(Some("")); assert_eq!(base.as_str(), "moz://example.com/path?"); base.set_query(None); assert_eq!(base.as_str(), "moz://example.com/path"); } #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] macro_rules! assert_from_file_path { ($path: expr) => { assert_from_file_path!($path, $path) }; ($path: expr, $url_path: expr) => {{ let url = Url::from_file_path(Path::new($path)).unwrap(); assert_eq!(url.host(), None); assert_eq!(url.path(), $url_path); assert_eq!(url.to_file_path(), Ok(PathBuf::from($path))); }}; } #[test] #[cfg(any(unix, windows))] fn new_file_paths() { if cfg!(unix) { assert_eq!(Url::from_file_path(Path::new("relative")), Err(())); assert_eq!(Url::from_file_path(Path::new("../relative")), Err(())); } if cfg!(windows) { assert_eq!(Url::from_file_path(Path::new("relative")), Err(())); assert_eq!(Url::from_file_path(Path::new(r"..\relative")), Err(())); assert_eq!(Url::from_file_path(Path::new(r"\drive-relative")), Err(())); assert_eq!(Url::from_file_path(Path::new(r"\\ucn\")), Err(())); } if cfg!(unix) { assert_from_file_path!("/foo/bar"); assert_from_file_path!("/foo/ba\0r", "/foo/ba%00r"); assert_from_file_path!("/foo/ba%00r", "/foo/ba%2500r"); assert_from_file_path!("/foo/ba\\r", "/foo/ba%5Cr"); } } #[test] #[cfg(unix)] fn new_path_bad_utf8() { use std::ffi::OsStr; use std::os::unix::prelude::*; let url = Url::from_file_path(Path::new(OsStr::from_bytes(b"/foo/ba\x80r"))).unwrap(); let os_str = OsStr::from_bytes(b"/foo/ba\x80r"); assert_eq!(url.to_file_path(), Ok(PathBuf::from(os_str))); } #[test] #[cfg(windows)] fn new_path_windows_fun() { assert_from_file_path!(r"C:\foo\bar", "/C:/foo/bar"); assert_from_file_path!("C:\\foo\\ba\0r", "/C:/foo/ba%00r"); // Invalid UTF-8 assert!(Url::parse("file:///C:/foo/ba%80r") .unwrap() .to_file_path() .is_err()); // test windows canonicalized path let path = PathBuf::from(r"\\?\C:\foo\bar"); assert!(Url::from_file_path(path).is_ok()); // Percent-encoded drive letter let url = Url::parse("file:///C%3A/foo/bar").unwrap(); assert_eq!(url.to_file_path(), Ok(PathBuf::from(r"C:\foo\bar"))); } #[test] #[cfg(any(unix, windows))] fn new_directory_paths() { if cfg!(unix) { assert_eq!(Url::from_directory_path(Path::new("relative")), Err(())); assert_eq!(Url::from_directory_path(Path::new("../relative")), Err(())); let url = Url::from_directory_path(Path::new("/foo/bar")).unwrap(); assert_eq!(url.host(), None); assert_eq!(url.path(), "/foo/bar/"); } if cfg!(windows) { assert_eq!(Url::from_directory_path(Path::new("relative")), Err(())); assert_eq!(Url::from_directory_path(Path::new(r"..\relative")), Err(())); assert_eq!( Url::from_directory_path(Path::new(r"\drive-relative")), Err(()) ); assert_eq!(Url::from_directory_path(Path::new(r"\\ucn\")), Err(())); let url = Url::from_directory_path(Path::new(r"C:\foo\bar")).unwrap(); assert_eq!(url.host(), None); assert_eq!(url.path(), "/C:/foo/bar/"); } } #[test] fn path_backslash_fun() { let mut special_url = "http://foobar.com".parse::().unwrap(); special_url.path_segments_mut().unwrap().push("foo\\bar"); assert_eq!(special_url.as_str(), "http://foobar.com/foo%5Cbar"); let mut nonspecial_url = "thing://foobar.com".parse::().unwrap(); nonspecial_url.path_segments_mut().unwrap().push("foo\\bar"); assert_eq!(nonspecial_url.as_str(), "thing://foobar.com/foo\\bar"); } #[test] fn from_str() { assert!("http://testing.com/this".parse::().is_ok()); } #[test] fn parse_with_params() { let url = Url::parse_with_params( "http://testing.com/this?dont=clobberme", &[("lang", "rust")], ) .unwrap(); assert_eq!( url.as_str(), "http://testing.com/this?dont=clobberme&lang=rust" ); } #[test] fn issue_124() { let url: Url = "file:a".parse().unwrap(); assert_eq!(url.path(), "/a"); let url: Url = "file:...".parse().unwrap(); assert_eq!(url.path(), "/..."); let url: Url = "file:..".parse().unwrap(); assert_eq!(url.path(), "/"); } #[test] fn test_equality() { use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; fn check_eq(a: &Url, b: &Url) { assert_eq!(a, b); let mut h1 = DefaultHasher::new(); a.hash(&mut h1); let mut h2 = DefaultHasher::new(); b.hash(&mut h2); assert_eq!(h1.finish(), h2.finish()); } fn url(s: &str) -> Url { let rv = s.parse().unwrap(); check_eq(&rv, &rv); rv } // Doesn't care if default port is given. let a: Url = url("https://example.com/"); let b: Url = url("https://example.com:443/"); check_eq(&a, &b); // Different ports let a: Url = url("http://example.com/"); let b: Url = url("http://example.com:8080/"); assert!(a != b, "{:?} != {:?}", a, b); // Different scheme let a: Url = url("http://example.com/"); let b: Url = url("https://example.com/"); assert_ne!(a, b); // Different host let a: Url = url("http://foo.com/"); let b: Url = url("http://bar.com/"); assert_ne!(a, b); // Missing path, automatically substituted. Semantically the same. let a: Url = url("http://foo.com"); let b: Url = url("http://foo.com/"); check_eq(&a, &b); } #[test] fn host() { fn assert_host(input: &str, host: Host<&str>) { assert_eq!(Url::parse(input).unwrap().host(), Some(host)); } assert_host("http://www.mozilla.org", Host::Domain("www.mozilla.org")); assert_host( "http://1.35.33.49", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)), ); assert_host( "http://[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", Host::Ipv6(Ipv6Addr::new( 0x2001, 0x0db8, 0x85a3, 0x08d3, 0x1319, 0x8a2e, 0x0370, 0x7344, )), ); assert_host( "http://[::]", Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0)), ); assert_host( "http://[::1]", Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1)), ); assert_host( "http://0x1.0X23.0x21.061", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)), ); assert_host("http://0x1232131", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49))); assert_host("http://111", Host::Ipv4(Ipv4Addr::new(0, 0, 0, 111))); assert!(Url::parse("http://1.35.+33.49").is_err()); assert!(Url::parse("http://2..2.3").is_err()); assert!(Url::parse("http://42.0x1232131").is_err()); assert!(Url::parse("http://192.168.0.257").is_err()); assert_eq!(Host::Domain("foo"), Host::Domain("foo").to_owned()); assert_ne!(Host::Domain("foo"), Host::Domain("bar").to_owned()); } #[test] fn host_serialization() { // libstd’s `Display for Ipv6Addr` serializes 0:0:0:0:0:0:_:_ and 0:0:0:0:0:ffff:_:_ // using IPv4-like syntax, as suggested in https://tools.ietf.org/html/rfc5952#section-4 // but https://url.spec.whatwg.org/#concept-ipv6-serializer specifies not to. // Not [::0.0.0.2] / [::ffff:0.0.0.2] assert_eq!( Url::parse("http://[0::2]").unwrap().host_str(), Some("[::2]") ); assert_eq!( Url::parse("http://[0::ffff:0:2]").unwrap().host_str(), Some("[::ffff:0:2]") ); } #[test] fn test_idna() { assert!("http://goșu.ro".parse::().is_ok()); assert_eq!( Url::parse("http://☃.net/").unwrap().host(), Some(Host::Domain("xn--n3h.net")) ); assert!("https://r2---sn-huoa-cvhl.googlevideo.com/crossdomain.xml" .parse::() .is_ok()); } #[test] fn test_serialization() { let data = [ ("http://example.com/", "http://example.com/"), ("http://addslash.com", "http://addslash.com/"), ("http://@emptyuser.com/", "http://emptyuser.com/"), ("http://:@emptypass.com/", "http://emptypass.com/"), ("http://user@user.com/", "http://user@user.com/"), ( "http://user:pass@userpass.com/", "http://user:pass@userpass.com/", ), ( "http://slashquery.com/path/?q=something", "http://slashquery.com/path/?q=something", ), ( "http://noslashquery.com/path?q=something", "http://noslashquery.com/path?q=something", ), ]; for &(input, result) in &data { let url = Url::parse(input).unwrap(); assert_eq!(url.as_str(), result); } } #[test] fn test_form_urlencoded() { let pairs: &[(Cow<'_, str>, Cow<'_, str>)] = &[ ("foo".into(), "é&".into()), ("bar".into(), "".into()), ("foo".into(), "#".into()), ]; let encoded = form_urlencoded::Serializer::new(String::new()) .extend_pairs(pairs) .finish(); assert_eq!(encoded, "foo=%C3%A9%26&bar=&foo=%23"); assert_eq!( form_urlencoded::parse(encoded.as_bytes()).collect::>(), pairs.to_vec() ); } #[test] fn test_form_serialize() { let encoded = form_urlencoded::Serializer::new(String::new()) .append_pair("foo", "é&") .append_pair("bar", "") .append_pair("foo", "#") .append_key_only("json") .finish(); assert_eq!(encoded, "foo=%C3%A9%26&bar=&foo=%23&json"); } #[test] fn form_urlencoded_encoding_override() { let encoded = form_urlencoded::Serializer::new(String::new()) .encoding_override(Some(&|s| s.as_bytes().to_ascii_uppercase().into())) .append_pair("foo", "bar") .append_key_only("xml") .finish(); assert_eq!(encoded, "FOO=BAR&XML"); } #[test] /// https://github.com/servo/rust-url/issues/61 fn issue_61() { let mut url = Url::parse("http://mozilla.org").unwrap(); url.set_scheme("https").unwrap(); assert_eq!(url.port(), None); assert_eq!(url.port_or_known_default(), Some(443)); url.check_invariants().unwrap(); } #[test] #[cfg(any(unix, target_os = "redox", target_os = "wasi"))] #[cfg(not(windows))] /// https://github.com/servo/rust-url/issues/197 fn issue_197() { let mut url = Url::from_file_path("/").expect("Failed to parse path"); url.check_invariants().unwrap(); assert_eq!( url, Url::parse("file:///").expect("Failed to parse path + protocol") ); url.path_segments_mut() .expect("path_segments_mut") .pop_if_empty(); } #[test] fn issue_241() { Url::parse("mailto:").unwrap().cannot_be_a_base(); } #[test] /// https://github.com/servo/rust-url/issues/222 fn append_trailing_slash() { let mut url: Url = "http://localhost:6767/foo/bar?a=b".parse().unwrap(); url.check_invariants().unwrap(); url.path_segments_mut().unwrap().push(""); url.check_invariants().unwrap(); assert_eq!(url.to_string(), "http://localhost:6767/foo/bar/?a=b"); } #[test] /// https://github.com/servo/rust-url/issues/227 fn extend_query_pairs_then_mutate() { let mut url: Url = "http://localhost:6767/foo/bar".parse().unwrap(); url.query_pairs_mut() .extend_pairs(vec![("auth", "my-token")]); url.check_invariants().unwrap(); assert_eq!( url.to_string(), "http://localhost:6767/foo/bar?auth=my-token" ); url.path_segments_mut().unwrap().push("some_other_path"); url.check_invariants().unwrap(); assert_eq!( url.to_string(), "http://localhost:6767/foo/bar/some_other_path?auth=my-token" ); } #[test] /// https://github.com/servo/rust-url/issues/222 fn append_empty_segment_then_mutate() { let mut url: Url = "http://localhost:6767/foo/bar?a=b".parse().unwrap(); url.check_invariants().unwrap(); url.path_segments_mut().unwrap().push("").pop(); url.check_invariants().unwrap(); assert_eq!(url.to_string(), "http://localhost:6767/foo/bar?a=b"); } #[test] /// https://github.com/servo/rust-url/issues/243 fn test_set_host() { let mut url = Url::parse("https://example.net/hello").unwrap(); url.set_host(Some("foo.com")).unwrap(); assert_eq!(url.as_str(), "https://foo.com/hello"); assert!(url.set_host(None).is_err()); assert_eq!(url.as_str(), "https://foo.com/hello"); assert!(url.set_host(Some("")).is_err()); assert_eq!(url.as_str(), "https://foo.com/hello"); let mut url = Url::parse("foobar://example.net/hello").unwrap(); url.set_host(None).unwrap(); assert_eq!(url.as_str(), "foobar:/hello"); let mut url = Url::parse("foo://ș").unwrap(); assert_eq!(url.as_str(), "foo://%C8%99"); url.set_host(Some("goșu.ro")).unwrap(); assert_eq!(url.as_str(), "foo://go%C8%99u.ro"); } #[test] // https://github.com/servo/rust-url/issues/166 fn test_leading_dots() { assert_eq!( Host::parse(".org").unwrap(), Host::Domain(".org".to_owned()) ); assert_eq!(Url::parse("file://./foo").unwrap().domain(), Some(".")); } #[test] /// https://github.com/servo/rust-url/issues/302 fn test_origin_hash() { use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; fn hash(value: &T) -> u64 { let mut hasher = DefaultHasher::new(); value.hash(&mut hasher); hasher.finish() } let origin = &Url::parse("http://example.net/").unwrap().origin(); let origins_to_compare = [ Url::parse("http://example.net:80/").unwrap().origin(), Url::parse("http://example.net:81/").unwrap().origin(), Url::parse("http://example.net").unwrap().origin(), Url::parse("http://example.net/hello").unwrap().origin(), Url::parse("https://example.net").unwrap().origin(), Url::parse("ftp://example.net").unwrap().origin(), Url::parse("file://example.net").unwrap().origin(), Url::parse("http://user@example.net/").unwrap().origin(), Url::parse("http://user:pass@example.net/") .unwrap() .origin(), ]; for origin_to_compare in &origins_to_compare { if origin == origin_to_compare { assert_eq!(hash(origin), hash(origin_to_compare)); } else { assert_ne!(hash(origin), hash(origin_to_compare)); } } let opaque_origin = Url::parse("file://example.net").unwrap().origin(); let same_opaque_origin = Url::parse("file://example.net").unwrap().origin(); let other_opaque_origin = Url::parse("file://other").unwrap().origin(); assert_ne!(hash(&opaque_origin), hash(&same_opaque_origin)); assert_ne!(hash(&opaque_origin), hash(&other_opaque_origin)); } #[test] fn test_origin_blob_equality() { let origin = &Url::parse("http://example.net/").unwrap().origin(); let blob_origin = &Url::parse("blob:http://example.net/").unwrap().origin(); assert_eq!(origin, blob_origin); } #[test] fn test_origin_opaque() { assert!(!Origin::new_opaque().is_tuple()); assert!(!&Url::parse("blob:malformed//").unwrap().origin().is_tuple()) } #[test] fn test_origin_unicode_serialization() { let data = [ ("http://😅.com", "http://😅.com"), ("ftp://😅:🙂@🙂.com", "ftp://🙂.com"), ("https://user@😅.com", "https://😅.com"), ("http://😅.🙂:40", "http://😅.🙂:40"), ]; for &(unicode_url, expected_serialization) in &data { let origin = Url::parse(unicode_url).unwrap().origin(); assert_eq!(origin.unicode_serialization(), *expected_serialization); } let ascii_origins = [ Url::parse("http://example.net/").unwrap().origin(), Url::parse("http://example.net:80/").unwrap().origin(), Url::parse("http://example.net:81/").unwrap().origin(), Url::parse("http://example.net").unwrap().origin(), Url::parse("http://example.net/hello").unwrap().origin(), Url::parse("https://example.net").unwrap().origin(), Url::parse("ftp://example.net").unwrap().origin(), Url::parse("file://example.net").unwrap().origin(), Url::parse("http://user@example.net/").unwrap().origin(), Url::parse("http://user:pass@example.net/") .unwrap() .origin(), Url::parse("http://127.0.0.1").unwrap().origin(), ]; for ascii_origin in &ascii_origins { assert_eq!( ascii_origin.ascii_serialization(), ascii_origin.unicode_serialization() ); } } #[test] #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] fn test_socket_addrs() { use std::net::ToSocketAddrs; let data = [ ("https://127.0.0.1/", "127.0.0.1", 443), ("https://127.0.0.1:9742/", "127.0.0.1", 9742), ("custom-protocol://127.0.0.1:9742/", "127.0.0.1", 9742), ("custom-protocol://127.0.0.1/", "127.0.0.1", 9743), ("https://[::1]/", "::1", 443), ("https://[::1]:9742/", "::1", 9742), ("custom-protocol://[::1]:9742/", "::1", 9742), ("custom-protocol://[::1]/", "::1", 9743), ("https://localhost/", "localhost", 443), ("https://localhost:9742/", "localhost", 9742), ("custom-protocol://localhost:9742/", "localhost", 9742), ("custom-protocol://localhost/", "localhost", 9743), ]; for (url_string, host, port) in &data { let url = url::Url::parse(url_string).unwrap(); let addrs = url .socket_addrs(|| match url.scheme() { "custom-protocol" => Some(9743), _ => None, }) .unwrap(); assert_eq!( Some(addrs[0]), (*host, *port).to_socket_addrs().unwrap().next() ); } } #[test] fn test_no_base_url() { let mut no_base_url = Url::parse("mailto:test@example.net").unwrap(); assert!(no_base_url.cannot_be_a_base()); assert!(no_base_url.path_segments().is_none()); assert!(no_base_url.path_segments_mut().is_err()); assert!(no_base_url.set_host(Some("foo")).is_err()); assert!(no_base_url .set_ip_host("127.0.0.1".parse().unwrap()) .is_err()); no_base_url.set_path("/foo"); assert_eq!(no_base_url.path(), "%2Ffoo"); } #[test] fn test_domain() { let url = Url::parse("https://127.0.0.1/").unwrap(); assert_eq!(url.domain(), None); let url = Url::parse("mailto:test@example.net").unwrap(); assert_eq!(url.domain(), None); let url = Url::parse("https://example.com/").unwrap(); assert_eq!(url.domain(), Some("example.com")); } #[test] fn test_query() { let url = Url::parse("https://example.com/products?page=2#fragment").unwrap(); assert_eq!(url.query(), Some("page=2")); assert_eq!( url.query_pairs().next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2"))) ); let url = Url::parse("https://example.com/products").unwrap(); assert!(url.query().is_none()); assert_eq!(url.query_pairs().count(), 0); let url = Url::parse("https://example.com/?country=español").unwrap(); assert_eq!(url.query(), Some("country=espa%C3%B1ol")); assert_eq!( url.query_pairs().next(), Some((Cow::Borrowed("country"), Cow::Borrowed("español"))) ); let url = Url::parse("https://example.com/products?page=2&sort=desc").unwrap(); assert_eq!(url.query(), Some("page=2&sort=desc")); let mut pairs = url.query_pairs(); assert_eq!(pairs.count(), 2); assert_eq!( pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2"))) ); assert_eq!( pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc"))) ); } #[test] fn test_fragment() { let url = Url::parse("https://example.com/#fragment").unwrap(); assert_eq!(url.fragment(), Some("fragment")); let url = Url::parse("https://example.com/").unwrap(); assert_eq!(url.fragment(), None); } #[test] fn test_set_ip_host() { let mut url = Url::parse("http://example.com").unwrap(); url.set_ip_host("127.0.0.1".parse().unwrap()).unwrap(); assert_eq!(url.host_str(), Some("127.0.0.1")); url.set_ip_host("::1".parse().unwrap()).unwrap(); assert_eq!(url.host_str(), Some("[::1]")); } #[test] fn test_set_href() { use url::quirks::set_href; let mut url = Url::parse("https://existing.url").unwrap(); assert!(set_href(&mut url, "mal//formed").is_err()); assert!(set_href( &mut url, "https://user:pass@domain.com:9742/path/file.ext?key=val&key2=val2#fragment" ) .is_ok()); assert_eq!( url, Url::parse("https://user:pass@domain.com:9742/path/file.ext?key=val&key2=val2#fragment") .unwrap() ); } #[test] fn test_domain_encoding_quirks() { use url::quirks::{domain_to_ascii, domain_to_unicode}; let data = [ ("http://example.com", "", ""), ("😅.🙂", "xn--j28h.xn--938h", "😅.🙂"), ("example.com", "example.com", "example.com"), ("mailto:test@example.net", "", ""), ]; for url in &data { assert_eq!(domain_to_ascii(url.0), url.1); assert_eq!(domain_to_unicode(url.0), url.2); } } #[cfg(feature = "expose_internals")] #[test] fn test_expose_internals() { use url::quirks::internal_components; use url::quirks::InternalComponents; let url = Url::parse("https://example.com/path/file.ext?key=val&key2=val2#fragment").unwrap(); let InternalComponents { scheme_end, username_end, host_start, host_end, port, path_start, query_start, fragment_start, } = internal_components(&url); assert_eq!(scheme_end, 5); assert_eq!(username_end, 8); assert_eq!(host_start, 8); assert_eq!(host_end, 19); assert_eq!(port, None); assert_eq!(path_start, 19); assert_eq!(query_start, Some(33)); assert_eq!(fragment_start, Some(51)); } #[test] #[cfg(windows)] fn test_windows_unc_path() { let url = Url::from_file_path(Path::new(r"\\host\share\path\file.txt")).unwrap(); assert_eq!(url.as_str(), "file://host/share/path/file.txt"); let url = Url::from_file_path(Path::new(r"\\höst\share\path\file.txt")).unwrap(); assert_eq!(url.as_str(), "file://xn--hst-sna/share/path/file.txt"); let url = Url::from_file_path(Path::new(r"\\192.168.0.1\share\path\file.txt")).unwrap(); assert_eq!(url.host(), Some(Host::Ipv4(Ipv4Addr::new(192, 168, 0, 1)))); let path = url.to_file_path().unwrap(); assert_eq!(path.to_str(), Some(r"\\192.168.0.1\share\path\file.txt")); // Another way to write these: let url = Url::from_file_path(Path::new(r"\\?\UNC\host\share\path\file.txt")).unwrap(); assert_eq!(url.as_str(), "file://host/share/path/file.txt"); // Paths starting with "\\.\" (Local Device Paths) are intentionally not supported. let url = Url::from_file_path(Path::new(r"\\.\some\path\file.txt")); assert!(url.is_err()); } #[test] fn test_syntax_violation_callback() { use url::SyntaxViolation::*; let violation = Cell::new(None); let url = Url::options() .syntax_violation_callback(Some(&|v| violation.set(Some(v)))) .parse("http:////mozilla.org:42") .unwrap(); assert_eq!(url.port(), Some(42)); let v = violation.take().unwrap(); assert_eq!(v, ExpectedDoubleSlash); assert_eq!(v.description(), "expected //"); assert_eq!(v.to_string(), "expected //"); } #[test] fn test_syntax_violation_callback_lifetimes() { use url::SyntaxViolation::*; let violation = Cell::new(None); let vfn = |s| violation.set(Some(s)); let url = Url::options() .syntax_violation_callback(Some(&vfn)) .parse("http:////mozilla.org:42") .unwrap(); assert_eq!(url.port(), Some(42)); assert_eq!(violation.take(), Some(ExpectedDoubleSlash)); let url = Url::options() .syntax_violation_callback(Some(&vfn)) .parse("http://mozilla.org\\path") .unwrap(); assert_eq!(url.path(), "/path"); assert_eq!(violation.take(), Some(Backslash)); } #[test] fn test_syntax_violation_callback_types() { use url::SyntaxViolation::*; let data = [ ("http://mozilla.org/\\foo", Backslash, "backslash"), (" http://mozilla.org", C0SpaceIgnored, "leading or trailing control or space character are ignored in URLs"), ("http://user:pass@mozilla.org", EmbeddedCredentials, "embedding authentication information (username or password) in an URL is not recommended"), ("http:///mozilla.org", ExpectedDoubleSlash, "expected //"), ("file:/foo.txt", ExpectedFileDoubleSlash, "expected // after file:"), ("file://mozilla.org/c:/file.txt", FileWithHostAndWindowsDrive, "file: with host and Windows drive letter"), ("http://mozilla.org/^", NonUrlCodePoint, "non-URL code point"), ("http://mozilla.org/#\x000", NullInFragment, "NULL characters are ignored in URL fragment identifiers"), ("http://mozilla.org/%1", PercentDecode, "expected 2 hex digits after %"), ("http://mozilla.org\t/foo", TabOrNewlineIgnored, "tabs or newlines are ignored in URLs"), ("http://user@:pass@mozilla.org", UnencodedAtSign, "unencoded @ sign in username or password") ]; for test_case in &data { let violation = Cell::new(None); Url::options() .syntax_violation_callback(Some(&|v| violation.set(Some(v)))) .parse(test_case.0) .unwrap(); let v = violation.take(); assert_eq!(v, Some(test_case.1)); assert_eq!(v.unwrap().description(), test_case.2); assert_eq!(v.unwrap().to_string(), test_case.2); } } #[test] fn test_options_reuse() { use url::SyntaxViolation::*; let violations = RefCell::new(Vec::new()); let vfn = |v| violations.borrow_mut().push(v); let options = Url::options().syntax_violation_callback(Some(&vfn)); let url = options.parse("http:////mozilla.org").unwrap(); let options = options.base_url(Some(&url)); let url = options.parse("/sub\\path").unwrap(); assert_eq!(url.as_str(), "http://mozilla.org/sub/path"); assert_eq!(*violations.borrow(), vec!(ExpectedDoubleSlash, Backslash)); } /// https://github.com/servo/rust-url/issues/505 #[cfg(windows)] #[test] fn test_url_from_file_path() { use std::path::PathBuf; use url::Url; let p = PathBuf::from("c:///"); let u = Url::from_file_path(p).unwrap(); let path = u.to_file_path().unwrap(); assert_eq!("C:\\", path.to_str().unwrap()); } /// https://github.com/servo/rust-url/issues/505 #[cfg(any(unix, target_os = "redox", target_os = "wasi"))] #[cfg(not(windows))] #[test] fn test_url_from_file_path() { use std::path::PathBuf; use url::Url; let p = PathBuf::from("/c:/"); let u = Url::from_file_path(p).unwrap(); let path = u.to_file_path().unwrap(); assert_eq!("/c:/", path.to_str().unwrap()); } #[test] fn test_non_special_path() { let mut db_url = url::Url::parse("postgres://postgres@localhost/").unwrap(); assert_eq!(db_url.as_str(), "postgres://postgres@localhost/"); db_url.set_path("diesel_foo"); assert_eq!(db_url.as_str(), "postgres://postgres@localhost/diesel_foo"); assert_eq!(db_url.path(), "/diesel_foo"); } #[test] fn test_non_special_path2() { let mut db_url = url::Url::parse("postgres://postgres@localhost/").unwrap(); assert_eq!(db_url.as_str(), "postgres://postgres@localhost/"); db_url.set_path(""); assert_eq!(db_url.path(), ""); assert_eq!(db_url.as_str(), "postgres://postgres@localhost"); db_url.set_path("foo"); assert_eq!(db_url.path(), "/foo"); assert_eq!(db_url.as_str(), "postgres://postgres@localhost/foo"); db_url.set_path("/bar"); assert_eq!(db_url.path(), "/bar"); assert_eq!(db_url.as_str(), "postgres://postgres@localhost/bar"); } #[test] fn test_non_special_path3() { let mut db_url = url::Url::parse("postgres://postgres@localhost/").unwrap(); assert_eq!(db_url.as_str(), "postgres://postgres@localhost/"); db_url.set_path("/"); assert_eq!(db_url.as_str(), "postgres://postgres@localhost/"); assert_eq!(db_url.path(), "/"); db_url.set_path("/foo"); assert_eq!(db_url.as_str(), "postgres://postgres@localhost/foo"); assert_eq!(db_url.path(), "/foo"); } #[test] fn test_set_scheme_to_file_with_host() { let mut url: Url = "http://localhost:6767/foo/bar".parse().unwrap(); let result = url.set_scheme("file"); assert_eq!(url.to_string(), "http://localhost:6767/foo/bar"); assert_eq!(result, Err(())); } #[test] fn no_panic() { let mut url = Url::parse("arhttpsps:/.//eom/dae.com/\\\\t\\:").unwrap(); url::quirks::set_hostname(&mut url, "//eom/datcom/\\\\t\\://eom/data.cs").unwrap(); } #[test] fn test_null_host_with_leading_empty_path_segment() { // since Note in item 3 of URL serializing in the URL Standard // https://url.spec.whatwg.org/#url-serializing let url = Url::parse("m:/.//\\").unwrap(); let encoded = url.as_str(); let reparsed = Url::parse(encoded).unwrap(); assert_eq!(reparsed, url); } #[test] fn pop_if_empty_in_bounds() { let mut url = Url::parse("m://").unwrap(); let mut segments = url.path_segments_mut().unwrap(); segments.pop_if_empty(); segments.pop(); } #[test] fn test_slicing() { use url::Position::*; #[derive(Default)] struct ExpectedSlices<'a> { full: &'a str, scheme: &'a str, username: &'a str, password: &'a str, host: &'a str, port: &'a str, path: &'a str, query: &'a str, fragment: &'a str, } let data = [ ExpectedSlices { full: "https://user:pass@domain.com:9742/path/file.ext?key=val&key2=val2#fragment", scheme: "https", username: "user", password: "pass", host: "domain.com", port: "9742", path: "/path/file.ext", query: "key=val&key2=val2", fragment: "fragment", }, ExpectedSlices { full: "https://domain.com:9742/path/file.ext#fragment", scheme: "https", host: "domain.com", port: "9742", path: "/path/file.ext", fragment: "fragment", ..Default::default() }, ExpectedSlices { full: "https://domain.com:9742/path/file.ext", scheme: "https", host: "domain.com", port: "9742", path: "/path/file.ext", ..Default::default() }, ExpectedSlices { full: "blob:blob-info", scheme: "blob", path: "blob-info", ..Default::default() }, ]; for expected_slices in &data { let url = Url::parse(expected_slices.full).unwrap(); assert_eq!(&url[..], expected_slices.full); assert_eq!(&url[BeforeScheme..AfterScheme], expected_slices.scheme); assert_eq!( &url[BeforeUsername..AfterUsername], expected_slices.username ); assert_eq!( &url[BeforePassword..AfterPassword], expected_slices.password ); assert_eq!(&url[BeforeHost..AfterHost], expected_slices.host); assert_eq!(&url[BeforePort..AfterPort], expected_slices.port); assert_eq!(&url[BeforePath..AfterPath], expected_slices.path); assert_eq!(&url[BeforeQuery..AfterQuery], expected_slices.query); assert_eq!( &url[BeforeFragment..AfterFragment], expected_slices.fragment ); assert_eq!(&url[..AfterFragment], expected_slices.full); } } #[test] fn test_make_relative() { let tests = [ ( "http://127.0.0.1:8080/test", "http://127.0.0.1:8080/test", "", ), ( "http://127.0.0.1:8080/test", "http://127.0.0.1:8080/test/", "test/", ), ( "http://127.0.0.1:8080/test/", "http://127.0.0.1:8080/test", "../test", ), ( "http://127.0.0.1:8080/", "http://127.0.0.1:8080/?foo=bar#123", "?foo=bar#123", ), ( "http://127.0.0.1:8080/", "http://127.0.0.1:8080/test/video", "test/video", ), ( "http://127.0.0.1:8080/test", "http://127.0.0.1:8080/test/video", "test/video", ), ( "http://127.0.0.1:8080/test/", "http://127.0.0.1:8080/test/video", "video", ), ( "http://127.0.0.1:8080/test", "http://127.0.0.1:8080/test2/video", "test2/video", ), ( "http://127.0.0.1:8080/test/", "http://127.0.0.1:8080/test2/video", "../test2/video", ), ( "http://127.0.0.1:8080/test/bla", "http://127.0.0.1:8080/test2/video", "../test2/video", ), ( "http://127.0.0.1:8080/test/bla/", "http://127.0.0.1:8080/test2/video", "../../test2/video", ), ( "http://127.0.0.1:8080/test/?foo=bar#123", "http://127.0.0.1:8080/test/video", "video", ), ( "http://127.0.0.1:8080/test/", "http://127.0.0.1:8080/test/video?baz=meh#456", "video?baz=meh#456", ), ( "http://127.0.0.1:8080/test", "http://127.0.0.1:8080/test?baz=meh#456", "?baz=meh#456", ), ( "http://127.0.0.1:8080/test/", "http://127.0.0.1:8080/test?baz=meh#456", "../test?baz=meh#456", ), ( "http://127.0.0.1:8080/test/", "http://127.0.0.1:8080/test/?baz=meh#456", "?baz=meh#456", ), ( "http://127.0.0.1:8080/test/?foo=bar#123", "http://127.0.0.1:8080/test/video?baz=meh#456", "video?baz=meh#456", ), ( "http://127.0.0.1:8080/file.txt", "http://127.0.0.1:8080/test/file.txt", "test/file.txt", ), ( "http://127.0.0.1:8080/not_equal.txt", "http://127.0.0.1:8080/test/file.txt", "test/file.txt", ), ]; for (base, uri, relative) in &tests { let base_uri = url::Url::parse(base).unwrap(); let relative_uri = url::Url::parse(uri).unwrap(); let make_relative = base_uri.make_relative(&relative_uri).unwrap(); assert_eq!( make_relative, *relative, "base: {}, uri: {}, relative: {}", base, uri, relative ); assert_eq!( base_uri.join(relative).unwrap().as_str(), *uri, "base: {}, uri: {}, relative: {}", base, uri, relative ); } let error_tests = [ ("http://127.0.0.1:8080/", "https://127.0.0.1:8080/test/"), ("http://127.0.0.1:8080/", "http://127.0.0.1:8081/test/"), ("http://127.0.0.1:8080/", "http://127.0.0.2:8080/test/"), ("mailto:a@example.com", "mailto:b@example.com"), ]; for (base, uri) in &error_tests { let base_uri = url::Url::parse(base).unwrap(); let relative_uri = url::Url::parse(uri).unwrap(); let make_relative = base_uri.make_relative(&relative_uri); assert_eq!(make_relative, None, "base: {}, uri: {}", base, uri); } } #[test] fn test_has_authority() { let url = Url::parse("mailto:joe@example.com").unwrap(); assert!(!url.has_authority()); let url = Url::parse("unix:/run/foo.socket").unwrap(); assert!(!url.has_authority()); let url = Url::parse("file:///tmp/foo").unwrap(); assert!(url.has_authority()); let url = Url::parse("http://example.com/tmp/foo").unwrap(); assert!(url.has_authority()); } #[test] fn test_authority() { let url = Url::parse("mailto:joe@example.com").unwrap(); assert_eq!(url.authority(), ""); let url = Url::parse("unix:/run/foo.socket").unwrap(); assert_eq!(url.authority(), ""); let url = Url::parse("file:///tmp/foo").unwrap(); assert_eq!(url.authority(), ""); let url = Url::parse("http://example.com/tmp/foo").unwrap(); assert_eq!(url.authority(), "example.com"); let url = Url::parse("ftp://127.0.0.1:21/").unwrap(); assert_eq!(url.authority(), "127.0.0.1"); let url = Url::parse("ftp://user@127.0.0.1:2121/").unwrap(); assert_eq!(url.authority(), "user@127.0.0.1:2121"); let url = Url::parse("https://:@example.com/").unwrap(); assert_eq!(url.authority(), "example.com"); let url = Url::parse("https://:password@[::1]:8080/").unwrap(); assert_eq!(url.authority(), ":password@[::1]:8080"); let url = Url::parse("gopher://user:@àlex.example.com:70").unwrap(); assert_eq!(url.authority(), "user@%C3%A0lex.example.com:70"); let url = Url::parse("irc://àlex:àlex@àlex.рф.example.com:6667/foo").unwrap(); assert_eq!( url.authority(), "%C3%A0lex:%C3%A0lex@%C3%A0lex.%D1%80%D1%84.example.com:6667" ); let url = Url::parse("https://àlex:àlex@àlex.рф.example.com:443/foo").unwrap(); assert_eq!( url.authority(), "%C3%A0lex:%C3%A0lex@xn--lex-8ka.xn--p1ai.example.com" ); } #[test] /// https://github.com/servo/rust-url/issues/838 fn test_file_with_drive() { let s1 = "fIlE:p:?../"; let url = url::Url::parse(s1).unwrap(); assert_eq!(url.to_string(), "file:///p:?../"); assert_eq!(url.path(), "/p:"); let testcases = [ ("a", "file:///p:/a"), ("", "file:///p:?../"), ("?x", "file:///p:?x"), (".", "file:///p:/"), ("..", "file:///p:/"), ("../", "file:///p:/"), ]; for case in &testcases { let url2 = url::Url::join(&url, case.0).unwrap(); assert_eq!(url2.to_string(), case.1); } } #[test] /// Similar to test_file_with_drive, but with a path /// that could be confused for a drive. fn test_file_with_drive_and_path() { let s1 = "fIlE:p:/x|?../"; let url = url::Url::parse(s1).unwrap(); assert_eq!(url.to_string(), "file:///p:/x|?../"); assert_eq!(url.path(), "/p:/x|"); let s2 = "a"; let url2 = url::Url::join(&url, s2).unwrap(); assert_eq!(url2.to_string(), "file:///p:/a"); } #[test] fn issue_864() { let mut url = url::Url::parse("file://").unwrap(); dbg!(&url); url.set_path("x"); dbg!(&url); } url-2.5.2/tests/urltestdata.json000064400000000000000000006157761046102023000150530ustar 00000000000000[ "# Pulled from https://github.com/web-platform-tests/wpt/blob/befe66343e5f21dc464c8c772c6d20695936714f/url/resources/urltestdata.json", { "input": "http://example\t.\norg", "base": "http://example.org/foo/bar", "href": "http://example.org/", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://user:pass@foo:21/bar;par?b#c", "base": "http://example.org/foo/bar", "href": "http://user:pass@foo:21/bar;par?b#c", "origin": "http://foo:21", "protocol": "http:", "username": "user", "password": "pass", "host": "foo:21", "hostname": "foo", "port": "21", "pathname": "/bar;par", "search": "?b", "hash": "#c" }, { "input": "https://test:@test", "base": null, "href": "https://test@test/", "origin": "https://test", "protocol": "https:", "username": "test", "password": "", "host": "test", "hostname": "test", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "https://:@test", "base": null, "href": "https://test/", "origin": "https://test", "protocol": "https:", "username": "", "password": "", "host": "test", "hostname": "test", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "non-special://test:@test/x", "base": null, "href": "non-special://test@test/x", "origin": "null", "protocol": "non-special:", "username": "test", "password": "", "host": "test", "hostname": "test", "port": "", "pathname": "/x", "search": "", "hash": "" }, { "input": "non-special://:@test/x", "base": null, "href": "non-special://test/x", "origin": "null", "protocol": "non-special:", "username": "", "password": "", "host": "test", "hostname": "test", "port": "", "pathname": "/x", "search": "", "hash": "" }, { "input": "http:foo.com", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/foo.com", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/foo.com", "search": "", "hash": "" }, { "input": "\t :foo.com \n", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/:foo.com", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/:foo.com", "search": "", "hash": "" }, { "input": " foo.com ", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/foo.com", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/foo.com", "search": "", "hash": "" }, { "input": "a:\t foo.com", "base": "http://example.org/foo/bar", "href": "a: foo.com", "origin": "null", "protocol": "a:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": " foo.com", "search": "", "hash": "" }, { "input": "http://f:21/ b ? d # e ", "base": "http://example.org/foo/bar", "href": "http://f:21/%20b%20?%20d%20#%20e", "origin": "http://f:21", "protocol": "http:", "username": "", "password": "", "host": "f:21", "hostname": "f", "port": "21", "pathname": "/%20b%20", "search": "?%20d%20", "hash": "#%20e" }, { "input": "lolscheme:x x#x x", "base": null, "href": "lolscheme:x x#x%20x", "protocol": "lolscheme:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "x x", "search": "", "hash": "#x%20x" }, { "input": "http://f:/c", "base": "http://example.org/foo/bar", "href": "http://f/c", "origin": "http://f", "protocol": "http:", "username": "", "password": "", "host": "f", "hostname": "f", "port": "", "pathname": "/c", "search": "", "hash": "" }, { "input": "http://f:0/c", "base": "http://example.org/foo/bar", "href": "http://f:0/c", "origin": "http://f:0", "protocol": "http:", "username": "", "password": "", "host": "f:0", "hostname": "f", "port": "0", "pathname": "/c", "search": "", "hash": "" }, { "input": "http://f:00000000000000/c", "base": "http://example.org/foo/bar", "href": "http://f:0/c", "origin": "http://f:0", "protocol": "http:", "username": "", "password": "", "host": "f:0", "hostname": "f", "port": "0", "pathname": "/c", "search": "", "hash": "" }, { "input": "http://f:00000000000000000000080/c", "base": "http://example.org/foo/bar", "href": "http://f/c", "origin": "http://f", "protocol": "http:", "username": "", "password": "", "host": "f", "hostname": "f", "port": "", "pathname": "/c", "search": "", "hash": "" }, { "input": "http://f:b/c", "base": "http://example.org/foo/bar", "failure": true }, { "input": "http://f: /c", "base": "http://example.org/foo/bar", "failure": true }, { "input": "http://f:\n/c", "base": "http://example.org/foo/bar", "href": "http://f/c", "origin": "http://f", "protocol": "http:", "username": "", "password": "", "host": "f", "hostname": "f", "port": "", "pathname": "/c", "search": "", "hash": "" }, { "input": "http://f:fifty-two/c", "base": "http://example.org/foo/bar", "failure": true }, { "input": "http://f:999999/c", "base": "http://example.org/foo/bar", "failure": true }, { "input": "non-special://f:999999/c", "base": "http://example.org/foo/bar", "failure": true }, { "input": "http://f: 21 / b ? d # e ", "base": "http://example.org/foo/bar", "failure": true }, { "input": "", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/bar", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/bar", "search": "", "hash": "" }, { "input": " \t", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/bar", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/bar", "search": "", "hash": "" }, { "input": ":foo.com/", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/:foo.com/", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/:foo.com/", "search": "", "hash": "" }, { "input": ":foo.com\\", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/:foo.com/", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/:foo.com/", "search": "", "hash": "" }, { "input": ":", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/:", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/:", "search": "", "hash": "" }, { "input": ":a", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/:a", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/:a", "search": "", "hash": "" }, { "input": ":/", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/:/", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/:/", "search": "", "hash": "" }, { "input": ":\\", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/:/", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/:/", "search": "", "hash": "" }, { "input": ":#", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/:#", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/:", "search": "", "hash": "" }, { "input": "#", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/bar#", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/bar", "search": "", "hash": "" }, { "input": "#/", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/bar#/", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/bar", "search": "", "hash": "#/" }, { "input": "#\\", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/bar#\\", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/bar", "search": "", "hash": "#\\" }, { "input": "#;?", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/bar#;?", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/bar", "search": "", "hash": "#;?" }, { "input": "?", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/bar?", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/bar", "search": "", "hash": "" }, { "input": "/", "base": "http://example.org/foo/bar", "href": "http://example.org/", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": ":23", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/:23", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/:23", "search": "", "hash": "" }, { "input": "/:23", "base": "http://example.org/foo/bar", "href": "http://example.org/:23", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/:23", "search": "", "hash": "" }, { "input": "\\x", "base": "http://example.org/foo/bar", "href": "http://example.org/x", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/x", "search": "", "hash": "" }, { "input": "\\\\x\\hello", "base": "http://example.org/foo/bar", "href": "http://x/hello", "origin": "http://x", "protocol": "http:", "username": "", "password": "", "host": "x", "hostname": "x", "port": "", "pathname": "/hello", "search": "", "hash": "" }, { "input": "::", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/::", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/::", "search": "", "hash": "" }, { "input": "::23", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/::23", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/::23", "search": "", "hash": "" }, { "input": "foo://", "base": "http://example.org/foo/bar", "href": "foo://", "origin": "null", "protocol": "foo:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "", "search": "", "hash": "" }, { "input": "http://a:b@c:29/d", "base": "http://example.org/foo/bar", "href": "http://a:b@c:29/d", "origin": "http://c:29", "protocol": "http:", "username": "a", "password": "b", "host": "c:29", "hostname": "c", "port": "29", "pathname": "/d", "search": "", "hash": "" }, { "input": "http::@c:29", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/:@c:29", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/:@c:29", "search": "", "hash": "" }, { "input": "http://&a:foo(b]c@d:2/", "base": "http://example.org/foo/bar", "href": "http://&a:foo(b%5Dc@d:2/", "origin": "http://d:2", "protocol": "http:", "username": "&a", "password": "foo(b%5Dc", "host": "d:2", "hostname": "d", "port": "2", "pathname": "/", "search": "", "hash": "" }, { "input": "http://::@c@d:2", "base": "http://example.org/foo/bar", "href": "http://:%3A%40c@d:2/", "origin": "http://d:2", "protocol": "http:", "username": "", "password": "%3A%40c", "host": "d:2", "hostname": "d", "port": "2", "pathname": "/", "search": "", "hash": "" }, { "input": "http://foo.com:b@d/", "base": "http://example.org/foo/bar", "href": "http://foo.com:b@d/", "origin": "http://d", "protocol": "http:", "username": "foo.com", "password": "b", "host": "d", "hostname": "d", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://foo.com/\\@", "base": "http://example.org/foo/bar", "href": "http://foo.com//@", "origin": "http://foo.com", "protocol": "http:", "username": "", "password": "", "host": "foo.com", "hostname": "foo.com", "port": "", "pathname": "//@", "search": "", "hash": "" }, { "input": "http:\\\\foo.com\\", "base": "http://example.org/foo/bar", "href": "http://foo.com/", "origin": "http://foo.com", "protocol": "http:", "username": "", "password": "", "host": "foo.com", "hostname": "foo.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http:\\\\a\\b:c\\d@foo.com\\", "base": "http://example.org/foo/bar", "href": "http://a/b:c/d@foo.com/", "origin": "http://a", "protocol": "http:", "username": "", "password": "", "host": "a", "hostname": "a", "port": "", "pathname": "/b:c/d@foo.com/", "search": "", "hash": "" }, { "input": "foo:/", "base": "http://example.org/foo/bar", "href": "foo:/", "origin": "null", "protocol": "foo:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "foo:/bar.com/", "base": "http://example.org/foo/bar", "href": "foo:/bar.com/", "origin": "null", "protocol": "foo:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/bar.com/", "search": "", "hash": "" }, { "input": "foo://///////", "base": "http://example.org/foo/bar", "href": "foo://///////", "origin": "null", "protocol": "foo:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "///////", "search": "", "hash": "" }, { "input": "foo://///////bar.com/", "base": "http://example.org/foo/bar", "href": "foo://///////bar.com/", "origin": "null", "protocol": "foo:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "///////bar.com/", "search": "", "hash": "" }, { "input": "foo:////://///", "base": "http://example.org/foo/bar", "href": "foo:////://///", "origin": "null", "protocol": "foo:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//://///", "search": "", "hash": "" }, { "input": "c:/foo", "base": "http://example.org/foo/bar", "href": "c:/foo", "origin": "null", "protocol": "c:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/foo", "search": "", "hash": "" }, { "input": "//foo/bar", "base": "http://example.org/foo/bar", "href": "http://foo/bar", "origin": "http://foo", "protocol": "http:", "username": "", "password": "", "host": "foo", "hostname": "foo", "port": "", "pathname": "/bar", "search": "", "hash": "" }, { "input": "http://foo/path;a??e#f#g", "base": "http://example.org/foo/bar", "href": "http://foo/path;a??e#f#g", "origin": "http://foo", "protocol": "http:", "username": "", "password": "", "host": "foo", "hostname": "foo", "port": "", "pathname": "/path;a", "search": "??e", "hash": "#f#g" }, { "input": "http://foo/abcd?efgh?ijkl", "base": "http://example.org/foo/bar", "href": "http://foo/abcd?efgh?ijkl", "origin": "http://foo", "protocol": "http:", "username": "", "password": "", "host": "foo", "hostname": "foo", "port": "", "pathname": "/abcd", "search": "?efgh?ijkl", "hash": "" }, { "input": "http://foo/abcd#foo?bar", "base": "http://example.org/foo/bar", "href": "http://foo/abcd#foo?bar", "origin": "http://foo", "protocol": "http:", "username": "", "password": "", "host": "foo", "hostname": "foo", "port": "", "pathname": "/abcd", "search": "", "hash": "#foo?bar" }, { "input": "[61:24:74]:98", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/[61:24:74]:98", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/[61:24:74]:98", "search": "", "hash": "" }, { "input": "http:[61:27]/:foo", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/[61:27]/:foo", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/[61:27]/:foo", "search": "", "hash": "" }, { "input": "http://[1::2]:3:4", "base": "http://example.org/foo/bar", "failure": true }, { "input": "http://2001::1", "base": "http://example.org/foo/bar", "failure": true }, { "input": "http://2001::1]", "base": "http://example.org/foo/bar", "failure": true }, { "input": "http://2001::1]:80", "base": "http://example.org/foo/bar", "failure": true }, { "input": "http://[2001::1]", "base": "http://example.org/foo/bar", "href": "http://[2001::1]/", "origin": "http://[2001::1]", "protocol": "http:", "username": "", "password": "", "host": "[2001::1]", "hostname": "[2001::1]", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://[::127.0.0.1]", "base": "http://example.org/foo/bar", "href": "http://[::7f00:1]/", "origin": "http://[::7f00:1]", "protocol": "http:", "username": "", "password": "", "host": "[::7f00:1]", "hostname": "[::7f00:1]", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://[::127.0.0.1.]", "base": "http://example.org/foo/bar", "failure": true }, { "input": "http://[0:0:0:0:0:0:13.1.68.3]", "base": "http://example.org/foo/bar", "href": "http://[::d01:4403]/", "origin": "http://[::d01:4403]", "protocol": "http:", "username": "", "password": "", "host": "[::d01:4403]", "hostname": "[::d01:4403]", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://[2001::1]:80", "base": "http://example.org/foo/bar", "href": "http://[2001::1]/", "origin": "http://[2001::1]", "protocol": "http:", "username": "", "password": "", "host": "[2001::1]", "hostname": "[2001::1]", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http:/example.com/", "base": "http://example.org/foo/bar", "href": "http://example.org/example.com/", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/example.com/", "search": "", "hash": "" }, { "input": "ftp:/example.com/", "base": "http://example.org/foo/bar", "href": "ftp://example.com/", "origin": "ftp://example.com", "protocol": "ftp:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "https:/example.com/", "base": "http://example.org/foo/bar", "href": "https://example.com/", "origin": "https://example.com", "protocol": "https:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "madeupscheme:/example.com/", "base": "http://example.org/foo/bar", "href": "madeupscheme:/example.com/", "origin": "null", "protocol": "madeupscheme:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/example.com/", "search": "", "hash": "" }, { "input": "file:/example.com/", "base": "http://example.org/foo/bar", "href": "file:///example.com/", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/example.com/", "search": "", "hash": "" }, { "input": "file://example:1/", "base": null, "failure": true }, { "input": "file://example:test/", "base": null, "failure": true }, { "input": "file://example%/", "base": null, "failure": true }, { "input": "file://[example]/", "base": null, "failure": true }, { "input": "ftps:/example.com/", "base": "http://example.org/foo/bar", "href": "ftps:/example.com/", "origin": "null", "protocol": "ftps:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/example.com/", "search": "", "hash": "" }, { "input": "gopher:/example.com/", "base": "http://example.org/foo/bar", "href": "gopher:/example.com/", "origin": "null", "protocol": "gopher:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/example.com/", "search": "", "hash": "" }, { "input": "ws:/example.com/", "base": "http://example.org/foo/bar", "href": "ws://example.com/", "origin": "ws://example.com", "protocol": "ws:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "wss:/example.com/", "base": "http://example.org/foo/bar", "href": "wss://example.com/", "origin": "wss://example.com", "protocol": "wss:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "data:/example.com/", "base": "http://example.org/foo/bar", "href": "data:/example.com/", "origin": "null", "protocol": "data:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/example.com/", "search": "", "hash": "" }, { "input": "javascript:/example.com/", "base": "http://example.org/foo/bar", "href": "javascript:/example.com/", "origin": "null", "protocol": "javascript:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/example.com/", "search": "", "hash": "" }, { "input": "mailto:/example.com/", "base": "http://example.org/foo/bar", "href": "mailto:/example.com/", "origin": "null", "protocol": "mailto:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/example.com/", "search": "", "hash": "" }, { "input": "http:example.com/", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/example.com/", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/example.com/", "search": "", "hash": "" }, { "input": "ftp:example.com/", "base": "http://example.org/foo/bar", "href": "ftp://example.com/", "origin": "ftp://example.com", "protocol": "ftp:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "https:example.com/", "base": "http://example.org/foo/bar", "href": "https://example.com/", "origin": "https://example.com", "protocol": "https:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "madeupscheme:example.com/", "base": "http://example.org/foo/bar", "href": "madeupscheme:example.com/", "origin": "null", "protocol": "madeupscheme:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "example.com/", "search": "", "hash": "" }, { "input": "ftps:example.com/", "base": "http://example.org/foo/bar", "href": "ftps:example.com/", "origin": "null", "protocol": "ftps:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "example.com/", "search": "", "hash": "" }, { "input": "gopher:example.com/", "base": "http://example.org/foo/bar", "href": "gopher:example.com/", "origin": "null", "protocol": "gopher:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "example.com/", "search": "", "hash": "" }, { "input": "ws:example.com/", "base": "http://example.org/foo/bar", "href": "ws://example.com/", "origin": "ws://example.com", "protocol": "ws:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "wss:example.com/", "base": "http://example.org/foo/bar", "href": "wss://example.com/", "origin": "wss://example.com", "protocol": "wss:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "data:example.com/", "base": "http://example.org/foo/bar", "href": "data:example.com/", "origin": "null", "protocol": "data:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "example.com/", "search": "", "hash": "" }, { "input": "javascript:example.com/", "base": "http://example.org/foo/bar", "href": "javascript:example.com/", "origin": "null", "protocol": "javascript:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "example.com/", "search": "", "hash": "" }, { "input": "mailto:example.com/", "base": "http://example.org/foo/bar", "href": "mailto:example.com/", "origin": "null", "protocol": "mailto:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "example.com/", "search": "", "hash": "" }, { "input": "/a/b/c", "base": "http://example.org/foo/bar", "href": "http://example.org/a/b/c", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/a/b/c", "search": "", "hash": "" }, { "input": "/a/ /c", "base": "http://example.org/foo/bar", "href": "http://example.org/a/%20/c", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/a/%20/c", "search": "", "hash": "" }, { "input": "/a%2fc", "base": "http://example.org/foo/bar", "href": "http://example.org/a%2fc", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/a%2fc", "search": "", "hash": "" }, { "input": "/a/%2f/c", "base": "http://example.org/foo/bar", "href": "http://example.org/a/%2f/c", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/a/%2f/c", "search": "", "hash": "" }, { "input": "#β", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/bar#%CE%B2", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/bar", "search": "", "hash": "#%CE%B2" }, { "input": "data:text/html,test#test", "base": "http://example.org/foo/bar", "href": "data:text/html,test#test", "origin": "null", "protocol": "data:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "text/html,test", "search": "", "hash": "#test" }, { "input": "tel:1234567890", "base": "http://example.org/foo/bar", "href": "tel:1234567890", "origin": "null", "protocol": "tel:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "1234567890", "search": "", "hash": "" }, "# Based on https://felixfbecker.github.io/whatwg-url-custom-host-repro/", { "input": "ssh://example.com/foo/bar.git", "base": "http://example.org/", "href": "ssh://example.com/foo/bar.git", "origin": "null", "protocol": "ssh:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo/bar.git", "search": "", "hash": "" }, "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/file.html", { "input": "file:c:\\foo\\bar.html", "base": "file:///tmp/mock/path", "href": "file:///c:/foo/bar.html", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/c:/foo/bar.html", "search": "", "hash": "" }, { "input": " File:c|////foo\\bar.html", "base": "file:///tmp/mock/path", "href": "file:///c:////foo/bar.html", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/c:////foo/bar.html", "search": "", "hash": "" }, { "input": "C|/foo/bar", "base": "file:///tmp/mock/path", "href": "file:///C:/foo/bar", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/C:/foo/bar", "search": "", "hash": "" }, { "input": "/C|\\foo\\bar", "base": "file:///tmp/mock/path", "href": "file:///C:/foo/bar", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/C:/foo/bar", "search": "", "hash": "" }, { "input": "//C|/foo/bar", "base": "file:///tmp/mock/path", "href": "file:///C:/foo/bar", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/C:/foo/bar", "search": "", "hash": "" }, { "input": "//server/file", "base": "file:///tmp/mock/path", "href": "file://server/file", "protocol": "file:", "username": "", "password": "", "host": "server", "hostname": "server", "port": "", "pathname": "/file", "search": "", "hash": "" }, { "input": "\\\\server\\file", "base": "file:///tmp/mock/path", "href": "file://server/file", "protocol": "file:", "username": "", "password": "", "host": "server", "hostname": "server", "port": "", "pathname": "/file", "search": "", "hash": "" }, { "input": "/\\server/file", "base": "file:///tmp/mock/path", "href": "file://server/file", "protocol": "file:", "username": "", "password": "", "host": "server", "hostname": "server", "port": "", "pathname": "/file", "search": "", "hash": "" }, { "input": "file:///foo/bar.txt", "base": "file:///tmp/mock/path", "href": "file:///foo/bar.txt", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/foo/bar.txt", "search": "", "hash": "" }, { "input": "file:///home/me", "base": "file:///tmp/mock/path", "href": "file:///home/me", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/home/me", "search": "", "hash": "" }, { "input": "//", "base": "file:///tmp/mock/path", "href": "file:///", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "///", "base": "file:///tmp/mock/path", "href": "file:///", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "///test", "base": "file:///tmp/mock/path", "href": "file:///test", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/test", "search": "", "hash": "" }, { "input": "file://test", "base": "file:///tmp/mock/path", "href": "file://test/", "protocol": "file:", "username": "", "password": "", "host": "test", "hostname": "test", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "file://localhost", "base": "file:///tmp/mock/path", "href": "file:///", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "file://localhost/", "base": "file:///tmp/mock/path", "href": "file:///", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "file://localhost/test", "base": "file:///tmp/mock/path", "href": "file:///test", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/test", "search": "", "hash": "" }, { "input": "test", "base": "file:///tmp/mock/path", "href": "file:///tmp/mock/test", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/tmp/mock/test", "search": "", "hash": "" }, { "input": "file:test", "base": "file:///tmp/mock/path", "href": "file:///tmp/mock/test", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/tmp/mock/test", "search": "", "hash": "" }, "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/script-tests/path.js", { "input": "http://example.com/././foo", "base": null, "href": "http://example.com/foo", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo", "search": "", "hash": "" }, { "input": "http://example.com/./.foo", "base": null, "href": "http://example.com/.foo", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/.foo", "search": "", "hash": "" }, { "input": "http://example.com/foo/.", "base": null, "href": "http://example.com/foo/", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo/", "search": "", "hash": "" }, { "input": "http://example.com/foo/./", "base": null, "href": "http://example.com/foo/", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo/", "search": "", "hash": "" }, { "input": "http://example.com/foo/bar/..", "base": null, "href": "http://example.com/foo/", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo/", "search": "", "hash": "" }, { "input": "http://example.com/foo/bar/../", "base": null, "href": "http://example.com/foo/", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo/", "search": "", "hash": "" }, { "input": "http://example.com/foo/..bar", "base": null, "href": "http://example.com/foo/..bar", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo/..bar", "search": "", "hash": "" }, { "input": "http://example.com/foo/bar/../ton", "base": null, "href": "http://example.com/foo/ton", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo/ton", "search": "", "hash": "" }, { "input": "http://example.com/foo/bar/../ton/../../a", "base": null, "href": "http://example.com/a", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/a", "search": "", "hash": "" }, { "input": "http://example.com/foo/../../..", "base": null, "href": "http://example.com/", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://example.com/foo/../../../ton", "base": null, "href": "http://example.com/ton", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/ton", "search": "", "hash": "" }, { "input": "http://example.com/foo/%2e", "base": null, "href": "http://example.com/foo/", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo/", "search": "", "hash": "" }, { "input": "http://example.com/foo/%2e%2", "base": null, "href": "http://example.com/foo/%2e%2", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo/%2e%2", "search": "", "hash": "" }, { "input": "http://example.com/foo/%2e./%2e%2e/.%2e/%2e.bar", "base": null, "href": "http://example.com/%2e.bar", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/%2e.bar", "search": "", "hash": "" }, { "input": "http://example.com////../..", "base": null, "href": "http://example.com//", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "//", "search": "", "hash": "" }, { "input": "http://example.com/foo/bar//../..", "base": null, "href": "http://example.com/foo/", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo/", "search": "", "hash": "" }, { "input": "http://example.com/foo/bar//..", "base": null, "href": "http://example.com/foo/bar/", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo/bar/", "search": "", "hash": "" }, { "input": "http://example.com/foo", "base": null, "href": "http://example.com/foo", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo", "search": "", "hash": "" }, { "input": "http://example.com/%20foo", "base": null, "href": "http://example.com/%20foo", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/%20foo", "search": "", "hash": "" }, { "input": "http://example.com/foo%", "base": null, "href": "http://example.com/foo%", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo%", "search": "", "hash": "" }, { "input": "http://example.com/foo%2", "base": null, "href": "http://example.com/foo%2", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo%2", "search": "", "hash": "" }, { "input": "http://example.com/foo%2zbar", "base": null, "href": "http://example.com/foo%2zbar", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo%2zbar", "search": "", "hash": "" }, { "input": "http://example.com/foo%2©zbar", "base": null, "href": "http://example.com/foo%2%C3%82%C2%A9zbar", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo%2%C3%82%C2%A9zbar", "search": "", "hash": "" }, { "input": "http://example.com/foo%41%7a", "base": null, "href": "http://example.com/foo%41%7a", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo%41%7a", "search": "", "hash": "" }, { "input": "http://example.com/foo\t\u0091%91", "base": null, "href": "http://example.com/foo%C2%91%91", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo%C2%91%91", "search": "", "hash": "" }, { "input": "http://example.com/foo%00%51", "base": null, "href": "http://example.com/foo%00%51", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foo%00%51", "search": "", "hash": "" }, { "input": "http://example.com/(%28:%3A%29)", "base": null, "href": "http://example.com/(%28:%3A%29)", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/(%28:%3A%29)", "search": "", "hash": "" }, { "input": "http://example.com/%3A%3a%3C%3c", "base": null, "href": "http://example.com/%3A%3a%3C%3c", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/%3A%3a%3C%3c", "search": "", "hash": "" }, { "input": "http://example.com/foo\tbar", "base": null, "href": "http://example.com/foobar", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/foobar", "search": "", "hash": "" }, { "input": "http://example.com\\\\foo\\\\bar", "base": null, "href": "http://example.com//foo//bar", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "//foo//bar", "search": "", "hash": "" }, { "input": "http://example.com/%7Ffp3%3Eju%3Dduvgw%3Dd", "base": null, "href": "http://example.com/%7Ffp3%3Eju%3Dduvgw%3Dd", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/%7Ffp3%3Eju%3Dduvgw%3Dd", "search": "", "hash": "" }, { "input": "http://example.com/@asdf%40", "base": null, "href": "http://example.com/@asdf%40", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/@asdf%40", "search": "", "hash": "" }, { "input": "http://example.com/你好你好", "base": null, "href": "http://example.com/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", "search": "", "hash": "" }, { "input": "http://example.com/‥/foo", "base": null, "href": "http://example.com/%E2%80%A5/foo", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/%E2%80%A5/foo", "search": "", "hash": "" }, { "input": "http://example.com//foo", "base": null, "href": "http://example.com/%EF%BB%BF/foo", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/%EF%BB%BF/foo", "search": "", "hash": "" }, { "input": "http://example.com/‮/foo/‭/bar", "base": null, "href": "http://example.com/%E2%80%AE/foo/%E2%80%AD/bar", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/%E2%80%AE/foo/%E2%80%AD/bar", "search": "", "hash": "" }, "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/script-tests/relative.js", { "input": "http://www.google.com/foo?bar=baz#", "base": null, "href": "http://www.google.com/foo?bar=baz#", "origin": "http://www.google.com", "protocol": "http:", "username": "", "password": "", "host": "www.google.com", "hostname": "www.google.com", "port": "", "pathname": "/foo", "search": "?bar=baz", "hash": "" }, { "input": "http://www.google.com/foo?bar=baz# »", "base": null, "href": "http://www.google.com/foo?bar=baz#%20%C2%BB", "origin": "http://www.google.com", "protocol": "http:", "username": "", "password": "", "host": "www.google.com", "hostname": "www.google.com", "port": "", "pathname": "/foo", "search": "?bar=baz", "hash": "#%20%C2%BB" }, { "input": "data:test# »", "base": null, "href": "data:test#%20%C2%BB", "origin": "null", "protocol": "data:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "test", "search": "", "hash": "#%20%C2%BB" }, { "input": "http://www.google.com", "base": null, "href": "http://www.google.com/", "origin": "http://www.google.com", "protocol": "http:", "username": "", "password": "", "host": "www.google.com", "hostname": "www.google.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://192.0x00A80001", "base": null, "href": "http://192.168.0.1/", "origin": "http://192.168.0.1", "protocol": "http:", "username": "", "password": "", "host": "192.168.0.1", "hostname": "192.168.0.1", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://www/foo%2Ehtml", "base": null, "href": "http://www/foo%2Ehtml", "origin": "http://www", "protocol": "http:", "username": "", "password": "", "host": "www", "hostname": "www", "port": "", "pathname": "/foo%2Ehtml", "search": "", "hash": "" }, { "input": "http://www/foo/%2E/html", "base": null, "href": "http://www/foo/html", "origin": "http://www", "protocol": "http:", "username": "", "password": "", "host": "www", "hostname": "www", "port": "", "pathname": "/foo/html", "search": "", "hash": "" }, { "input": "http://user:pass@/", "base": null, "failure": true }, { "input": "http://%25DOMAIN:foobar@foodomain.com/", "base": null, "href": "http://%25DOMAIN:foobar@foodomain.com/", "origin": "http://foodomain.com", "protocol": "http:", "username": "%25DOMAIN", "password": "foobar", "host": "foodomain.com", "hostname": "foodomain.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http:\\\\www.google.com\\foo", "base": null, "href": "http://www.google.com/foo", "origin": "http://www.google.com", "protocol": "http:", "username": "", "password": "", "host": "www.google.com", "hostname": "www.google.com", "port": "", "pathname": "/foo", "search": "", "hash": "" }, { "input": "http://foo:80/", "base": null, "href": "http://foo/", "origin": "http://foo", "protocol": "http:", "username": "", "password": "", "host": "foo", "hostname": "foo", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://foo:81/", "base": null, "href": "http://foo:81/", "origin": "http://foo:81", "protocol": "http:", "username": "", "password": "", "host": "foo:81", "hostname": "foo", "port": "81", "pathname": "/", "search": "", "hash": "" }, { "input": "httpa://foo:80/", "base": null, "href": "httpa://foo:80/", "origin": "null", "protocol": "httpa:", "username": "", "password": "", "host": "foo:80", "hostname": "foo", "port": "80", "pathname": "/", "search": "", "hash": "" }, { "input": "http://foo:-80/", "base": null, "failure": true }, { "input": "https://foo:443/", "base": null, "href": "https://foo/", "origin": "https://foo", "protocol": "https:", "username": "", "password": "", "host": "foo", "hostname": "foo", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "https://foo:80/", "base": null, "href": "https://foo:80/", "origin": "https://foo:80", "protocol": "https:", "username": "", "password": "", "host": "foo:80", "hostname": "foo", "port": "80", "pathname": "/", "search": "", "hash": "" }, { "input": "ftp://foo:21/", "base": null, "href": "ftp://foo/", "origin": "ftp://foo", "protocol": "ftp:", "username": "", "password": "", "host": "foo", "hostname": "foo", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "ftp://foo:80/", "base": null, "href": "ftp://foo:80/", "origin": "ftp://foo:80", "protocol": "ftp:", "username": "", "password": "", "host": "foo:80", "hostname": "foo", "port": "80", "pathname": "/", "search": "", "hash": "" }, { "input": "gopher://foo:70/", "base": null, "href": "gopher://foo:70/", "origin": "null", "protocol": "gopher:", "username": "", "password": "", "host": "foo:70", "hostname": "foo", "port": "70", "pathname": "/", "search": "", "hash": "" }, { "input": "gopher://foo:443/", "base": null, "href": "gopher://foo:443/", "origin": "null", "protocol": "gopher:", "username": "", "password": "", "host": "foo:443", "hostname": "foo", "port": "443", "pathname": "/", "search": "", "hash": "" }, { "input": "ws://foo:80/", "base": null, "href": "ws://foo/", "origin": "ws://foo", "protocol": "ws:", "username": "", "password": "", "host": "foo", "hostname": "foo", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "ws://foo:81/", "base": null, "href": "ws://foo:81/", "origin": "ws://foo:81", "protocol": "ws:", "username": "", "password": "", "host": "foo:81", "hostname": "foo", "port": "81", "pathname": "/", "search": "", "hash": "" }, { "input": "ws://foo:443/", "base": null, "href": "ws://foo:443/", "origin": "ws://foo:443", "protocol": "ws:", "username": "", "password": "", "host": "foo:443", "hostname": "foo", "port": "443", "pathname": "/", "search": "", "hash": "" }, { "input": "ws://foo:815/", "base": null, "href": "ws://foo:815/", "origin": "ws://foo:815", "protocol": "ws:", "username": "", "password": "", "host": "foo:815", "hostname": "foo", "port": "815", "pathname": "/", "search": "", "hash": "" }, { "input": "wss://foo:80/", "base": null, "href": "wss://foo:80/", "origin": "wss://foo:80", "protocol": "wss:", "username": "", "password": "", "host": "foo:80", "hostname": "foo", "port": "80", "pathname": "/", "search": "", "hash": "" }, { "input": "wss://foo:81/", "base": null, "href": "wss://foo:81/", "origin": "wss://foo:81", "protocol": "wss:", "username": "", "password": "", "host": "foo:81", "hostname": "foo", "port": "81", "pathname": "/", "search": "", "hash": "" }, { "input": "wss://foo:443/", "base": null, "href": "wss://foo/", "origin": "wss://foo", "protocol": "wss:", "username": "", "password": "", "host": "foo", "hostname": "foo", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "wss://foo:815/", "base": null, "href": "wss://foo:815/", "origin": "wss://foo:815", "protocol": "wss:", "username": "", "password": "", "host": "foo:815", "hostname": "foo", "port": "815", "pathname": "/", "search": "", "hash": "" }, { "input": "http:/example.com/", "base": null, "href": "http://example.com/", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "ftp:/example.com/", "base": null, "href": "ftp://example.com/", "origin": "ftp://example.com", "protocol": "ftp:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "https:/example.com/", "base": null, "href": "https://example.com/", "origin": "https://example.com", "protocol": "https:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "madeupscheme:/example.com/", "base": null, "href": "madeupscheme:/example.com/", "origin": "null", "protocol": "madeupscheme:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/example.com/", "search": "", "hash": "" }, { "input": "file:/example.com/", "base": null, "href": "file:///example.com/", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/example.com/", "search": "", "hash": "" }, { "input": "ftps:/example.com/", "base": null, "href": "ftps:/example.com/", "origin": "null", "protocol": "ftps:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/example.com/", "search": "", "hash": "" }, { "input": "gopher:/example.com/", "base": null, "href": "gopher:/example.com/", "origin": "null", "protocol": "gopher:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/example.com/", "search": "", "hash": "" }, { "input": "ws:/example.com/", "base": null, "href": "ws://example.com/", "origin": "ws://example.com", "protocol": "ws:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "wss:/example.com/", "base": null, "href": "wss://example.com/", "origin": "wss://example.com", "protocol": "wss:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "data:/example.com/", "base": null, "href": "data:/example.com/", "origin": "null", "protocol": "data:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/example.com/", "search": "", "hash": "" }, { "input": "javascript:/example.com/", "base": null, "href": "javascript:/example.com/", "origin": "null", "protocol": "javascript:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/example.com/", "search": "", "hash": "" }, { "input": "mailto:/example.com/", "base": null, "href": "mailto:/example.com/", "origin": "null", "protocol": "mailto:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/example.com/", "search": "", "hash": "" }, { "input": "http:example.com/", "base": null, "href": "http://example.com/", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "ftp:example.com/", "base": null, "href": "ftp://example.com/", "origin": "ftp://example.com", "protocol": "ftp:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "https:example.com/", "base": null, "href": "https://example.com/", "origin": "https://example.com", "protocol": "https:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "madeupscheme:example.com/", "base": null, "href": "madeupscheme:example.com/", "origin": "null", "protocol": "madeupscheme:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "example.com/", "search": "", "hash": "" }, { "input": "ftps:example.com/", "base": null, "href": "ftps:example.com/", "origin": "null", "protocol": "ftps:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "example.com/", "search": "", "hash": "" }, { "input": "gopher:example.com/", "base": null, "href": "gopher:example.com/", "origin": "null", "protocol": "gopher:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "example.com/", "search": "", "hash": "" }, { "input": "ws:example.com/", "base": null, "href": "ws://example.com/", "origin": "ws://example.com", "protocol": "ws:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "wss:example.com/", "base": null, "href": "wss://example.com/", "origin": "wss://example.com", "protocol": "wss:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "data:example.com/", "base": null, "href": "data:example.com/", "origin": "null", "protocol": "data:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "example.com/", "search": "", "hash": "" }, { "input": "javascript:example.com/", "base": null, "href": "javascript:example.com/", "origin": "null", "protocol": "javascript:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "example.com/", "search": "", "hash": "" }, { "input": "mailto:example.com/", "base": null, "href": "mailto:example.com/", "origin": "null", "protocol": "mailto:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "example.com/", "search": "", "hash": "" }, "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/segments-userinfo-vs-host.html", { "input": "http:@www.example.com", "base": null, "href": "http://www.example.com/", "origin": "http://www.example.com", "protocol": "http:", "username": "", "password": "", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http:/@www.example.com", "base": null, "href": "http://www.example.com/", "origin": "http://www.example.com", "protocol": "http:", "username": "", "password": "", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://@www.example.com", "base": null, "href": "http://www.example.com/", "origin": "http://www.example.com", "protocol": "http:", "username": "", "password": "", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http:a:b@www.example.com", "base": null, "href": "http://a:b@www.example.com/", "origin": "http://www.example.com", "protocol": "http:", "username": "a", "password": "b", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http:/a:b@www.example.com", "base": null, "href": "http://a:b@www.example.com/", "origin": "http://www.example.com", "protocol": "http:", "username": "a", "password": "b", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://a:b@www.example.com", "base": null, "href": "http://a:b@www.example.com/", "origin": "http://www.example.com", "protocol": "http:", "username": "a", "password": "b", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://@pple.com", "base": null, "href": "http://pple.com/", "origin": "http://pple.com", "protocol": "http:", "username": "", "password": "", "host": "pple.com", "hostname": "pple.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http::b@www.example.com", "base": null, "href": "http://:b@www.example.com/", "origin": "http://www.example.com", "protocol": "http:", "username": "", "password": "b", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http:/:b@www.example.com", "base": null, "href": "http://:b@www.example.com/", "origin": "http://www.example.com", "protocol": "http:", "username": "", "password": "b", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://:b@www.example.com", "base": null, "href": "http://:b@www.example.com/", "origin": "http://www.example.com", "protocol": "http:", "username": "", "password": "b", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http:/:@/www.example.com", "base": null, "failure": true, "relativeTo": "non-opaque-path-base" }, { "input": "http://user@/www.example.com", "base": null, "failure": true }, { "input": "http:@/www.example.com", "base": null, "failure": true, "relativeTo": "non-opaque-path-base" }, { "input": "http:/@/www.example.com", "base": null, "failure": true, "relativeTo": "non-opaque-path-base" }, { "input": "http://@/www.example.com", "base": null, "failure": true }, { "input": "https:@/www.example.com", "base": null, "failure": true, "relativeTo": "non-opaque-path-base" }, { "input": "http:a:b@/www.example.com", "base": null, "failure": true, "relativeTo": "non-opaque-path-base" }, { "input": "http:/a:b@/www.example.com", "base": null, "failure": true, "relativeTo": "non-opaque-path-base" }, { "input": "http://a:b@/www.example.com", "base": null, "failure": true }, { "input": "http::@/www.example.com", "base": null, "failure": true, "relativeTo": "non-opaque-path-base" }, { "input": "http:a:@www.example.com", "base": null, "href": "http://a@www.example.com/", "origin": "http://www.example.com", "protocol": "http:", "username": "a", "password": "", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http:/a:@www.example.com", "base": null, "href": "http://a@www.example.com/", "origin": "http://www.example.com", "protocol": "http:", "username": "a", "password": "", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://a:@www.example.com", "base": null, "href": "http://a@www.example.com/", "origin": "http://www.example.com", "protocol": "http:", "username": "a", "password": "", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://www.@pple.com", "base": null, "href": "http://www.@pple.com/", "origin": "http://pple.com", "protocol": "http:", "username": "www.", "password": "", "host": "pple.com", "hostname": "pple.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http:@:www.example.com", "base": null, "failure": true, "relativeTo": "non-opaque-path-base" }, { "input": "http:/@:www.example.com", "base": null, "failure": true, "relativeTo": "non-opaque-path-base" }, { "input": "http://@:www.example.com", "base": null, "failure": true }, { "input": "http://:@www.example.com", "base": null, "href": "http://www.example.com/", "origin": "http://www.example.com", "protocol": "http:", "username": "", "password": "", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, "# Others", { "input": "/", "base": "http://www.example.com/test", "href": "http://www.example.com/", "origin": "http://www.example.com", "protocol": "http:", "username": "", "password": "", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "/test.txt", "base": "http://www.example.com/test", "href": "http://www.example.com/test.txt", "origin": "http://www.example.com", "protocol": "http:", "username": "", "password": "", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/test.txt", "search": "", "hash": "" }, { "input": ".", "base": "http://www.example.com/test", "href": "http://www.example.com/", "origin": "http://www.example.com", "protocol": "http:", "username": "", "password": "", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "..", "base": "http://www.example.com/test", "href": "http://www.example.com/", "origin": "http://www.example.com", "protocol": "http:", "username": "", "password": "", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "test.txt", "base": "http://www.example.com/test", "href": "http://www.example.com/test.txt", "origin": "http://www.example.com", "protocol": "http:", "username": "", "password": "", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/test.txt", "search": "", "hash": "" }, { "input": "./test.txt", "base": "http://www.example.com/test", "href": "http://www.example.com/test.txt", "origin": "http://www.example.com", "protocol": "http:", "username": "", "password": "", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/test.txt", "search": "", "hash": "" }, { "input": "../test.txt", "base": "http://www.example.com/test", "href": "http://www.example.com/test.txt", "origin": "http://www.example.com", "protocol": "http:", "username": "", "password": "", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/test.txt", "search": "", "hash": "" }, { "input": "../aaa/test.txt", "base": "http://www.example.com/test", "href": "http://www.example.com/aaa/test.txt", "origin": "http://www.example.com", "protocol": "http:", "username": "", "password": "", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/aaa/test.txt", "search": "", "hash": "" }, { "input": "../../test.txt", "base": "http://www.example.com/test", "href": "http://www.example.com/test.txt", "origin": "http://www.example.com", "protocol": "http:", "username": "", "password": "", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/test.txt", "search": "", "hash": "" }, { "input": "中/test.txt", "base": "http://www.example.com/test", "href": "http://www.example.com/%E4%B8%AD/test.txt", "origin": "http://www.example.com", "protocol": "http:", "username": "", "password": "", "host": "www.example.com", "hostname": "www.example.com", "port": "", "pathname": "/%E4%B8%AD/test.txt", "search": "", "hash": "" }, { "input": "http://www.example2.com", "base": "http://www.example.com/test", "href": "http://www.example2.com/", "origin": "http://www.example2.com", "protocol": "http:", "username": "", "password": "", "host": "www.example2.com", "hostname": "www.example2.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "//www.example2.com", "base": "http://www.example.com/test", "href": "http://www.example2.com/", "origin": "http://www.example2.com", "protocol": "http:", "username": "", "password": "", "host": "www.example2.com", "hostname": "www.example2.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "file:...", "base": "http://www.example.com/test", "href": "file:///...", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/...", "search": "", "hash": "" }, { "input": "file:..", "base": "http://www.example.com/test", "href": "file:///", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "file:a", "base": "http://www.example.com/test", "href": "file:///a", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/a", "search": "", "hash": "" }, "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/host.html", "Basic canonicalization, uppercase should be converted to lowercase", { "input": "http://ExAmPlE.CoM", "base": "http://other.com/", "href": "http://example.com/", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://example example.com", "base": "http://other.com/", "failure": true }, { "input": "http://Goo%20 goo%7C|.com", "base": "http://other.com/", "failure": true }, { "input": "http://[]", "base": "http://other.com/", "failure": true }, { "input": "http://[:]", "base": "http://other.com/", "failure": true }, "U+3000 is mapped to U+0020 (space) which is disallowed", { "input": "http://GOO\u00a0\u3000goo.com", "base": "http://other.com/", "failure": true }, "Other types of space (no-break, zero-width, zero-width-no-break) are name-prepped away to nothing. U+200B, U+2060, and U+FEFF, are ignored", { "input": "http://GOO\u200b\u2060\ufeffgoo.com", "base": "http://other.com/", "href": "http://googoo.com/", "origin": "http://googoo.com", "protocol": "http:", "username": "", "password": "", "host": "googoo.com", "hostname": "googoo.com", "port": "", "pathname": "/", "search": "", "hash": "" }, "Leading and trailing C0 control or space", { "input": "\u0000\u001b\u0004\u0012 http://example.com/\u001f \u000d ", "base": null, "href": "http://example.com/", "origin": "http://example.com", "protocol": "http:", "username": "", "password": "", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/", "search": "", "hash": "" }, "Ideographic full stop (full-width period for Chinese, etc.) should be treated as a dot. U+3002 is mapped to U+002E (dot)", { "input": "http://www.foo。bar.com", "base": "http://other.com/", "href": "http://www.foo.bar.com/", "origin": "http://www.foo.bar.com", "protocol": "http:", "username": "", "password": "", "host": "www.foo.bar.com", "hostname": "www.foo.bar.com", "port": "", "pathname": "/", "search": "", "hash": "" }, "Invalid unicode characters should fail... U+FDD0 is disallowed; %ef%b7%90 is U+FDD0", { "input": "http://\ufdd0zyx.com", "base": "http://other.com/", "failure": true }, "This is the same as previous but escaped", { "input": "http://%ef%b7%90zyx.com", "base": "http://other.com/", "failure": true }, "U+FFFD", { "input": "https://\ufffd", "base": null, "failure": true }, { "input": "https://%EF%BF%BD", "base": null, "failure": true }, { "input": "https://x/\ufffd?\ufffd#\ufffd", "base": null, "href": "https://x/%EF%BF%BD?%EF%BF%BD#%EF%BF%BD", "origin": "https://x", "protocol": "https:", "username": "", "password": "", "host": "x", "hostname": "x", "port": "", "pathname": "/%EF%BF%BD", "search": "?%EF%BF%BD", "hash": "#%EF%BF%BD" }, "Domain is ASCII, but a label is invalid IDNA", { "input": "http://a.b.c.xn--pokxncvks", "base": null, "failure": true }, { "input": "http://10.0.0.xn--pokxncvks", "base": null, "failure": true }, "IDNA labels should be matched case-insensitively", { "input": "http://a.b.c.XN--pokxncvks", "base": null, "failure": true }, { "input": "http://a.b.c.Xn--pokxncvks", "base": null, "failure": true }, { "input": "http://10.0.0.XN--pokxncvks", "base": null, "failure": true }, { "input": "http://10.0.0.xN--pokxncvks", "base": null, "failure": true }, "Test name prepping, fullwidth input should be converted to ASCII and NOT IDN-ized. This is 'Go' in fullwidth UTF-8/UTF-16.", { "input": "http://Go.com", "base": "http://other.com/", "href": "http://go.com/", "origin": "http://go.com", "protocol": "http:", "username": "", "password": "", "host": "go.com", "hostname": "go.com", "port": "", "pathname": "/", "search": "", "hash": "" }, "URL spec forbids the following. https://www.w3.org/Bugs/Public/show_bug.cgi?id=24257", { "input": "http://%41.com", "base": "http://other.com/", "failure": true }, { "input": "http://%ef%bc%85%ef%bc%94%ef%bc%91.com", "base": "http://other.com/", "failure": true }, "...%00 in fullwidth should fail (also as escaped UTF-8 input)", { "input": "http://%00.com", "base": "http://other.com/", "failure": true }, { "input": "http://%ef%bc%85%ef%bc%90%ef%bc%90.com", "base": "http://other.com/", "failure": true }, "Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN", { "input": "http://你好你好", "base": "http://other.com/", "href": "http://xn--6qqa088eba/", "origin": "http://xn--6qqa088eba", "protocol": "http:", "username": "", "password": "", "host": "xn--6qqa088eba", "hostname": "xn--6qqa088eba", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "https://faß.ExAmPlE/", "base": null, "href": "https://xn--fa-hia.example/", "origin": "https://xn--fa-hia.example", "protocol": "https:", "username": "", "password": "", "host": "xn--fa-hia.example", "hostname": "xn--fa-hia.example", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "sc://faß.ExAmPlE/", "base": null, "href": "sc://fa%C3%9F.ExAmPlE/", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "fa%C3%9F.ExAmPlE", "hostname": "fa%C3%9F.ExAmPlE", "port": "", "pathname": "/", "search": "", "hash": "" }, "Invalid escaped characters should fail and the percents should be escaped. https://www.w3.org/Bugs/Public/show_bug.cgi?id=24191", { "input": "http://%zz%66%a.com", "base": "http://other.com/", "failure": true }, "If we get an invalid character that has been escaped.", { "input": "http://%25", "base": "http://other.com/", "failure": true }, { "input": "http://hello%00", "base": "http://other.com/", "failure": true }, "Escaped numbers should be treated like IP addresses if they are.", { "input": "http://%30%78%63%30%2e%30%32%35%30.01", "base": "http://other.com/", "href": "http://192.168.0.1/", "origin": "http://192.168.0.1", "protocol": "http:", "username": "", "password": "", "host": "192.168.0.1", "hostname": "192.168.0.1", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://%30%78%63%30%2e%30%32%35%30.01%2e", "base": "http://other.com/", "href": "http://192.168.0.1/", "origin": "http://192.168.0.1", "protocol": "http:", "username": "", "password": "", "host": "192.168.0.1", "hostname": "192.168.0.1", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://192.168.0.257", "base": "http://other.com/", "failure": true }, "Invalid escaping in hosts causes failure", { "input": "http://%3g%78%63%30%2e%30%32%35%30%2E.01", "base": "http://other.com/", "failure": true }, "A space in a host causes failure", { "input": "http://192.168.0.1 hello", "base": "http://other.com/", "failure": true }, { "input": "https://x x:12", "base": null, "failure": true }, "Fullwidth and escaped UTF-8 fullwidth should still be treated as IP", { "input": "http://0Xc0.0250.01", "base": "http://other.com/", "href": "http://192.168.0.1/", "origin": "http://192.168.0.1", "protocol": "http:", "username": "", "password": "", "host": "192.168.0.1", "hostname": "192.168.0.1", "port": "", "pathname": "/", "search": "", "hash": "" }, "Domains with empty labels", { "input": "http://./", "base": null, "href": "http://./", "origin": "http://.", "protocol": "http:", "username": "", "password": "", "host": ".", "hostname": ".", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://../", "base": null, "href": "http://../", "origin": "http://..", "protocol": "http:", "username": "", "password": "", "host": "..", "hostname": "..", "port": "", "pathname": "/", "search": "", "hash": "" }, "Non-special domains with empty labels", { "input": "h://.", "base": null, "href": "h://.", "origin": "null", "protocol": "h:", "username": "", "password": "", "host": ".", "hostname": ".", "port": "", "pathname": "", "search": "", "hash": "" }, "Broken IPv6", { "input": "http://[www.google.com]/", "base": null, "failure": true }, { "input": "http://[google.com]", "base": "http://other.com/", "failure": true }, { "input": "http://[::1.2.3.4x]", "base": "http://other.com/", "failure": true }, { "input": "http://[::1.2.3.]", "base": "http://other.com/", "failure": true }, { "input": "http://[::1.2.]", "base": "http://other.com/", "failure": true }, { "input": "http://[::.1.2]", "base": "http://other.com/", "failure": true }, { "input": "http://[::1.]", "base": "http://other.com/", "failure": true }, { "input": "http://[::.1]", "base": "http://other.com/", "failure": true }, { "input": "http://[::%31]", "base": "http://other.com/", "failure": true }, { "input": "http://%5B::1]", "base": "http://other.com/", "failure": true }, "Misc Unicode", { "input": "http://foo:💩@example.com/bar", "base": "http://other.com/", "href": "http://foo:%F0%9F%92%A9@example.com/bar", "origin": "http://example.com", "protocol": "http:", "username": "foo", "password": "%F0%9F%92%A9", "host": "example.com", "hostname": "example.com", "port": "", "pathname": "/bar", "search": "", "hash": "" }, "# resolving a fragment against any scheme succeeds", { "input": "#", "base": "test:test", "href": "test:test#", "origin": "null", "protocol": "test:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "test", "search": "", "hash": "" }, { "input": "#x", "base": "mailto:x@x.com", "href": "mailto:x@x.com#x", "origin": "null", "protocol": "mailto:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "x@x.com", "search": "", "hash": "#x" }, { "input": "#x", "base": "data:,", "href": "data:,#x", "origin": "null", "protocol": "data:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": ",", "search": "", "hash": "#x" }, { "input": "#x", "base": "about:blank", "href": "about:blank#x", "origin": "null", "protocol": "about:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "blank", "search": "", "hash": "#x" }, { "input": "#x:y", "base": "about:blank", "href": "about:blank#x:y", "origin": "null", "protocol": "about:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "blank", "search": "", "hash": "#x:y" }, { "input": "#", "base": "test:test?test", "href": "test:test?test#", "origin": "null", "protocol": "test:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "test", "search": "?test", "hash": "" }, "# multiple @ in authority state", { "input": "https://@test@test@example:800/", "base": "http://doesnotmatter/", "href": "https://%40test%40test@example:800/", "origin": "https://example:800", "protocol": "https:", "username": "%40test%40test", "password": "", "host": "example:800", "hostname": "example", "port": "800", "pathname": "/", "search": "", "hash": "" }, { "input": "https://@@@example", "base": "http://doesnotmatter/", "href": "https://%40%40@example/", "origin": "https://example", "protocol": "https:", "username": "%40%40", "password": "", "host": "example", "hostname": "example", "port": "", "pathname": "/", "search": "", "hash": "" }, "non-az-09 characters", { "input": "http://`{}:`{}@h/`{}?`{}", "base": "http://doesnotmatter/", "href": "http://%60%7B%7D:%60%7B%7D@h/%60%7B%7D?`{}", "origin": "http://h", "protocol": "http:", "username": "%60%7B%7D", "password": "%60%7B%7D", "host": "h", "hostname": "h", "port": "", "pathname": "/%60%7B%7D", "search": "?`{}", "hash": "" }, "byte is ' and url is special", { "input": "http://host/?'", "base": null, "href": "http://host/?%27", "origin": "http://host", "protocol": "http:", "username": "", "password": "", "host": "host", "hostname": "host", "port": "", "pathname": "/", "search": "?%27", "hash": "" }, { "input": "notspecial://host/?'", "base": null, "href": "notspecial://host/?'", "origin": "null", "protocol": "notspecial:", "username": "", "password": "", "host": "host", "hostname": "host", "port": "", "pathname": "/", "search": "?'", "hash": "" }, "# Credentials in base", { "input": "/some/path", "base": "http://user@example.org/smth", "href": "http://user@example.org/some/path", "origin": "http://example.org", "protocol": "http:", "username": "user", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/some/path", "search": "", "hash": "" }, { "input": "", "base": "http://user:pass@example.org:21/smth", "href": "http://user:pass@example.org:21/smth", "origin": "http://example.org:21", "protocol": "http:", "username": "user", "password": "pass", "host": "example.org:21", "hostname": "example.org", "port": "21", "pathname": "/smth", "search": "", "hash": "" }, { "input": "/some/path", "base": "http://user:pass@example.org:21/smth", "href": "http://user:pass@example.org:21/some/path", "origin": "http://example.org:21", "protocol": "http:", "username": "user", "password": "pass", "host": "example.org:21", "hostname": "example.org", "port": "21", "pathname": "/some/path", "search": "", "hash": "" }, "# a set of tests designed by zcorpan for relative URLs with unknown schemes", { "input": "i", "base": "sc:sd", "failure": true }, { "input": "i", "base": "sc:sd/sd", "failure": true }, { "input": "i", "base": "sc:/pa/pa", "href": "sc:/pa/i", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/pa/i", "search": "", "hash": "" }, { "input": "i", "base": "sc://ho/pa", "href": "sc://ho/i", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "ho", "hostname": "ho", "port": "", "pathname": "/i", "search": "", "hash": "" }, { "input": "i", "base": "sc:///pa/pa", "href": "sc:///pa/i", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/pa/i", "search": "", "hash": "" }, { "input": "../i", "base": "sc:sd", "failure": true }, { "input": "../i", "base": "sc:sd/sd", "failure": true }, { "input": "../i", "base": "sc:/pa/pa", "href": "sc:/i", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/i", "search": "", "hash": "" }, { "input": "../i", "base": "sc://ho/pa", "href": "sc://ho/i", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "ho", "hostname": "ho", "port": "", "pathname": "/i", "search": "", "hash": "" }, { "input": "../i", "base": "sc:///pa/pa", "href": "sc:///i", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/i", "search": "", "hash": "" }, { "input": "/i", "base": "sc:sd", "failure": true }, { "input": "/i", "base": "sc:sd/sd", "failure": true }, { "input": "/i", "base": "sc:/pa/pa", "href": "sc:/i", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/i", "search": "", "hash": "" }, { "input": "/i", "base": "sc://ho/pa", "href": "sc://ho/i", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "ho", "hostname": "ho", "port": "", "pathname": "/i", "search": "", "hash": "" }, { "input": "/i", "base": "sc:///pa/pa", "href": "sc:///i", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/i", "search": "", "hash": "" }, { "input": "?i", "base": "sc:sd", "failure": true }, { "input": "?i", "base": "sc:sd/sd", "failure": true }, { "input": "?i", "base": "sc:/pa/pa", "href": "sc:/pa/pa?i", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/pa/pa", "search": "?i", "hash": "" }, { "input": "?i", "base": "sc://ho/pa", "href": "sc://ho/pa?i", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "ho", "hostname": "ho", "port": "", "pathname": "/pa", "search": "?i", "hash": "" }, { "input": "?i", "base": "sc:///pa/pa", "href": "sc:///pa/pa?i", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/pa/pa", "search": "?i", "hash": "" }, { "input": "#i", "base": "sc:sd", "href": "sc:sd#i", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "sd", "search": "", "hash": "#i" }, { "input": "#i", "base": "sc:sd/sd", "href": "sc:sd/sd#i", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "sd/sd", "search": "", "hash": "#i" }, { "input": "#i", "base": "sc:/pa/pa", "href": "sc:/pa/pa#i", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/pa/pa", "search": "", "hash": "#i" }, { "input": "#i", "base": "sc://ho/pa", "href": "sc://ho/pa#i", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "ho", "hostname": "ho", "port": "", "pathname": "/pa", "search": "", "hash": "#i" }, { "input": "#i", "base": "sc:///pa/pa", "href": "sc:///pa/pa#i", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/pa/pa", "search": "", "hash": "#i" }, "# make sure that relative URL logic works on known typically non-relative schemes too", { "input": "about:/../", "base": null, "href": "about:/", "origin": "null", "protocol": "about:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "data:/../", "base": null, "href": "data:/", "origin": "null", "protocol": "data:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "javascript:/../", "base": null, "href": "javascript:/", "origin": "null", "protocol": "javascript:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "mailto:/../", "base": null, "href": "mailto:/", "origin": "null", "protocol": "mailto:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/", "search": "", "hash": "" }, "# unknown schemes and their hosts", { "input": "sc://ñ.test/", "base": null, "href": "sc://%C3%B1.test/", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "%C3%B1.test", "hostname": "%C3%B1.test", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "sc://%/", "base": null, "href": "sc://%/", "protocol": "sc:", "username": "", "password": "", "host": "%", "hostname": "%", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "sc://@/", "base": null, "failure": true }, { "input": "sc://te@s:t@/", "base": null, "failure": true }, { "input": "sc://:/", "base": null, "failure": true }, { "input": "sc://:12/", "base": null, "failure": true }, { "input": "x", "base": "sc://ñ", "href": "sc://%C3%B1/x", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "%C3%B1", "hostname": "%C3%B1", "port": "", "pathname": "/x", "search": "", "hash": "" }, "# unknown schemes and backslashes", { "input": "sc:\\../", "base": null, "href": "sc:\\../", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "\\../", "search": "", "hash": "" }, "# unknown scheme with path looking like a password", { "input": "sc::a@example.net", "base": null, "href": "sc::a@example.net", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": ":a@example.net", "search": "", "hash": "" }, "# unknown scheme with bogus percent-encoding", { "input": "wow:%NBD", "base": null, "href": "wow:%NBD", "origin": "null", "protocol": "wow:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "%NBD", "search": "", "hash": "" }, { "input": "wow:%1G", "base": null, "href": "wow:%1G", "origin": "null", "protocol": "wow:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "%1G", "search": "", "hash": "" }, "# unknown scheme with non-URL characters", { "input": "wow:\uFFFF", "base": null, "href": "wow:%EF%BF%BF", "origin": "null", "protocol": "wow:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "%EF%BF%BF", "search": "", "hash": "" }, "Forbidden host code points", { "input": "sc://a\u0000b/", "base": null, "failure": true }, { "input": "sc://a b/", "base": null, "failure": true }, { "input": "sc://ab", "base": null, "failure": true }, { "input": "sc://a[b/", "base": null, "failure": true }, { "input": "sc://a\\b/", "base": null, "failure": true }, { "input": "sc://a]b/", "base": null, "failure": true }, { "input": "sc://a^b", "base": null, "failure": true }, { "input": "sc://a|b/", "base": null, "failure": true }, "Forbidden host codepoints: tabs and newlines are removed during preprocessing", { "input": "foo://ho\u0009st/", "base": null, "hash": "", "host": "host", "hostname": "host", "href":"foo://host/", "password": "", "pathname": "/", "port":"", "protocol": "foo:", "search": "", "username": "" }, { "input": "foo://ho\u000Ast/", "base": null, "hash": "", "host": "host", "hostname": "host", "href":"foo://host/", "password": "", "pathname": "/", "port":"", "protocol": "foo:", "search": "", "username": "" }, { "input": "foo://ho\u000Dst/", "base": null, "hash": "", "host": "host", "hostname": "host", "href":"foo://host/", "password": "", "pathname": "/", "port":"", "protocol": "foo:", "search": "", "username": "" }, "Forbidden domain code-points", { "input": "http://a\u0000b/", "base": null, "failure": true }, { "input": "http://a\u0001b/", "base": null, "failure": true }, { "input": "http://a\u0002b/", "base": null, "failure": true }, { "input": "http://a\u0003b/", "base": null, "failure": true }, { "input": "http://a\u0004b/", "base": null, "failure": true }, { "input": "http://a\u0005b/", "base": null, "failure": true }, { "input": "http://a\u0006b/", "base": null, "failure": true }, { "input": "http://a\u0007b/", "base": null, "failure": true }, { "input": "http://a\u0008b/", "base": null, "failure": true }, { "input": "http://a\u000Bb/", "base": null, "failure": true }, { "input": "http://a\u000Cb/", "base": null, "failure": true }, { "input": "http://a\u000Eb/", "base": null, "failure": true }, { "input": "http://a\u000Fb/", "base": null, "failure": true }, { "input": "http://a\u0010b/", "base": null, "failure": true }, { "input": "http://a\u0011b/", "base": null, "failure": true }, { "input": "http://a\u0012b/", "base": null, "failure": true }, { "input": "http://a\u0013b/", "base": null, "failure": true }, { "input": "http://a\u0014b/", "base": null, "failure": true }, { "input": "http://a\u0015b/", "base": null, "failure": true }, { "input": "http://a\u0016b/", "base": null, "failure": true }, { "input": "http://a\u0017b/", "base": null, "failure": true }, { "input": "http://a\u0018b/", "base": null, "failure": true }, { "input": "http://a\u0019b/", "base": null, "failure": true }, { "input": "http://a\u001Ab/", "base": null, "failure": true }, { "input": "http://a\u001Bb/", "base": null, "failure": true }, { "input": "http://a\u001Cb/", "base": null, "failure": true }, { "input": "http://a\u001Db/", "base": null, "failure": true }, { "input": "http://a\u001Eb/", "base": null, "failure": true }, { "input": "http://a\u001Fb/", "base": null, "failure": true }, { "input": "http://a b/", "base": null, "failure": true }, { "input": "http://a%b/", "base": null, "failure": true }, { "input": "http://ab", "base": null, "failure": true }, { "input": "http://a[b/", "base": null, "failure": true }, { "input": "http://a]b/", "base": null, "failure": true }, { "input": "http://a^b", "base": null, "failure": true }, { "input": "http://a|b/", "base": null, "failure": true }, { "input": "http://a\u007Fb/", "base": null, "failure": true }, "Forbidden domain codepoints: tabs and newlines are removed during preprocessing", { "input": "http://ho\u0009st/", "base": null, "hash": "", "host": "host", "hostname": "host", "href":"http://host/", "password": "", "pathname": "/", "port":"", "protocol": "http:", "search": "", "username": "" }, { "input": "http://ho\u000Ast/", "base": null, "hash": "", "host": "host", "hostname": "host", "href":"http://host/", "password": "", "pathname": "/", "port":"", "protocol": "http:", "search": "", "username": "" }, { "input": "http://ho\u000Dst/", "base": null, "hash": "", "host": "host", "hostname": "host", "href":"http://host/", "password": "", "pathname": "/", "port":"", "protocol": "http:", "search": "", "username": "" }, "Encoded forbidden domain codepoints in special URLs", { "input": "http://ho%00st/", "base": null, "failure": true }, { "input": "http://ho%01st/", "base": null, "failure": true }, { "input": "http://ho%02st/", "base": null, "failure": true }, { "input": "http://ho%03st/", "base": null, "failure": true }, { "input": "http://ho%04st/", "base": null, "failure": true }, { "input": "http://ho%05st/", "base": null, "failure": true }, { "input": "http://ho%06st/", "base": null, "failure": true }, { "input": "http://ho%07st/", "base": null, "failure": true }, { "input": "http://ho%08st/", "base": null, "failure": true }, { "input": "http://ho%09st/", "base": null, "failure": true }, { "input": "http://ho%0Ast/", "base": null, "failure": true }, { "input": "http://ho%0Bst/", "base": null, "failure": true }, { "input": "http://ho%0Cst/", "base": null, "failure": true }, { "input": "http://ho%0Dst/", "base": null, "failure": true }, { "input": "http://ho%0Est/", "base": null, "failure": true }, { "input": "http://ho%0Fst/", "base": null, "failure": true }, { "input": "http://ho%10st/", "base": null, "failure": true }, { "input": "http://ho%11st/", "base": null, "failure": true }, { "input": "http://ho%12st/", "base": null, "failure": true }, { "input": "http://ho%13st/", "base": null, "failure": true }, { "input": "http://ho%14st/", "base": null, "failure": true }, { "input": "http://ho%15st/", "base": null, "failure": true }, { "input": "http://ho%16st/", "base": null, "failure": true }, { "input": "http://ho%17st/", "base": null, "failure": true }, { "input": "http://ho%18st/", "base": null, "failure": true }, { "input": "http://ho%19st/", "base": null, "failure": true }, { "input": "http://ho%1Ast/", "base": null, "failure": true }, { "input": "http://ho%1Bst/", "base": null, "failure": true }, { "input": "http://ho%1Cst/", "base": null, "failure": true }, { "input": "http://ho%1Dst/", "base": null, "failure": true }, { "input": "http://ho%1Est/", "base": null, "failure": true }, { "input": "http://ho%1Fst/", "base": null, "failure": true }, { "input": "http://ho%20st/", "base": null, "failure": true }, { "input": "http://ho%23st/", "base": null, "failure": true }, { "input": "http://ho%25st/", "base": null, "failure": true }, { "input": "http://ho%2Fst/", "base": null, "failure": true }, { "input": "http://ho%3Ast/", "base": null, "failure": true }, { "input": "http://ho%3Cst/", "base": null, "failure": true }, { "input": "http://ho%3Est/", "base": null, "failure": true }, { "input": "http://ho%3Fst/", "base": null, "failure": true }, { "input": "http://ho%40st/", "base": null, "failure": true }, { "input": "http://ho%5Bst/", "base": null, "failure": true }, { "input": "http://ho%5Cst/", "base": null, "failure": true }, { "input": "http://ho%5Dst/", "base": null, "failure": true }, { "input": "http://ho%7Cst/", "base": null, "failure": true }, { "input": "http://ho%7Fst/", "base": null, "failure": true }, "Allowed host/domain code points", { "input": "http://!\"$&'()*+,-.;=_`{}~/", "base": null, "href": "http://!\"$&'()*+,-.;=_`{}~/", "origin": "http://!\"$&'()*+,-.;=_`{}~", "protocol": "http:", "username": "", "password": "", "host": "!\"$&'()*+,-.;=_`{}~", "hostname": "!\"$&'()*+,-.;=_`{}~", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "sc://\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u000B\u000C\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F\u007F!\"$%&'()*+,-.;=_`{}~/", "base": null, "href": "sc://%01%02%03%04%05%06%07%08%0B%0C%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%7F!\"$%&'()*+,-.;=_`{}~/", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "%01%02%03%04%05%06%07%08%0B%0C%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%7F!\"$%&'()*+,-.;=_`{}~", "hostname": "%01%02%03%04%05%06%07%08%0B%0C%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%7F!\"$%&'()*+,-.;=_`{}~", "port": "", "pathname": "/", "search": "", "hash": "" }, "# Hosts and percent-encoding", { "input": "ftp://example.com%80/", "base": null, "failure": true }, { "input": "ftp://example.com%A0/", "base": null, "failure": true }, { "input": "https://example.com%80/", "base": null, "failure": true }, { "input": "https://example.com%A0/", "base": null, "failure": true }, { "input": "ftp://%e2%98%83", "base": null, "href": "ftp://xn--n3h/", "origin": "ftp://xn--n3h", "protocol": "ftp:", "username": "", "password": "", "host": "xn--n3h", "hostname": "xn--n3h", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "https://%e2%98%83", "base": null, "href": "https://xn--n3h/", "origin": "https://xn--n3h", "protocol": "https:", "username": "", "password": "", "host": "xn--n3h", "hostname": "xn--n3h", "port": "", "pathname": "/", "search": "", "hash": "" }, "# tests from jsdom/whatwg-url designed for code coverage", { "input": "http://127.0.0.1:10100/relative_import.html", "base": null, "href": "http://127.0.0.1:10100/relative_import.html", "origin": "http://127.0.0.1:10100", "protocol": "http:", "username": "", "password": "", "host": "127.0.0.1:10100", "hostname": "127.0.0.1", "port": "10100", "pathname": "/relative_import.html", "search": "", "hash": "" }, { "input": "http://facebook.com/?foo=%7B%22abc%22", "base": null, "href": "http://facebook.com/?foo=%7B%22abc%22", "origin": "http://facebook.com", "protocol": "http:", "username": "", "password": "", "host": "facebook.com", "hostname": "facebook.com", "port": "", "pathname": "/", "search": "?foo=%7B%22abc%22", "hash": "" }, { "input": "https://localhost:3000/jqueryui@1.2.3", "base": null, "href": "https://localhost:3000/jqueryui@1.2.3", "origin": "https://localhost:3000", "protocol": "https:", "username": "", "password": "", "host": "localhost:3000", "hostname": "localhost", "port": "3000", "pathname": "/jqueryui@1.2.3", "search": "", "hash": "" }, "# tab/LF/CR", { "input": "h\tt\nt\rp://h\to\ns\rt:9\t0\n0\r0/p\ta\nt\rh?q\tu\ne\rry#f\tr\na\rg", "base": null, "href": "http://host:9000/path?query#frag", "origin": "http://host:9000", "protocol": "http:", "username": "", "password": "", "host": "host:9000", "hostname": "host", "port": "9000", "pathname": "/path", "search": "?query", "hash": "#frag" }, "# Stringification of URL.searchParams", { "input": "?a=b&c=d", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/bar?a=b&c=d", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/bar", "search": "?a=b&c=d", "searchParams": "a=b&c=d", "hash": "" }, { "input": "??a=b&c=d", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/bar??a=b&c=d", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/bar", "search": "??a=b&c=d", "searchParams": "%3Fa=b&c=d", "hash": "" }, "# Scheme only", { "input": "http:", "base": "http://example.org/foo/bar", "href": "http://example.org/foo/bar", "origin": "http://example.org", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/foo/bar", "search": "", "searchParams": "", "hash": "" }, { "input": "http:", "base": "https://example.org/foo/bar", "failure": true }, { "input": "sc:", "base": "https://example.org/foo/bar", "href": "sc:", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "", "search": "", "searchParams": "", "hash": "" }, "# Percent encoding of fragments", { "input": "http://foo.bar/baz?qux#foo\bbar", "base": null, "href": "http://foo.bar/baz?qux#foo%08bar", "origin": "http://foo.bar", "protocol": "http:", "username": "", "password": "", "host": "foo.bar", "hostname": "foo.bar", "port": "", "pathname": "/baz", "search": "?qux", "searchParams": "qux=", "hash": "#foo%08bar" }, { "input": "http://foo.bar/baz?qux#foo\"bar", "base": null, "href": "http://foo.bar/baz?qux#foo%22bar", "origin": "http://foo.bar", "protocol": "http:", "username": "", "password": "", "host": "foo.bar", "hostname": "foo.bar", "port": "", "pathname": "/baz", "search": "?qux", "searchParams": "qux=", "hash": "#foo%22bar" }, { "input": "http://foo.bar/baz?qux#foobar", "base": null, "href": "http://foo.bar/baz?qux#foo%3Ebar", "origin": "http://foo.bar", "protocol": "http:", "username": "", "password": "", "host": "foo.bar", "hostname": "foo.bar", "port": "", "pathname": "/baz", "search": "?qux", "searchParams": "qux=", "hash": "#foo%3Ebar" }, { "input": "http://foo.bar/baz?qux#foo`bar", "base": null, "href": "http://foo.bar/baz?qux#foo%60bar", "origin": "http://foo.bar", "protocol": "http:", "username": "", "password": "", "host": "foo.bar", "hostname": "foo.bar", "port": "", "pathname": "/baz", "search": "?qux", "searchParams": "qux=", "hash": "#foo%60bar" }, "# IPv4 parsing (via https://github.com/nodejs/node/pull/10317)", { "input": "http://1.2.3.4/", "base": "http://other.com/", "href": "http://1.2.3.4/", "origin": "http://1.2.3.4", "protocol": "http:", "username": "", "password": "", "host": "1.2.3.4", "hostname": "1.2.3.4", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://1.2.3.4./", "base": "http://other.com/", "href": "http://1.2.3.4/", "origin": "http://1.2.3.4", "protocol": "http:", "username": "", "password": "", "host": "1.2.3.4", "hostname": "1.2.3.4", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://192.168.257", "base": "http://other.com/", "href": "http://192.168.1.1/", "origin": "http://192.168.1.1", "protocol": "http:", "username": "", "password": "", "host": "192.168.1.1", "hostname": "192.168.1.1", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://192.168.257.", "base": "http://other.com/", "href": "http://192.168.1.1/", "origin": "http://192.168.1.1", "protocol": "http:", "username": "", "password": "", "host": "192.168.1.1", "hostname": "192.168.1.1", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://192.168.257.com", "base": "http://other.com/", "href": "http://192.168.257.com/", "origin": "http://192.168.257.com", "protocol": "http:", "username": "", "password": "", "host": "192.168.257.com", "hostname": "192.168.257.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://256", "base": "http://other.com/", "href": "http://0.0.1.0/", "origin": "http://0.0.1.0", "protocol": "http:", "username": "", "password": "", "host": "0.0.1.0", "hostname": "0.0.1.0", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://256.com", "base": "http://other.com/", "href": "http://256.com/", "origin": "http://256.com", "protocol": "http:", "username": "", "password": "", "host": "256.com", "hostname": "256.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://999999999", "base": "http://other.com/", "href": "http://59.154.201.255/", "origin": "http://59.154.201.255", "protocol": "http:", "username": "", "password": "", "host": "59.154.201.255", "hostname": "59.154.201.255", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://999999999.", "base": "http://other.com/", "href": "http://59.154.201.255/", "origin": "http://59.154.201.255", "protocol": "http:", "username": "", "password": "", "host": "59.154.201.255", "hostname": "59.154.201.255", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://999999999.com", "base": "http://other.com/", "href": "http://999999999.com/", "origin": "http://999999999.com", "protocol": "http:", "username": "", "password": "", "host": "999999999.com", "hostname": "999999999.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://10000000000", "base": "http://other.com/", "failure": true }, { "input": "http://10000000000.com", "base": "http://other.com/", "href": "http://10000000000.com/", "origin": "http://10000000000.com", "protocol": "http:", "username": "", "password": "", "host": "10000000000.com", "hostname": "10000000000.com", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://4294967295", "base": "http://other.com/", "href": "http://255.255.255.255/", "origin": "http://255.255.255.255", "protocol": "http:", "username": "", "password": "", "host": "255.255.255.255", "hostname": "255.255.255.255", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://4294967296", "base": "http://other.com/", "failure": true }, { "input": "http://0xffffffff", "base": "http://other.com/", "href": "http://255.255.255.255/", "origin": "http://255.255.255.255", "protocol": "http:", "username": "", "password": "", "host": "255.255.255.255", "hostname": "255.255.255.255", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://0xffffffff1", "base": "http://other.com/", "failure": true }, { "input": "http://256.256.256.256", "base": "http://other.com/", "failure": true }, { "input": "https://0x.0x.0", "base": null, "href": "https://0.0.0.0/", "origin": "https://0.0.0.0", "protocol": "https:", "username": "", "password": "", "host": "0.0.0.0", "hostname": "0.0.0.0", "port": "", "pathname": "/", "search": "", "hash": "" }, "More IPv4 parsing (via https://github.com/jsdom/whatwg-url/issues/92)", { "input": "https://0x100000000/test", "base": null, "failure": true }, { "input": "https://256.0.0.1/test", "base": null, "failure": true }, "# file URLs containing percent-encoded Windows drive letters (shouldn't work)", { "input": "file:///C%3A/", "base": null, "href": "file:///C%3A/", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/C%3A/", "search": "", "hash": "" }, { "input": "file:///C%7C/", "base": null, "href": "file:///C%7C/", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/C%7C/", "search": "", "hash": "" }, { "input": "file://%43%3A", "base": null, "failure": true }, { "input": "file://%43%7C", "base": null, "failure": true }, { "input": "file://%43|", "base": null, "failure": true }, { "input": "file://C%7C", "base": null, "failure": true }, { "input": "file://%43%7C/", "base": null, "failure": true }, { "input": "https://%43%7C/", "base": null, "failure": true }, { "input": "asdf://%43|/", "base": null, "failure": true }, { "input": "asdf://%43%7C/", "base": null, "href": "asdf://%43%7C/", "origin": "null", "protocol": "asdf:", "username": "", "password": "", "host": "%43%7C", "hostname": "%43%7C", "port": "", "pathname": "/", "search": "", "hash": "" }, "# file URLs relative to other file URLs (via https://github.com/jsdom/whatwg-url/pull/60)", { "input": "pix/submit.gif", "base": "file:///C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/anchor.html", "href": "file:///C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/pix/submit.gif", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/pix/submit.gif", "search": "", "hash": "" }, { "input": "..", "base": "file:///C:/", "href": "file:///C:/", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/C:/", "search": "", "hash": "" }, { "input": "..", "base": "file:///", "href": "file:///", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/", "search": "", "hash": "" }, "# More file URL tests by zcorpan and annevk", { "input": "/", "base": "file:///C:/a/b", "href": "file:///C:/", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/C:/", "search": "", "hash": "" }, { "input": "/", "base": "file://h/C:/a/b", "href": "file://h/C:/", "protocol": "file:", "username": "", "password": "", "host": "h", "hostname": "h", "port": "", "pathname": "/C:/", "search": "", "hash": "" }, { "input": "/", "base": "file://h/a/b", "href": "file://h/", "protocol": "file:", "username": "", "password": "", "host": "h", "hostname": "h", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "//d:", "base": "file:///C:/a/b", "href": "file:///d:", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/d:", "search": "", "hash": "" }, { "input": "//d:/..", "base": "file:///C:/a/b", "href": "file:///d:/", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/d:/", "search": "", "hash": "" }, { "input": "..", "base": "file:///ab:/", "href": "file:///", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "..", "base": "file:///1:/", "href": "file:///", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "", "base": "file:///test?test#test", "href": "file:///test?test", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/test", "search": "?test", "hash": "" }, { "input": "file:", "base": "file:///test?test#test", "href": "file:///test?test", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/test", "search": "?test", "hash": "" }, { "input": "?x", "base": "file:///test?test#test", "href": "file:///test?x", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/test", "search": "?x", "hash": "" }, { "input": "file:?x", "base": "file:///test?test#test", "href": "file:///test?x", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/test", "search": "?x", "hash": "" }, { "input": "#x", "base": "file:///test?test#test", "href": "file:///test?test#x", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/test", "search": "?test", "hash": "#x" }, { "input": "file:#x", "base": "file:///test?test#test", "href": "file:///test?test#x", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/test", "search": "?test", "hash": "#x" }, "# File URLs and many (back)slashes", { "input": "file:\\\\//", "base": null, "href": "file:////", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//", "search": "", "hash": "" }, { "input": "file:\\\\\\\\", "base": null, "href": "file:////", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//", "search": "", "hash": "" }, { "input": "file:\\\\\\\\?fox", "base": null, "href": "file:////?fox", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//", "search": "?fox", "hash": "" }, { "input": "file:\\\\\\\\#guppy", "base": null, "href": "file:////#guppy", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//", "search": "", "hash": "#guppy" }, { "input": "file://spider///", "base": null, "href": "file://spider///", "protocol": "file:", "username": "", "password": "", "host": "spider", "hostname": "spider", "port": "", "pathname": "///", "search": "", "hash": "" }, { "input": "file:\\\\localhost//", "base": null, "href": "file:////", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//", "search": "", "hash": "" }, { "input": "file:///localhost//cat", "base": null, "href": "file:///localhost//cat", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/localhost//cat", "search": "", "hash": "" }, { "input": "file://\\/localhost//cat", "base": null, "href": "file:////localhost//cat", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//localhost//cat", "search": "", "hash": "" }, { "input": "file://localhost//a//../..//", "base": null, "href": "file://///", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "///", "search": "", "hash": "" }, { "input": "/////mouse", "base": "file:///elephant", "href": "file://///mouse", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "///mouse", "search": "", "hash": "" }, { "input": "\\//pig", "base": "file://lion/", "href": "file:///pig", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/pig", "search": "", "hash": "" }, { "input": "\\/localhost//pig", "base": "file://lion/", "href": "file:////pig", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//pig", "search": "", "hash": "" }, { "input": "//localhost//pig", "base": "file://lion/", "href": "file:////pig", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//pig", "search": "", "hash": "" }, { "input": "/..//localhost//pig", "base": "file://lion/", "href": "file://lion//localhost//pig", "protocol": "file:", "username": "", "password": "", "host": "lion", "hostname": "lion", "port": "", "pathname": "//localhost//pig", "search": "", "hash": "" }, { "input": "file://", "base": "file://ape/", "href": "file:///", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/", "search": "", "hash": "" }, "# File URLs with non-empty hosts", { "input": "/rooibos", "base": "file://tea/", "href": "file://tea/rooibos", "protocol": "file:", "username": "", "password": "", "host": "tea", "hostname": "tea", "port": "", "pathname": "/rooibos", "search": "", "hash": "" }, { "input": "/?chai", "base": "file://tea/", "href": "file://tea/?chai", "protocol": "file:", "username": "", "password": "", "host": "tea", "hostname": "tea", "port": "", "pathname": "/", "search": "?chai", "hash": "" }, "# Windows drive letter handling with the 'file:' base URL", { "input": "C|", "base": "file://host/dir/file", "href": "file://host/C:", "protocol": "file:", "username": "", "password": "", "host": "host", "hostname": "host", "port": "", "pathname": "/C:", "search": "", "hash": "" }, { "input": "C|", "base": "file://host/D:/dir1/dir2/file", "href": "file://host/C:", "protocol": "file:", "username": "", "password": "", "host": "host", "hostname": "host", "port": "", "pathname": "/C:", "search": "", "hash": "" }, { "input": "C|#", "base": "file://host/dir/file", "href": "file://host/C:#", "protocol": "file:", "username": "", "password": "", "host": "host", "hostname": "host", "port": "", "pathname": "/C:", "search": "", "hash": "" }, { "input": "C|?", "base": "file://host/dir/file", "href": "file://host/C:?", "protocol": "file:", "username": "", "password": "", "host": "host", "hostname": "host", "port": "", "pathname": "/C:", "search": "", "hash": "" }, { "input": "C|/", "base": "file://host/dir/file", "href": "file://host/C:/", "protocol": "file:", "username": "", "password": "", "host": "host", "hostname": "host", "port": "", "pathname": "/C:/", "search": "", "hash": "" }, { "input": "C|\n/", "base": "file://host/dir/file", "href": "file://host/C:/", "protocol": "file:", "username": "", "password": "", "host": "host", "hostname": "host", "port": "", "pathname": "/C:/", "search": "", "hash": "" }, { "input": "C|\\", "base": "file://host/dir/file", "href": "file://host/C:/", "protocol": "file:", "username": "", "password": "", "host": "host", "hostname": "host", "port": "", "pathname": "/C:/", "search": "", "hash": "" }, { "input": "C", "base": "file://host/dir/file", "href": "file://host/dir/C", "protocol": "file:", "username": "", "password": "", "host": "host", "hostname": "host", "port": "", "pathname": "/dir/C", "search": "", "hash": "" }, { "input": "C|a", "base": "file://host/dir/file", "href": "file://host/dir/C|a", "protocol": "file:", "username": "", "password": "", "host": "host", "hostname": "host", "port": "", "pathname": "/dir/C|a", "search": "", "hash": "" }, "# Windows drive letter quirk in the file slash state", { "input": "/c:/foo/bar", "base": "file:///c:/baz/qux", "href": "file:///c:/foo/bar", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/c:/foo/bar", "search": "", "hash": "" }, { "input": "/c|/foo/bar", "base": "file:///c:/baz/qux", "href": "file:///c:/foo/bar", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/c:/foo/bar", "search": "", "hash": "" }, { "input": "file:\\c:\\foo\\bar", "base": "file:///c:/baz/qux", "href": "file:///c:/foo/bar", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/c:/foo/bar", "search": "", "hash": "" }, { "input": "/c:/foo/bar", "base": "file://host/path", "href": "file://host/c:/foo/bar", "protocol": "file:", "username": "", "password": "", "host": "host", "hostname": "host", "port": "", "pathname": "/c:/foo/bar", "search": "", "hash": "" }, "# Do not drop the host in the presence of a drive letter", { "input": "file://example.net/C:/", "base": null, "href": "file://example.net/C:/", "protocol": "file:", "username": "", "password": "", "host": "example.net", "hostname": "example.net", "port": "", "pathname": "/C:/", "search": "", "hash": "" }, { "input": "file://1.2.3.4/C:/", "base": null, "href": "file://1.2.3.4/C:/", "protocol": "file:", "username": "", "password": "", "host": "1.2.3.4", "hostname": "1.2.3.4", "port": "", "pathname": "/C:/", "search": "", "hash": "" }, { "input": "file://[1::8]/C:/", "base": null, "href": "file://[1::8]/C:/", "protocol": "file:", "username": "", "password": "", "host": "[1::8]", "hostname": "[1::8]", "port": "", "pathname": "/C:/", "search": "", "hash": "" }, "# Copy the host from the base URL in the following cases", { "input": "C|/", "base": "file://host/", "href": "file://host/C:/", "protocol": "file:", "username": "", "password": "", "host": "host", "hostname": "host", "port": "", "pathname": "/C:/", "search": "", "hash": "" }, { "input": "/C:/", "base": "file://host/", "href": "file://host/C:/", "protocol": "file:", "username": "", "password": "", "host": "host", "hostname": "host", "port": "", "pathname": "/C:/", "search": "", "hash": "" }, { "input": "file:C:/", "base": "file://host/", "href": "file://host/C:/", "protocol": "file:", "username": "", "password": "", "host": "host", "hostname": "host", "port": "", "pathname": "/C:/", "search": "", "hash": "" }, { "input": "file:/C:/", "base": "file://host/", "href": "file://host/C:/", "protocol": "file:", "username": "", "password": "", "host": "host", "hostname": "host", "port": "", "pathname": "/C:/", "search": "", "hash": "" }, "# Copy the empty host from the input in the following cases", { "input": "//C:/", "base": "file://host/", "href": "file:///C:/", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/C:/", "search": "", "hash": "" }, { "input": "file://C:/", "base": "file://host/", "href": "file:///C:/", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/C:/", "search": "", "hash": "" }, { "input": "///C:/", "base": "file://host/", "href": "file:///C:/", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/C:/", "search": "", "hash": "" }, { "input": "file:///C:/", "base": "file://host/", "href": "file:///C:/", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/C:/", "search": "", "hash": "" }, "# Windows drive letter quirk (no host)", { "input": "file:/C|/", "base": null, "href": "file:///C:/", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/C:/", "search": "", "hash": "" }, { "input": "file://C|/", "base": null, "href": "file:///C:/", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/C:/", "search": "", "hash": "" }, "# file URLs without base URL by Rimas Misevičius", { "input": "file:", "base": null, "href": "file:///", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "file:?q=v", "base": null, "href": "file:///?q=v", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/", "search": "?q=v", "hash": "" }, { "input": "file:#frag", "base": null, "href": "file:///#frag", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/", "search": "", "hash": "#frag" }, "# file: drive letter cases from https://crbug.com/1078698", { "input": "file:///Y:", "base": null, "href": "file:///Y:", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/Y:", "search": "", "hash": "" }, { "input": "file:///Y:/", "base": null, "href": "file:///Y:/", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/Y:/", "search": "", "hash": "" }, { "input": "file:///./Y", "base": null, "href": "file:///Y", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/Y", "search": "", "hash": "" }, { "input": "file:///./Y:", "base": null, "href": "file:///Y:", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/Y:", "search": "", "hash": "" }, { "input": "\\\\\\.\\Y:", "base": null, "failure": true, "relativeTo": "non-opaque-path-base" }, "# file: drive letter cases from https://crbug.com/1078698 but lowercased", { "input": "file:///y:", "base": null, "href": "file:///y:", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/y:", "search": "", "hash": "" }, { "input": "file:///y:/", "base": null, "href": "file:///y:/", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/y:/", "search": "", "hash": "" }, { "input": "file:///./y", "base": null, "href": "file:///y", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/y", "search": "", "hash": "" }, { "input": "file:///./y:", "base": null, "href": "file:///y:", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/y:", "search": "", "hash": "" }, { "input": "\\\\\\.\\y:", "base": null, "failure": true, "relativeTo": "non-opaque-path-base" }, "# Additional file URL tests for (https://github.com/whatwg/url/issues/405)", { "input": "file://localhost//a//../..//foo", "base": null, "href": "file://///foo", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "///foo", "search": "", "hash": "" }, { "input": "file://localhost////foo", "base": null, "href": "file://////foo", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "////foo", "search": "", "hash": "" }, { "input": "file:////foo", "base": null, "href": "file:////foo", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//foo", "search": "", "hash": "" }, { "input": "file:///one/two", "base": "file:///", "href": "file:///one/two", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/one/two", "search": "", "hash": "" }, { "input": "file:////one/two", "base": "file:///", "href": "file:////one/two", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//one/two", "search": "", "hash": "" }, { "input": "//one/two", "base": "file:///", "href": "file://one/two", "protocol": "file:", "username": "", "password": "", "host": "one", "hostname": "one", "port": "", "pathname": "/two", "search": "", "hash": "" }, { "input": "///one/two", "base": "file:///", "href": "file:///one/two", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/one/two", "search": "", "hash": "" }, { "input": "////one/two", "base": "file:///", "href": "file:////one/two", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//one/two", "search": "", "hash": "" }, { "input": "file:///.//", "base": "file:////", "href": "file:////", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//", "search": "", "hash": "" }, "File URL tests for https://github.com/whatwg/url/issues/549", { "input": "file:.//p", "base": null, "href": "file:////p", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//p", "search": "", "hash": "" }, { "input": "file:/.//p", "base": null, "href": "file:////p", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//p", "search": "", "hash": "" }, "# IPv6 tests", { "input": "http://[1:0::]", "base": "http://example.net/", "href": "http://[1::]/", "origin": "http://[1::]", "protocol": "http:", "username": "", "password": "", "host": "[1::]", "hostname": "[1::]", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://[0:1:2:3:4:5:6:7:8]", "base": "http://example.net/", "failure": true }, { "input": "https://[0::0::0]", "base": null, "failure": true }, { "input": "https://[0:.0]", "base": null, "failure": true }, { "input": "https://[0:0:]", "base": null, "failure": true }, { "input": "https://[0:1:2:3:4:5:6:7.0.0.0.1]", "base": null, "failure": true }, { "input": "https://[0:1.00.0.0.0]", "base": null, "failure": true }, { "input": "https://[0:1.290.0.0.0]", "base": null, "failure": true }, { "input": "https://[0:1.23.23]", "base": null, "failure": true }, "# Empty host", { "input": "http://?", "base": null, "failure": true }, { "input": "http://#", "base": null, "failure": true }, "Port overflow (2^32 + 81)", { "input": "http://f:4294967377/c", "base": "http://example.org/", "failure": true }, "Port overflow (2^64 + 81)", { "input": "http://f:18446744073709551697/c", "base": "http://example.org/", "failure": true }, "Port overflow (2^128 + 81)", { "input": "http://f:340282366920938463463374607431768211537/c", "base": "http://example.org/", "failure": true }, "# Non-special-URL path tests", { "input": "sc://ñ", "base": null, "href": "sc://%C3%B1", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "%C3%B1", "hostname": "%C3%B1", "port": "", "pathname": "", "search": "", "hash": "" }, { "input": "sc://ñ?x", "base": null, "href": "sc://%C3%B1?x", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "%C3%B1", "hostname": "%C3%B1", "port": "", "pathname": "", "search": "?x", "hash": "" }, { "input": "sc://ñ#x", "base": null, "href": "sc://%C3%B1#x", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "%C3%B1", "hostname": "%C3%B1", "port": "", "pathname": "", "search": "", "hash": "#x" }, { "input": "#x", "base": "sc://ñ", "href": "sc://%C3%B1#x", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "%C3%B1", "hostname": "%C3%B1", "port": "", "pathname": "", "search": "", "hash": "#x" }, { "input": "?x", "base": "sc://ñ", "href": "sc://%C3%B1?x", "origin": "null", "protocol": "sc:", "username": "", "password": "", "host": "%C3%B1", "hostname": "%C3%B1", "port": "", "pathname": "", "search": "?x", "hash": "" }, { "input": "sc://?", "base": null, "href": "sc://?", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "", "search": "", "hash": "" }, { "input": "sc://#", "base": null, "href": "sc://#", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "", "search": "", "hash": "" }, { "input": "///", "base": "sc://x/", "href": "sc:///", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "////", "base": "sc://x/", "href": "sc:////", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//", "search": "", "hash": "" }, { "input": "////x/", "base": "sc://x/", "href": "sc:////x/", "protocol": "sc:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//x/", "search": "", "hash": "" }, { "input": "tftp://foobar.com/someconfig;mode=netascii", "base": null, "href": "tftp://foobar.com/someconfig;mode=netascii", "origin": "null", "protocol": "tftp:", "username": "", "password": "", "host": "foobar.com", "hostname": "foobar.com", "port": "", "pathname": "/someconfig;mode=netascii", "search": "", "hash": "" }, { "input": "telnet://user:pass@foobar.com:23/", "base": null, "href": "telnet://user:pass@foobar.com:23/", "origin": "null", "protocol": "telnet:", "username": "user", "password": "pass", "host": "foobar.com:23", "hostname": "foobar.com", "port": "23", "pathname": "/", "search": "", "hash": "" }, { "input": "ut2004://10.10.10.10:7777/Index.ut2", "base": null, "href": "ut2004://10.10.10.10:7777/Index.ut2", "origin": "null", "protocol": "ut2004:", "username": "", "password": "", "host": "10.10.10.10:7777", "hostname": "10.10.10.10", "port": "7777", "pathname": "/Index.ut2", "search": "", "hash": "" }, { "input": "redis://foo:bar@somehost:6379/0?baz=bam&qux=baz", "base": null, "href": "redis://foo:bar@somehost:6379/0?baz=bam&qux=baz", "origin": "null", "protocol": "redis:", "username": "foo", "password": "bar", "host": "somehost:6379", "hostname": "somehost", "port": "6379", "pathname": "/0", "search": "?baz=bam&qux=baz", "hash": "" }, { "input": "rsync://foo@host:911/sup", "base": null, "href": "rsync://foo@host:911/sup", "origin": "null", "protocol": "rsync:", "username": "foo", "password": "", "host": "host:911", "hostname": "host", "port": "911", "pathname": "/sup", "search": "", "hash": "" }, { "input": "git://github.com/foo/bar.git", "base": null, "href": "git://github.com/foo/bar.git", "origin": "null", "protocol": "git:", "username": "", "password": "", "host": "github.com", "hostname": "github.com", "port": "", "pathname": "/foo/bar.git", "search": "", "hash": "" }, { "input": "irc://myserver.com:6999/channel?passwd", "base": null, "href": "irc://myserver.com:6999/channel?passwd", "origin": "null", "protocol": "irc:", "username": "", "password": "", "host": "myserver.com:6999", "hostname": "myserver.com", "port": "6999", "pathname": "/channel", "search": "?passwd", "hash": "" }, { "input": "dns://fw.example.org:9999/foo.bar.org?type=TXT", "base": null, "href": "dns://fw.example.org:9999/foo.bar.org?type=TXT", "origin": "null", "protocol": "dns:", "username": "", "password": "", "host": "fw.example.org:9999", "hostname": "fw.example.org", "port": "9999", "pathname": "/foo.bar.org", "search": "?type=TXT", "hash": "" }, { "input": "ldap://localhost:389/ou=People,o=JNDITutorial", "base": null, "href": "ldap://localhost:389/ou=People,o=JNDITutorial", "origin": "null", "protocol": "ldap:", "username": "", "password": "", "host": "localhost:389", "hostname": "localhost", "port": "389", "pathname": "/ou=People,o=JNDITutorial", "search": "", "hash": "" }, { "input": "git+https://github.com/foo/bar", "base": null, "href": "git+https://github.com/foo/bar", "origin": "null", "protocol": "git+https:", "username": "", "password": "", "host": "github.com", "hostname": "github.com", "port": "", "pathname": "/foo/bar", "search": "", "hash": "" }, { "input": "urn:ietf:rfc:2648", "base": null, "href": "urn:ietf:rfc:2648", "origin": "null", "protocol": "urn:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "ietf:rfc:2648", "search": "", "hash": "" }, { "input": "tag:joe@example.org,2001:foo/bar", "base": null, "href": "tag:joe@example.org,2001:foo/bar", "origin": "null", "protocol": "tag:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "joe@example.org,2001:foo/bar", "search": "", "hash": "" }, "Serialize /. in path", { "input": "non-spec:/.//", "base": null, "href": "non-spec:/.//", "protocol": "non-spec:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//", "search": "", "hash": "" }, { "input": "non-spec:/..//", "base": null, "href": "non-spec:/.//", "protocol": "non-spec:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//", "search": "", "hash": "" }, { "input": "non-spec:/a/..//", "base": null, "href": "non-spec:/.//", "protocol": "non-spec:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//", "search": "", "hash": "" }, { "input": "non-spec:/.//path", "base": null, "href": "non-spec:/.//path", "protocol": "non-spec:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//path", "search": "", "hash": "" }, { "input": "non-spec:/..//path", "base": null, "href": "non-spec:/.//path", "protocol": "non-spec:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//path", "search": "", "hash": "" }, { "input": "non-spec:/a/..//path", "base": null, "href": "non-spec:/.//path", "protocol": "non-spec:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//path", "search": "", "hash": "" }, { "input": "/.//path", "base": "non-spec:/p", "href": "non-spec:/.//path", "protocol": "non-spec:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//path", "search": "", "hash": "" }, { "input": "/..//path", "base": "non-spec:/p", "href": "non-spec:/.//path", "protocol": "non-spec:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//path", "search": "", "hash": "" }, { "input": "..//path", "base": "non-spec:/p", "href": "non-spec:/.//path", "protocol": "non-spec:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//path", "search": "", "hash": "" }, { "input": "a/..//path", "base": "non-spec:/p", "href": "non-spec:/.//path", "protocol": "non-spec:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//path", "search": "", "hash": "" }, { "input": "", "base": "non-spec:/..//p", "href": "non-spec:/.//p", "protocol": "non-spec:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//p", "search": "", "hash": "" }, { "input": "path", "base": "non-spec:/..//p", "href": "non-spec:/.//path", "protocol": "non-spec:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "//path", "search": "", "hash": "" }, "Do not serialize /. in path", { "input": "../path", "base": "non-spec:/.//p", "href": "non-spec:/path", "protocol": "non-spec:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/path", "search": "", "hash": "" }, "# percent encoded hosts in non-special-URLs", { "input": "non-special://%E2%80%A0/", "base": null, "href": "non-special://%E2%80%A0/", "protocol": "non-special:", "username": "", "password": "", "host": "%E2%80%A0", "hostname": "%E2%80%A0", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "non-special://H%4fSt/path", "base": null, "href": "non-special://H%4fSt/path", "protocol": "non-special:", "username": "", "password": "", "host": "H%4fSt", "hostname": "H%4fSt", "port": "", "pathname": "/path", "search": "", "hash": "" }, "# IPv6 in non-special-URLs", { "input": "non-special://[1:2:0:0:5:0:0:0]/", "base": null, "href": "non-special://[1:2:0:0:5::]/", "protocol": "non-special:", "username": "", "password": "", "host": "[1:2:0:0:5::]", "hostname": "[1:2:0:0:5::]", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "non-special://[1:2:0:0:0:0:0:3]/", "base": null, "href": "non-special://[1:2::3]/", "protocol": "non-special:", "username": "", "password": "", "host": "[1:2::3]", "hostname": "[1:2::3]", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "non-special://[1:2::3]:80/", "base": null, "href": "non-special://[1:2::3]:80/", "protocol": "non-special:", "username": "", "password": "", "host": "[1:2::3]:80", "hostname": "[1:2::3]", "port": "80", "pathname": "/", "search": "", "hash": "" }, { "input": "non-special://[:80/", "base": null, "failure": true }, { "input": "blob:https://example.com:443/", "base": null, "href": "blob:https://example.com:443/", "origin": "https://example.com", "protocol": "blob:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "https://example.com:443/", "search": "", "hash": "" }, { "input": "blob:http://example.org:88/", "base": null, "href": "blob:http://example.org:88/", "origin": "http://example.org:88", "protocol": "blob:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "http://example.org:88/", "search": "", "hash": "" }, { "input": "blob:d3958f5c-0777-0845-9dcf-2cb28783acaf", "base": null, "href": "blob:d3958f5c-0777-0845-9dcf-2cb28783acaf", "origin": "null", "protocol": "blob:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "d3958f5c-0777-0845-9dcf-2cb28783acaf", "search": "", "hash": "" }, { "input": "blob:", "base": null, "href": "blob:", "origin": "null", "protocol": "blob:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "", "search": "", "hash": "" }, "blob: in blob:", { "input": "blob:blob:", "base": null, "href": "blob:blob:", "origin": "null", "protocol": "blob:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "blob:", "search": "", "hash": "" }, { "input": "blob:blob:https://example.org/", "base": null, "href": "blob:blob:https://example.org/", "origin": "null", "protocol": "blob:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "blob:https://example.org/", "search": "", "hash": "" }, "Non-http(s): in blob:", { "input": "blob:about:blank", "base": null, "href": "blob:about:blank", "origin": "null", "protocol": "blob:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "about:blank", "search": "", "hash": "" }, { "input": "blob:file://host/path", "base": null, "href": "blob:file://host/path", "origin": "null", "protocol": "blob:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "file://host/path", "search": "", "hash": "" }, { "input": "blob:ftp://host/path", "base": null, "href": "blob:ftp://host/path", "origin": "null", "protocol": "blob:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "ftp://host/path", "search": "", "hash": "" }, { "input": "blob:ws://example.org/", "base": null, "href": "blob:ws://example.org/", "origin": "null", "protocol": "blob:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "ws://example.org/", "search": "", "hash": "" }, { "input": "blob:wss://example.org/", "base": null, "href": "blob:wss://example.org/", "origin": "null", "protocol": "blob:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "wss://example.org/", "search": "", "hash": "" }, "Percent-encoded http: in blob:", { "input": "blob:http%3a//example.org/", "base": null, "href": "blob:http%3a//example.org/", "origin": "null", "protocol": "blob:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "http%3a//example.org/", "search": "", "hash": "" }, "Invalid IPv4 radix digits", { "input": "http://0x7f.0.0.0x7g", "base": null, "href": "http://0x7f.0.0.0x7g/", "protocol": "http:", "username": "", "password": "", "host": "0x7f.0.0.0x7g", "hostname": "0x7f.0.0.0x7g", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://0X7F.0.0.0X7G", "base": null, "href": "http://0x7f.0.0.0x7g/", "protocol": "http:", "username": "", "password": "", "host": "0x7f.0.0.0x7g", "hostname": "0x7f.0.0.0x7g", "port": "", "pathname": "/", "search": "", "hash": "" }, "Invalid IPv4 portion of IPv6 address", { "input": "http://[::127.0.0.0.1]", "base": null, "failure": true }, "Uncompressed IPv6 addresses with 0", { "input": "http://[0:1:0:1:0:1:0:1]", "base": null, "href": "http://[0:1:0:1:0:1:0:1]/", "protocol": "http:", "username": "", "password": "", "host": "[0:1:0:1:0:1:0:1]", "hostname": "[0:1:0:1:0:1:0:1]", "port": "", "pathname": "/", "search": "", "hash": "" }, { "input": "http://[1:0:1:0:1:0:1:0]", "base": null, "href": "http://[1:0:1:0:1:0:1:0]/", "protocol": "http:", "username": "", "password": "", "host": "[1:0:1:0:1:0:1:0]", "hostname": "[1:0:1:0:1:0:1:0]", "port": "", "pathname": "/", "search": "", "hash": "" }, "Percent-encoded query and fragment", { "input": "http://example.org/test?\u0022", "base": null, "href": "http://example.org/test?%22", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/test", "search": "?%22", "hash": "" }, { "input": "http://example.org/test?\u0023", "base": null, "href": "http://example.org/test?#", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/test", "search": "", "hash": "" }, { "input": "http://example.org/test?\u003C", "base": null, "href": "http://example.org/test?%3C", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/test", "search": "?%3C", "hash": "" }, { "input": "http://example.org/test?\u003E", "base": null, "href": "http://example.org/test?%3E", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/test", "search": "?%3E", "hash": "" }, { "input": "http://example.org/test?\u2323", "base": null, "href": "http://example.org/test?%E2%8C%A3", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/test", "search": "?%E2%8C%A3", "hash": "" }, { "input": "http://example.org/test?%23%23", "base": null, "href": "http://example.org/test?%23%23", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/test", "search": "?%23%23", "hash": "" }, { "input": "http://example.org/test?%GH", "base": null, "href": "http://example.org/test?%GH", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/test", "search": "?%GH", "hash": "" }, { "input": "http://example.org/test?a#%EF", "base": null, "href": "http://example.org/test?a#%EF", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/test", "search": "?a", "hash": "#%EF" }, { "input": "http://example.org/test?a#%GH", "base": null, "href": "http://example.org/test?a#%GH", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/test", "search": "?a", "hash": "#%GH" }, "URLs that require a non-about:blank base. (Also serve as invalid base tests.)", { "input": "a", "base": null, "failure": true, "relativeTo": "non-opaque-path-base" }, { "input": "a/", "base": null, "failure": true, "relativeTo": "non-opaque-path-base" }, { "input": "a//", "base": null, "failure": true, "relativeTo": "non-opaque-path-base" }, "Bases that don't fail to parse but fail to be bases", { "input": "test-a-colon.html", "base": "a:", "failure": true }, { "input": "test-a-colon-b.html", "base": "a:b", "failure": true }, "Other base URL tests, that must succeed", { "input": "test-a-colon-slash.html", "base": "a:/", "href": "a:/test-a-colon-slash.html", "protocol": "a:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/test-a-colon-slash.html", "search": "", "hash": "" }, { "input": "test-a-colon-slash-slash.html", "base": "a://", "href": "a:///test-a-colon-slash-slash.html", "protocol": "a:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/test-a-colon-slash-slash.html", "search": "", "hash": "" }, { "input": "test-a-colon-slash-b.html", "base": "a:/b", "href": "a:/test-a-colon-slash-b.html", "protocol": "a:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/test-a-colon-slash-b.html", "search": "", "hash": "" }, { "input": "test-a-colon-slash-slash-b.html", "base": "a://b", "href": "a://b/test-a-colon-slash-slash-b.html", "protocol": "a:", "username": "", "password": "", "host": "b", "hostname": "b", "port": "", "pathname": "/test-a-colon-slash-slash-b.html", "search": "", "hash": "" }, "Null code point in fragment", { "input": "http://example.org/test?a#b\u0000c", "base": null, "href": "http://example.org/test?a#b%00c", "protocol": "http:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/test", "search": "?a", "hash": "#b%00c" }, { "input": "non-spec://example.org/test?a#b\u0000c", "base": null, "href": "non-spec://example.org/test?a#b%00c", "protocol": "non-spec:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/test", "search": "?a", "hash": "#b%00c" }, { "input": "non-spec:/test?a#b\u0000c", "base": null, "href": "non-spec:/test?a#b%00c", "protocol": "non-spec:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/test", "search": "?a", "hash": "#b%00c" }, "First scheme char - not allowed: https://github.com/whatwg/url/issues/464", { "input": "10.0.0.7:8080/foo.html", "base": "file:///some/dir/bar.html", "href": "file:///some/dir/10.0.0.7:8080/foo.html", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/some/dir/10.0.0.7:8080/foo.html", "search": "", "hash": "" }, "Subsequent scheme chars - not allowed", { "input": "a!@$*=/foo.html", "base": "file:///some/dir/bar.html", "href": "file:///some/dir/a!@$*=/foo.html", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/some/dir/a!@$*=/foo.html", "search": "", "hash": "" }, "First and subsequent scheme chars - allowed", { "input": "a1234567890-+.:foo/bar", "base": "http://example.com/dir/file", "href": "a1234567890-+.:foo/bar", "protocol": "a1234567890-+.:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "foo/bar", "search": "", "hash": "" }, "IDNA ignored code points in file URLs hosts", { "input": "file://a\u00ADb/p", "base": null, "href": "file://ab/p", "protocol": "file:", "username": "", "password": "", "host": "ab", "hostname": "ab", "port": "", "pathname": "/p", "search": "", "hash": "" }, { "input": "file://a%C2%ADb/p", "base": null, "href": "file://ab/p", "protocol": "file:", "username": "", "password": "", "host": "ab", "hostname": "ab", "port": "", "pathname": "/p", "search": "", "hash": "" }, "IDNA hostnames which get mapped to 'localhost'", { "input": "file://loC𝐀𝐋𝐇𝐨𝐬𝐭/usr/bin", "base": null, "href": "file:///usr/bin", "protocol": "file:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/usr/bin", "search": "", "hash": "" }, "Empty host after the domain to ASCII", { "input": "file://\u00ad/p", "base": null, "failure": true }, { "input": "file://%C2%AD/p", "base": null, "failure": true }, { "input": "file://xn--/p", "base": null, "failure": true }, "https://bugzilla.mozilla.org/show_bug.cgi?id=1647058", { "input": "#link", "base": "https://example.org/##link", "href": "https://example.org/#link", "protocol": "https:", "username": "", "password": "", "host": "example.org", "hostname": "example.org", "port": "", "pathname": "/", "search": "", "hash": "#link" }, "UTF-8 percent-encode of C0 control percent-encode set and supersets", { "input": "non-special:cannot-be-a-base-url-\u0000\u0001\u001F\u001E\u007E\u007F\u0080", "base": null, "hash": "", "host": "", "hostname": "", "href": "non-special:cannot-be-a-base-url-%00%01%1F%1E~%7F%C2%80", "origin": "null", "password": "", "pathname": "cannot-be-a-base-url-%00%01%1F%1E~%7F%C2%80", "port": "", "protocol": "non-special:", "search": "", "username": "" }, { "input": "https://www.example.com/path{\u007Fpath.html?query'\u007F=query#fragment<\u007Ffragment", "base": null, "hash": "#fragment%3C%7Ffragment", "host": "www.example.com", "hostname": "www.example.com", "href": "https://www.example.com/path%7B%7Fpath.html?query%27%7F=query#fragment%3C%7Ffragment", "origin": "https://www.example.com", "password": "", "pathname": "/path%7B%7Fpath.html", "port": "", "protocol": "https:", "search": "?query%27%7F=query", "username": "" }, { "input": "https://user:pass[\u007F@foo/bar", "base": "http://example.org", "hash": "", "host": "foo", "hostname": "foo", "href": "https://user:pass%5B%7F@foo/bar", "origin": "https://foo", "password": "pass%5B%7F", "pathname": "/bar", "port": "", "protocol": "https:", "search": "", "username": "user" }, "Tests for the distinct percent-encode sets", { "input": "foo:// !\"$%&'()*+,-.;<=>@[\\]^_`{|}~@host/", "base": null, "hash": "", "host": "host", "hostname": "host", "href": "foo://%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~@host/", "origin": "null", "password": "", "pathname": "/", "port":"", "protocol": "foo:", "search": "", "username": "%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~" }, { "input": "wss:// !\"$%&'()*+,-.;<=>@[]^_`{|}~@host/", "base": null, "hash": "", "host": "host", "hostname": "host", "href": "wss://%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~@host/", "origin": "wss://host", "password": "", "pathname": "/", "port":"", "protocol": "wss:", "search": "", "username": "%20!%22$%&'()*+,-.%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~" }, { "input": "foo://joe: !\"$%&'()*+,-.:;<=>@[\\]^_`{|}~@host/", "base": null, "hash": "", "host": "host", "hostname": "host", "href": "foo://joe:%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~@host/", "origin": "null", "password": "%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5C%5D%5E_%60%7B%7C%7D~", "pathname": "/", "port":"", "protocol": "foo:", "search": "", "username": "joe" }, { "input": "wss://joe: !\"$%&'()*+,-.:;<=>@[]^_`{|}~@host/", "base": null, "hash": "", "host": "host", "hostname": "host", "href": "wss://joe:%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~@host/", "origin": "wss://host", "password": "%20!%22$%&'()*+,-.%3A%3B%3C%3D%3E%40%5B%5D%5E_%60%7B%7C%7D~", "pathname": "/", "port":"", "protocol": "wss:", "search": "", "username": "joe" }, { "input": "foo://!\"$%&'()*+,-.;=_`{}~/", "base": null, "hash": "", "host": "!\"$%&'()*+,-.;=_`{}~", "hostname": "!\"$%&'()*+,-.;=_`{}~", "href":"foo://!\"$%&'()*+,-.;=_`{}~/", "origin": "null", "password": "", "pathname": "/", "port":"", "protocol": "foo:", "search": "", "username": "" }, { "input": "wss://!\"$&'()*+,-.;=_`{}~/", "base": null, "hash": "", "host": "!\"$&'()*+,-.;=_`{}~", "hostname": "!\"$&'()*+,-.;=_`{}~", "href":"wss://!\"$&'()*+,-.;=_`{}~/", "origin": "wss://!\"$&'()*+,-.;=_`{}~", "password": "", "pathname": "/", "port":"", "protocol": "wss:", "search": "", "username": "" }, { "input": "foo://host/ !\"$%&'()*+,-./:;<=>@[\\]^_`{|}~", "base": null, "hash": "", "host": "host", "hostname": "host", "href": "foo://host/%20!%22$%&'()*+,-./:;%3C=%3E@[\\]^_%60%7B|%7D~", "origin": "null", "password": "", "pathname": "/%20!%22$%&'()*+,-./:;%3C=%3E@[\\]^_%60%7B|%7D~", "port":"", "protocol": "foo:", "search": "", "username": "" }, { "input": "wss://host/ !\"$%&'()*+,-./:;<=>@[\\]^_`{|}~", "base": null, "hash": "", "host": "host", "hostname": "host", "href": "wss://host/%20!%22$%&'()*+,-./:;%3C=%3E@[/]^_%60%7B|%7D~", "origin": "wss://host", "password": "", "pathname": "/%20!%22$%&'()*+,-./:;%3C=%3E@[/]^_%60%7B|%7D~", "port":"", "protocol": "wss:", "search": "", "username": "" }, { "input": "foo://host/dir/? !\"$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "base": null, "hash": "", "host": "host", "hostname": "host", "href": "foo://host/dir/?%20!%22$%&'()*+,-./:;%3C=%3E?@[\\]^_`{|}~", "origin": "null", "password": "", "pathname": "/dir/", "port":"", "protocol": "foo:", "search": "?%20!%22$%&'()*+,-./:;%3C=%3E?@[\\]^_`{|}~", "username": "" }, { "input": "wss://host/dir/? !\"$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "base": null, "hash": "", "host": "host", "hostname": "host", "href": "wss://host/dir/?%20!%22$%&%27()*+,-./:;%3C=%3E?@[\\]^_`{|}~", "origin": "wss://host", "password": "", "pathname": "/dir/", "port":"", "protocol": "wss:", "search": "?%20!%22$%&%27()*+,-./:;%3C=%3E?@[\\]^_`{|}~", "username": "" }, { "input": "foo://host/dir/# !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "base": null, "hash": "#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", "host": "host", "hostname": "host", "href": "foo://host/dir/#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", "origin": "null", "password": "", "pathname": "/dir/", "port":"", "protocol": "foo:", "search": "", "username": "" }, { "input": "wss://host/dir/# !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "base": null, "hash": "#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", "host": "host", "hostname": "host", "href": "wss://host/dir/#%20!%22#$%&'()*+,-./:;%3C=%3E?@[\\]^_%60{|}~", "origin": "wss://host", "password": "", "pathname": "/dir/", "port":"", "protocol": "wss:", "search": "", "username": "" }, "Ensure that input schemes are not ignored when resolving non-special URLs", { "input": "abc:rootless", "base": "abc://host/path", "hash": "", "host": "", "hostname": "", "href":"abc:rootless", "password": "", "pathname": "rootless", "port":"", "protocol": "abc:", "search": "", "username": "" }, { "input": "abc:rootless", "base": "abc:/path", "hash": "", "host": "", "hostname": "", "href":"abc:rootless", "password": "", "pathname": "rootless", "port":"", "protocol": "abc:", "search": "", "username": "" }, { "input": "abc:rootless", "base": "abc:path", "hash": "", "host": "", "hostname": "", "href":"abc:rootless", "password": "", "pathname": "rootless", "port":"", "protocol": "abc:", "search": "", "username": "" }, { "input": "abc:/rooted", "base": "abc://host/path", "hash": "", "host": "", "hostname": "", "href":"abc:/rooted", "password": "", "pathname": "/rooted", "port":"", "protocol": "abc:", "search": "", "username": "" }, "Empty query and fragment with blank should throw an error", { "input": "#", "base": null, "failure": true, "relativeTo": "any-base" }, { "input": "?", "base": null, "failure": true, "relativeTo": "non-opaque-path-base" }, "Last component looks like a number, but not valid IPv4", { "input": "http://1.2.3.4.5", "base": "http://other.com/", "failure": true }, { "input": "http://1.2.3.4.5.", "base": "http://other.com/", "failure": true }, { "input": "http://0..0x300/", "base": null, "failure": true }, { "input": "http://0..0x300./", "base": null, "failure": true }, { "input": "http://256.256.256.256.256", "base": "http://other.com/", "failure": true }, { "input": "http://256.256.256.256.256.", "base": "http://other.com/", "failure": true }, { "input": "http://1.2.3.08", "base": null, "failure": true }, { "input": "http://1.2.3.08.", "base": null, "failure": true }, { "input": "http://1.2.3.09", "base": null, "failure": true }, { "input": "http://09.2.3.4", "base": null, "failure": true }, { "input": "http://09.2.3.4.", "base": null, "failure": true }, { "input": "http://01.2.3.4.5", "base": null, "failure": true }, { "input": "http://01.2.3.4.5.", "base": null, "failure": true }, { "input": "http://0x100.2.3.4", "base": null, "failure": true }, { "input": "http://0x100.2.3.4.", "base": null, "failure": true }, { "input": "http://0x1.2.3.4.5", "base": null, "failure": true }, { "input": "http://0x1.2.3.4.5.", "base": null, "failure": true }, { "input": "http://foo.1.2.3.4", "base": null, "failure": true }, { "input": "http://foo.1.2.3.4.", "base": null, "failure": true }, { "input": "http://foo.2.3.4", "base": null, "failure": true }, { "input": "http://foo.2.3.4.", "base": null, "failure": true }, { "input": "http://foo.09", "base": null, "failure": true }, { "input": "http://foo.09.", "base": null, "failure": true }, { "input": "http://foo.0x4", "base": null, "failure": true }, { "input": "http://foo.0x4.", "base": null, "failure": true }, { "input": "http://foo.09..", "base": null, "hash": "", "host": "foo.09..", "hostname": "foo.09..", "href":"http://foo.09../", "password": "", "pathname": "/", "port":"", "protocol": "http:", "search": "", "username": "" }, { "input": "http://0999999999999999999/", "base": null, "failure": true }, { "input": "http://foo.0x", "base": null, "failure": true }, { "input": "http://foo.0XFfFfFfFfFfFfFfFfFfAcE123", "base": null, "failure": true }, { "input": "http://💩.123/", "base": null, "failure": true }, "U+0000 and U+FFFF in various places", { "input": "https://\u0000y", "base": null, "failure": true }, { "input": "https://x/\u0000y", "base": null, "hash": "", "host": "x", "hostname": "x", "href": "https://x/%00y", "password": "", "pathname": "/%00y", "port": "", "protocol": "https:", "search": "", "username": "" }, { "input": "https://x/?\u0000y", "base": null, "hash": "", "host": "x", "hostname": "x", "href": "https://x/?%00y", "password": "", "pathname": "/", "port": "", "protocol": "https:", "search": "?%00y", "username": "" }, { "input": "https://x/?#\u0000y", "base": null, "hash": "#%00y", "host": "x", "hostname": "x", "href": "https://x/?#%00y", "password": "", "pathname": "/", "port": "", "protocol": "https:", "search": "", "username": "" }, { "input": "https://\uFFFFy", "base": null, "failure": true }, { "input": "https://x/\uFFFFy", "base": null, "hash": "", "host": "x", "hostname": "x", "href": "https://x/%EF%BF%BFy", "password": "", "pathname": "/%EF%BF%BFy", "port": "", "protocol": "https:", "search": "", "username": "" }, { "input": "https://x/?\uFFFFy", "base": null, "hash": "", "host": "x", "hostname": "x", "href": "https://x/?%EF%BF%BFy", "password": "", "pathname": "/", "port": "", "protocol": "https:", "search": "?%EF%BF%BFy", "username": "" }, { "input": "https://x/?#\uFFFFy", "base": null, "hash": "#%EF%BF%BFy", "host": "x", "hostname": "x", "href": "https://x/?#%EF%BF%BFy", "password": "", "pathname": "/", "port": "", "protocol": "https:", "search": "", "username": "" }, { "input": "non-special:\u0000y", "base": null, "hash": "", "host": "", "hostname": "", "href": "non-special:%00y", "password": "", "pathname": "%00y", "port": "", "protocol": "non-special:", "search": "", "username": "" }, { "input": "non-special:x/\u0000y", "base": null, "hash": "", "host": "", "hostname": "", "href": "non-special:x/%00y", "password": "", "pathname": "x/%00y", "port": "", "protocol": "non-special:", "search": "", "username": "" }, { "input": "non-special:x/?\u0000y", "base": null, "hash": "", "host": "", "hostname": "", "href": "non-special:x/?%00y", "password": "", "pathname": "x/", "port": "", "protocol": "non-special:", "search": "?%00y", "username": "" }, { "input": "non-special:x/?#\u0000y", "base": null, "hash": "#%00y", "host": "", "hostname": "", "href": "non-special:x/?#%00y", "password": "", "pathname": "x/", "port": "", "protocol": "non-special:", "search": "", "username": "" }, { "input": "non-special:\uFFFFy", "base": null, "hash": "", "host": "", "hostname": "", "href": "non-special:%EF%BF%BFy", "password": "", "pathname": "%EF%BF%BFy", "port": "", "protocol": "non-special:", "search": "", "username": "" }, { "input": "non-special:x/\uFFFFy", "base": null, "hash": "", "host": "", "hostname": "", "href": "non-special:x/%EF%BF%BFy", "password": "", "pathname": "x/%EF%BF%BFy", "port": "", "protocol": "non-special:", "search": "", "username": "" }, { "input": "non-special:x/?\uFFFFy", "base": null, "hash": "", "host": "", "hostname": "", "href": "non-special:x/?%EF%BF%BFy", "password": "", "pathname": "x/", "port": "", "protocol": "non-special:", "search": "?%EF%BF%BFy", "username": "" }, { "input": "non-special:x/?#\uFFFFy", "base": null, "hash": "#%EF%BF%BFy", "host": "", "hostname": "", "href": "non-special:x/?#%EF%BF%BFy", "password": "", "pathname": "x/", "port": "", "protocol": "non-special:", "search": "", "username": "" }, { "input": "", "base": null, "failure": true, "relativeTo": "non-opaque-path-base" }, { "input": "https://example.com/\"quoted\"", "base": null, "hash": "", "host": "example.com", "hostname": "example.com", "href": "https://example.com/%22quoted%22", "origin": "https://example.com", "password": "", "pathname": "/%22quoted%22", "port": "", "protocol": "https:", "search": "", "username": "" }, { "input": "https://a%C2%ADb/", "base": null, "hash": "", "host": "ab", "hostname": "ab", "href": "https://ab/", "origin": "https://ab", "password": "", "pathname": "/", "port": "", "protocol": "https:", "search": "", "username": "" }, { "comment": "Empty host after domain to ASCII", "input": "https://\u00AD/", "base": null, "failure": true }, { "input": "https://%C2%AD/", "base": null, "failure": true }, { "input": "https://xn--/", "base": null, "failure": true }, "Non-special schemes that some implementations might incorrectly treat as special", { "input": "data://example.com:8080/pathname?search#hash", "base": null, "href": "data://example.com:8080/pathname?search#hash", "origin": "null", "protocol": "data:", "username": "", "password": "", "host": "example.com:8080", "hostname": "example.com", "port": "8080", "pathname": "/pathname", "search": "?search", "hash": "#hash" }, { "input": "data:///test", "base": null, "href": "data:///test", "origin": "null", "protocol": "data:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/test", "search": "", "hash": "" }, { "input": "data://test/a/../b", "base": null, "href": "data://test/b", "origin": "null", "protocol": "data:", "username": "", "password": "", "host": "test", "hostname": "test", "port": "", "pathname": "/b", "search": "", "hash": "" }, { "input": "data://:443", "base": null, "failure": true }, { "input": "data://test:test", "base": null, "failure": true }, { "input": "data://[:1]", "base": null, "failure": true }, { "input": "javascript://example.com:8080/pathname?search#hash", "base": null, "href": "javascript://example.com:8080/pathname?search#hash", "origin": "null", "protocol": "javascript:", "username": "", "password": "", "host": "example.com:8080", "hostname": "example.com", "port": "8080", "pathname": "/pathname", "search": "?search", "hash": "#hash" }, { "input": "javascript:///test", "base": null, "href": "javascript:///test", "origin": "null", "protocol": "javascript:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/test", "search": "", "hash": "" }, { "input": "javascript://test/a/../b", "base": null, "href": "javascript://test/b", "origin": "null", "protocol": "javascript:", "username": "", "password": "", "host": "test", "hostname": "test", "port": "", "pathname": "/b", "search": "", "hash": "" }, { "input": "javascript://:443", "base": null, "failure": true }, { "input": "javascript://test:test", "base": null, "failure": true }, { "input": "javascript://[:1]", "base": null, "failure": true }, { "input": "mailto://example.com:8080/pathname?search#hash", "base": null, "href": "mailto://example.com:8080/pathname?search#hash", "origin": "null", "protocol": "mailto:", "username": "", "password": "", "host": "example.com:8080", "hostname": "example.com", "port": "8080", "pathname": "/pathname", "search": "?search", "hash": "#hash" }, { "input": "mailto:///test", "base": null, "href": "mailto:///test", "origin": "null", "protocol": "mailto:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/test", "search": "", "hash": "" }, { "input": "mailto://test/a/../b", "base": null, "href": "mailto://test/b", "origin": "null", "protocol": "mailto:", "username": "", "password": "", "host": "test", "hostname": "test", "port": "", "pathname": "/b", "search": "", "hash": "" }, { "input": "mailto://:443", "base": null, "failure": true }, { "input": "mailto://test:test", "base": null, "failure": true }, { "input": "mailto://[:1]", "base": null, "failure": true }, { "input": "intent://example.com:8080/pathname?search#hash", "base": null, "href": "intent://example.com:8080/pathname?search#hash", "origin": "null", "protocol": "intent:", "username": "", "password": "", "host": "example.com:8080", "hostname": "example.com", "port": "8080", "pathname": "/pathname", "search": "?search", "hash": "#hash" }, { "input": "intent:///test", "base": null, "href": "intent:///test", "origin": "null", "protocol": "intent:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/test", "search": "", "hash": "" }, { "input": "intent://test/a/../b", "base": null, "href": "intent://test/b", "origin": "null", "protocol": "intent:", "username": "", "password": "", "host": "test", "hostname": "test", "port": "", "pathname": "/b", "search": "", "hash": "" }, { "input": "intent://:443", "base": null, "failure": true }, { "input": "intent://test:test", "base": null, "failure": true }, { "input": "intent://[:1]", "base": null, "failure": true }, { "input": "urn://example.com:8080/pathname?search#hash", "base": null, "href": "urn://example.com:8080/pathname?search#hash", "origin": "null", "protocol": "urn:", "username": "", "password": "", "host": "example.com:8080", "hostname": "example.com", "port": "8080", "pathname": "/pathname", "search": "?search", "hash": "#hash" }, { "input": "urn:///test", "base": null, "href": "urn:///test", "origin": "null", "protocol": "urn:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/test", "search": "", "hash": "" }, { "input": "urn://test/a/../b", "base": null, "href": "urn://test/b", "origin": "null", "protocol": "urn:", "username": "", "password": "", "host": "test", "hostname": "test", "port": "", "pathname": "/b", "search": "", "hash": "" }, { "input": "urn://:443", "base": null, "failure": true }, { "input": "urn://test:test", "base": null, "failure": true }, { "input": "urn://[:1]", "base": null, "failure": true }, { "input": "turn://example.com:8080/pathname?search#hash", "base": null, "href": "turn://example.com:8080/pathname?search#hash", "origin": "null", "protocol": "turn:", "username": "", "password": "", "host": "example.com:8080", "hostname": "example.com", "port": "8080", "pathname": "/pathname", "search": "?search", "hash": "#hash" }, { "input": "turn:///test", "base": null, "href": "turn:///test", "origin": "null", "protocol": "turn:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/test", "search": "", "hash": "" }, { "input": "turn://test/a/../b", "base": null, "href": "turn://test/b", "origin": "null", "protocol": "turn:", "username": "", "password": "", "host": "test", "hostname": "test", "port": "", "pathname": "/b", "search": "", "hash": "" }, { "input": "turn://:443", "base": null, "failure": true }, { "input": "turn://test:test", "base": null, "failure": true }, { "input": "turn://[:1]", "base": null, "failure": true }, { "input": "stun://example.com:8080/pathname?search#hash", "base": null, "href": "stun://example.com:8080/pathname?search#hash", "origin": "null", "protocol": "stun:", "username": "", "password": "", "host": "example.com:8080", "hostname": "example.com", "port": "8080", "pathname": "/pathname", "search": "?search", "hash": "#hash" }, { "input": "stun:///test", "base": null, "href": "stun:///test", "origin": "null", "protocol": "stun:", "username": "", "password": "", "host": "", "hostname": "", "port": "", "pathname": "/test", "search": "", "hash": "" }, { "input": "stun://test/a/../b", "base": null, "href": "stun://test/b", "origin": "null", "protocol": "stun:", "username": "", "password": "", "host": "test", "hostname": "test", "port": "", "pathname": "/b", "search": "", "hash": "" }, { "input": "stun://:443", "base": null, "failure": true }, { "input": "stun://test:test", "base": null, "failure": true }, { "input": "stun://[:1]", "base": null, "failure": true }, { "input": "w://x:0", "base": null, "href": "w://x:0", "origin": "null", "protocol": "w:", "username": "", "password": "", "host": "x:0", "hostname": "x", "port": "0", "pathname": "", "search": "", "hash": "" }, { "input": "west://x:0", "base": null, "href": "west://x:0", "origin": "null", "protocol": "west:", "username": "", "password": "", "host": "x:0", "hostname": "x", "port": "0", "pathname": "", "search": "", "hash": "" } ] url-2.5.2/tests/wpt.rs000064400000000000000000000345331046102023000127660ustar 00000000000000// Copyright 2013-2014 The rust-url developers. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! Data-driven tests imported from web-platform-tests use serde_json::Value; use std::collections::HashMap; use std::fmt::Write; #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] use std::sync::Mutex; use url::Url; // https://rustwasm.github.io/wasm-bindgen/wasm-bindgen-test/usage.html #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] use wasm_bindgen_test::{console_log, wasm_bindgen_test, wasm_bindgen_test_configure}; #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] wasm_bindgen_test_configure!(run_in_browser); // wpt has its own test driver, but we shoe-horn this into wasm_bindgen_test // which will discard stdout and stderr. So, we make println! go to // console.log(), so we see failures that do not result in panics. #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] static PRINT_BUF: Mutex> = Mutex::new(None); #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] macro_rules! print { ($($arg:tt)*) => { let v = format!($($arg)*); { let mut buf = PRINT_BUF.lock().unwrap(); if let Some(buf) = buf.as_mut() { buf.push_str(&v); } else { *buf = Some(v); } } }; } #[cfg(all(target_arch = "wasm32", target_os = "unknown"))] macro_rules! println { () => { let buf = PRINT_BUF.lock().unwrap().take(); match buf { Some(buf) => console_log!("{buf}"), None => console_log!(""), } }; ($($arg:tt)*) => { let buf = PRINT_BUF.lock().unwrap().take(); match buf { Some(buf) => { let v = format!($($arg)*); console_log!("{buf}{v}"); }, None => console_log!($($arg)*), } } } #[derive(Debug, serde::Deserialize)] struct UrlTest { input: String, base: Option, #[serde(flatten)] result: UrlTestResult, } #[derive(Debug, serde::Deserialize)] #[serde(untagged)] #[allow(clippy::large_enum_variant)] enum UrlTestResult { Ok(UrlTestOk), Fail(UrlTestFail), } #[derive(Debug, serde::Deserialize)] struct UrlTestOk { href: String, protocol: String, username: String, password: String, host: String, hostname: String, port: String, pathname: String, search: String, hash: String, } #[derive(Debug, serde::Deserialize)] struct UrlTestFail { failure: bool, } #[derive(Debug, serde::Deserialize)] struct SetterTest { href: String, new_value: String, expected: SetterTestExpected, } #[derive(Debug, serde::Deserialize)] struct SetterTestExpected { href: Option, protocol: Option, username: Option, password: Option, host: Option, hostname: Option, port: Option, pathname: Option, search: Option, hash: Option, } #[cfg_attr(all(target_arch = "wasm32", target_os = "unknown"), wasm_bindgen_test)] fn main() { let mut filter = None; let mut args = std::env::args().skip(1); while filter.is_none() { if let Some(arg) = args.next() { if arg == "--test-threads" { args.next(); continue; } filter = Some(arg); } else { break; } } let mut expected_failures = include_str!("expected_failures.txt") .lines() .collect::>(); let mut errors = vec![]; // Copied from https://github.com/web-platform-tests/wpt/blob/master/url/ let url_json: Vec = serde_json::from_str(include_str!("urltestdata.json")) .expect("JSON parse error in urltestdata.json"); let url_tests = url_json .into_iter() .filter(|val| val.is_object()) .map(|val| serde_json::from_value::(val).expect("parsing failed")) .collect::>(); let setter_json: HashMap = serde_json::from_str(include_str!("setters_tests.json")) .expect("JSON parse error in setters_tests.json"); let setter_tests = setter_json .into_iter() .filter(|(k, _)| k != "comment") .map(|(k, v)| { let test = serde_json::from_value::>(v).expect("parsing failed"); (k, test) }) .collect::>(); for url_test in url_tests { let mut name = format!("<{}>", url_test.input.escape_default()); if let Some(base) = &url_test.base { write!(&mut name, " against <{}>", base.escape_default()).unwrap(); } if should_skip(&name, filter.as_deref()) { continue; } print!("{} ... ", name); let res = run_url_test(url_test); report(name, res, &mut errors, &mut expected_failures); } for (kind, tests) in setter_tests { for test in tests { let name = format!( "<{}> set {} to <{}>", test.href.escape_default(), kind, test.new_value.escape_default() ); if should_skip(&name, filter.as_deref()) { continue; } print!("{} ... ", name); let res = run_setter_test(&kind, test); report(name, res, &mut errors, &mut expected_failures); } } println!(); println!("===================="); println!(); if !errors.is_empty() { println!("errors:"); println!(); for (name, err) in errors { println!(" name: {}", name); println!(" err: {}", err); println!(); } std::process::exit(1); } else { println!("all tests passed"); } if !expected_failures.is_empty() && filter.is_none() { println!(); println!("===================="); println!(); println!("tests were expected to fail but did not run:"); println!(); for name in expected_failures { println!(" {}", name); } println!(); println!("if these tests were removed, update expected_failures.txt"); println!(); std::process::exit(1); } } fn should_skip(name: &str, filter: Option<&str>) -> bool { match filter { Some(filter) => !name.contains(filter), None => false, } } fn report( name: String, res: Result<(), String>, errors: &mut Vec<(String, String)>, expected_failures: &mut Vec<&str>, ) { let expected_failure = expected_failures.contains(&&*name); expected_failures.retain(|&s| s != &*name); match res { Ok(()) => { if expected_failure { println!("🟠 (unexpected success)"); errors.push((name, "unexpected success".to_string())); } else { println!("✅"); } } Err(err) => { if expected_failure { println!("✅ (expected fail)"); } else { println!("❌"); errors.push((name, err)); } } } } fn run_url_test( UrlTest { base, input, result, }: UrlTest, ) -> Result<(), String> { let base = match base { Some(base) => { let base = Url::parse(&base).map_err(|e| format!("errored while parsing base: {}", e))?; Some(base) } None => None, }; let res = Url::options() .base_url(base.as_ref()) .parse(&input) .map_err(|e| format!("errored while parsing input: {}", e)); match result { UrlTestResult::Ok(ok) => check_url_ok(res, ok), UrlTestResult::Fail(fail) => { assert!(fail.failure); if res.is_ok() { return Err("expected failure, but parsed successfully".to_string()); } Ok(()) } } } fn check_url_ok(res: Result, ok: UrlTestOk) -> Result<(), String> { let url = match res { Ok(url) => url, Err(err) => { return Err(format!("expected success, but errored: {:?}", err)); } }; let href = url::quirks::href(&url); if href != ok.href { return Err(format!("expected href {:?}, but got {:?}", ok.href, href)); } let protocol = url::quirks::protocol(&url); if protocol != ok.protocol { return Err(format!( "expected protocol {:?}, but got {:?}", ok.protocol, protocol )); } let username = url::quirks::username(&url); if username != ok.username { return Err(format!( "expected username {:?}, but got {:?}", ok.username, username )); } let password = url::quirks::password(&url); if password != ok.password { return Err(format!( "expected password {:?}, but got {:?}", ok.password, password )); } let host = url::quirks::host(&url); if host != ok.host { return Err(format!("expected host {:?}, but got {:?}", ok.host, host)); } let hostname = url::quirks::hostname(&url); if hostname != ok.hostname { return Err(format!( "expected hostname {:?}, but got {:?}", ok.hostname, hostname )); } let port = url::quirks::port(&url); if port != ok.port { return Err(format!("expected port {:?}, but got {:?}", ok.port, port)); } let pathname = url::quirks::pathname(&url); if pathname != ok.pathname { return Err(format!( "expected pathname {:?}, but got {:?}", ok.pathname, pathname )); } let search = url::quirks::search(&url); if search != ok.search { return Err(format!( "expected search {:?}, but got {:?}", ok.search, search )); } let hash = url::quirks::hash(&url); if hash != ok.hash { return Err(format!("expected hash {:?}, but got {:?}", ok.hash, hash)); } Ok(()) } fn run_setter_test( kind: &str, SetterTest { href, new_value, expected, }: SetterTest, ) -> Result<(), String> { let mut url = Url::parse(&href).map_err(|e| format!("errored while parsing href: {}", e))?; match kind { "protocol" => { url::quirks::set_protocol(&mut url, &new_value).ok(); } "username" => { url::quirks::set_username(&mut url, &new_value).ok(); } "password" => { url::quirks::set_password(&mut url, &new_value).ok(); } "host" => { url::quirks::set_host(&mut url, &new_value).ok(); } "hostname" => { url::quirks::set_hostname(&mut url, &new_value).ok(); } "port" => { url::quirks::set_port(&mut url, &new_value).ok(); } "pathname" => url::quirks::set_pathname(&mut url, &new_value), "search" => url::quirks::set_search(&mut url, &new_value), "hash" => url::quirks::set_hash(&mut url, &new_value), _ => { return Err(format!("unknown setter kind: {:?}", kind)); } } if let Some(expected_href) = expected.href { let href = url::quirks::href(&url); if href != expected_href { return Err(format!( "expected href {:?}, but got {:?}", expected_href, href )); } } if let Some(expected_protocol) = expected.protocol { let protocol = url::quirks::protocol(&url); if protocol != expected_protocol { return Err(format!( "expected protocol {:?}, but got {:?}", expected_protocol, protocol )); } } if let Some(expected_username) = expected.username { let username = url::quirks::username(&url); if username != expected_username { return Err(format!( "expected username {:?}, but got {:?}", expected_username, username )); } } if let Some(expected_password) = expected.password { let password = url::quirks::password(&url); if password != expected_password { return Err(format!( "expected password {:?}, but got {:?}", expected_password, password )); } } if let Some(expected_host) = expected.host { let host = url::quirks::host(&url); if host != expected_host { return Err(format!( "expected host {:?}, but got {:?}", expected_host, host )); } } if let Some(expected_hostname) = expected.hostname { let hostname = url::quirks::hostname(&url); if hostname != expected_hostname { return Err(format!( "expected hostname {:?}, but got {:?}", expected_hostname, hostname )); } } if let Some(expected_port) = expected.port { let port = url::quirks::port(&url); if port != expected_port { return Err(format!( "expected port {:?}, but got {:?}", expected_port, port )); } } if let Some(expected_pathname) = expected.pathname { let pathname = url::quirks::pathname(&url); if pathname != expected_pathname { return Err(format!( "expected pathname {:?}, but got {:?}", expected_pathname, pathname )); } } if let Some(expected_search) = expected.search { let search = url::quirks::search(&url); if search != expected_search { return Err(format!( "expected search {:?}, but got {:?}", expected_search, search )); } } if let Some(expected_hash) = expected.hash { let hash = url::quirks::hash(&url); if hash != expected_hash { return Err(format!( "expected hash {:?}, but got {:?}", expected_hash, hash )); } } Ok(()) }