publicsuffix-1.5.2/.gitignore010064400017500000144000000000221335165715700144360ustar0000000000000000target Cargo.lock publicsuffix-1.5.2/.travis.yml010064400017500000144000000001341335165715700145630ustar0000000000000000language: rust rust: - nightly - beta - stable script: - cargo test - cargo doc --no-deps publicsuffix-1.5.2/Cargo.toml.orig010064400017500000144000000012231335165765500153440ustar0000000000000000[package] name = "publicsuffix" description = "Robust domain name parsing and RFC compliant email address validation" version = "1.5.2" license = "MIT/Apache-2.0" repository = "https://github.com/rushmorem/publicsuffix" documentation = "https://docs.rs/publicsuffix" readme = "README.md" keywords = ["tld", "gtld", "cctld", "domain", "psl"] authors = ["rushmorem "] [features] default = ["remote_list"] remote_list = ["native-tls"] [dependencies] error-chain = "0.12" idna = "0.1" regex = "1.0" url = "1.7" lazy_static = "1.0" [dependencies.native-tls] version = "0.2" optional = true [dev_dependencies] rspec = "=1.0.0-beta.3" publicsuffix-1.5.2/Cargo.toml0000644000000024150000000000000116030ustar00# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g. crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] name = "publicsuffix" version = "1.5.2" authors = ["rushmorem "] description = "Robust domain name parsing and RFC compliant email address validation" documentation = "https://docs.rs/publicsuffix" readme = "README.md" keywords = ["tld", "gtld", "cctld", "domain", "psl"] license = "MIT/Apache-2.0" repository = "https://github.com/rushmorem/publicsuffix" [dependencies.error-chain] version = "0.12" [dependencies.idna] version = "0.1" [dependencies.lazy_static] version = "1.0" [dependencies.native-tls] version = "0.2" optional = true [dependencies.regex] version = "1.0" [dependencies.url] version = "1.7" [dev-dependencies.rspec] version = "=1.0.0-beta.3" [features] default = ["remote_list"] remote_list = ["native-tls"] publicsuffix-1.5.2/LICENSE010064400017500000144000000020621335165715700134610ustar0000000000000000MIT License Copyright (c) 2016 Rushmore Mushambi Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. publicsuffix-1.5.2/LICENSE-APACHE010064400017500000144000000251371335165715700144100ustar0000000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. publicsuffix-1.5.2/README.md010064400017500000144000000120621335165715700137340ustar0000000000000000# Robust domain name parsing and RFC compliant email address validation [![Build Status](https://travis-ci.org/rushmorem/publicsuffix.svg?branch=master)](https://travis-ci.org/rushmorem/publicsuffix) [![Latest Version](https://img.shields.io/crates/v/publicsuffix.svg)](https://crates.io/crates/publicsuffix) [![Docs](https://docs.rs/publicsuffix/badge.svg)](https://docs.rs/publicsuffix) This library uses Mozilla's [Public Suffix List](https://publicsuffix.org) to reliably parse domain names and email addresses in [Rust](https://www.rust-lang.org). Though parsing domain names is it's primary goal, it also fully exposes the list allowing you to use convenient methods like `list.all()` to get all known domain extensions or `list.icann()` to get only ICANN extensions. If all you need is to check whether a domain is syntactically correct and do not need to utilise the list you can just use `Domain::has_valid_syntax` method. This method will reliably tell you if a domain has valid syntax whether or not it is an internationalised domain name (IDN). It also checks the length restrictions for each label, total number of labels and full length of domain name. This crate doesn't cache the public suffix list for you. If you want to use this crate in a long running application and want to make use of the public suffix list, I highly recommend you use the [psl](https://github.com/rushmorem/psl) crate which does this for you. ## Setting Up Add this crate to your `Cargo.toml`: ```toml [dependencies.publicsuffix] version = "1.5" # This crate exposes the methods `List::fetch` and `List::from_url` as a # feature named "remote_list". This feature is on by default. If you have # the public suffix list on your local filesystem or you would like # to fetch this list on your own you can disable this feature and build # the list using `List::from_path` or `List::from_reader` respectively. # # To disable, uncomment the line below: # default-features = false ``` ## Examples ```rust extern crate publicsuffix; use publicsuffix::List; // Fetch the list from the official URL, let list = List::fetch()?; // from your own URL let list = List::from_url("https://example.com/path/to/public_suffix_list.dat")?; // or from a local file. You can download the list from // "https://publicsuffix.org/list/public_suffix_list.dat". let list = List::from_path("/path/to/public_suffix_list.dat")?; // Using the list you can find out the root domain // or extension of any given domain name let domain = list.parse_domain("www.example.com")?; assert_eq!(domain.root(), Some("example.com")); assert_eq!(domain.suffix(), Some("com")); let domain = list.parse_domain("www.食狮.中国")?; assert_eq!(domain.root(), Some("食狮.中国")); assert_eq!(domain.suffix(), Some("中国")); let domain = list.parse_domain("www.xn--85x722f.xn--55qx5d.cn")?; assert_eq!(domain.root(), Some("xn--85x722f.xn--55qx5d.cn")); assert_eq!(domain.suffix(), Some("xn--55qx5d.cn")); let domain = list.parse_domain("a.b.example.uk.com")?; assert_eq!(domain.root(), Some("example.uk.com")); assert_eq!(domain.suffix(), Some("uk.com")); let name = list.parse_dns_name("_tcp.example.com.")?; assert_eq!(name.domain().and_then(|domain| domain.root()), Some("example.com")); assert_eq!(name.domain().and_then(|domain| domain.suffix()), Some("com")); // You can also find out if this is an ICANN domain assert!(!domain.is_icann()); // or a private one assert!(domain.is_private()); // In any case if the domain's suffix is in the list // then this is definately a registrable domain name assert!(domain.has_known_suffix()); ``` ## Use Cases For those who work with domain names the use cases of this library are plenty. [publicsuffix.org/learn](https://publicsuffix.org/learn/) lists quite a few. For the sake of brevity, I'm not going to repeat them here. I work for a domain registrar so we make good use of this library. Here are some of the ways this library can be used:- * Validating domain names. This one is probably obvious. If a [Domain::has_known_suffix](https://docs.rs/publicsuffix/*/publicsuffix/struct.Domain.html#method.has_known_suffix) you can be absolutely sure this is a valid domain name. A regular expression is simply not robust enough. * Validating email addresses. You can utilise this library to validate email addresses in a robust and reliable manner before resorting to more expensive (DNS checks) or less convenient (sending confirmation emails) ways. * Blacklisting or whitelisting domain names and email addresses. You can't just blindly do this without knowing the actual registrable domain name otherwise you risk being too restrictive or too lenient. Bad news either way... * Extracting the registrable part of a domain name so you can check whether the domain is registered or not. * Storing details about a domain name in a DBMS using the registrable part of a domain name as the primary key. * Like my company, a registrar or similar organisation can draft their own list of domain extensions they support, following the same specs as the original list, and then use this library to check whether a requested domain name is actually supported. publicsuffix-1.5.2/default.nix010064400017500000144000000002301335165715700146130ustar0000000000000000with import {}; stdenv.mkDerivation { name = "publicsuffix"; OPENSSL_DIR = "${openssl.dev}"; OPENSSL_LIB_DIR = "${openssl.out}/lib"; } publicsuffix-1.5.2/src/errors.rs010064400017500000144000000016451335165715700151330ustar0000000000000000//! Errors returned by this library #[cfg(feature = "remote_list")] use std::net::TcpStream; error_chain! { foreign_links { Io(::std::io::Error); Url(::url::ParseError); Tls(::native_tls::Error) #[cfg(feature = "remote_list")]; Handshake(::native_tls::HandshakeError) #[cfg(feature = "remote_list")]; } errors { UnsupportedScheme { } InvalidList { } NoHost { } InvalidHost { } InvalidEmail { } InvalidRule(t: String) { description("invalid rule") display("invalid rule: '{}'", t) } InvalidDomain(t: String) { description("invalid domain") display("invalid domain: '{}'", t) } Uts46(t: ::idna::uts46::Errors) { description("UTS #46 processing failed") display("UTS #46 processing error: '{:?}'", t) } } } publicsuffix-1.5.2/src/lib.rs010064400017500000144000000607171335165715700143720ustar0000000000000000//! Robust domain name parsing using the Public Suffix List //! //! This library allows you to easily and accurately parse any given domain name. //! //! ## Examples //! //! ```rust,norun //! extern crate publicsuffix; //! //! use publicsuffix::List; //! # use publicsuffix::Result; //! //! # fn examples() -> Result<()> { //! // Fetch the list from the official URL, //! # #[cfg(feature = "remote_list")] //! let list = List::fetch()?; //! //! // from your own URL //! # #[cfg(feature = "remote_list")] //! let list = List::from_url("https://example.com/path/to/public_suffix_list.dat")?; //! //! // or from a local file. //! let list = List::from_path("/path/to/public_suffix_list.dat")?; //! //! // Using the list you can find out the root domain //! // or extension of any given domain name //! let domain = list.parse_domain("www.example.com")?; //! assert_eq!(domain.root(), Some("example.com")); //! assert_eq!(domain.suffix(), Some("com")); //! //! let domain = list.parse_domain("www.食狮.中国")?; //! assert_eq!(domain.root(), Some("食狮.中国")); //! assert_eq!(domain.suffix(), Some("中国")); //! //! let domain = list.parse_domain("www.xn--85x722f.xn--55qx5d.cn")?; //! assert_eq!(domain.root(), Some("xn--85x722f.xn--55qx5d.cn")); //! assert_eq!(domain.suffix(), Some("xn--55qx5d.cn")); //! //! let domain = list.parse_domain("a.b.example.uk.com")?; //! assert_eq!(domain.root(), Some("example.uk.com")); //! assert_eq!(domain.suffix(), Some("uk.com")); //! //! let name = list.parse_dns_name("_tcp.example.com.")?; //! assert_eq!(name.domain().and_then(|domain| domain.root()), Some("example.com")); //! assert_eq!(name.domain().and_then(|domain| domain.suffix()), Some("com")); //! //! // You can also find out if this is an ICANN domain //! assert!(!domain.is_icann()); //! //! // or a private one //! assert!(domain.is_private()); //! //! // In any case if the domain's suffix is in the list //! // then this is definately a registrable domain name //! assert!(domain.has_known_suffix()); //! # Ok(()) //! # } //! # fn main() {} //! ``` #![recursion_limit = "1024"] #[macro_use] extern crate error_chain; #[cfg(feature = "remote_list")] extern crate native_tls; #[macro_use] extern crate lazy_static; extern crate regex; extern crate idna; extern crate url; pub mod errors; #[cfg(feature = "remote_list")] #[cfg(test)] mod tests; use std::fs::File; use std::path::Path; #[cfg(feature = "remote_list")] use std::time::Duration; #[cfg(feature = "remote_list")] use std::net::TcpStream; use std::io::Read; #[cfg(feature = "remote_list")] use std::io::Write; use std::collections::HashMap; use std::net::IpAddr; use std::str::FromStr; use std::fmt; pub use errors::{Result, Error}; use regex::RegexSet; use errors::{ErrorKind, ResultExt}; #[cfg(feature = "remote_list")] use native_tls::TlsConnector; use idna::{domain_to_unicode, uts46}; use url::Url; /// The official URL of the list pub const LIST_URL: &'static str = "https://publicsuffix.org/list/public_suffix_list.dat"; #[derive(Debug, PartialEq, Eq, Hash)] struct Suffix { rule: String, typ: Type, } /// Stores the public suffix list /// /// You can use the methods, `fetch`, `from_url` or `from_path` to build the list. /// If you are using this in a long running server it's recommended you use either /// `fetch` or `from_url` to download updates at least once a week. #[derive(Debug)] pub struct List { rules: HashMap>, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] enum Type { Icann, Private, } /// Holds information about a particular domain /// /// This is created by `List::parse_domain`. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Domain { full: String, typ: Option, suffix: Option, registrable: Option, } /// Holds information about a particular host /// /// This is created by `List::parse_host`. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Host { Ip(IpAddr), Domain(Domain), } /// Holds information about a particular DNS name /// /// This is created by `List::parse_dns_name`. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct DnsName { name: String, domain: Option, } lazy_static! { // Regex for matching domain name labels static ref LABEL: RegexSet = { let exprs = vec![ // can be any combination of alphanumeric characters r"^[[:alnum:]]+$", // or it can start with an alphanumeric character // then optionally be followed by any combination of // alphanumeric characters and dashes before finally // ending with an alphanumeric character r"^[[:alnum:]]+[[:alnum:]-]*[[:alnum:]]+$", ]; RegexSet::new(exprs).unwrap() }; // Regex for matching the local-part of an // email address static ref LOCAL: RegexSet = { // these characters can be anywhere in the expresion let global = r#"[[:alnum:]!#$%&'*+/=?^_`{|}~-]"#; // non-ascii characters (an also be unquoted) let non_ascii = r#"[^\x00-\x7F]"#; // the pattern to match let quoted = r#"["(),\\:;<>@\[\]. ]"#; // combined regex let combined = format!(r#"({}*{}*)"#, global, non_ascii); let exprs = vec![ // can be any combination of allowed characters format!(r#"^{}+$"#, combined), // can be any combination of allowed charaters // separated by a . in between format!(r#"^({0}+[.]?{0}+)+$"#, combined), // can be a quoted string with allowed plus // additional characters format!(r#"^"({}*{}*)*"$"#, combined, quoted), ]; RegexSet::new(exprs).unwrap() }; } /// Converts a type into a Url object pub trait IntoUrl { fn into_url(self) -> Result; } impl IntoUrl for Url { fn into_url(self) -> Result { Ok(self) } } impl<'a> IntoUrl for &'a str { fn into_url(self) -> Result { Ok(Url::parse(self)?) } } impl<'a> IntoUrl for &'a String { fn into_url(self) -> Result { Ok(Url::parse(self)?) } } impl IntoUrl for String { fn into_url(self) -> Result { Ok(Url::parse(&self)?) } } #[cfg(feature = "remote_list")] fn request(u: U) -> Result { let url = u.into_url()?; let addr = url.with_default_port(|_| Err(()))?; let host = match url.host_str() { Some(host) => host, None => { return Err(ErrorKind::NoHost.into()); } }; let data = format!("GET {} HTTP/1.0\r\nHost: {}\r\n\r\n", url.path(), host); let stream = TcpStream::connect(addr)?; let timeout = Duration::from_secs(2); stream.set_read_timeout(Some(timeout))?; stream.set_write_timeout(Some(timeout))?; let mut res = String::new(); match url.scheme() { scheme if scheme == "https" => { let connector = TlsConnector::builder().build()?; let mut stream = connector.connect(host, stream)?; stream.write_all(data.as_bytes())?; stream.read_to_string(&mut res)?; } scheme if scheme == "http" => { let mut stream = stream; stream.write_all(data.as_bytes())?; stream.read_to_string(&mut res)?; } _ => { return Err(ErrorKind::UnsupportedScheme.into()); } } Ok(res) } impl List { fn append(&mut self, rule: &str, typ: Type) -> Result<()> { rule.rsplit('.').next() .ok_or(ErrorKind::InvalidRule(rule.into()).into()) .and_then(|tld| { if tld.is_empty() { return Err(ErrorKind::InvalidRule(rule.into()).into()); } Ok(tld)}) .and_then(|tld| { self.rules.entry(tld.into()).or_insert(Vec::new()) .push(Suffix { rule: rule.into(), typ: typ, }); Ok(()) }) } fn build(res: &str) -> Result { let mut typ = None; let mut list = List::empty(); for line in res.lines() { match line { line if line.contains("BEGIN ICANN DOMAINS") => { typ = Some(Type::Icann); } line if line.contains("BEGIN PRIVATE DOMAINS") => { typ = Some(Type::Private); } line if line.starts_with("//") => { continue; } line => { match typ { Some(typ) => { let rule = match line.split_whitespace().next() { Some(rule) => rule, None => continue, }; list.append(rule, typ)?; } None => { continue; } } } } } if list.rules.is_empty() || list.all().is_empty() { return Err(ErrorKind::InvalidList.into()); } Ok(list) } /// Creates an empty List without any rules /// /// Sometimes all you want is to do syntax checks. If you don't really care whether /// the domain has a known suffix or not you can just create an empty list and use /// that to parse domain names and email addresses. pub fn empty() -> List { List { rules: HashMap::new(), } } /// Pull the list from a URL #[cfg(feature = "remote_list")] pub fn from_url(url: U) -> Result { request(url).and_then(Self::from_string) } /// Fetch the list from a local file pub fn from_path>(path: P) -> Result { File::open(path) .map_err(|err| ErrorKind::Io(err).into()) .and_then(|mut data| { let mut res = String::new(); data.read_to_string(&mut res)?; Ok(res) }) .and_then(Self::from_string) } /// Build the list from the result of anything that implements `std::io::Read` /// /// If you don't already have your list on the filesystem but want to use your /// own library to fetch the list you can use this method so you don't have to /// save it first. pub fn from_reader(mut reader: R) -> Result { let mut res = String::new(); reader.read_to_string(&mut res)?; Self::build(&res) } /// Build the list from a string /// /// The list doesn't always have to come from a file. You can maintain your own /// list, say in a DBMS. You can then pull it at runtime and build the list from /// the resulting String. pub fn from_string(string: String) -> Result { Self::from_str(&string) } /// Build the list from a str /// /// The list doesn't always have to come from a file. You can maintain your own /// list, say in a DBMS. You can then pull it at runtime and build the list from /// the resulting str. pub fn from_str(string: &str) -> Result { Self::build(string) } /// Pull the list from the official URL #[cfg(feature = "remote_list")] pub fn fetch() -> Result { let github = "https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat"; Self::from_url(LIST_URL) // Fallback to the Github repo if the official link // is down for some reason. .or_else(|_| Self::from_url(github)) } fn find_type(&self, typ: Type) -> Vec<&str> { self.rules.values() .fold(Vec::new(), |mut res, ref suffices| { for suffix in *suffices { if suffix.typ == typ { res.push(&suffix.rule); } } res }) } /// Gets a list of all ICANN domain suffices pub fn icann(&self) -> Vec<&str> { self.find_type(Type::Icann) } /// Gets a list of all private domain suffices pub fn private(&self) -> Vec<&str> { self.find_type(Type::Private) } /// Gets a list of all domain suffices pub fn all(&self) -> Vec<&str> { self.rules.values() .fold(Vec::new(), |mut res, ref suffices| { for suffix in *suffices { res.push(&suffix.rule); } res }) } /// Parses a domain using the list pub fn parse_domain(&self, domain: &str) -> Result { Domain::parse(domain, self, true) } /// Parses a host using the list /// /// A host, for the purposes of this library, is either /// an IP address or a domain name. pub fn parse_host(&self, host: &str) -> Result { Host::parse(host, self) } /// Extracts Host from a URL pub fn parse_url(&self, url: U) -> Result { let url = url.into_url()?; match url.scheme() { "mailto" => { match url.host_str() { Some(host) => self.parse_email(&format!("{}@{}", url.username(), host)), None => Err(ErrorKind::InvalidEmail.into()), } } _ => { match url.host_str() { Some(host) => self.parse_host(host), None => Err(ErrorKind::NoHost.into()), } } } } /// Extracts Host from an email address /// /// This method can also be used, simply to validate an email address. /// If it returns an error, the email address is not valid. // https://en.wikipedia.org/wiki/Email_address#Syntax // https://en.wikipedia.org/wiki/International_email#Email_addresses // http://girders.org/blog/2013/01/31/dont-rfc-validate-email-addresses/ // https://html.spec.whatwg.org/multipage/forms.html#valid-e-mail-address // https://hackernoon.com/the-100-correct-way-to-validate-email-addresses-7c4818f24643#.pgcir4z3e // http://haacked.com/archive/2007/08/21/i-knew-how-to-validate-an-email-address-until-i.aspx/ // https://tools.ietf.org/html/rfc6530#section-10.1 // http://rumkin.com/software/email/rules.php pub fn parse_email(&self, address: &str) -> Result { let mut parts = address.rsplitn(2, "@"); let host = match parts.next() { Some(host) => host, None => { return Err(ErrorKind::InvalidEmail.into()); } }; let local = match parts.next() { Some(local) => local, None => { return Err(ErrorKind::InvalidEmail.into()); } }; if local.chars().count() > 64 || address.chars().count() > 254 || (!local.starts_with('"') && local.contains("..")) || !LOCAL.is_match(local) { return Err(ErrorKind::InvalidEmail.into()); } self.parse_host(host) } /// Parses any arbitrary string /// /// Effectively this means that the string is either a URL, an email address or a host. pub fn parse_str(&self, string: &str) -> Result { if string.contains("://") { self.parse_url(string) } else if string.contains("@") { self.parse_email(string) } else { self.parse_host(string) } } /// Parses any arbitrary string that can be used as a key in a DNS database pub fn parse_dns_name(&self, name: &str) -> Result { let mut dns_name = DnsName { name: Domain::to_ascii(name).chain_err(|| { ErrorKind::InvalidDomain(name.into()) })?, domain: None, }; if let Ok(mut domain) = Domain::parse(name, self, false) { if let Some(root) = domain.root().map(|root| root.to_string()) { if Domain::has_valid_syntax(&root) { domain.full = root; dns_name.domain = Some(domain); } } } Ok(dns_name) } } impl Host { fn parse(mut host: &str, list: &List) -> Result { if let Ok(domain) = Domain::parse(host, list, true) { return Ok(Host::Domain(domain)); } if host.starts_with("[") && !host.starts_with("[[") && host.ends_with("]") && !host.ends_with("]]") { host = host .trim_left_matches("[") .trim_right_matches("]"); }; if let Ok(ip) = IpAddr::from_str(host) { return Ok(Host::Ip(ip)); } Err(ErrorKind::InvalidHost.into()) } /// A convenient method to simply check if a host is an IP address pub fn is_ip(&self) -> bool { if let &Host::Ip(_) = self { return true; } false } /// A convenient method to simply check if a host is a domain name pub fn is_domain(&self) -> bool { if let &Host::Domain(_) = self { return true; } false } } impl fmt::Display for Host { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { &Host::Ip(ref ip) => write!(f, "{}", ip), &Host::Domain(ref domain) => write!(f, "{}", domain), } } } impl Domain { /// Check if a domain has valid syntax // https://en.wikipedia.org/wiki/Domain_name#Domain_name_syntax // http://blog.sacaluta.com/2011/12/dns-domain-names-253-or-255-bytesoctets.html // https://blogs.msdn.microsoft.com/oldnewthing/20120412-00/?p=7873/ pub fn has_valid_syntax(domain: &str) -> bool { // we are explicitly checking for this here before calling `domain_to_ascii` // because `domain_to_ascii` strips of leading dots so we won't be able to // check for this later if domain.starts_with('.') { return false; } // let's convert the domain to ascii early on so we can validate // internationalised domain names as well let domain = match Self::to_ascii(domain) { Ok(domain) => { domain } Err(_) => { return false; } }; let mut labels: Vec<&str> = domain.split('.').collect(); // strip of the first dot from a domain to support fully qualified domain names if domain.ends_with(".") { labels.pop(); } // a domain must not have more than 127 labels if labels.len() > 127 { return false; } labels.reverse(); for (i, label) in labels.iter().enumerate() { // the tld must not be a number if i == 0 && label.parse::().is_ok() { return false; } // any label must only contain allowed characters if !LABEL.is_match(label) { return false; } } true } fn find_possible_matches<'a>(domain: &str, list: &'a List) -> Result> { let tld = match domain.rsplit('.').next() { Some(tld) => { if tld.is_empty() { return Ok(Vec::new()); } tld }, None => { return Ok(Vec::new()); }, }; let candidates = match list.rules.get(tld) { Some(candidates) => candidates, None => { return Ok(Vec::new()); }, }; let candidates = candidates.iter() .fold(Vec::new(), |mut res, ref suffix| { res.push(*suffix); res }); Ok(candidates) } fn assemble(input: &str, s_len: usize) -> String { let domain = input.to_lowercase(); let d_labels: Vec<&str> = domain .trim_right_matches('.') .split('.').rev().collect(); (&d_labels[..s_len]).iter().rev() .map(|part| *part) .collect::>() .join(".") } fn find_match(input: &str, domain: &str, candidates: Vec<&Suffix>) -> Result { let d_labels: Vec<&str> = domain.split('.').rev().collect(); let mut registrable = None; let mut suffix = None; let mut typ = None; let mut num_labels = 0; let no_possible_matches_found = candidates.is_empty(); for candidate in candidates { let s_labels: Vec<&str> = candidate.rule.split('.').rev().collect(); if s_labels.len() > d_labels.len() { continue; } for (i, label) in s_labels.iter().enumerate() { if *label == d_labels[i] || *label == "*" || label.trim_left_matches('!') == d_labels[i] { if i == s_labels.len()-1 { if s_labels.len() >= num_labels { num_labels = s_labels.len(); typ = Some(candidate.typ); let s_len = if label.starts_with("!") { s_labels.len()-1 } else { s_labels.len() }; suffix = Some(Self::assemble(input, s_len)); if d_labels.len() > s_len { let root = Self::assemble(input, s_len+1); registrable = Some(root); } else { registrable = None; } } } } else { break; } } } if suffix.is_none() && d_labels.len() > 0 && no_possible_matches_found { suffix = Some(Self::assemble(input, 1)); registrable = if d_labels.len() > 1 { Some(Self::assemble(input, 2)) } else { None }; } Ok(Domain { full: input.to_string(), typ: typ, suffix: suffix, registrable: registrable, }) } fn to_ascii(domain: &str) -> Result { let result = uts46::to_ascii(domain, uts46::Flags { use_std3_ascii_rules: false, transitional_processing: true, verify_dns_length: true, }); result.map_err(|error| ErrorKind::Uts46(error).into()) } fn parse(domain: &str, list: &List, check_syntax: bool) -> Result { if check_syntax && !Self::has_valid_syntax(domain) { return Err(ErrorKind::InvalidDomain(domain.into()).into()); } let input = domain.trim_right_matches('.'); let (domain, res) = domain_to_unicode(input); if let Err(errors) = res { return Err(ErrorKind::Uts46(errors).into()); } Self::find_possible_matches(&domain, list) .and_then(|res| Self::find_match(input, &domain, res)) } /// Gets the root domain portion if any pub fn root(&self) -> Option<&str> { match self.registrable { Some(ref registrable) => Some(registrable), None => None, } } /// Gets the suffix if any pub fn suffix(&self) -> Option<&str> { match self.suffix { Some(ref suffix) => Some(suffix), None => None, } } /// Whether the domain has a private suffix pub fn is_private(&self) -> bool { match self.typ { Some(typ) => match typ { Type::Icann => false, Type::Private => true, }, None => false, } } /// Whether the domain has an ICANN suffix pub fn is_icann(&self) -> bool { match self.typ { Some(typ) => match typ { Type::Icann => true, Type::Private => false, }, None => false, } } /// Whether this domain's suffix is in the list /// /// If it is, this is definately a valid domain. If it's not /// chances are very high that this isn't a valid domain name, /// however, it might simply be because the suffix is new and /// it hasn't been added to the list yet. /// /// If you want to validate a domain name, use this as a quick /// check but fall back to a DNS lookup if it returns false. pub fn has_known_suffix(&self) -> bool { self.typ.is_some() } } impl fmt::Display for Domain { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.full.trim_right_matches(".").to_lowercase()) } } impl DnsName { /// Extracts the root domain from a DNS name, if any pub fn domain(&self) -> Option<&Domain> { self.domain.as_ref() } } impl fmt::Display for DnsName { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.name.fmt(f) } } publicsuffix-1.5.2/src/tests.rs010064400017500000144000000333021335165715700147540ustar0000000000000000extern crate rspec; use {List, request}; use errors::ErrorKind; use self::rspec::context::rdescribe; #[test] fn list_behaviour() { let list = List::fetch().unwrap(); rdescribe("the list", |ctx| { ctx.it("should not be empty", || { assert!(!list.all().is_empty()); }); ctx.it("should have ICANN domains", || { assert!(!list.icann().is_empty()); }); ctx.it("should have private domains", || { assert!(!list.private().is_empty()); }); ctx.it("should have at least 1000 domains", || { assert!(list.all().len() > 1000); }); }); rdescribe("the official test", |_| { let tests = "https://raw.githubusercontent.com/publicsuffix/list/master/tests/tests.txt"; let body = request(tests).unwrap(); let mut parse = false; for (i, line) in body.lines().enumerate() { match line { line if line.trim().is_empty() => { parse = true; continue; } line if line.starts_with("//") => { continue; } line => { if !parse { continue; } let mut test = line.split_whitespace().peekable(); if test.peek().is_none() { continue; } let input = match test.next() { Some("null") => "", Some(res) => res, None => { panic!(format!("line {} of the test file doesn't seem to be valid", i)); }, }; let (expected_root, expected_suffix) = match test.next() { Some("null") => (None, None), Some(root) => { let suffix = { let parts: Vec<&str> = root.split('.').rev().collect(); (&parts[..parts.len()-1]).iter().rev() .map(|part| *part) .collect::>() .join(".") }; (Some(root.to_string()), Some(suffix.to_string())) }, None => { panic!(format!("line {} of the test file doesn't seem to be valid", i)); }, }; let (found_root, found_suffix) = match list.parse_domain(input) { Ok(domain) => { let found_root = match domain.root() { Some(found) => Some(found.to_string()), None => None, }; let found_suffix = match domain.suffix() { Some(found) => Some(found.to_string()), None => None, }; (found_root, found_suffix) }, Err(_) => (None, None), }; if expected_root != found_root || (expected_root.is_some() && expected_suffix != found_suffix) { let msg = format!("\n\nGiven `{}`:\nWe expected root domain to be `{:?}` and suffix be `{:?}`\nBut instead, we have `{:?}` as root domain and `{:?}` as suffix.\nWe are on line {} of `test_psl.txt`.\n\n", input, expected_root, expected_suffix, found_root, found_suffix, i+1); panic!(msg); } } } } }); rdescribe("a domain", |ctx| { ctx.it("should allow fully qualified domain names", || { assert!(list.parse_domain("example.com.").is_ok()); }); ctx.it("should not allow more than 1 trailing dot", || { assert!(list.parse_domain("example.com..").is_err()); match *list.parse_domain("example.com..").unwrap_err().kind() { ErrorKind::InvalidDomain(ref domain) => assert_eq!(domain, "example.com.."), _ => assert!(false), } }); ctx.it("should allow a single label with a single trailing dot", || { assert!(list.parse_domain("com.").is_ok()); }); ctx.it("should always have a suffix for single-label domains", || { let domains = vec![ // real TLDs "com", "saarland", "museum.", // non-existant TLDs "localhost", "madeup", "with-dot.", ]; for domain in domains { let res = list.parse_domain(domain).unwrap(); assert_eq!(res.suffix(), Some(domain.trim_right_matches('.'))); assert!(res.root().is_none()); } }); ctx.it("should have the same result with or without the trailing dot", || { assert_eq!(list.parse_domain("com.").unwrap(), list.parse_domain("com").unwrap()); }); ctx.it("should not have empty labels", || { assert!(list.parse_domain("exa..mple.com").is_err()); }); ctx.it("should not contain spaces", || { assert!(list.parse_domain("exa mple.com").is_err()); }); ctx.it("should not start with a dash", || { assert!(list.parse_domain("-example.com").is_err()); }); ctx.it("should not end with a dash", || { assert!(list.parse_domain("example-.com").is_err()); }); ctx.it("should not contain /", || { assert!(list.parse_domain("exa/mple.com").is_err()); }); ctx.it("should not have a label > 63 characters", || { let mut too_long_domain = String::from("a"); for _ in 0..64 { too_long_domain.push_str("a"); } too_long_domain.push_str(".com"); assert!(list.parse_domain(&too_long_domain).is_err()); }); ctx.it("should not be an IPv4 address", || { assert!(list.parse_domain("127.38.53.247").is_err()); }); ctx.it("should not be an IPv6 address", || { assert!(list.parse_domain("fd79:cdcb:38cc:9dd:f686:e06d:32f3:c123").is_err()); }); ctx.it("should allow numbers only labels that are not the tld", || { assert!(list.parse_domain("127.com").is_ok()); }); ctx.it("should not have more than 127 labels", || { let mut too_many_labels_domain = String::from("a"); for _ in 0..126 { too_many_labels_domain.push_str(".a"); } too_many_labels_domain.push_str(".com"); assert!(list.parse_domain(&too_many_labels_domain).is_err()); }); ctx.it("should not have more than 253 characters", || { let mut too_many_chars_domain = String::from("aaaaa"); for _ in 0..50 { too_many_chars_domain.push_str(".aaaaaa"); } too_many_chars_domain.push_str(".com"); assert!(list.parse_domain(&too_many_chars_domain).is_err()); }); }); rdescribe("a DNS name", |ctx| { ctx.it("should allow extended characters", || { let names = vec![ "_tcp.example.com.", "_telnet._tcp.example.com.", "*.example.com.", "ex!mple.com.", ]; for name in names { println!("{} should be valid", name); assert!(list.parse_dns_name(name).is_ok()); } }); ctx.it("should allow extracting the correct domain name where possible", || { let names = vec![ ("_tcp.example.com.", "example.com"), ("_telnet._tcp.example.com.", "example.com"), ("*.example.com.", "example.com"), ]; for (name, domain) in names { println!("{}'s root domain should be {}", name, domain); let name = list.parse_dns_name(name).unwrap(); let root = name.domain().unwrap().root(); assert_eq!(root, Some(domain)); } }); ctx.it("should not extract any domain where not possible", || { let names = vec![ "_tcp.com.", "_telnet._tcp.com.", "*.com.", "ex!mple.com.", ]; for name in names { println!("{} should not have any root domain", name); let name = list.parse_dns_name(name).unwrap(); assert!(name.domain().is_none()); } }); ctx.it("should not allow more than 1 trailing dot", || { assert!(list.parse_dns_name("example.com..").is_err()); match *list.parse_dns_name("example.com..").unwrap_err().kind() { ErrorKind::InvalidDomain(ref domain) => assert_eq!(domain, "example.com.."), _ => assert!(false), } }); }); rdescribe("a host", |ctx| { ctx.it("can be an IPv4 address", || { assert!(list.parse_host("127.38.53.247").is_ok()); }); ctx.it("can be an IPv6 address", || { assert!(list.parse_host("fd79:cdcb:38cc:9dd:f686:e06d:32f3:c123").is_ok()); }); ctx.it("can be a domain name", || { assert!(list.parse_host("example.com").is_ok()); }); ctx.it("cannot be neither an IP address nor a domain name", || { assert!(list.parse_host("23.56").is_err()); }); ctx.it("an IPv4 address should parse into an IP object", || { assert!(list.parse_host("127.38.53.247").unwrap().is_ip()); }); ctx.it("an IPv6 address should parse into an IP object", || { assert!(list.parse_host("fd79:cdcb:38cc:9dd:f686:e06d:32f3:c123").unwrap().is_ip()); }); ctx.it("a domain name should parse into a domain object", || { assert!(list.parse_host("example.com").unwrap().is_domain()); }); ctx.it("can be parsed from a URL with a domain as hostname", || { assert!(list.parse_url("https://publicsuffix.org/list/").unwrap().is_domain()); }); ctx.it("can be parsed from a URL with an IP address as hostname", || { assert!(list.parse_url("https://127.38.53.247:8080/list/").unwrap().is_ip()); }); ctx.it("can be parsed from a URL using `parse_str`", || { assert!(list.parse_str("https://127.38.53.247:8080/list/").unwrap().is_ip()); }); ctx.it("can be parsed from a non-URL using `parse_str`", || { assert!(list.parse_str("example.com").unwrap().is_domain()); }); }); rdescribe("a parsed email", |ctx| { ctx.it("should allow valid email addresses", || { let emails = vec![ "prettyandsimple@example.com", "very.common@example.com", "disposable.style.email.with+symbol@example.com", "other.email-with-dash@example.com", "x@example.com", "example-indeed@strange-example.com", "#!$%&'*+-/=?^_`{}|~@example.org", "example@s.solutions", "user@[fd79:cdcb:38cc:9dd:f686:e06d:32f3:c123]", r#""Abc\@def"@example.com"#, r#""Fred Bloggs"@example.com"#, r#""Joe\\Blow"@example.com"#, r#""Abc@def"@example.com"#, r#"customer/department=shipping@example.com"#, "$A12345@example.com", "!def!xyz%abc@example.com", "_somename@example.com", ]; for email in emails { println!("{} should be valid", email); assert!(list.parse_email(email).is_ok()); } }); ctx.it("should reject invalid email addresses", || { let emails = vec![ "Abc.example.com", "A@b@c@example.com", r#"a"b(c)d,e:f;gi[j\k]l@example.com"#, r#""just"not"right@example.com"#, r#"this is"not\allowed@example.com"#, r#"this\ still\"not\\allowed@example.com"#, "1234567890123456789012345678901234567890123456789012345678901234+x@example.com", "john..doe@example.com", "john.doe@example..com", " prettyandsimple@example.com", "prettyandsimple@example.com ", ]; for email in emails { println!("{} should not be valid", email); assert!(list.parse_email(email).is_err()); } }); ctx.it("should allow parsing emails as str", || { assert!(list.parse_str("prettyandsimple@example.com").unwrap().is_domain()); }); ctx.it("should allow parsing emails as URL", || { assert!(list.parse_url("mailto://prettyandsimple@example.com").unwrap().is_domain()); }); ctx.it("should allow parsing IDN email addresses", || { let emails = vec![ r#"Pelé@example.com"#, r#"δοκιμή@παράδειγμα.δοκιμή"#, r#"我買@屋企.香港"#, r#"甲斐@黒川.日本"#, r#"чебурашка@ящик-с-апельсинами.рф"#, r#"संपर्क@डाटामेल.भारत"#, r#"用户@例子.广告"#, ]; for email in emails { println!("{} should be valid", email); assert!(list.parse_email(email).is_ok()); } }); }); }