rxml_validation-0.11.0/.cargo_vcs_info.json0000644000000001550000000000100143100ustar { "git": { "sha1": "69f8838f3f779cf514916c364ee3db0686cfb787" }, "path_in_vcs": "rxml_validation" }rxml_validation-0.11.0/COPYING000064400000000000000000000020431046102023000141310ustar 00000000000000Copyright (c) 2021 Jonas Schäfer Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. rxml_validation-0.11.0/Cargo.toml0000644000000015430000000000100123100ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "rxml_validation" version = "0.11.0" authors = ["Jonas Schäfer "] description = "Plumbing crate for rxml and rxml_proc crates." readme = "README.md" keywords = ["xml"] license = "MIT" repository = "https://codeberg.org/jssfr/rxml" [dependencies.compact_str] version = "^0.7" optional = true [features] default = ["std"] std = ["compact_str"] rxml_validation-0.11.0/Cargo.toml.orig000064400000000000000000000006201046102023000157640ustar 00000000000000[package] name = "rxml_validation" version = "0.11.0" authors = ["Jonas Schäfer "] license = "MIT" edition = "2018" description = "Plumbing crate for rxml and rxml_proc crates." repository = "https://codeberg.org/jssfr/rxml" keywords = ["xml"] readme = true [dependencies] compact_str = { version = "^0.7", optional = true } [features] std = ["compact_str"] default = ["std"] rxml_validation-0.11.0/README.md000064400000000000000000000012241046102023000143550ustar 00000000000000# `rxml_validation` — Utilities for validating strings against specific XML productions This crate is an infrastructure/plumbing crate for the `rxml` and `rxml_proc` crates. It provides utility functions to validate that strings conform to the CData, Name and NCName grammars described in the XML 1.0 specification. [![crate badge](https://img.shields.io/crates/v/rxml_validation.svg)](https://crates.io/crates/rxml_validation) [![docs badge](https://docs.rs/rxml_validation/badge.svg)](https://docs.rs/rxml_validation/) Please see the [rxml](https://crates.io/crates/rxml) and [rxml_proc](https://crates.io/crates/rxml_proc) crates for more information. rxml_validation-0.11.0/src/lib.rs000064400000000000000000000174331046102023000150120ustar 00000000000000#![deny(missing_docs)] #![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(docsrs, feature(doc_cfg))] /*! # Strongly-typed strings for use with XML 1.0 documents This crate defines various string- and str-like types which represent pieces of text as they may occur in XML documents. These types are checked to contain only text which conforms to the respective grammar in the XML specifications. This allows to carry information about the checking which already took place in the parser to the application, avoiding the need to execute checks multiple times. This is a supplementary crate for [`rxml`](https://docs.rs/rxml). It is factored out of the main crate to support [`rxml_proc`](https://docs.rs/rxml_proc), a crate of macros which allow compile-time validation and typing of XML strings. All types defined in this crate are re-exported in `rxml`; if you depend on `rxml`, you can use the types from there directly. If the `std` feature is *not* enabled (it is enabled by default), this crate can be used in `no_std` environments. ## Type Overview - [`Name`] and [`NameStr`] represent the `Name` production and can be used for element and attribute names before namespace prefix expansion. - [`NcName`] and [`NcNameStr`] represent the `Name` production but without a colon inside; they are used for localnames after prefix expansion and to carry the prefixes themselves. ## Construction In general, values are constructed using the [`std::convert::TryInto`] trait, from other string types or `str`. Supported source types are: * [`String`] (copies) * [`compact_str::CompactString`] (moves) * [`str`] (copies for all types except the slice types) **Note:** If the `compact_str` feature is *not* enabled, all string types use the normal [`std::string::String`] type instead. In addition, converting from [`NcName`] to [`Name`] is possible without extra checking and is thus possible through `.into()` (and likewise for the corresponding str types). The inverse directions are only available through `try_into`. ## When to use rxml_validation vs. rxml? You should use this crate (`rxml_validation`) whenever you only need to validate strings against rules present in XML, without actually parsing or serialising XML data. In that case, this crate is a much lighter choice and it can be used in `no_std` environments. */ use core::fmt; mod strings; pub mod selectors; use selectors::CharSelector; #[doc(inline)] #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] pub use strings::{CompactString, Name, NcName}; #[doc(inline)] pub use strings::{NameStr, NcNameStr}; /** Error condition from validating an XML string. */ #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Error { /// A Name or NCName was empty. EmptyName, /// An invalid character was encountered. /// /// This variant contains the character as data. InvalidChar(char), /// One side of the colon in a name was empty. /// /// The contents are implementation details. EmptyNamePart, /// More than one colon encountered in a name. /// /// The contents are implementation details. MultiColonName, /// Local name does not conform to Name production (invalid start char) InvalidLocalName, } impl fmt::Display for Error { fn fmt<'f>(&self, f: &'f mut fmt::Formatter) -> fmt::Result { match self { Self::EmptyName => f.write_str("Name and NCName must not be empty"), Self::InvalidChar(c) => write!(f, "character U+{:04x} is not allowed", *c as u32), Self::EmptyNamePart => f.write_str("empty string on one side of the colon"), Self::MultiColonName => f.write_str("more than one colon"), Self::InvalidLocalName => f.write_str("local name is invalid"), } } } #[cfg(feature = "std")] impl std::error::Error for Error {} /** Check whether a str is a valid XML 1.0 Name # Example ```rust use rxml_validation::{validate_name, Error}; assert!(validate_name("foobar").is_ok()); assert!(validate_name("foo:bar").is_ok()); assert!(matches!(validate_name("foo bar"), Err(Error::InvalidChar(' ')))); assert!(matches!(validate_name(""), Err(Error::EmptyName))); */ pub fn validate_name(s: &str) -> Result<(), Error> { let mut chars = s.chars(); match chars.next() { // must have at least one char None => return Err(Error::EmptyName), Some(c) => { if !selectors::CLASS_XML_NAMESTART.select(c) { return Err(Error::InvalidChar(c)); } } } for ch in chars { if !selectors::CLASS_XML_NAME.select(ch) { return Err(Error::InvalidChar(ch)); } } Ok(()) } /** Check whether a str is a valid XML 1.0 Name, without colons. # Example ```rust use rxml_validation::{validate_ncname, Error}; assert!(validate_ncname("foobar").is_ok()); assert!(matches!(validate_ncname("foo:bar"), Err(Error::InvalidChar(':')))); assert!(matches!(validate_ncname(""), Err(Error::EmptyName))); */ pub fn validate_ncname(s: &str) -> Result<(), Error> { let mut chars = s.chars(); match chars.next() { // must have at least one char None => return Err(Error::EmptyName), Some(c) => { if !selectors::CLASS_XML_NAMESTART.select(c) || c == ':' { return Err(Error::InvalidChar(c)); } } } for ch in chars { if !selectors::CLASS_XML_NAME.select(ch) || ch == ':' { return Err(Error::InvalidChar(ch)); } } Ok(()) } /** Check whether a str is valid XML 1.0 CData. There exists no specific string type for CData, because it is almost identical to Rust strings and encoding even a validated CData string into XML requires extra steps because it may contain characters which need escaping when written into an XML document. # Example ```rust use rxml_validation::{validate_cdata, Error}; assert!(validate_cdata("foo bar baz ").is_ok()); assert!(matches!(validate_cdata("\x01"), Err(Error::InvalidChar('\x01')))); */ pub fn validate_cdata(s: &str) -> Result<(), Error> { let s = s.as_bytes(); for i in 0..s.len() { let b = s[i]; if b < 0x09 || b == 0x0b || b == 0x0c || (b >= 0x0e && b <= 0x1f) { return Err(Error::InvalidChar(b.into())); } if b == 0xbe || b == 0xbf { if i >= 2 && s[i - 2] == 0xef && s[i - 1] == 0xbf { // U+FFFE or U+FFFF let bit = (b & 0x01) as u32; // SAFETY: we are passing only 16 bits and the upper // nibble is set to all ones, so this is within the bounds // of a unicode code point and not a surrogate. let ch = unsafe { char::from_u32_unchecked(0xfffe | bit) }; return Err(Error::InvalidChar(ch)); } } } Ok(()) } #[cfg(test)] mod tests { use super::*; #[test] fn test_cdata_smoketest() { assert!(validate_cdata("foo bar baz http://").is_ok()); assert!(validate_cdata("\u{ffff}").is_err()); } #[test] fn test_name_smoketest() { assert!(validate_name("foobar").is_ok()); assert!(validate_name("foo:bar").is_ok()); assert!(validate_name("").is_err()); assert!(validate_name("foo bar baz http://").is_err()); assert!(validate_name("\u{ffff}").is_err()); } #[test] fn test_ncname_smoketest() { assert!(validate_ncname("foobar").is_ok()); assert!(validate_ncname("foo:bar").is_err()); assert!(validate_ncname("").is_err()); assert!(validate_ncname("foo bar baz http://").is_err()); assert!(validate_ncname("\u{ffff}").is_err()); } #[test] fn test_validate_cdata_is_equivalent_to_nonchar_class() { let mut buf = String::with_capacity(4); for cp in 0x0..=0x10ffffu32 { if let Some(ch) = std::char::from_u32(cp) { buf.clear(); buf.push(ch); if selectors::CLASS_XML_NONCHAR.select(ch) { match validate_cdata(&buf) { Err(Error::InvalidChar(v)) => { assert_eq!(v, ch); } other => panic!("validate_cdata accepts {:?} (ch={:?}) which is rejected by CLASS_XML_NONCHAR: {:?}", buf, ch, other), } } else { match validate_cdata(&buf) { Ok(()) => (), other => panic!("validate_cdata rejects {:?} (ch={:?}) which is accepted by CLASS_XML_NONCHAR: {:?}", buf, ch, other), } } } } } } rxml_validation-0.11.0/src/selectors.rs000064400000000000000000000121261046102023000162410ustar 00000000000000/*! # Constants and Utilities for matching ranges of codepoints The contents of this module are implementation details of `rxml`, `rxml_validation` and `rxml_proc` and should not be relied upon. */ use core::fmt; /** # Predicate trait for matching chars */ pub trait CharSelector { /// Return true if the given char is selected by the selector fn select(&self, c: char) -> bool; } /// Selects all chars #[derive(Debug, Copy, Clone)] pub struct AllChars(); impl CharSelector for char { fn select(&self, c: char) -> bool { *self == c } } impl CharSelector for &'_ [char] { fn select(&self, c: char) -> bool { for r in self.iter() { if *r == c { return true; } } false } } impl CharSelector for AllChars { fn select(&self, _c: char) -> bool { return true; } } // start to end (incl., because some of our edge points are not valid chars // in rust) /// Selects all chars from a range (including both ends) pub struct CodepointRange(pub char, pub char); /// Valid codepoints for character data (XML 1.0 § 2.2) pub const VALID_XML_CDATA_RANGES: &'static [CodepointRange] = &[ CodepointRange('\x09', '\x0a'), CodepointRange('\x0d', '\x0d'), CodepointRange('\u{0020}', '\u{d7ff}'), CodepointRange('\u{e000}', '\u{fffd}'), CodepointRange('\u{10000}', '\u{10ffff}'), ]; /// Invalid codepoints for character data (XML 1.0 § 2.2) /// /// Note that values which are not valid Rust characters are not included in /// these ranges. pub const INVALID_XML_CDATA_RANGES: &'static [CodepointRange] = &[ CodepointRange('\x00', '\x08'), CodepointRange('\x0b', '\x0c'), CodepointRange('\x0e', '\x1f'), CodepointRange('\u{fffe}', '\u{ffff}'), ]; const VALID_XML_NAME_START_RANGES: &'static [CodepointRange] = &[ CodepointRange(':', ':'), CodepointRange('A', 'Z'), CodepointRange('_', '_'), CodepointRange('a', 'z'), CodepointRange('\u{c0}', '\u{d6}'), CodepointRange('\u{d8}', '\u{f6}'), CodepointRange('\u{f8}', '\u{2ff}'), CodepointRange('\u{370}', '\u{37d}'), CodepointRange('\u{37f}', '\u{1fff}'), CodepointRange('\u{200c}', '\u{200d}'), CodepointRange('\u{2070}', '\u{218f}'), CodepointRange('\u{2c00}', '\u{2fef}'), CodepointRange('\u{3001}', '\u{d7ff}'), CodepointRange('\u{f900}', '\u{fdcf}'), CodepointRange('\u{10000}', '\u{effff}'), ]; const VALID_XML_NAME_RANGES: &'static [CodepointRange] = &[ CodepointRange(':', ':'), CodepointRange('-', '-'), CodepointRange('.', '.'), CodepointRange('A', 'Z'), CodepointRange('_', '_'), CodepointRange('0', '9'), CodepointRange('a', 'z'), CodepointRange('\u{b7}', '\u{b7}'), CodepointRange('\u{c0}', '\u{d6}'), CodepointRange('\u{d8}', '\u{f6}'), CodepointRange('\u{f8}', '\u{2ff}'), CodepointRange('\u{300}', '\u{36f}'), CodepointRange('\u{370}', '\u{37d}'), CodepointRange('\u{37f}', '\u{1fff}'), CodepointRange('\u{200c}', '\u{200d}'), CodepointRange('\u{203f}', '\u{2040}'), CodepointRange('\u{2070}', '\u{218f}'), CodepointRange('\u{2c00}', '\u{2fef}'), CodepointRange('\u{3001}', '\u{d7ff}'), CodepointRange('\u{f900}', '\u{fdcf}'), CodepointRange('\u{10000}', '\u{effff}'), ]; impl CodepointRange { /// Returns true if `c` is a member of the range of codepoints represented /// by this object. pub fn contains(&self, c: char) -> bool { return (self.0 <= c) && (c <= self.1); } } /// Selects all chars from any of the contained ranges #[derive(Copy)] pub struct CodepointRanges(pub &'static [CodepointRange]); /// Valid non-first characters for an XML Name (XML 1.0 § 2.3 \[4a\]) pub static CLASS_XML_NAME: CodepointRanges = CodepointRanges(VALID_XML_NAME_RANGES); /// Valid first characters for an XML Name (XML 1.0 § 2.3 \[4\]) pub static CLASS_XML_NAMESTART: CodepointRanges = CodepointRanges(VALID_XML_NAME_START_RANGES); /// See [`INVALID_XML_CDATA_RANGES`] pub static CLASS_XML_NONCHAR: CodepointRanges = CodepointRanges(INVALID_XML_CDATA_RANGES); impl CharSelector for CodepointRange { fn select(&self, c: char) -> bool { self.contains(c) } } impl CharSelector for CodepointRanges { fn select(&self, c: char) -> bool { contained_in_ranges(c, self.0) } } /// Returns true if `c` is a member of any of the range of the given codepoint /// ranges. pub fn contained_in_ranges(c: char, rs: &[CodepointRange]) -> bool { for r in rs.iter() { if r.contains(c) { return true; } } false } impl fmt::Debug for CodepointRanges { fn fmt<'f>(&self, f: &'f mut fmt::Formatter) -> fmt::Result { write!(f, "CodepointRanges(<{} ranges>)", self.0.len()) } } impl Clone for CodepointRanges { fn clone(&self) -> Self { CodepointRanges(self.0) } } impl PartialEq for CodepointRanges { fn eq(&self, other: &CodepointRanges) -> bool { core::ptr::eq(&self.0, &other.0) } } #[cfg(test)] mod tests { use super::*; #[test] fn cdata_inclusion_and_exclusion_are_equivalent() { let excluder = CodepointRanges(INVALID_XML_CDATA_RANGES); let includer = CodepointRanges(VALID_XML_CDATA_RANGES); for cp in 0x0..=0x10ffffu32 { if let Some(ch) = core::char::from_u32(cp) { if !includer.select(ch) != excluder.select(ch) { panic!("INVALID_XML_CDATA_RANGES and VALID_XML_CDATA_RANGES have different opinions about U+{:x}", cp) } } } } } rxml_validation-0.11.0/src/strings.rs000064400000000000000000001153721046102023000157360ustar 00000000000000use core::borrow::Borrow; use core::cmp::{Ordering, PartialOrd}; use core::convert::{TryFrom, TryInto}; use core::fmt; use core::ops::{ Deref, Index, Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive, }; #[cfg(feature = "std")] use std::{ borrow::{Cow, ToOwned}, iter::FromIterator, ops::{Add, AddAssign, DerefMut}, }; /// Type alias to access the inner type used for [`Name`] and [`NcName`] /// irrespective of feature flags. /// /// This will be [`compact_str::CompactString`] if the `compact_str` feature /// flag is enabled and [`String`] otherwise. #[cfg(feature = "compact_str")] pub type CompactString = compact_str::CompactString; /// Type alias to access the inner type used for [`Name`] and [`NcName`] /// irrespective of feature flags. /// /// This will be `compact_str::CompactString` if the `compact_str` feature /// flag is enabled and [`String`] otherwise. #[cfg(all(not(feature = "compact_str"), feature = "std"))] pub type CompactString = String; use crate::selectors; use crate::selectors::CharSelector; use crate::Error; use crate::{validate_name, validate_ncname}; macro_rules! rxml_unsafe_str_construct_doc { ($name:ident, $other:ident) => { concat!( "Construct a `", stringify!($name), "` without enforcing anything\n", "\n", "# Safety\n", "\n", "The caller is responsible for ensuring that the passed [`", stringify!($other), "`] is in fact a valid `", stringify!($name), "`.\n", ) }; } macro_rules! rxml_safe_str_construct_doc { ($name:ident, $other:ident, $more:expr) => { concat!( "Converts a [`", stringify!($other), "`] to a `", stringify!($name), "`.\n", "\n", "If the given `", stringify!($other), "` does not conform to the restrictions imposed by `", stringify!($name), "`, an error is returned.\n", $more ) }; } macro_rules! rxml_split_at_example { ($borrowed:ty) => { concat!( "\n\n```\n", "# use std::convert::TryInto;\n", "# use rxml_validation::", stringify!($borrowed), ";\n", "let value: &", stringify!($borrowed), " = \"foobar\".try_into().unwrap();\n", "let (lhs, rhs) = value.split_at(3);\n", "assert_eq!(lhs, \"foo\");\n", "assert_eq!(rhs, \"bar\");\n", "```\n", ) }; } #[cfg(feature = "std")] macro_rules! rxml_make_ascii_lowercase_example { ($owned:ty, $borrowed:ty) => { concat!( "\n\n# Example\n\n```\n", "# use std::convert::TryInto;\n", "# use rxml_validation::{", stringify!($borrowed), ", ", stringify!($owned), "};\n", "let mut owned: ", stringify!($owned), " = \"FÖöBar\".try_into().unwrap();\n", "let borrowed: &mut ", stringify!($borrowed), " = &mut owned;\n", "borrowed.make_ascii_lowercase();\n", "assert_eq!(borrowed, \"fÖöbar\");\n", "```\n", ) }; } #[cfg(not(feature = "std"))] macro_rules! rxml_make_ascii_lowercase_example { ($owned:ty, $borrowed:ty) => { "" }; } #[cfg(feature = "std")] macro_rules! rxml_make_ascii_uppercase_example { ($owned:ty, $borrowed:ty) => { concat!( "\n\n# Example\n\n```\n", "# use std::convert::TryInto;\n", "# use rxml_validation::{", stringify!($borrowed), ", ", stringify!($owned), "};\n", "let mut owned: ", stringify!($owned), " = \"FÖöBar\".try_into().unwrap();\n", "let borrowed: &mut ", stringify!($borrowed), " = &mut owned;\n", "borrowed.make_ascii_uppercase();\n", "assert_eq!(borrowed, \"FÖöBAR\");\n", "```\n", ) }; } #[cfg(not(feature = "std"))] macro_rules! rxml_make_ascii_uppercase_example { ($owned:ty, $borrowed:ty) => { "" }; } #[cfg(feature = "std")] macro_rules! rxml_split_off_panic_on_empty { () => { concat!( "\n", "# Panics\n", "\n", "If `idx` is 0 or equal to the length minus one, as the empty ", "string is not valid.\n", ) }; } #[cfg(feature = "std")] macro_rules! rxml_split_off_panics { (NcName) => { rxml_split_off_panic_on_empty!() }; (Name) => { rxml_split_off_panic_on_empty!() }; } #[cfg(feature = "std")] macro_rules! rxml_split_off_example { ($ty:ident) => { concat!( "\n", "```\n", "# use std::convert::TryInto;\n", "# use rxml_validation::", stringify!($ty), ";\n", "let mut value: ", stringify!($ty), " = \"foobar\".try_into().unwrap();\n", "let rhs: ", stringify!($ty), " = value.split_off(3);\n", "assert_eq!(value, \"foo\");\n", "assert_eq!(rhs, \"bar\");\n", "```\n", ) }; } #[cfg(feature = "std")] macro_rules! rxml_insert_str_example { ($owned:ident, $borrowed:ident) => { concat!( "\n", "```\n", "# use std::convert::TryInto;\n", "# use rxml_validation::{", stringify!($owned), ", ", stringify!($borrowed), "};\n", "let mut value: ", stringify!($owned), " = \"foobaz\".try_into().unwrap();\n", "let to_insert: &", stringify!($borrowed), " = \"bar\".try_into().unwrap();\n", "value.insert_str(3, to_insert);\n", "assert_eq!(value, \"foobarbaz\");\n", "```\n", ) }; } #[cfg(feature = "std")] macro_rules! rxml_push_str_example { ($owned:ident, $borrowed:ident) => { concat!( "\n", "```\n", "# use std::convert::TryInto;\n", "# use rxml_validation::{", stringify!($owned), ", ", stringify!($borrowed), "};\n", "let mut value: ", stringify!($owned), " = \"foobar\".try_into().unwrap();\n", "let to_append: &", stringify!($borrowed), " = \"baz\".try_into().unwrap();\n", "value.push_str(to_append);\n", "assert_eq!(value, \"foobarbaz\");\n", "```\n", ) }; } #[cfg(all(not(feature = "compact_str"), feature = "std"))] macro_rules! rxml_non_compact_str_only_note { (CompactString) => { "\n# Note\nThis function is only available *without* the `compact_str` feature!\n" }; ($other:ident) => { "" }; } #[cfg(feature = "std")] macro_rules! rxml_custom_string_type { ( $(#[$outer:meta])* pub struct $name:ident($string:ident) use $check:ident => $borrowed:ident; ) => { $(#[$outer])* #[derive(Debug, Clone, PartialEq, Eq, Hash, Ord)] #[repr(transparent)] pub struct $name($string); impl $name { /// Extract the inner string and return it. pub fn into_inner(self) -> $string { self.0 } /// Obtain a reference to the inner string slice. pub fn as_str(&self) -> &str { self.0.as_str() } /// Return the capacity, in bytes. pub fn capacity(&self) -> usize { self.0.capacity() } /// Inserts a slice into this typed string at a byte position. /// /// This is an *O(n)* operation as it requires copying every /// element into the buffer. /// /// # Panics /// /// Panics if `idx` is larger than the string's length, or if it /// does not lie on a `char` boundary. /// /// # Example /// #[doc = rxml_insert_str_example!($name, $borrowed)] pub fn insert_str(&mut self, idx: usize, string: &$borrowed) { // CORRECTNESS: as $borrowed is a valid sub-slice of $name, // it is also valid at the start of $name -> we can insert it // at arbitrary positions. self.0.insert_str(idx, &string.0); } /// Return the length of this string in bytes. pub fn len(&self) -> usize { self.0.len() } /// Reserve capacity for at least `additional` bytes more than the /// current length. pub fn reserve(&mut self, additional: usize) { self.0.reserve(additional) } /// Reserve capacity for at least `additional` bytes more than the current /// length. /// /// Unlike `reserve`, this will not over-allocate, ever. // ↓ CompactString does not have this. #[doc = rxml_non_compact_str_only_note!($string)] #[cfg(not(feature = "compact_str"))] pub fn reserve_exact(&mut self, additional: usize) { self.0.reserve_exact(additional) } /// Shrink the capacity of this string with a lower bound. pub fn shrink_to(&mut self, min_capacity: usize) { self.0.shrink_to(min_capacity) } /// Shrink the capacity of this string to match its length. pub fn shrink_to_fit(&mut self) { self.0.shrink_to_fit() } #[doc = rxml_unsafe_str_construct_doc!($name, str)] pub unsafe fn from_str_unchecked>(s: T) -> Self { Self(s.as_ref().into()) } #[doc = rxml_unsafe_str_construct_doc!($name, String)] pub unsafe fn from_string_unchecked>(s: T) -> Self { Self(s.into().into()) } #[cfg(feature = "compact_str")] #[allow(dead_code)] unsafe fn from_auto_unchecked(s: CompactString) -> Self { Self(s.into()) } #[cfg(not(feature = "compact_str"))] #[allow(dead_code)] unsafe fn from_auto_unchecked(s: String) -> Self { Self(s.into()) } #[doc = rxml_unsafe_str_construct_doc!($name, CompactString)] #[cfg(feature = "compact_str")] #[cfg_attr(docsrs, doc(cfg(feature = "compact_str")))] pub unsafe fn from_compact_str_unchecked>(s: T) -> Self { Self(s.into().into()) } unsafe fn from_native_unchecked(s: $string) -> Self { Self(s) } /// Appends a slice to this typed string. /// /// This is an *O(n)* operation as it requires copying every /// element into the buffer. /// /// # Example /// #[doc = rxml_push_str_example!($name, $borrowed)] // CORRECTNESS: // For NcName and Name it holds that concatenations of these types // are correct. pub fn push_str(&mut self, v: &$borrowed) { self.0.push_str(&v.0) } } impl Deref for $name { type Target = $borrowed; fn deref(&self) -> &Self::Target { // SAFETY: $borrowed is assumed to use the same check; this is // enforced by using the pair macro. unsafe { $borrowed::from_str_unchecked(&self.0) } } } impl DerefMut for $name { fn deref_mut(&mut self) -> &mut Self::Target { // SAFETY: $borrowed is assumed to use the same check; this is // enforced by using the pair macro. unsafe { $borrowed::from_str_unchecked_mut(&mut self.0) } } } impl Borrow<$string> for $name { fn borrow(&self) -> &$string { &self.0 } } impl Borrow<$borrowed> for $name { fn borrow(&self) -> &$borrowed { // SAFETY: $borrowed is assumed to use the same check; this is // enforced by using the pair macro. unsafe { $borrowed::from_str_unchecked(&self.0) } } } impl Borrow for $name { fn borrow(&self) -> &str { &self.0 } } impl AsRef<$string> for $name { fn as_ref(&self) -> &$string { &self.0 } } impl AsRef<$borrowed> for $name { fn as_ref(&self) -> &$borrowed { // SAFETY: $borrowed is assumed to use the same check; this is // enforced by using the pair macro. unsafe { $borrowed::from_str_unchecked(&self.0) } } } impl AsRef for $name { fn as_ref(&self) -> &str { &self.0 } } impl PartialEq for $name { fn eq(&self, other: &str) -> bool { self.0 == other } } // following the example of std::string::String, we define PartialEq // against the slice and the base type. impl PartialEq<$name> for str { fn eq(&self, other: &$name) -> bool { other.0 == self } } impl PartialEq<&str> for $name { fn eq(&self, other: &&str) -> bool { &self.0 == other } } impl PartialEq<$name> for &str { fn eq(&self, other: &$name) -> bool { other.0 == *self } } impl PartialEq<$borrowed> for $name { fn eq(&self, other: &$borrowed) -> bool { self.0 == &other.0 } } impl PartialEq<$name> for $borrowed { fn eq(&self, other: &$name) -> bool { other.0 == &self.0 } } impl PartialEq<&$borrowed> for $name { fn eq(&self, other: &&$borrowed) -> bool { self.0 == &other.0 } } impl PartialEq<$name> for &$borrowed { fn eq(&self, other: &$name) -> bool { other.0 == &self.0 } } impl PartialOrd<$name> for $name { fn partial_cmp(&self, other: &$name) -> Option { self.0.partial_cmp(&other.0) } } impl From<$name> for String { fn from(other: $name) -> Self { other.0.into() } } #[cfg(feature = "compact_str")] #[cfg_attr(docsrs, doc(cfg(feature = "compact_str")))] impl From<$name> for CompactString { fn from(other: $name) -> Self { other.0.into() } } impl<'x> From<$name> for Cow<'x, $borrowed> { fn from(other: $name) -> Self { Self::Owned(other) } } impl<'x> From> for $name { fn from(other: Cow<'x, $borrowed>) -> Self { other.into_owned() } } #[cfg(feature = "compact_str")] #[cfg_attr(docsrs, doc(cfg(feature = "compact_str")))] impl TryFrom for $name { type Error = Error; #[doc = rxml_safe_str_construct_doc!($name, CompactString, "")] fn try_from(other: CompactString) -> Result { $check(&other)?; Ok($name(other.into())) } } impl TryFrom for $name { type Error = Error; #[doc = rxml_safe_str_construct_doc!($name, String, "")] fn try_from(other: String) -> Result { $check(&other)?; Ok($name(other.into())) } } impl TryFrom<&str> for $name { type Error = Error; #[doc = rxml_safe_str_construct_doc!($name, str, "")] fn try_from(other: &str) -> Result { $check(other)?; Ok($name(other.into())) } } impl fmt::Display for $name { fn fmt<'f>(&self, f: &'f mut fmt::Formatter) -> fmt::Result { f.write_str(&self.0 as &str) } } // The impls below here are inspired by the list of trait impls of // String. // CORRECTNESS: // For NcName and Name it holds that concatenations of these types are // correct. // // That is because while they may have constraints on the characters // at the beginning, the characters allowed in the remainder of the // respective string types are a superset of those allowed as first // character. impl Add<&$borrowed> for $name { type Output = $name; fn add(mut self, rhs: &$borrowed) -> Self::Output { self += rhs; self } } impl AddAssign<&$borrowed> for $name { fn add_assign(&mut self, rhs: &$borrowed) { self.0.push_str(&rhs.0) } } impl<'a> Extend<&'a $borrowed> for $name { fn extend>(&mut self, iter: I) { self.0.extend(iter.into_iter().map(|x| &x.0)) } } impl Extend> for $name { fn extend>>(&mut self, iter: I) { for item in iter { self.add_assign(&item); } } } impl<'a> Extend> for $name { fn extend>>(&mut self, iter: I) { for item in iter { self.add_assign(&item); } } } impl Extend<$name> for $name { fn extend>(&mut self, iter: I) { self.0.extend(iter.into_iter().map(|x| x.0)) } } impl<'x> FromIterator<&'x $borrowed> for $name { fn from_iter>(iter: I) -> Self { // SAFETY: see note above impl Add<&$borrowed>. unsafe { Self::from_native_unchecked( <$string>::from_iter(iter.into_iter().map(|x| &x.0)) ) } } } impl FromIterator> for $name { fn from_iter>>(iter: I) -> Self { let mut buf = <$string>::with_capacity(0); for item in iter { buf.push_str(&item.0); } unsafe { Self::from_native_unchecked(buf) } } } impl<'x> FromIterator> for $name { fn from_iter>>(iter: I) -> Self { let mut buf = <$string>::with_capacity(0); for item in iter { buf.push_str(&item.0); } unsafe { Self::from_native_unchecked(buf) } } } } } macro_rules! rxml_custom_str_type { ( $(#[$outer:meta])* pub struct $name:ident(str) use $check:ident => $owned:ident; ) => { $(#[$outer])* #[derive(Debug, Hash, PartialEq, Eq, Ord)] #[repr(transparent)] pub struct $name(str); impl $name { #[doc = rxml_safe_str_construct_doc!($name, str, "")] pub fn from_str<'x>(s: &'x str) -> Result<&'x Self, Error> { s.try_into() } /// Access the underlying str. /// /// This is mostly provided for use in const functions. pub const fn as_str(&self) -> &str { &self.0 } #[doc = rxml_unsafe_str_construct_doc!($name, str)] pub const unsafe fn from_str_unchecked<'x>(s: &'x str) -> &'x Self { core::mem::transmute(s) } #[doc = rxml_unsafe_str_construct_doc!($name, str)] pub unsafe fn from_str_unchecked_mut<'x>(s: &'x mut str) -> &'x mut Self { core::mem::transmute(s) } /// Replace A-Z with a-z, in-place. /// /// Non-ASCII characters remain unchanged. #[doc = rxml_make_ascii_lowercase_example!($owned, $name)] pub fn make_ascii_lowercase(&mut self) { self.0.make_ascii_lowercase() } /// Replace a-z with A-Z, in-place. /// /// Non-ASCII characters remain unchanged. #[doc = rxml_make_ascii_uppercase_example!($owned, $name)] pub fn make_ascii_uppercase(&mut self) { self.0.make_ascii_uppercase() } } impl Deref for $name { type Target = str; fn deref(&self) -> &Self::Target { &self.0 } } impl AsRef for $name { fn as_ref(&self) -> &str { &self.0 } } impl AsRef<$name> for &$name { fn as_ref(&self) -> &$name { &self } } impl PartialEq for $name { fn eq(&self, other: &str) -> bool { &self.0 == other } } impl PartialEq<$name> for str { fn eq(&self, other: &$name) -> bool { self == &other.0 } } impl PartialOrd<$name> for $name { fn partial_cmp(&self, other: &$name) -> Option { self.0.partial_cmp(&other.0) } } #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] impl ToOwned for $name { type Owned = $owned; fn to_owned(&self) ->Self::Owned { self.into() } } #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] impl From<&$name> for $owned { fn from(other: &$name) -> Self { // SAFETY: $owned is assumed to use the same check; this is // enforced by using the pair macro. unsafe { $owned::from_str_unchecked(&other.0) } } } impl<'x> TryFrom<&'x str> for &'x $name { type Error = Error; fn try_from(other: &'x str) -> Result { $check(other)?; // SAFETY: the content check is executed right above and we're // transmuting &str into a repr(transparent) of &str. Ok(unsafe { core::mem::transmute(other) } ) } } impl fmt::Display for $name { fn fmt<'f>(&self, f: &'f mut fmt::Formatter) -> fmt::Result { f.write_str(&self.0) } } } } macro_rules! rxml_index_impl { ($ty:ty, $selcode:expr, $borrowed:ty, $rangety:ty) => { impl Index<$rangety> for $ty { type Output = $borrowed; fn index(&self, index: $rangety) -> &$borrowed { let tmp = &self.0[index]; let firstchar = tmp.chars().next(); if !($selcode(firstchar)) { panic!(concat!("slice is not a valid ", stringify!($borrowed))); } // SAFETY: please please point $selcode at the right stuff, kthxbai. unsafe { <$borrowed>::from_str_unchecked(tmp) } } } }; } macro_rules! rxml_splitting_impls { ($ty:ident => $firstsel:path => $borrowed:ident) => { rxml_splitting_impls!($ty => (|firstchar: Option| firstchar.map(|x| $firstsel.select(x)).unwrap_or(false)) => $borrowed); }; ($ty:ident => $selcode:expr => $borrowed:ident) => { #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] rxml_index_impl!($ty, $selcode, $borrowed, Range); #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] rxml_index_impl!($ty, $selcode, $borrowed, RangeFrom); #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] rxml_index_impl!($ty, $selcode, $borrowed, RangeFull); #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] rxml_index_impl!($ty, $selcode, $borrowed, RangeInclusive); #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] rxml_index_impl!($ty, $selcode, $borrowed, RangeTo); #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] rxml_index_impl!($ty, $selcode, $borrowed, RangeToInclusive); rxml_index_impl!($borrowed, $selcode, $borrowed, Range); rxml_index_impl!($borrowed, $selcode, $borrowed, RangeFrom); rxml_index_impl!($borrowed, $selcode, $borrowed, RangeFull); rxml_index_impl!($borrowed, $selcode, $borrowed, RangeInclusive); rxml_index_impl!($borrowed, $selcode, $borrowed, RangeTo); rxml_index_impl!($borrowed, $selcode, $borrowed, RangeToInclusive); #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] impl $ty { /// Splits the string into two at the given byte index. #[doc = rxml_split_off_panics!($ty)] /// /// # Example /// #[doc = rxml_split_off_example!($ty)] pub fn split_off(&mut self, at: usize) -> Self { let other = self.0.split_off(at); if !<$borrowed>::verify(&other) || !<$borrowed>::verify(&self.0) { panic!(concat!("split string is not a valid ", stringify!($ty))); } // SAFETY: please please point $selcode at the right stuff, kthxbai. unsafe { <$ty>::from_str_unchecked(other) } } } impl $borrowed { fn verify(s: &str) -> bool { let firstchar = s.chars().next(); return $selcode(firstchar); } /// Divide one string slice into two at an index. /// /// # Panics /// /// Panics if `mid` is not on a UTF-8 code point boundary, or if /// it is past the end of the last code point of the slice, or /// if either resulting part is not a valid slice of this type. /// /// # Example #[doc = rxml_split_at_example!($borrowed)] pub fn split_at(&self, mid: usize) -> (&Self, &Self) { let (a, b) = self.0.split_at(mid); if !Self::verify(a) || !Self::verify(b) { panic!(concat!("split_at result is not a valid ", stringify!($borrowed))); } // SAFETY: please please point $selcode at the right stuff, kthxbai. unsafe { ( Self::from_str_unchecked(a), Self::from_str_unchecked(b), ) } } /// Divide one mutable string slice into two at an index. /// /// # Panics /// /// Panics if `mid` is not on a UTF-8 code point boundary, or if /// it is past the end of the last code point of the slice, or /// if either resulting part is not a valid slice of this type. pub fn split_at_mut(&mut self, mid: usize) -> (&mut Self, &mut Self) { let (a, b) = self.0.split_at_mut(mid); if !Self::verify(a) || !Self::verify(b) { panic!(concat!("split_at_mut result is not a valid ", stringify!($borrowed))); } // SAFETY: please please point $selcode at the right stuff, kthxbai. unsafe { ( Self::from_str_unchecked_mut(a), Self::from_str_unchecked_mut(b), ) } } } } } macro_rules! rxml_custom_string_type_pair { ( $(#[$ownedmeta:meta])* pub struct $owned:ident($string:ident) use $check:ident; $(#[$borrowedmeta:meta])* pub struct $borrowed:ident(str); ) => { #[cfg(feature = "std")] rxml_custom_string_type!{ $(#[$ownedmeta])* #[cfg_attr(docsrs, doc(cfg(feature = "std")))] pub struct $owned($string) use $check => $borrowed; } rxml_custom_str_type!{ $(#[$borrowedmeta])* pub struct $borrowed(str) use $check => $owned; } } } rxml_custom_string_type_pair! { /// String which conforms to the Name production of XML 1.0. /// /// [`Name`] corresponds to a (restricted) [`String`]. For a [`str`]-like type /// with the same restrictions, see [`NameStr`]. /// /// If using the `rxml` crate and the `macros` feature of the `rxml` crate /// is enabled, `&NameStr` can be created from a string literal at compile /// time using the `rxml::xml_name` macro. /// /// Since [`Name`] (indirectly) derefs to [`str`], all (non-mutable) /// methods from [`str`] are available. /// /// # Formal definition /// /// The data inside [`Name`] (and [`NameStr`]) is guaranteed to conform to /// the `Name` production of the below grammar, quoted from /// [XML 1.0 § 2.3](https://www.w3.org/TR/REC-xml/#NT-NameStartChar): /// /// ```text /// [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] /// | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] /// | [#x37F-#x1FFF] | [#x200C-#x200D] /// | [#x2070-#x218F] | [#x2C00-#x2FEF] /// | [#x3001-#xD7FF] | [#xF900-#xFDCF] /// | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] /// [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 /// | [#x0300-#x036F] | [#x203F-#x2040] /// [5] Name ::= NameStartChar (NameChar)* /// ``` pub struct Name(CompactString) use validate_name; /// str which conforms to the Name production of XML 1.0. /// /// [`NameStr`] corresponds to a (restricted) [`str`]. For a [`String`]-like /// type with the same restrictions as well as the formal definition of those /// restrictions, see [`Name`]. /// /// If using the `rxml` crate and the `macros` feature of the `rxml` crate /// is enabled, `&NameStr` can be created from a string literal at compile /// time using the `rxml::xml_name` macro. /// /// Since [`NameStr`] derefs to [`str`], all (non-mutable) methods from /// [`str`] are available. pub struct NameStr(str); } rxml_splitting_impls! { Name => selectors::CLASS_XML_NAMESTART => NameStr } #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] impl Name { /// Split the name at a colon, if it exists. /// /// If the name contains no colon, the function returns `(None, self)`. /// If the name contains exactly one colon, the function returns the part /// before the colon (the prefix) in the first return value and the part /// following the colon (the suffix) as second return value. /// /// If neither of the two cases apply or the string on either side of the /// colon is empty, an error is returned. /// /// This function optimizes the split (compared to operating on a borrowed /// [`NameStr`] and then cloning the returned parts) by avoiding /// unnecessary copying. /// /// # Example /// /// ``` /// # use rxml_validation::Name; /// # use std::convert::TryFrom; /// // with prefix: /// let name = Name::try_from("foo:bar").unwrap(); /// let (prefix, local) = name.split_name().unwrap(); /// assert_eq!(prefix.unwrap(), "foo"); /// assert_eq!(local, "bar"); /// /// // without prefix: /// let name = Name::try_from("no-prefix").unwrap(); /// let (prefix, local) = name.split_name().unwrap(); /// assert!(prefix.is_none()); /// assert_eq!(local, "no-prefix"); /// ``` pub fn split_name(self) -> Result<(Option, NcName), Error> { let mut name = self.0; let colon_pos = match name.find(':') { None => return Ok((None, unsafe { NcName::from_auto_unchecked(name) })), Some(pos) => pos, }; if colon_pos == 0 || colon_pos == name.len() - 1 { return Err(Error::EmptyNamePart); } let localname = name.split_off(colon_pos + 1); let mut prefix = name; if localname.find(':').is_some() { // Namespaces in XML 1.0 (Third Edition) namespace-well-formed criterium 1 return Err(Error::MultiColonName); }; if !selectors::CLASS_XML_NAMESTART.select(localname.chars().next().unwrap()) { // Namespaces in XML 1.0 (Third Edition) NcName production return Err(Error::InvalidLocalName); } prefix.pop(); // do not shrink to fit here -- the prefix will be used when the element // is finalized to put it on the stack for quick validation of the // token. debug_assert!(prefix.len() > 0); debug_assert!(localname.len() > 0); Ok(( Some(unsafe { NcName::from_auto_unchecked(prefix) }), unsafe { NcName::from_auto_unchecked(localname) }, )) } } impl NameStr { /// Create an owned copy of the string as [`Name`]. /// /// This operation is also available as implementation of the `Into` /// trait. #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] pub fn to_name(&self) -> Name { self.into() } /// Split the name at a colon, if it exists. /// /// If the name contains no colon, the function returns `(None, self)`. /// If the name contains exactly one colon, the function returns the part /// before the colon (the prefix) in the first return value and the part /// following the colon (the suffix) as second return value. /// /// If neither of the two cases apply or the string on either side of the /// colon is empty, an error is returned. /// /// # Example /// /// ``` /// # use rxml_validation::NameStr; /// # use std::convert::TryInto; /// // with prefix: /// let name: &NameStr = "foo:bar".try_into().unwrap(); /// let (prefix, local) = name.split_name().unwrap(); /// assert_eq!(prefix.unwrap(), "foo"); /// assert_eq!(local, "bar"); /// /// // without prefix: /// let name: &NameStr = "no-prefix".try_into().unwrap(); /// let (prefix, local) = name.split_name().unwrap(); /// assert!(prefix.is_none()); /// assert_eq!(local, "no-prefix"); /// ``` pub fn split_name(&self) -> Result<(Option<&'_ NcNameStr>, &'_ NcNameStr), Error> { let name = &self.0; let colon_pos = match name.find(':') { None => return Ok((None, unsafe { NcNameStr::from_str_unchecked(name) })), Some(pos) => pos, }; if colon_pos == 0 || colon_pos == name.len() - 1 { return Err(Error::EmptyNamePart); } let (prefix, localname) = name.split_at(colon_pos); let localname = &localname[1..]; if localname.find(':').is_some() { // Namespaces in XML 1.0 (Third Edition) namespace-well-formed criterium 1 return Err(Error::MultiColonName); }; if !selectors::CLASS_XML_NAMESTART.select(localname.chars().next().unwrap()) { // Namespaces in XML 1.0 (Third Edition) NcName production return Err(Error::InvalidLocalName); } debug_assert!(prefix.len() > 0); debug_assert!(localname.len() > 0); Ok(( Some(unsafe { NcNameStr::from_str_unchecked(prefix) }), unsafe { NcNameStr::from_str_unchecked(localname) }, )) } } #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] impl From for Name { fn from(other: NcName) -> Self { other.into_name() } } impl<'x> From<&'x NcNameStr> for &'x NameStr { fn from(other: &'x NcNameStr) -> Self { other.as_namestr() } } rxml_custom_string_type_pair! { /// String which conforms to the NcName production of Namespaces in XML 1.0. /// /// [`NcName`] corresponds to a (restricted) [`String`]. For a [`str`]-like /// type with the same restrictions, see [`NcNameStr`]. /// /// If using the `rxml` crate and the `macros` feature of the `rxml` crate /// is enabled, `&NcNameStr` can be created from a string literal at /// compile time using the `rxml::xml_ncname` macro. /// /// Since [`NcName`] (indirectly) derefs to [`str`], all (non-mutable) /// methods from [`str`] are available. /// /// # Formal definition /// /// The data inside [`NcName`] (and [`NcNameStr`]) is guaranteed to conform to /// the `NcName` production of the below grammar, quoted from /// [Namespaces in XML 1.0 § 3](https://www.w3.org/TR/REC-xml-names/#NT-NcName): /// /// ```text /// [4] NcName ::= Name - (Char* ':' Char*) /* An XML Name, minus the ":" */ /// ``` pub struct NcName(CompactString) use validate_ncname; /// str which conforms to the NcName production of Namespaces in XML 1.0. /// /// [`NcNameStr`] corresponds to a (restricted) [`str`]. For a [`String`]-like /// type with the same restrictions as well as the formal definition of those /// restrictions, see [`NcName`]. /// /// If using the `rxml` crate and the `macros` feature of the `rxml` crate /// is enabled, `&NcNameStr` can be created from a string literal at /// compile time using the `rxml::xml_ncname` macro. /// /// Since [`NcNameStr`] derefs to [`str`], all (non-mutable) methods from /// [`str`] are available. pub struct NcNameStr(str); } rxml_splitting_impls! { NcName => selectors::CLASS_XML_NAMESTART => NcNameStr } #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] impl NcName { /// Compose two [`NcName`] objects to one [`Name`], separating them with /// a colon. /// /// As an [`NcName`] is always a valid [`Name`], the composition of the /// two with a `:` as separator is also a valid [`Name`]. /// /// This is the inverse of [`Name::split_name()`]. /// /// # Example /// /// ``` /// # use rxml_validation::NcName; /// # use std::convert::TryFrom; /// let prefix = NcName::try_from("xmlns").unwrap(); /// let localname = NcName::try_from("stream").unwrap(); /// assert_eq!(prefix.add_suffix(&localname), "xmlns:stream"); /// ``` pub fn add_suffix(self, suffix: &NcNameStr) -> Name { let mut s: String = self.0.into(); s.reserve(suffix.len() + 1); s.push_str(":"); s.push_str(suffix); // SAFETY: NcName cannot contain a colon; Name is NcName with colons, // so we can concat two NcNames to a Name. unsafe { Name::from_string_unchecked(s) } } /// Convert the [`NcName`] into a [`Name`]. /// /// This operation is O(1). /// /// This operation is also available as implementation of the `Into` /// trait. pub fn into_name(self) -> Name { // SAFETY: NcName is a strict subset of Name unsafe { Name::from_auto_unchecked(self.0) } } } #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] impl AsRef for NcName { fn as_ref(&self) -> &NameStr { >::as_ref(self).as_ref() } } #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] impl Borrow for NcName { fn borrow(&self) -> &NameStr { >::borrow(self).borrow() } } impl NcNameStr { /// Create an owned copy of the string as [`NcName`]. /// /// This operation is also available as implementation of the `Into` /// trait. #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] pub fn to_ncname(&self) -> NcName { self.into() } /// Create an owned copy of the string as [`Name`]. #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] pub fn to_name(&self) -> Name { self.to_ncname().into() } /// Access the string as [`NameStr`]. /// /// This operation is O(1), as Names are a strict superset of NcNames. pub fn as_namestr<'x>(&'x self) -> &'x NameStr { // SAFETY: NcName is a strict subset of Name unsafe { NameStr::from_str_unchecked(&self.0) } } /// Compose two [`NcName`] objects to one [`Name`], separating them with /// a colon. /// /// As an [`NcName`] is always a valid [`Name`], the composition of the /// two with a `:` as separator is also a valid [`Name`]. /// /// This is the inverse of [`Name::split_name()`]. /// /// # Example /// /// ``` /// # use rxml_validation::NcName; /// # use std::convert::TryFrom; /// let prefix = NcName::try_from("xmlns").unwrap(); /// let localname = NcName::try_from("stream").unwrap(); /// assert_eq!(prefix.add_suffix(&localname), "xmlns:stream"); /// ``` #[cfg(feature = "std")] #[cfg_attr(docsrs, doc(cfg(feature = "std")))] pub fn with_suffix(&self, suffix: &NcNameStr) -> Name { let mut s = String::with_capacity(self.len() + 1 + suffix.len()); s.push_str(self); s.push_str(":"); s.push_str(suffix); // SAFETY: NcName cannot contain a colon; Name is NcName with colons, // so we can concat two NcNames to a Name. unsafe { Name::from_string_unchecked(s) } } } impl AsRef for NcNameStr { fn as_ref(&self) -> &NameStr { // SAFETY: NameStr rules are a superset of the NcNameStr rules. unsafe { NameStr::from_str_unchecked(&self.0) } } } impl Borrow for NcNameStr { fn borrow(&self) -> &NameStr { self.as_ref() } } #[cfg(test)] mod tests { use super::*; #[test] fn split_name_on_namestr_with_valid_name() { let nm: &NameStr = "foo:bar".try_into().unwrap(); let (prefix, localname) = nm.split_name().unwrap(); assert_eq!(prefix.unwrap(), "foo"); assert_eq!(localname, "bar"); } #[test] fn split_name_on_namestr_with_prefixless_name() { let nm: &NameStr = "bar".try_into().unwrap(); let (prefix, localname) = nm.split_name().unwrap(); assert_eq!(prefix, None); assert_eq!(localname, "bar"); } #[test] fn split_name_on_namestr_rejects_localname_with_non_namestart_first_char() { let nm: &NameStr = "foo:-bar".try_into().unwrap(); let result = nm.split_name(); assert!(matches!(result.err().unwrap(), Error::InvalidLocalName,)); } #[test] #[cfg(feature = "std")] fn split_name_on_name_with_valid_name() { let nm: Name = "foo:bar".try_into().unwrap(); let (prefix, localname) = nm.split_name().unwrap(); assert_eq!(prefix.unwrap(), "foo"); assert_eq!(localname, "bar"); } #[test] #[cfg(feature = "std")] fn split_name_on_name_with_prefixless_name() { let nm: Name = "bar".try_into().unwrap(); let (prefix, localname) = nm.split_name().unwrap(); assert_eq!(prefix, None); assert_eq!(localname, "bar"); } #[test] #[cfg(feature = "std")] fn split_name_on_name_rejects_localname_with_non_namestart_first_char() { let nm: Name = "foo:-bar".try_into().unwrap(); let result = nm.split_name(); assert!(matches!(result.err().unwrap(), Error::InvalidLocalName,)); } #[test] fn split_namestr_on_name_with_valid_name() { let nm: &NameStr = "foo:bar".try_into().unwrap(); let (prefix, localname) = nm.split_name().unwrap(); assert_eq!(prefix.unwrap(), "foo"); assert_eq!(localname, "bar"); } #[test] fn split_namestr_on_name_with_prefixless_name() { let nm: &NameStr = "bar".try_into().unwrap(); let (prefix, localname) = nm.split_name().unwrap(); assert_eq!(prefix, None); assert_eq!(localname, "bar"); } #[test] fn split_namestr_on_name_rejects_localname_with_non_namestart_first_char() { let nm: &NameStr = "foo:-bar".try_into().unwrap(); let result = nm.split_name(); assert!(matches!(result.err().unwrap(), Error::InvalidLocalName,)); } #[test] #[should_panic(expected = "slice is not a valid NameStr")] fn namestr_slice_panics_on_non_name_start() { let x: &NameStr = "foo-bar".try_into().unwrap(); let _: &NameStr = &x[3..]; } #[test] #[should_panic(expected = "slice is not a valid NameStr")] #[cfg(feature = "std")] fn name_slice_panics_on_non_name_start() { let x: Name = "foo-bar".try_into().unwrap(); let _: &NameStr = &x[3..]; } #[test] #[should_panic(expected = "split string is not a valid Name")] #[cfg(feature = "std")] fn name_split_off_refuses_empty_lhs() { let mut x: Name = "foobar".try_into().unwrap(); x.split_off(0); } #[test] #[should_panic(expected = "split string is not a valid Name")] #[cfg(feature = "std")] fn name_split_off_refuses_empty_rhs() { let mut x: Name = "foobar".try_into().unwrap(); x.split_off(6); } #[test] #[should_panic(expected = "slice is not a valid NcNameStr")] fn ncnamestr_slice_panics_on_non_name_start() { let x: &NcNameStr = "foo-bar".try_into().unwrap(); let _: &NcNameStr = &x[3..]; } #[test] #[should_panic(expected = "slice is not a valid NcNameStr")] #[cfg(feature = "std")] fn ncname_slice_panics_on_non_name_start() { let x: NcName = "foo-bar".try_into().unwrap(); let _: &NcNameStr = &x[3..]; } #[test] fn ncname_refuses_empty_slice() { match <&str as TryInto<&NcNameStr>>::try_into("") { Err(_) => (), other => panic!("unexpected result: {:?}", other), } } }