tag.
fn make_parser() -> html::Parser
{
html::parse_fragment(
RcDom::default(),
html::ParseOpts::default(),
QualName::new(None, ns!(html), local_name!("div")),
vec![],
)
}
}
/// Given an element name and attribute name, determine if the given attribute contains a URL.
fn is_url_attr(element: &str, attr: &str) -> bool {
attr == "href"
|| attr == "src"
|| (element == "form" && attr == "action")
|| (element == "object" && attr == "data")
|| ((element == "button" || element == "input") && attr == "formaction")
|| (element == "a" && attr == "ping")
|| (element == "video" && attr == "poster")
}
/// Given an element name, check if it's SVG
fn is_svg_tag(element: &str) -> bool {
// https://svgwg.org/svg2-draft/eltindex.html
match element {
"a"
| "animate"
| "animateMotion"
| "animateTransform"
| "circle"
| "clipPath"
| "defs"
| "desc"
| "discard"
| "ellipse"
| "feBlend"
| "feColorMatrix"
| "feComponentTransfer"
| "feComposite"
| "feConvolveMatrix"
| "feDiffuseLighting"
| "feDisplacementMap"
| "feDistantLight"
| "feDropShadow"
| "feFlood"
| "feFuncA"
| "feFuncB"
| "feFuncG"
| "feFuncR"
| "feGaussianBlur"
| "feImage"
| "feMerge"
| "feMergeNode"
| "feMorphology"
| "feOffset"
| "fePointLight"
| "feSpecularLighting"
| "feSpotLight"
| "feTile"
| "feTurbulence"
| "filter"
| "foreignObject"
| "g"
| "image"
| "line"
| "linearGradient"
| "marker"
| "mask"
| "metadata"
| "mpath"
| "path"
| "pattern"
| "polygon"
| "polyline"
| "radialGradient"
| "rect"
| "script"
| "set"
| "stop"
| "style"
| "svg"
| "switch"
| "symbol"
| "text"
| "textPath"
| "title"
| "tspan"
| "use"
| "view" => true,
_ => false,
}
}
/// Given an element name, check if it's Math
fn is_mathml_tag(element: &str) -> bool {
// https://svgwg.org/svg2-draft/eltindex.html
match element {
"abs"
| "and"
| "annotation"
| "annotation-xml"
| "apply"
| "approx"
| "arccos"
| "arccosh"
| "arccot"
| "arccoth"
| "arccsc"
| "arccsch"
| "arcsec"
| "arcsech"
| "arcsin"
| "arcsinh"
| "arctan"
| "arctanh"
| "arg"
| "bind"
| "bvar"
| "card"
| "cartesianproduct"
| "cbytes"
| "ceiling"
| "cerror"
| "ci"
| "cn"
| "codomain"
| "complexes"
| "compose"
| "condition"
| "conjugate"
| "cos"
| "cosh"
| "cot"
| "coth"
| "cs"
| "csc"
| "csch"
| "csymbol"
| "curl"
| "declare"
| "degree"
| "determinant"
| "diff"
| "divergence"
| "divide"
| "domain"
| "domainofapplication"
| "emptyset"
| "eq"
| "equivalent"
| "eulergamma"
| "exists"
| "exp"
| "exponentiale"
| "factorial"
| "factorof"
| "false"
| "floor"
| "fn"
| "forall"
| "gcd"
| "geq"
| "grad"
| "gt"
| "ident"
| "image"
| "imaginary"
| "imaginaryi"
| "implies"
| "in"
| "infinity"
| "int"
| "integers"
| "intersect"
| "interval"
| "inverse"
| "lambda"
| "laplacian"
| "lcm"
| "leq"
| "limit"
| "list"
| "ln"
| "log"
| "logbase"
| "lowlimit"
| "lt"
| "maction"
| "maligngroup"
| "malignmark"
| "math"
| "matrix"
| "matrixrow"
| "max"
| "mean"
| "median"
| "menclose"
| "merror"
| "mfenced"
| "mfrac"
| "mglyph"
| "mi"
| "min"
| "minus"
| "mlabeledtr"
| "mlongdiv"
| "mmultiscripts"
| "mn"
| "mo"
| "mode"
| "moment"
| "momentabout"
| "mover"
| "mpadded"
| "mphantom"
| "mprescripts"
| "mroot"
| "mrow"
| "ms"
| "mscarries"
| "mscarry"
| "msgroup"
| "msline"
| "mspace"
| "msqrt"
| "msrow"
| "mstack"
| "mstyle"
| "msub"
| "msubsup"
| "msup"
| "mtable"
| "mtd"
| "mtext"
| "mtr"
| "munder"
| "munderover"
| "naturalnumbers"
| "neq"
| "none"
| "not"
| "notanumber"
| "notin"
| "notprsubset"
| "notsubset"
| "or"
| "otherwise"
| "outerproduct"
| "partialdiff"
| "pi"
| "piece"
| "piecewise"
| "plus"
| "power"
| "primes"
| "product"
| "prsubset"
| "quotient"
| "rationals"
| "real"
| "reals"
| "reln"
| "rem"
| "root"
| "scalarproduct"
| "sdev"
| "sec"
| "sech"
| "selector"
| "semantics"
| "sep"
| "set"
| "setdiff"
| "share"
| "sin"
| "sinh"
| "span"
| "subset"
| "sum"
| "tan"
| "tanh"
| "tendsto"
| "times"
| "transpose"
| "true"
| "union"
| "uplimit"
| "variance"
| "vector"
| "vectorproduct"
| "xor" => true,
_ => false,
}
}
fn is_url_relative(url: &str) -> bool {
matches!(
Url::parse(url),
Err(url::ParseError::RelativeUrlWithoutBase)
)
}
/// Policy for [relative URLs], that is, URLs that do not specify the scheme in full.
///
/// This policy kicks in, if set, for any attribute named `src` or `href`,
/// as well as the `data` attribute of an `object` tag.
///
/// [relative URLs]: struct.Builder.html#method.url_relative
///
/// # Examples
///
/// ## `Deny`
///
/// * `` is a file-relative URL, and will be removed
/// * `` is a domain-relative URL, and will be removed
/// * `` is a scheme-relative URL, and will be removed
/// * `` is an absolute URL, and will be kept
///
/// ## `PassThrough`
///
/// No changes will be made to any URLs, except if a disallowed scheme is used.
///
/// ## `RewriteWithBase`
///
/// If the base is set to `http://notriddle.com/some-directory/some-file`
///
/// * `` will be rewritten to ``
/// * `` will be rewritten to ``
/// * `` will be rewritten to ``
/// * `` is an absolute URL, so it will be kept as-is
///
/// ## `Custom`
///
/// Pass the relative URL to a function.
/// If it returns `Some(string)`, then that one gets used.
/// Otherwise, it will remove the attribute (like `Deny` does).
///
/// use std::borrow::Cow;
/// fn is_absolute_path(url: &str) -> bool {
/// let u = url.as_bytes();
/// // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
/// // `/a/b/c` is an absolute path, and what we want to do stuff to.
/// u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/')
/// }
/// fn evaluate(url: &str) -> Option> {
/// if is_absolute_path(url) {
/// Some(Cow::Owned(String::from("/root") + url))
/// } else {
/// Some(Cow::Borrowed(url))
/// }
/// }
/// fn main() {
/// let a = ammonia::Builder::new()
/// .url_relative(ammonia::UrlRelative::Custom(Box::new(evaluate)))
/// .clean("fixedpassedskipped")
/// .to_string();
/// assert_eq!(a, "fixedpassedskipped");
/// }
///
/// This function is only applied to relative URLs.
/// To filter all of the URLs,
/// use the not-yet-implemented Content Security Policy.
pub enum UrlRelative {
/// Relative URLs will be completely stripped from the document.
Deny,
/// Relative URLs will be passed through unchanged.
PassThrough,
/// Relative URLs will be changed into absolute URLs, based on this base URL.
RewriteWithBase(Url),
/// Rewrite URLs with a custom function.
Custom(Box),
// Do not allow the user to exhaustively match on UrlRelative,
// because we may add new items to it later.
#[doc(hidden)]
__NonExhaustive,
}
impl fmt::Debug for UrlRelative {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
UrlRelative::Deny => write!(f, "UrlRelative::Deny"),
UrlRelative::PassThrough => write!(f, "UrlRelative::PassThrough"),
UrlRelative::RewriteWithBase(ref base) => {
write!(f, "UrlRelative::RewriteWithBase({})", base)
}
UrlRelative::Custom(_) => write!(f, "UrlRelative::Custom"),
UrlRelative::__NonExhaustive => unreachable!(),
}
}
}
/// Types that implement this trait can be used to convert a relative URL into an absolute URL.
///
/// This evaluator is only called when the URL is relative; absolute URLs are not evaluated.
///
/// See [`url_relative`][url_relative] for more details.
///
/// [url_relative]: struct.Builder.html#method.url_relative
pub trait UrlRelativeEvaluate: Send + Sync {
/// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string.
fn evaluate<'a>(&self, _: &'a str) -> Option>;
}
impl UrlRelativeEvaluate for T
where
T: Fn(&str) -> Option> + Send + Sync,
{
fn evaluate<'a>(&self, url: &'a str) -> Option> {
self(url)
}
}
impl fmt::Debug for dyn AttributeFilter {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("AttributeFilter")
}
}
/// Types that implement this trait can be used to remove or rewrite arbitrary attributes.
///
/// See [`attribute_filter`][attribute_filter] for more details.
///
/// [attribute_filter]: struct.Builder.html#method.attribute_filter
pub trait AttributeFilter: Send + Sync {
/// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string.
fn filter<'a>(&self, _: &str, _: &str, _: &'a str) -> Option>;
}
impl AttributeFilter for T
where
T: for<'a> Fn(&str, &str, &'a str) -> Option> + Send + Sync + 'static,
{
fn filter<'a>(&self, element: &str, attribute: &str, value: &'a str) -> Option> {
self(element, attribute, value)
}
}
/// A sanitized HTML document.
///
/// The `Document` type is an opaque struct representing an HTML fragment that was sanitized by
/// `ammonia`. It can be converted to a [`String`] or written to a [`Write`] instance. This allows
/// users to avoid buffering the serialized representation to a [`String`] when desired.
///
/// This type is opaque to insulate the caller from breaking changes in the `html5ever` interface.
///
/// Note that this type wraps an `html5ever` DOM tree. `ammonia` does not support streaming, so
/// the complete fragment needs to be stored in memory during processing. Currently, `Document`
/// is backed by an [`html5ever::rcdom::Node`] object.
///
/// [`String`]: https://doc.rust-lang.org/nightly/std/string/struct.String.html
/// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html
/// [`html5ever::rcdom::Node`]: ../markup5ever/rcdom/struct.Node.html
///
/// # Examples
///
/// use ammonia::Builder;
///
/// let input = "This is an Ammonia example.";
/// let output = "This is an Ammonia example.";
///
/// let document = Builder::new()
/// .clean(input);
/// assert_eq!(document.to_string(), output);
pub struct Document(RcDom);
impl Document {
/// Serializes a `Document` instance to a `String`.
///
/// This method returns a [`String`] with the sanitized HTML. This is the simplest way to use
/// `ammonia`.
///
/// [`String`]: https://doc.rust-lang.org/nightly/std/string/struct.String.html
///
/// # Examples
///
/// use ammonia::Builder;
///
/// let input = "Some HTML here";
/// let output = "Some HTML here";
///
/// let document = Builder::new()
/// .clean(input);
/// assert_eq!(document.to_string(), output);
pub fn to_string(&self) -> String {
let opts = Self::serialize_opts();
let mut ret_val = Vec::new();
let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into();
serialize(&mut ret_val, &inner, opts)
.expect("Writing to a string shouldn't fail (expect on OOM)");
String::from_utf8(ret_val).expect("html5ever only supports UTF8")
}
/// Serializes a `Document` instance to a writer.
///
/// This method writes the sanitized HTML to a [`Write`] instance, avoiding a buffering step.
///
/// To avoid consuming the writer, a mutable reference can be passed, like in the example below.
///
/// Note that the in-memory representation of `Document` is larger than the serialized
/// `String`.
///
/// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html
///
/// # Examples
///
/// use ammonia::Builder;
///
/// let input = "Some HTML here";
/// let expected = b"Some HTML here";
///
/// let document = Builder::new()
/// .clean(input);
///
/// let mut sanitized = Vec::new();
/// document.write_to(&mut sanitized)
/// .expect("Writing to a string should not fail (except on OOM)");
/// assert_eq!(sanitized, expected);
pub fn write_to(&self, writer: W) -> io::Result<()>
where
W: io::Write,
{
let opts = Self::serialize_opts();
let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into();
serialize(writer, &inner, opts)
}
/// Exposes the `Document` instance as an [`html5ever::rcdom::Handle`][h].
///
/// This method returns the inner object backing the `Document` instance. This allows
/// making further changes to the DOM without introducing redundant serialization and
/// parsing.
///
/// Note that this method should be considered unstable and sits outside of the semver
/// stability guarantees. It may change, break, or go away at any time, either because
/// of `html5ever` changes or `ammonia` implementation changes.
///
/// For this method to be accessible, a `cfg` flag is required. The easiest way is to
/// use the `RUSTFLAGS` environment variable:
///
/// [h]: ../markup5ever/rcdom/type.Handle.html
///
/// ```text
/// RUSTFLAGS='--cfg ammonia_unstable' cargo build
/// ```
///
/// on Unix-like platforms, or
///
/// ```text
/// set RUSTFLAGS=--cfg ammonia_unstable
/// cargo build
/// ```
///
/// on Windows.
///
/// This requirement also applies to crates that transitively depend on crates that use
/// this flag.
///
/// # Examples
///
/// use ammonia::Builder;
/// use maplit::hashset;
/// use html5ever::serialize::{serialize, SerializeOpts};
///
/// # use std::error::Error;
/// # fn do_main() -> Result<(), Box> {
/// let input = "one link and one more";
/// let expected = "one more and one link";
///
/// let document = Builder::new()
/// .link_rel(None)
/// .clean(input);
///
/// let mut node = document.to_dom_node();
/// node.children.borrow_mut().reverse();
///
/// let mut buf = Vec::new();
/// serialize(&mut buf, &node, SerializeOpts::default())?;
/// let output = String::from_utf8(buf)?;
///
/// assert_eq!(output, expected);
/// # Ok(())
/// # }
/// # fn main() { do_main().unwrap() }
#[cfg(ammonia_unstable)]
pub fn to_dom_node(&self) -> Handle {
self.0.document.children.borrow()[0].clone()
}
fn serialize_opts() -> SerializeOpts {
SerializeOpts::default()
}
}
impl Clone for Document {
fn clone(&self) -> Self {
let parser = Builder::make_parser();
let dom = parser.one(&self.to_string()[..]);
Document(dom)
}
}
impl fmt::Display for Document {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.to_string())
}
}
impl fmt::Debug for Document {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Document({})", self.to_string())
}
}
impl From for String {
fn from(document: Document) -> Self {
document.to_string()
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn deeply_nested_whitelisted() {
clean(&"".repeat(60_000));
}
#[test]
fn deeply_nested_blacklisted() {
clean(&"".repeat(60_000));
}
#[test]
fn deeply_nested_alternating() {
clean(&"".repeat(35_000));
}
#[test]
fn included_angles() {
let fragment = "1 < 2";
let result = clean(fragment);
assert_eq!(result, "1 < 2");
}
#[test]
fn remove_script() {
let fragment = "an example";
let result = clean(fragment);
assert_eq!(result, "an example");
}
#[test]
fn ignore_link() {
let fragment = "a good example";
let expected = "a \
good example";
let result = clean(fragment);
assert_eq!(result, expected);
}
#[test]
fn remove_unsafe_link() {
let fragment = "an evil example";
let result = clean(fragment);
assert_eq!(
result,
"an evil example"
);
}
#[test]
fn remove_js_link() {
let fragment = "an evil example";
let result = clean(fragment);
assert_eq!(result, "an evil example");
}
#[test]
fn tag_rebalance() {
let fragment = "AWESOME!";
let result = clean(fragment);
assert_eq!(result, "AWESOME!");
}
#[test]
fn allow_url_relative() {
let fragment = "Test";
let result = Builder::new()
.url_relative(UrlRelative::PassThrough)
.clean(fragment)
.to_string();
assert_eq!(
result,
"Test"
);
}
#[test]
fn rewrite_url_relative() {
let fragment = "Test";
let result = Builder::new()
.url_relative(UrlRelative::RewriteWithBase(
Url::parse("http://example.com/").unwrap(),
))
.clean(fragment)
.to_string();
assert_eq!(
result,
"Test"
);
}
#[test]
fn rewrite_url_relative_with_invalid_url() {
// Reduced from https://github.com/Bauke/ammonia-crash-test
let fragment = r##"test"##;
let result = Builder::new()
.url_relative(UrlRelative::RewriteWithBase(
Url::parse("http://example.com/").unwrap(),
))
.clean(fragment)
.to_string();
assert_eq!(result, r##"test"##);
}
#[test]
fn attribute_filter_nop() {
let fragment = "Test";
let result = Builder::new()
.attribute_filter(|elem, attr, value| {
assert_eq!("a", elem);
assert!(
match (attr, value) {
("href", "test") => true,
("rel", "noopener noreferrer") => true,
_ => false,
},
"{}",
value.to_string()
);
Some(value.into())
})
.clean(fragment)
.to_string();
assert_eq!(
result,
"Test"
);
}
#[test]
fn attribute_filter_drop() {
let fragment = "Test
";
let result = Builder::new()
.attribute_filter(|elem, attr, value| {
assert_eq!("img", elem);
match (attr, value) {
("src", "imgtest") => None,
("alt", "test") => Some(value.into()),
_ => panic!("unexpected"),
}
})
.clean(fragment)
.to_string();
assert_eq!(result, r#"Test
"#);
}
#[test]
fn url_filter_absolute() {
let fragment = "Test
";
let result = Builder::new()
.attribute_filter(|elem, attr, value| {
assert_eq!("img", elem);
match (attr, value) {
("src", "imgtest") => {
Some(format!("https://example.com/images/{}", value).into())
}
("alt", "test") => None,
_ => panic!("unexpected"),
}
})
.url_relative(UrlRelative::RewriteWithBase(
Url::parse("http://wrong.invalid/").unwrap(),
))
.clean(fragment)
.to_string();
assert_eq!(
result,
r#"Test
"#
);
}
#[test]
fn url_filter_relative() {
let fragment = "Test
";
let result = Builder::new()
.attribute_filter(|elem, attr, value| {
assert_eq!("img", elem);
match (attr, value) {
("src", "imgtest") => Some("rewrite".into()),
("alt", "test") => Some("altalt".into()),
_ => panic!("unexpected"),
}
})
.url_relative(UrlRelative::RewriteWithBase(
Url::parse("https://example.com/base/#").unwrap(),
))
.clean(fragment)
.to_string();
assert_eq!(
result,
r#"Test
"#
);
}
#[test]
fn rewrite_url_relative_no_rel() {
let fragment = "Test";
let result = Builder::new()
.url_relative(UrlRelative::RewriteWithBase(
Url::parse("http://example.com/").unwrap(),
))
.link_rel(None)
.clean(fragment)
.to_string();
assert_eq!(result, "Test");
}
#[test]
fn deny_url_relative() {
let fragment = "Test";
let result = Builder::new()
.url_relative(UrlRelative::Deny)
.clean(fragment)
.to_string();
assert_eq!(result, "Test");
}
#[test]
fn replace_rel() {
let fragment = "Test";
let result = Builder::new()
.url_relative(UrlRelative::PassThrough)
.clean(fragment)
.to_string();
assert_eq!(
result,
"Test"
);
}
#[test]
fn consider_rel_still_banned() {
let fragment = "Test";
let result = Builder::new()
.url_relative(UrlRelative::PassThrough)
.link_rel(None)
.clean(fragment)
.to_string();
assert_eq!(result, "Test");
}
#[test]
fn object_data() {
let fragment = "Test\
M";
let expected = r#"TestM"#;
let result = Builder::new()
.tags(hashset!["span", "object"])
.generic_attributes(hashset!["data"])
.clean(fragment)
.to_string();
assert_eq!(result, expected);
}
#[test]
fn remove_attributes() {
let fragment = "";
let result = Builder::new().clean(fragment);
assert_eq!(
result.to_string(),
""
);
}
#[test]
fn quotes_in_attrs() {
let fragment = "contents";
let result = clean(fragment);
assert_eq!(result, "contents");
}
#[test]
#[should_panic]
fn panic_if_rel_is_allowed_and_replaced_generic() {
Builder::new()
.link_rel(Some("noopener noreferrer"))
.generic_attributes(hashset!["rel"])
.clean("something");
}
#[test]
#[should_panic]
fn panic_if_rel_is_allowed_and_replaced_a() {
Builder::new()
.link_rel(Some("noopener noreferrer"))
.tag_attributes(hashmap![
"a" => hashset!["rel"],
])
.clean("something");
}
#[test]
fn no_panic_if_rel_is_allowed_and_replaced_span() {
Builder::new()
.link_rel(Some("noopener noreferrer"))
.tag_attributes(hashmap![
"span" => hashset!["rel"],
])
.clean("s");
}
#[test]
fn no_panic_if_rel_is_allowed_and_not_replaced_generic() {
Builder::new()
.link_rel(None)
.generic_attributes(hashset!["rel"])
.clean("s");
}
#[test]
fn no_panic_if_rel_is_allowed_and_not_replaced_a() {
Builder::new()
.link_rel(None)
.tag_attributes(hashmap![
"a" => hashset!["rel"],
])
.clean("s");
}
#[test]
fn dont_close_void_elements() {
let fragment = "
";
let result = clean(fragment);
assert_eq!(result.to_string(), "
");
}
#[should_panic]
#[test]
fn panic_on_allowed_classes_tag_attributes() {
let fragment = "Hey
";
Builder::new()
.link_rel(None)
.tag_attributes(hashmap![
"p" => hashset!["class"],
"a" => hashset!["class"],
])
.allowed_classes(hashmap![
"p" => hashset!["foo", "bar"],
"a" => hashset!["baz"],
])
.clean(fragment);
}
#[should_panic]
#[test]
fn panic_on_allowed_classes_generic_attributes() {
let fragment = "Hey
";
Builder::new()
.link_rel(None)
.generic_attributes(hashset!["class", "href", "some-foo"])
.allowed_classes(hashmap![
"p" => hashset!["foo", "bar"],
"a" => hashset!["baz"],
])
.clean(fragment);
}
#[test]
fn remove_non_allowed_classes() {
let fragment = "Hey
";
let result = Builder::new()
.link_rel(None)
.allowed_classes(hashmap![
"p" => hashset!["foo", "bar"],
"a" => hashset!["baz"],
])
.clean(fragment);
assert_eq!(
result.to_string(),
"Hey
"
);
}
#[test]
fn remove_non_allowed_classes_with_tag_class() {
let fragment = "Hey
";
let result = Builder::new()
.link_rel(None)
.tag_attributes(hashmap![
"div" => hashset!["class"],
])
.allowed_classes(hashmap![
"p" => hashset!["foo", "bar"],
"a" => hashset!["baz"],
])
.clean(fragment);
assert_eq!(
result.to_string(),
"Hey
"
);
}
#[test]
fn remove_non_allowed_attributes_with_tag_attribute_values() {
let fragment = "";
let result = Builder::new()
.tag_attribute_values(hashmap![
"p" => hashmap![
"data-label" => hashset!["bar"],
],
])
.tag_attributes(hashmap![
"p" => hashset!["name"],
])
.clean(fragment);
assert_eq!(result.to_string(), "",);
}
#[test]
fn keep_allowed_attributes_with_tag_attribute_values() {
let fragment = "";
let result = Builder::new()
.tag_attribute_values(hashmap![
"p" => hashmap![
"data-label" => hashset!["bar"],
],
])
.tag_attributes(hashmap![
"p" => hashset!["name"],
])
.clean(fragment);
assert_eq!(
result.to_string(),
"",
);
}
#[test]
fn tag_attribute_values_case_insensitive() {
let fragment = "";
let result = Builder::new()
.tags(hashset!["input"])
.tag_attribute_values(hashmap![
"input" => hashmap![
"type" => hashset!["checkbox"],
],
])
.tag_attributes(hashmap![
"input" => hashset!["name"],
])
.clean(fragment);
assert_eq!(result.to_string(), "",);
}
#[test]
fn set_tag_attribute_values() {
let fragment = "Link";
let result = Builder::new()
.link_rel(None)
.add_tag_attributes("a", &["target"])
.set_tag_attribute_value("a", "target", "_blank")
.clean(fragment);
assert_eq!(
result.to_string(),
"Link",
);
}
#[test]
fn update_existing_set_tag_attribute_values() {
let fragment = "Link";
let result = Builder::new()
.link_rel(None)
.add_tag_attributes("a", &["target"])
.set_tag_attribute_value("a", "target", "_blank")
.clean(fragment);
assert_eq!(
result.to_string(),
"Link",
);
}
#[test]
fn unwhitelisted_set_tag_attribute_values() {
let fragment = "hi";
let result = Builder::new()
.set_tag_attribute_value("my-elem", "my-attr", "val")
.clean(fragment);
assert_eq!(result.to_string(), "hi",);
}
#[test]
fn remove_entity_link() {
let fragment = "Click me!";
let result = clean(fragment);
assert_eq!(
result.to_string(),
"Click me!"
);
}
#[test]
fn remove_relative_url_evaluate() {
fn is_absolute_path(url: &str) -> bool {
let u = url.as_bytes();
// `//a/b/c` is "protocol-relative", meaning "a" is a hostname
// `/a/b/c` is an absolute path, and what we want to do stuff to.
u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/')
}
fn is_banned(url: &str) -> bool {
let u = url.as_bytes();
u.get(0) == Some(&b'b') && u.get(1) == Some(&b'a')
}
fn evaluate(url: &str) -> Option> {
if is_absolute_path(url) {
Some(Cow::Owned(String::from("/root") + url))
} else if is_banned(url) {
None
} else {
Some(Cow::Borrowed(url))
}
}
let a = Builder::new()
.url_relative(UrlRelative::Custom(Box::new(evaluate)))
.clean("bannedfixedpassedskipped")
.to_string();
assert_eq!(a, "bannedfixedpassedskipped");
}
#[test]
fn remove_relative_url_evaluate_b() {
fn is_absolute_path(url: &str) -> bool {
let u = url.as_bytes();
// `//a/b/c` is "protocol-relative", meaning "a" is a hostname
// `/a/b/c` is an absolute path, and what we want to do stuff to.
u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/')
}
fn is_banned(url: &str) -> bool {
let u = url.as_bytes();
u.get(0) == Some(&b'b') && u.get(1) == Some(&b'a')
}
fn evaluate(url: &str) -> Option> {
if is_absolute_path(url) {
Some(Cow::Owned(String::from("/root") + url))
} else if is_banned(url) {
None
} else {
Some(Cow::Borrowed(url))
}
}
let a = Builder::new()
.url_relative(UrlRelative::Custom(Box::new(evaluate)))
.clean("bannedbannedbanned")
.to_string();
assert_eq!(a, "bannedbannedbanned");
}
#[test]
fn remove_relative_url_evaluate_c() {
// Don't run on absolute URLs.
fn evaluate(_: &str) -> Option> {
return Some(Cow::Owned(String::from("invalid")));
}
let a = Builder::new()
.url_relative(UrlRelative::Custom(Box::new(evaluate)))
.clean("google")
.to_string();
assert_eq!(
a,
"google"
);
}
#[test]
fn clean_children_of_bad_element() {
let fragment = "ab";
let result = Builder::new().clean(fragment);
assert_eq!(result.to_string(), "ab");
}
#[test]
fn reader_input() {
let fragment = b"an example";
let result = Builder::new().clean_from_reader(&fragment[..]);
assert!(result.is_ok());
assert_eq!(result.unwrap().to_string(), "an example");
}
#[test]
fn reader_non_utf8() {
let fragment = b"non-utf8 \xF0\x90\x80string";
let result = Builder::new().clean_from_reader(&fragment[..]);
assert!(result.is_ok());
assert_eq!(result.unwrap().to_string(), "non-utf8 \u{fffd}string");
}
#[test]
fn display_impl() {
let fragment = r#"a link"#;
let result = Builder::new().link_rel(None).clean(fragment);
assert_eq!(format!("{}", result), "a link");
}
#[test]
fn debug_impl() {
let fragment = r#"a link"#;
let result = Builder::new().link_rel(None).clean(fragment);
assert_eq!(format!("{:?}", result), "Document(a link)");
}
#[cfg(ammonia_unstable)]
#[test]
fn to_dom_node() {
let fragment = r#"a link"#;
let result = Builder::new().link_rel(None).clean(fragment);
let _node = result.to_dom_node();
}
#[test]
fn string_from_document() {
let fragment = r#"a link"#;
let result = String::from(Builder::new().link_rel(None).clean(fragment));
assert_eq!(format!("{}", result), "a link");
}
fn require_sync(_: T) {}
fn require_send(_: T) {}
#[test]
fn require_sync_and_send() {
require_sync(Builder::new());
require_send(Builder::new());
}
#[test]
fn id_prefixed() {
let fragment = "";
let result = String::from(
Builder::new()
.tag_attributes(hashmap![
"a" => hashset!["id"],
])
.id_prefix(Some("prefix-"))
.clean(fragment),
);
assert_eq!(
result.to_string(),
""
);
}
#[test]
fn id_already_prefixed() {
let fragment = "";
let result = String::from(
Builder::new()
.tag_attributes(hashmap![
"a" => hashset!["id"],
])
.id_prefix(Some("prefix-"))
.clean(fragment),
);
assert_eq!(
result.to_string(),
""
);
}
#[test]
fn clean_content_tags() {
let fragment = "";
let result = String::from(
Builder::new()
.clean_content_tags(hashset!["script"])
.clean(fragment),
);
assert_eq!(result.to_string(), "");
}
#[test]
fn only_clean_content_tags() {
let fragment = "This isstill here!
";
let result = String::from(
Builder::new()
.clean_content_tags(hashset!["script"])
.clean(fragment),
);
assert_eq!(result.to_string(), "This isstill here!
");
}
#[test]
fn clean_removed_default_tag() {
let fragment = "This isstill here!
";
let result = String::from(
Builder::new()
.rm_tags(hashset!["a"])
.rm_tag_attributes("a", hashset!["href", "hreflang"])
.clean_content_tags(hashset!["script"])
.clean(fragment),
);
assert_eq!(result.to_string(), "This isstill here!
");
}
#[test]
#[should_panic]
fn panic_on_clean_content_tag_attribute() {
Builder::new()
.rm_tags(std::iter::once("a"))
.clean_content_tags(hashset!["a"])
.clean("");
}
#[test]
#[should_panic]
fn panic_on_clean_content_tag() {
Builder::new().clean_content_tags(hashset!["a"]).clean("");
}
#[test]
fn clean_text_test() {
assert_eq!(
clean_text(" is