markup5ever_rcdom-0.1.0/Cargo.toml.orig 0100644 0000765 0000120 00000001615 13576733473 0016272 0 ustar 00 0000000 0000000 [package]
name = "markup5ever_rcdom"
version = "0.1.0"
authors = [ "The html5ever Project Developers" ]
license = "MIT / Apache-2.0"
repository = "https://github.com/servo/html5ever"
description = "Basic, unsupported DOM structure for use by tests in html5ever/xml5ever"
readme = "README.md"
documentation = "https://docs.rs/markup5ever_rcdom"
categories = [ "parser-implementations", "web-programming" ]
edition = "2018"
[lib]
path = "lib.rs"
[dependencies]
tendril = "0.4"
html5ever = { version = "0.25", path = "../html5ever" }
markup5ever = { version = "0.10", path = "../markup5ever" }
xml5ever = { version = "0.16", path = "../xml5ever" }
[dev-dependencies]
serde_json = "1.0"
rustc-test = "0.3"
[[test]]
name = "html-tokenizer"
harness = false
[[test]]
name = "html-tree-builder"
harness = false
[[test]]
name = "xml-tree-builder"
harness = false
[[test]]
name = "xml-tokenizer"
harness = false
markup5ever_rcdom-0.1.0/Cargo.toml 0000644 00000002630 13576733505 0012577 0 ustar 00 # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
edition = "2018"
name = "markup5ever_rcdom"
version = "0.1.0"
authors = ["The html5ever Project Developers"]
description = "Basic, unsupported DOM structure for use by tests in html5ever/xml5ever"
documentation = "https://docs.rs/markup5ever_rcdom"
readme = "README.md"
categories = ["parser-implementations", "web-programming"]
license = "MIT / Apache-2.0"
repository = "https://github.com/servo/html5ever"
[lib]
path = "lib.rs"
[[test]]
name = "html-tokenizer"
harness = false
[[test]]
name = "html-tree-builder"
harness = false
[[test]]
name = "xml-tree-builder"
harness = false
[[test]]
name = "xml-tokenizer"
harness = false
[dependencies.html5ever]
version = "0.25"
[dependencies.markup5ever]
version = "0.10"
[dependencies.tendril]
version = "0.4"
[dependencies.xml5ever]
version = "0.16"
[dev-dependencies.rustc-test]
version = "0.3"
[dev-dependencies.serde_json]
version = "1.0"
markup5ever_rcdom-0.1.0/data/test/ignore 0100644 0000765 0000120 00000000001 13576733427 0016464 0 ustar 00 0000000 0000000
markup5ever_rcdom-0.1.0/examples/hello_xml.rs 0100644 0000765 0000120 00000002414 13576733427 0017547 0 ustar 00 0000000 0000000 #!/usr/bin/env run-cargo-script
//! This is a regular crate doc comment, but it also contains a partial
//! Cargo manifest. Note the use of a *fenced* code block, and the
//! `cargo` "language".
//!
//! ```cargo
//! [dependencies]
//! xml5ever = "0.2.0"
//! tendril = "0.1.3"
//! ```
extern crate markup5ever_rcdom as rcdom;
extern crate xml5ever;
use std::default::Default;
use rcdom::{NodeData, RcDom};
use xml5ever::driver::parse_document;
use xml5ever::tendril::TendrilSink;
use xml5ever::tree_builder::TreeSink;
fn main() {
// To parse a string into a tree of nodes, we need to invoke
// `parse_document` and supply it with a TreeSink implementation (RcDom).
let dom: RcDom = parse_document(RcDom::default(), Default::default()).one("XML");
// Do some processing
let doc = &dom.document;
let hello_node = &doc.children.borrow()[0];
let hello_tag = &*dom.elem_name(hello_node).local;
let text_node = &hello_node.children.borrow()[0];
let xml = {
let mut xml = String::new();
match &text_node.data {
&NodeData::Text { ref contents } => {
xml.push_str(&contents.borrow());
},
_ => {},
};
xml
};
println!("{:?} {:?}!", hello_tag, xml);
}
markup5ever_rcdom-0.1.0/examples/html2html.rs 0100644 0000765 0000120 00000003372 13576733427 0017503 0 ustar 00 0000000 0000000 // Copyright 2014-2017 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 or the MIT license
// , at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Parse and re-serialize a HTML5 document.
//!
//! This is meant to produce the exact same output (ignoring stderr) as
//!
//! java -classpath htmlparser-1.4.jar nu.validator.htmlparser.tools.HTML2HTML
//!
//! where htmlparser-1.4.jar comes from http://about.validator.nu/htmlparser/
extern crate html5ever;
extern crate markup5ever_rcdom as rcdom;
use std::default::Default;
use std::io::{self, Write};
use html5ever::driver::ParseOpts;
use html5ever::tendril::TendrilSink;
use html5ever::tree_builder::TreeBuilderOpts;
use html5ever::{parse_document, serialize};
use rcdom::{RcDom, SerializableHandle};
fn main() {
let opts = ParseOpts {
tree_builder: TreeBuilderOpts {
drop_doctype: true,
..Default::default()
},
..Default::default()
};
let stdin = io::stdin();
let dom = parse_document(RcDom::default(), opts)
.from_utf8()
.read_from(&mut stdin.lock())
.unwrap();
// The validator.nu HTML2HTML always prints a doctype at the very beginning.
io::stdout()
.write_all(b"\n")
.ok()
.expect("writing DOCTYPE failed");
let document: SerializableHandle = dom.document.clone().into();
serialize(&mut io::stdout(), &document, Default::default())
.ok()
.expect("serialization failed");
}
markup5ever_rcdom-0.1.0/examples/print-rcdom.rs 0100644 0000765 0000120 00000004745 13576733427 0020033 0 ustar 00 0000000 0000000 // Copyright 2014-2017 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 or the MIT license
// , at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#[macro_use]
extern crate html5ever;
extern crate markup5ever_rcdom as rcdom;
use std::default::Default;
use std::io;
use std::iter::repeat;
use std::string::String;
use html5ever::parse_document;
use html5ever::tendril::TendrilSink;
use rcdom::{Handle, NodeData, RcDom};
// This is not proper HTML serialization, of course.
fn walk(indent: usize, handle: &Handle) {
let node = handle;
// FIXME: don't allocate
print!("{}", repeat(" ").take(indent).collect::());
match node.data {
NodeData::Document => println!("#Document"),
NodeData::Doctype {
ref name,
ref public_id,
ref system_id,
} => println!("", name, public_id, system_id),
NodeData::Text { ref contents } => {
println!("#text: {}", escape_default(&contents.borrow()))
},
NodeData::Comment { ref contents } => println!("", escape_default(contents)),
NodeData::Element {
ref name,
ref attrs,
..
} => {
assert!(name.ns == ns!(html));
print!("<{}", name.local);
for attr in attrs.borrow().iter() {
assert!(attr.name.ns == ns!());
print!(" {}=\"{}\"", attr.name.local, attr.value);
}
println!(">");
},
NodeData::ProcessingInstruction { .. } => unreachable!(),
}
for child in node.children.borrow().iter() {
walk(indent + 4, child);
}
}
// FIXME: Copy of str::escape_default from std, which is currently unstable
pub fn escape_default(s: &str) -> String {
s.chars().flat_map(|c| c.escape_default()).collect()
}
fn main() {
let stdin = io::stdin();
let dom = parse_document(RcDom::default(), Default::default())
.from_utf8()
.read_from(&mut stdin.lock())
.unwrap();
walk(0, &dom.document);
if !dom.errors.is_empty() {
println!("\nParse errors:");
for err in dom.errors.iter() {
println!(" {}", err);
}
}
}
markup5ever_rcdom-0.1.0/examples/xml_tree_printer.rs 0100644 0000765 0000120 00000003454 13576733427 0021153 0 ustar 00 0000000 0000000 #!/usr/bin/env run-cargo-script
//! This is a regular crate doc comment, but it also contains a partial
//! Cargo manifest. Note the use of a *fenced* code block, and the
//! `cargo` "language".
//!
//! ```cargo
//! [dependencies]
//! xml5ever = "0.2.0"
//! tendril = "0.1.3"
//! ```
extern crate markup5ever_rcdom as rcdom;
extern crate xml5ever;
use std::default::Default;
use std::io;
use std::string::String;
use rcdom::{Handle, NodeData, RcDom};
use xml5ever::driver::parse_document;
use xml5ever::tendril::TendrilSink;
fn walk(prefix: &str, handle: &Handle) {
let node = handle;
print!("{}", prefix);
match node.data {
NodeData::Document => println!("#document"),
NodeData::Text { ref contents } => println!("#text {}", escape_default(&contents.borrow())),
NodeData::Element { ref name, .. } => {
println!("{}", name.local);
},
_ => {},
}
let new_indent = {
let mut temp = String::new();
temp.push_str(prefix);
temp.push_str(" ");
temp
};
for child in node
.children
.borrow()
.iter()
.filter(|child| match child.data {
NodeData::Text { .. } | NodeData::Element { .. } => true,
_ => false,
})
{
walk(&new_indent, child);
}
}
pub fn escape_default(s: &str) -> String {
s.chars().flat_map(|c| c.escape_default()).collect()
}
fn main() {
let stdin = io::stdin();
// To parse XML into a tree form, we need a TreeSink
// luckily xml5ever comes with a static RC backed tree represetation.
let dom: RcDom = parse_document(RcDom::default(), Default::default())
.from_utf8()
.read_from(&mut stdin.lock())
.unwrap();
// Execute our visualizer on RcDom
walk("", &dom.document);
}
markup5ever_rcdom-0.1.0/lib.rs 0100644 0000765 0000120 00000035517 13576733427 0014526 0 ustar 00 0000000 0000000 // Copyright 2014-2017 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 or the MIT license
// , at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! A simple reference-counted DOM.
//!
//! This is sufficient as a static parse tree, but don't build a
//! web browser using it. :)
//!
//! A DOM is a [tree structure] with ordered children that can be represented in an XML-like
//! format. For example, the following graph
//!
//! ```text
//! div
//! +- "text node"
//! +- span
//! ```
//! in HTML would be serialized as
//!
//! ```html
//!
text node
//! ```
//!
//! See the [document object model article on wikipedia][dom wiki] for more information.
//!
//! This implementation stores the information associated with each node once, and then hands out
//! refs to children. The nodes themselves are reference-counted to avoid copying - you can create
//! a new ref and then a node will outlive the document. Nodes own their children, but only have
//! weak references to their parents.
//!
//! [tree structure]: https://en.wikipedia.org/wiki/Tree_(data_structure)
//! [dom wiki]: https://en.wikipedia.org/wiki/Document_Object_Model
extern crate markup5ever;
extern crate tendril;
use std::borrow::Cow;
use std::cell::{Cell, RefCell};
use std::collections::HashSet;
use std::default::Default;
use std::fmt;
use std::io;
use std::mem;
use std::rc::{Rc, Weak};
use tendril::StrTendril;
use markup5ever::interface::tree_builder;
use markup5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
use markup5ever::serialize::TraversalScope;
use markup5ever::serialize::TraversalScope::{ChildrenOnly, IncludeNode};
use markup5ever::serialize::{Serialize, Serializer};
use markup5ever::Attribute;
use markup5ever::ExpandedName;
use markup5ever::QualName;
/// The different kinds of nodes in the DOM.
#[derive(Debug)]
pub enum NodeData {
/// The `Document` itself - the root node of a HTML document.
Document,
/// A `DOCTYPE` with name, public id, and system id. See
/// [document type declaration on wikipedia][dtd wiki].
///
/// [dtd wiki]: https://en.wikipedia.org/wiki/Document_type_declaration
Doctype {
name: StrTendril,
public_id: StrTendril,
system_id: StrTendril,
},
/// A text node.
Text { contents: RefCell },
/// A comment.
Comment { contents: StrTendril },
/// An element with attributes.
Element {
name: QualName,
attrs: RefCell>,
/// For HTML \ elements, the [template contents].
///
/// [template contents]: https://html.spec.whatwg.org/multipage/#template-contents
template_contents: Option,
/// Whether the node is a [HTML integration point].
///
/// [HTML integration point]: https://html.spec.whatwg.org/multipage/#html-integration-point
mathml_annotation_xml_integration_point: bool,
},
/// A Processing instruction.
ProcessingInstruction {
target: StrTendril,
contents: StrTendril,
},
}
/// A DOM node.
pub struct Node {
/// Parent node.
pub parent: Cell
"#);
// FIXME: test serialization of qualified tag/attribute names that can't be
// parsed from HTML
test!(attr_ns_1, r#""#);
test!(attr_ns_2, r#""#);
test!(attr_ns_3, r#""#);
test!(attr_ns_4, r#""#);
test_no_parse!(malformed_tokens, r#"foo
"#);
#[test]
fn doctype() {
let dom = parse_document(RcDom::default(), ParseOpts::default()).one("");
dom.document.children.borrow_mut().truncate(1); // Remove
let mut result = vec![];
let document: SerializableHandle = dom.document.clone().into();
serialize(&mut result, &document, Default::default()).unwrap();
assert_eq!(String::from_utf8(result).unwrap(), "");
}
#[test]
fn deep_tree() {
let parser = parse_fragment(
RcDom::default(),
ParseOpts::default(),
QualName::new(None, ns!(html), local_name!("div")),
vec![],
);
let src = String::from("".repeat(60_000));
let dom = parser.one(src);
let opts = SerializeOpts::default();
let mut ret_val = Vec::new();
let document: SerializableHandle = dom.document.clone().into();
serialize(&mut ret_val, &document, opts)
.expect("Writing to a string shouldn't fail (expect on OOM)");
}
markup5ever_rcdom-0.1.0/tests/html-tokenizer.rs 0100644 0000765 0000120 00000034150 13576733427 0020066 0 ustar 00 0000000 0000000 // Copyright 2014-2017 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 or the MIT license
// , at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
mod foreach_html5lib_test;
use foreach_html5lib_test::foreach_html5lib_test;
use html5ever::tendril::*;
use html5ever::tokenizer::states::{Plaintext, RawData, Rawtext, Rcdata};
use html5ever::tokenizer::BufferQueue;
use html5ever::tokenizer::{CharacterTokens, EOFToken, NullCharacterToken, ParseError};
use html5ever::tokenizer::{CommentToken, DoctypeToken, TagToken, Token};
use html5ever::tokenizer::{Doctype, EndTag, StartTag, Tag};
use html5ever::tokenizer::{TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts};
use html5ever::{namespace_url, ns, Attribute, LocalName, QualName};
use rustc_test::{DynTestFn, DynTestName, TestDesc, TestDescAndFn};
use serde_json::{Map, Value};
use std::borrow::Cow::Borrowed;
use std::default::Default;
use std::ffi::OsStr;
use std::io::Read;
use std::mem::replace;
use std::path::Path;
use std::{char, env};
// Return all ways of splitting the string into at most n
// possibly-empty pieces.
fn splits(s: &str, n: usize) -> Vec> {
if n == 1 {
return vec![vec![s.to_tendril()]];
}
let mut points: Vec = s.char_indices().map(|(n, _)| n).collect();
points.push(s.len());
// do this with iterators?
let mut out = vec![];
for p in points.into_iter() {
let y = &s[p..];
for mut x in splits(&s[..p], n - 1).into_iter() {
x.push(y.to_tendril());
out.push(x);
}
}
out.extend(splits(s, n - 1).into_iter());
out
}
struct TokenLogger {
tokens: Vec,
current_str: StrTendril,
exact_errors: bool,
}
impl TokenLogger {
fn new(exact_errors: bool) -> TokenLogger {
TokenLogger {
tokens: vec![],
current_str: StrTendril::new(),
exact_errors: exact_errors,
}
}
// Push anything other than character tokens
fn push(&mut self, token: Token) {
self.finish_str();
self.tokens.push(token);
}
fn finish_str(&mut self) {
if self.current_str.len() > 0 {
let s = replace(&mut self.current_str, StrTendril::new());
self.tokens.push(CharacterTokens(s));
}
}
fn get_tokens(mut self) -> Vec {
self.finish_str();
self.tokens
}
}
impl TokenSink for TokenLogger {
type Handle = ();
fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
match token {
CharacterTokens(b) => {
self.current_str.push_slice(&b);
},
NullCharacterToken => {
self.current_str.push_char('\0');
},
ParseError(_) => {
if self.exact_errors {
self.push(ParseError(Borrowed("")));
}
},
TagToken(mut t) => {
// The spec seems to indicate that one can emit
// erroneous end tags with attrs, but the test
// cases don't contain them.
match t.kind {
EndTag => {
t.self_closing = false;
t.attrs = vec![];
},
_ => t.attrs.sort_by(|a1, a2| a1.name.cmp(&a2.name)),
}
self.push(TagToken(t));
},
EOFToken => (),
_ => self.push(token),
}
TokenSinkResult::Continue
}
}
fn tokenize(input: Vec, opts: TokenizerOpts) -> Vec {
let sink = TokenLogger::new(opts.exact_errors);
let mut tok = Tokenizer::new(sink, opts);
let mut buffer = BufferQueue::new();
for chunk in input.into_iter() {
buffer.push_back(chunk);
let _ = tok.feed(&mut buffer);
}
let _ = tok.feed(&mut buffer);
tok.end();
tok.sink.get_tokens()
}
trait JsonExt: Sized {
fn get_str(&self) -> String;
fn get_tendril(&self) -> StrTendril;
fn get_nullable_tendril(&self) -> Option;
fn get_bool(&self) -> bool;
fn get_obj<'t>(&'t self) -> &'t Map;
fn get_list<'t>(&'t self) -> &'t Vec;
fn find<'t>(&'t self, key: &str) -> &'t Self;
}
impl JsonExt for Value {
fn get_str(&self) -> String {
match *self {
Value::String(ref s) => s.to_string(),
_ => panic!("Value::get_str: not a String"),
}
}
fn get_tendril(&self) -> StrTendril {
match *self {
Value::String(ref s) => s.to_tendril(),
_ => panic!("Value::get_tendril: not a String"),
}
}
fn get_nullable_tendril(&self) -> Option {
match *self {
Value::Null => None,
Value::String(ref s) => Some(s.to_tendril()),
_ => panic!("Value::get_nullable_tendril: not a String"),
}
}
fn get_bool(&self) -> bool {
match *self {
Value::Bool(b) => b,
_ => panic!("Value::get_bool: not a Bool"),
}
}
fn get_obj<'t>(&'t self) -> &'t Map {
match *self {
Value::Object(ref m) => &*m,
_ => panic!("Value::get_obj: not an Object"),
}
}
fn get_list<'t>(&'t self) -> &'t Vec {
match *self {
Value::Array(ref m) => m,
_ => panic!("Value::get_list: not an Array"),
}
}
fn find<'t>(&'t self, key: &str) -> &'t Value {
self.get_obj().get(&key.to_string()).unwrap()
}
}
// Parse a JSON object (other than "ParseError") to a token.
fn json_to_token(js: &Value) -> Token {
let parts = js.get_list();
// Collect refs here so we don't have to use "ref" in all the patterns below.
let args: Vec<&Value> = parts[1..].iter().collect();
match &*parts[0].get_str() {
"DOCTYPE" => DoctypeToken(Doctype {
name: args[0].get_nullable_tendril(),
public_id: args[1].get_nullable_tendril(),
system_id: args[2].get_nullable_tendril(),
force_quirks: !args[3].get_bool(),
}),
"StartTag" => TagToken(Tag {
kind: StartTag,
name: LocalName::from(&*args[0].get_str()),
attrs: args[1]
.get_obj()
.iter()
.map(|(k, v)| Attribute {
name: QualName::new(None, ns!(), LocalName::from(&**k)),
value: v.get_tendril(),
})
.collect(),
self_closing: match args.get(2) {
Some(b) => b.get_bool(),
None => false,
},
}),
"EndTag" => TagToken(Tag {
kind: EndTag,
name: LocalName::from(&*args[0].get_str()),
attrs: vec![],
self_closing: false,
}),
"Comment" => CommentToken(args[0].get_tendril()),
"Character" => CharacterTokens(args[0].get_tendril()),
// We don't need to produce NullCharacterToken because
// the TokenLogger will convert them to CharacterTokens.
_ => panic!("don't understand token {:?}", parts),
}
}
// Parse the "output" field of the test case into a vector of tokens.
fn json_to_tokens(js: &Value, exact_errors: bool) -> Vec {
// Use a TokenLogger so that we combine character tokens separated
// by an ignored error.
let mut sink = TokenLogger::new(exact_errors);
for tok in js.get_list().iter() {
assert_eq!(
match *tok {
Value::String(ref s) if &s[..] == "ParseError" => {
sink.process_token(ParseError(Borrowed("")), 0)
},
_ => sink.process_token(json_to_token(tok), 0),
},
TokenSinkResult::Continue
);
}
sink.get_tokens()
}
// Undo the escaping in "doubleEscaped" tests.
fn unescape(s: &str) -> Option {
let mut out = String::with_capacity(s.len());
let mut it = s.chars().peekable();
loop {
match it.next() {
None => return Some(out),
Some('\\') => {
if it.peek() != Some(&'u') {
panic!("can't understand escape");
}
drop(it.next());
let hex: String = it.by_ref().take(4).collect();
match u32::from_str_radix(&hex, 16).ok().and_then(char::from_u32) {
// Some of the tests use lone surrogates, but we have no
// way to represent them in the UTF-8 input to our parser.
// Since these can only come from script, we will catch
// them there.
None => return None,
Some(c) => out.push(c),
}
},
Some(c) => out.push(c),
}
}
}
fn unescape_json(js: &Value) -> Value {
match *js {
// unwrap is OK here because the spec'd *output* of the tokenizer never
// contains a lone surrogate.
Value::String(ref s) => Value::String(unescape(&s).unwrap()),
Value::Array(ref xs) => Value::Array(xs.iter().map(unescape_json).collect()),
Value::Object(ref obj) => {
let mut new_obj = Map::new();
for (k, v) in obj.iter() {
new_obj.insert(k.clone(), unescape_json(v));
}
Value::Object(new_obj)
},
_ => js.clone(),
}
}
fn mk_test(desc: String, input: String, expect: Value, opts: TokenizerOpts) -> TestDescAndFn {
TestDescAndFn {
desc: TestDesc::new(DynTestName(desc)),
testfn: DynTestFn(Box::new(move || {
// Split up the input at different points to test incremental tokenization.
let insplits = splits(&input, 3);
for input in insplits.into_iter() {
// Clone 'input' so we have it for the failure message.
// Also clone opts. If we don't, we get the wrong
// result but the compiler doesn't catch it!
// Possibly mozilla/rust#12223.
let output = tokenize(input.clone(), opts.clone());
let expect_toks = json_to_tokens(&expect, opts.exact_errors);
if output != expect_toks {
panic!(
"\ninput: {:?}\ngot: {:?}\nexpected: {:?}",
input, output, expect
);
}
}
})),
}
}
fn mk_tests(tests: &mut Vec, filename: &str, js: &Value) {
let obj = js.get_obj();
let mut input = js.find("input").get_str();
let mut expect = js.find("output").clone();
let desc = format!("tok: {}: {}", filename, js.find("description").get_str());
// "Double-escaped" tests require additional processing of
// the input and output.
if obj
.get(&"doubleEscaped".to_string())
.map_or(false, |j| j.get_bool())
{
match unescape(&input) {
None => return,
Some(i) => input = i,
}
expect = unescape_json(&expect);
}
// Some tests have a last start tag name.
let start_tag = obj.get(&"lastStartTag".to_string()).map(|s| s.get_str());
// Some tests want to start in a state other than Data.
let state_overrides = match obj.get(&"initialStates".to_string()) {
Some(&Value::Array(ref xs)) => xs
.iter()
.map(|s| {
Some(match &s.get_str()[..] {
"PLAINTEXT state" => Plaintext,
"RAWTEXT state" => RawData(Rawtext),
"RCDATA state" => RawData(Rcdata),
s => panic!("don't know state {}", s),
})
})
.collect(),
None => vec![None],
_ => panic!("don't understand initialStates value"),
};
// Build the tests.
for state in state_overrides.into_iter() {
for &exact_errors in [false, true].iter() {
let mut newdesc = desc.clone();
match state {
Some(s) => newdesc = format!("{} (in state {:?})", newdesc, s),
None => (),
};
if exact_errors {
newdesc = format!("{} (exact errors)", newdesc);
}
tests.push(mk_test(
newdesc,
input.clone(),
expect.clone(),
TokenizerOpts {
exact_errors: exact_errors,
initial_state: state,
last_start_tag_name: start_tag.clone(),
// Not discarding a BOM is what the test suite expects; see
// https://github.com/html5lib/html5lib-tests/issues/2
discard_bom: false,
..Default::default()
},
));
}
}
}
fn tests(src_dir: &Path) -> Vec {
let mut tests = vec![];
foreach_html5lib_test(
src_dir,
"tokenizer",
OsStr::new("test"),
|path, mut file| {
let mut s = String::new();
file.read_to_string(&mut s)
.ok()
.expect("file reading error");
let js: Value = serde_json::from_str(&s).ok().expect("json parse error");
match js.get_obj().get(&"tests".to_string()) {
Some(&Value::Array(ref lst)) => {
for test in lst.iter() {
mk_tests(
&mut tests,
path.file_name().unwrap().to_str().unwrap(),
test,
);
}
},
// xmlViolation.test doesn't follow this format.
_ => (),
}
},
);
tests
}
fn main() {
let args: Vec<_> = env::args().collect();
rustc_test::test_main(&args, tests(Path::new(env!("CARGO_MANIFEST_DIR"))));
}
markup5ever_rcdom-0.1.0/tests/html-tree-builder.rs 0100644 0000765 0000120 00000022465 13576733427 0020445 0 ustar 00 0000000 0000000 // Copyright 2014-2017 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 or the MIT license
// , at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
extern crate markup5ever_rcdom as rcdom;
extern crate rustc_test as test;
#[macro_use]
extern crate html5ever;
mod foreach_html5lib_test;
use foreach_html5lib_test::foreach_html5lib_test;
use std::collections::{HashMap, HashSet};
use std::default::Default;
use std::ffi::OsStr;
use std::io::BufRead;
use std::iter::repeat;
use std::mem::replace;
use std::path::Path;
use std::{env, fs, io};
use test::{DynTestName, TestDesc, TestDescAndFn, TestFn};
use html5ever::tendril::{StrTendril, TendrilSink};
use html5ever::{parse_document, parse_fragment, ParseOpts};
use html5ever::{LocalName, QualName};
use rcdom::{Handle, NodeData, RcDom};
fn parse_tests>(mut lines: It) -> Vec> {
let mut tests = vec![];
let mut test = HashMap::new();
let mut key: Option = None;
let mut val = String::new();
macro_rules! finish_val ( () => (
match key.take() {
None => (),
Some(key) => {
assert!(test.insert(key, replace(&mut val, String::new())).is_none());
}
}
));
macro_rules! finish_test ( () => (
if !test.is_empty() {
tests.push(replace(&mut test, HashMap::new()));
}
));
loop {
match lines.next() {
None => break,
Some(line) => {
if line.starts_with("#") {
finish_val!();
if line == "#data" {
finish_test!();
}
key = Some(line[1..].to_string());
} else {
val.push_str(&line);
val.push('\n');
}
},
}
}
finish_val!();
finish_test!();
tests
}
fn serialize(buf: &mut String, indent: usize, handle: Handle) {
buf.push_str("|");
buf.push_str(&repeat(" ").take(indent).collect::());
let node = handle;
match node.data {
NodeData::Document => panic!("should not reach Document"),
NodeData::Doctype {
ref name,
ref public_id,
ref system_id,
} => {
buf.push_str("\n");
},
NodeData::Text { ref contents } => {
buf.push_str("\"");
buf.push_str(&contents.borrow());
buf.push_str("\"\n");
},
NodeData::Comment { ref contents } => {
buf.push_str("\n");
},
NodeData::Element {
ref name,
ref attrs,
..
} => {
buf.push_str("<");
match name.ns {
ns!(svg) => buf.push_str("svg "),
ns!(mathml) => buf.push_str("math "),
_ => (),
}
buf.push_str(&*name.local);
buf.push_str(">\n");
let mut attrs = attrs.borrow().clone();
attrs.sort_by(|x, y| x.name.local.cmp(&y.name.local));
// FIXME: sort by UTF-16 code unit
for attr in attrs.into_iter() {
buf.push_str("|");
buf.push_str(&repeat(" ").take(indent + 2).collect::());
match attr.name.ns {
ns!(xlink) => buf.push_str("xlink "),
ns!(xml) => buf.push_str("xml "),
ns!(xmlns) => buf.push_str("xmlns "),
_ => (),
}
buf.push_str(&format!("{}=\"{}\"\n", attr.name.local, attr.value));
}
},
NodeData::ProcessingInstruction { .. } => unreachable!(),
}
for child in node.children.borrow().iter() {
serialize(buf, indent + 2, child.clone());
}
if let NodeData::Element {
template_contents: Some(ref content),
..
} = node.data
{
buf.push_str("|");
buf.push_str(&repeat(" ").take(indent + 2).collect::());
buf.push_str("content\n");
for child in content.children.borrow().iter() {
serialize(buf, indent + 4, child.clone());
}
}
}
fn make_test(
tests: &mut Vec,
ignores: &HashSet,
filename: &str,
idx: usize,
fields: HashMap,
) {
let scripting_flags = &[false, true];
let scripting_flags = if fields.contains_key("script-off") {
&scripting_flags[0..1]
} else if fields.contains_key("script-on") {
&scripting_flags[1..2]
} else {
&scripting_flags[0..2]
};
let name = format!("tb: {}-{}", filename, idx);
for scripting_enabled in scripting_flags {
let test = make_test_desc_with_scripting_flag(ignores, &name, &fields, *scripting_enabled);
tests.push(test);
}
}
fn make_test_desc_with_scripting_flag(
ignores: &HashSet,
name: &str,
fields: &HashMap,
scripting_enabled: bool,
) -> TestDescAndFn {
let get_field = |key| {
let field = fields.get(key).expect("missing field");
field.trim_end_matches('\n').to_string()
};
let mut data = fields.get("data").expect("missing data").to_string();
data.pop();
let expected = get_field("document");
let context = fields
.get("document-fragment")
.map(|field| context_name(field.trim_end_matches('\n')));
let ignore = ignores.contains(name);
let mut name = name.to_owned();
if scripting_enabled {
name.push_str(" (scripting enabled)");
} else {
name.push_str(" (scripting disabled)");
};
let mut opts: ParseOpts = Default::default();
opts.tree_builder.scripting_enabled = scripting_enabled;
TestDescAndFn {
desc: TestDesc {
ignore: ignore,
..TestDesc::new(DynTestName(name))
},
testfn: TestFn::dyn_test_fn(move || {
// Do this here because Tendril isn't Send.
let data = StrTendril::from_slice(&data);
let mut result = String::new();
match context {
None => {
let dom = parse_document(RcDom::default(), opts).one(data.clone());
for child in dom.document.children.borrow().iter() {
serialize(&mut result, 1, child.clone());
}
},
Some(ref context) => {
let dom = parse_fragment(RcDom::default(), opts, context.clone(), vec![])
.one(data.clone());
// fragment case: serialize children of the html element
// rather than children of the document
let doc = &dom.document;
let root = &doc.children.borrow()[0];
for child in root.children.borrow().iter() {
serialize(&mut result, 1, child.clone());
}
},
};
let len = result.len();
result.truncate(len - 1); // drop the trailing newline
if result != expected {
panic!(
"\ninput: {}\ngot:\n{}\nexpected:\n{}\n",
data, result, expected
);
}
}),
}
}
fn context_name(context: &str) -> QualName {
if context.starts_with("svg ") {
QualName::new(None, ns!(svg), LocalName::from(&context[4..]))
} else if context.starts_with("math ") {
QualName::new(None, ns!(mathml), LocalName::from(&context[5..]))
} else {
QualName::new(None, ns!(html), LocalName::from(context))
}
}
fn tests(src_dir: &Path, ignores: &HashSet) -> Vec {
let mut tests = vec![];
foreach_html5lib_test(
src_dir,
"tree-construction",
OsStr::new("dat"),
|path, file| {
let buf = io::BufReader::new(file);
let lines = buf.lines().map(|res| res.ok().expect("couldn't read"));
let data = parse_tests(lines);
for (i, test) in data.into_iter().enumerate() {
make_test(
&mut tests,
ignores,
path.file_name().unwrap().to_str().unwrap(),
i,
test,
);
}
},
);
tests
}
fn main() {
let args: Vec<_> = env::args().collect();
let src_dir = Path::new(env!("CARGO_MANIFEST_DIR"));
let mut ignores = HashSet::new();
{
let f = fs::File::open(&src_dir.join("data/test/ignore")).unwrap();
let r = io::BufReader::new(f);
for ln in r.lines() {
ignores.insert(ln.unwrap().trim_end().to_string());
}
}
test::test_main(&args, tests(src_dir, &ignores));
}
markup5ever_rcdom-0.1.0/tests/html-tree-sink.rs 0100644 0000765 0000120 00000010002 13576733427 0017743 0 ustar 00 0000000 0000000 use html5ever::driver;
use html5ever::tendril::stream::TendrilSink;
use html5ever::tendril::StrTendril;
use html5ever::ExpandedName;
use html5ever::QualName;
use markup5ever::interface::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
use markup5ever::{local_name, namespace_url, ns, Attribute};
use markup5ever_rcdom::{Handle, RcDom};
use std::borrow::Cow;
pub struct LineCountingDOM {
pub line_vec: Vec<(QualName, u64)>,
pub current_line: u64,
pub rcdom: RcDom,
}
impl TreeSink for LineCountingDOM {
type Output = Self;
fn finish(self) -> Self {
self
}
type Handle = Handle;
fn parse_error(&mut self, msg: Cow<'static, str>) {
self.rcdom.parse_error(msg);
}
fn get_document(&mut self) -> Handle {
self.rcdom.get_document()
}
fn get_template_contents(&mut self, target: &Handle) -> Handle {
self.rcdom.get_template_contents(target)
}
fn set_quirks_mode(&mut self, mode: QuirksMode) {
self.rcdom.set_quirks_mode(mode)
}
fn same_node(&self, x: &Handle, y: &Handle) -> bool {
self.rcdom.same_node(x, y)
}
fn elem_name<'a>(&'a self, target: &'a Handle) -> ExpandedName<'a> {
self.rcdom.elem_name(target)
}
fn create_element(
&mut self,
name: QualName,
attrs: Vec,
flags: ElementFlags,
) -> Handle {
self.line_vec.push((name.clone(), self.current_line));
self.rcdom.create_element(name, attrs, flags)
}
fn create_comment(&mut self, text: StrTendril) -> Handle {
self.rcdom.create_comment(text)
}
fn create_pi(&mut self, target: StrTendril, content: StrTendril) -> Handle {
self.rcdom.create_pi(target, content)
}
fn append(&mut self, parent: &Handle, child: NodeOrText) {
self.rcdom.append(parent, child)
}
fn append_before_sibling(&mut self, sibling: &Handle, child: NodeOrText) {
self.rcdom.append_before_sibling(sibling, child)
}
fn append_based_on_parent_node(
&mut self,
element: &Handle,
prev_element: &Handle,
child: NodeOrText,
) {
self.rcdom
.append_based_on_parent_node(element, prev_element, child)
}
fn append_doctype_to_document(
&mut self,
name: StrTendril,
public_id: StrTendril,
system_id: StrTendril,
) {
self.rcdom
.append_doctype_to_document(name, public_id, system_id);
}
fn add_attrs_if_missing(&mut self, target: &Handle, attrs: Vec) {
self.rcdom.add_attrs_if_missing(target, attrs);
}
fn remove_from_parent(&mut self, target: &Handle) {
self.rcdom.remove_from_parent(target);
}
fn reparent_children(&mut self, node: &Handle, new_parent: &Handle) {
self.rcdom.reparent_children(node, new_parent);
}
fn mark_script_already_started(&mut self, target: &Handle) {
self.rcdom.mark_script_already_started(target);
}
fn set_current_line(&mut self, line_number: u64) {
self.current_line = line_number;
}
}
#[test]
fn check_four_lines() {
// Input
let sink = LineCountingDOM {
line_vec: vec![],
current_line: 1,
rcdom: RcDom::default(),
};
let mut result_tok = driver::parse_document(sink, Default::default());
result_tok.process(StrTendril::from("\n"));
result_tok.process(StrTendril::from("\n"));
result_tok.process(StrTendril::from("\n"));
result_tok.process(StrTendril::from(""));
// Actual Output
let actual = result_tok.finish();
// Expected Output
let expected = vec![
(QualName::new(None, ns!(html), local_name!("html")), 1),
(QualName::new(None, ns!(html), local_name!("head")), 1),
(QualName::new(None, ns!(html), local_name!("body")), 1),
(QualName::new(None, ns!(html), local_name!("a")), 1),
(QualName::new(None, ns!(html), local_name!("b")), 3),
];
// Assertion
assert_eq!(actual.line_vec, expected);
}
markup5ever_rcdom-0.1.0/tests/util/find_tests.rs 0100644 0000765 0000120 00000002040 13576733427 0020222 0 ustar 00 0000000 0000000 // Copyright 2014-2017 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 or the MIT license
// , at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::ffi::OsStr;
use std::fs;
use std::path::Path;
pub fn foreach_xml5lib_test(
src_dir: &Path,
subdir: &'static str,
ext: &'static OsStr,
mut mk: Mk,
) where
Mk: FnMut(&Path, fs::File),
{
let mut test_dir_path = src_dir.to_path_buf();
test_dir_path.push("xml5lib-tests");
test_dir_path.push(subdir);
let test_files = fs::read_dir(&test_dir_path).unwrap();
for entry in test_files {
let path = entry.unwrap().path();
if path.extension() == Some(ext) {
let file = fs::File::open(&path).unwrap();
mk(&path, file);
}
}
}
markup5ever_rcdom-0.1.0/tests/xml-driver.rs 0100644 0000765 0000120 00000006542 13576733427 0017207 0 ustar 00 0000000 0000000 use markup5ever_rcdom::{RcDom, SerializableHandle};
use xml5ever::driver;
use xml5ever::serialize;
use xml5ever::tendril::TendrilSink;
#[test]
fn el_ns_serialize() {
assert_eq_serialization(
"Test",
driver::parse_document(RcDom::default(), Default::default())
.from_utf8()
.one("Test".as_bytes()),
);
}
#[test]
fn nested_ns_serialize() {
assert_eq_serialization("",
driver::parse_document(RcDom::default(), Default::default())
.from_utf8()
.one("".as_bytes()));
}
#[test]
fn def_ns_serialize() {
assert_eq_serialization(
"