rst_parser-0.4.0/.cargo_vcs_info.json0000644000000001121374755466000132720ustar { "git": { "sha1": "33e0418e0a23d0b592104d657cafa0978375cfa6" } } rst_parser-0.4.0/Cargo.toml0000644000000020431374755466000112750ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2018" name = "rst_parser" version = "0.4.0" authors = ["Philipp A. "] description = "a reStructuredText parser" homepage = "https://github.com/flying-sheep/rust-rst" documentation = "https://docs.rs/rst_parser" readme = "README.md" license = "MIT OR Apache-2.0" repository = "https://github.com/flying-sheep/rust-rst" [dependencies.document_tree] version = "0.4.0" [dependencies.failure] version = "0.1.6" [dependencies.pest] version = "2.1.2" [dependencies.pest_derive] version = "2.1.0" rst_parser-0.4.0/Cargo.toml.orig010064400017500001750000000007621374755427200147740ustar 00000000000000[package] name = 'rst_parser' version = '0.4.0' authors = ['Philipp A. '] edition = '2018' description = 'a reStructuredText parser' license = 'MIT OR Apache-2.0' readme = 'README.md' documentation = 'https://docs.rs/rst_parser' homepage = 'https://github.com/flying-sheep/rust-rst' repository = 'https://github.com/flying-sheep/rust-rst' [dependencies] document_tree = { path = '../document_tree', version = '0.4.0' } pest = '2.1.2' pest_derive = '2.1.0' failure = '0.1.6' rst_parser-0.4.0/README.md010064400017500001750000000004511360542466300133500ustar 00000000000000`rst_parser` ============ Part of the [`rst`][rst] crate family. Offers the functions `parse` and `parse_only`, which try to create a `document_tree::Document`. `parse` simplifies this document and resolves references before returning it. [rst]: https://github.com/flying-sheep/rust-rst/#readme rst_parser-0.4.0/src/conversion/block.rs010064400017500001750000000222201374755412300165050ustar 00000000000000use failure::{Error,bail}; use pest::iterators::Pair; use document_tree::{ Element,HasChildren,ExtraAttributes, elements as e, element_categories as c, extra_attributes as a, attribute_types as at }; use crate::{ pest_rst::Rule, pair_ext_parse::PairExt, }; use super::{whitespace_normalize_name, inline::convert_inlines}; #[derive(PartialEq)] pub(super) enum TitleKind { Double(char), Single(char) } pub(super) enum TitleOrSsubel { Title(e::Title, TitleKind), Ssubel(c::StructuralSubElement), } pub(super) fn convert_ssubel(pair: Pair) -> Result, Error> { use self::TitleOrSsubel::*; Ok(Some(match pair.as_rule() { Rule::title => { let (t, k) = convert_title(pair)?; Title(t, k) }, //TODO: subtitle, decoration, docinfo Rule::EOI => return Ok(None), _ => Ssubel(convert_substructure(pair)?.into()), })) } fn convert_substructure(pair: Pair) -> Result { Ok(match pair.as_rule() { // todo: Topic, Sidebar, Transition // no section here, as it’s constructed from titles _ => convert_body_elem(pair)?.into(), }) } fn convert_body_elem(pair: Pair) -> Result { Ok(match pair.as_rule() { Rule::paragraph => convert_paragraph(pair)?.into(), Rule::target => convert_target(pair)?.into(), Rule::substitution_def => convert_substitution_def(pair)?.into(), Rule::admonition_gen => convert_admonition_gen(pair)?.into(), Rule::image => convert_image::(pair)?.into(), Rule::bullet_list => convert_bullet_list(pair)?.into(), Rule::literal_block => convert_literal_block(pair).into(), Rule::code_directive => convert_code_directive(pair).into(), Rule::raw_directive => convert_raw_directive(pair).into(), Rule::block_comment => convert_comment(pair).into(), rule => unimplemented!("unhandled rule {:?}", rule), }) } fn convert_title(pair: Pair) -> Result<(e::Title, TitleKind), Error> { let mut title: Option = None; let mut title_inlines: Option> = None; let mut adornment_char: Option = None; // title_double or title_single. Extract kind before consuming let inner_pair = pair.into_inner().next().unwrap(); let kind = inner_pair.as_rule(); for p in inner_pair.into_inner() { match p.as_rule() { Rule::line => { title = Some(p.as_str().to_owned()); title_inlines = Some(convert_inlines(p)?); }, Rule::adornments => adornment_char = Some(p.as_str().chars().next().expect("Empty adornment?")), rule => unimplemented!("Unexpected rule in title: {:?}", rule), }; } // now we encountered one line of text and one of adornments // TODO: emit error if the adornment line is too short (has to match title length) let mut elem = e::Title::with_children(title_inlines.expect("No text in title")); if let Some(title) = title { //TODO: slugify properly let slug = title.to_lowercase().replace("\n", "").replace(" ", "-"); elem.names_mut().push(at::NameToken(slug)); } let title_kind = match kind { Rule::title_double => TitleKind::Double(adornment_char.unwrap()), Rule::title_single => TitleKind::Single(adornment_char.unwrap()), _ => unreachable!(), }; Ok((elem, title_kind)) } fn convert_paragraph(pair: Pair) -> Result { Ok(e::Paragraph::with_children(convert_inlines(pair)?)) } fn convert_target(pair: Pair) -> Result { let mut elem: e::Target = Default::default(); elem.extra_mut().anonymous = false; for p in pair.into_inner() { match p.as_rule() { Rule::target_name_uq | Rule::target_name_qu => { elem.ids_mut().push(p.as_str().into()); elem.names_mut().push(p.as_str().into()); }, // TODO: also handle non-urls Rule::link_target => elem.extra_mut().refuri = Some(p.parse()?), rule => panic!("Unexpected rule in target: {:?}", rule), } } Ok(elem) } fn convert_substitution_def(pair: Pair) -> Result { let mut pairs = pair.into_inner(); let name = whitespace_normalize_name(pairs.next().unwrap().as_str()); // Rule::substitution_name let inner_pair = pairs.next().unwrap(); let inner: Vec = match inner_pair.as_rule() { Rule::replace => convert_replace(inner_pair)?, Rule::image => vec![convert_image::(inner_pair)?.into()], rule => panic!("Unknown substitution rule {:?}", rule), }; let mut subst_def = e::SubstitutionDefinition::with_children(inner); subst_def.names_mut().push(at::NameToken(name)); Ok(subst_def) } fn convert_replace(pair: Pair) -> Result, Error> { let mut pairs = pair.into_inner(); let paragraph = pairs.next().unwrap(); convert_inlines(paragraph) } fn convert_image(pair: Pair) -> Result where I: Element + ExtraAttributes { let mut pairs = pair.into_inner(); let mut image = I::with_extra(a::Image::new( pairs.next().unwrap().as_str().trim().parse()?, // line )); for opt in pairs { let mut opt_iter = opt.into_inner(); let opt_name = opt_iter.next().unwrap(); let opt_val = opt_iter.next().unwrap(); match opt_name.as_str() { "class" => image.classes_mut().push(opt_val.as_str().to_owned()), "name" => image.names_mut().push(opt_val.as_str().into()), "alt" => image.extra_mut().alt = Some(opt_val.as_str().to_owned()), "height" => image.extra_mut().height = Some(opt_val.parse()?), "width" => image.extra_mut().width = Some(opt_val.parse()?), "scale" => image.extra_mut().scale = Some(parse_scale(&opt_val)?), "align" => image.extra_mut().align = Some(opt_val.parse()?), "target" => image.extra_mut().target = Some(opt_val.parse()?), name => bail!("Unknown Image option {}", name), } } Ok(image) } fn parse_scale(pair: &Pair) -> Result { let input = if pair.as_str().chars().rev().next() == Some('%') { &pair.as_str()[..pair.as_str().len()-1] } else { pair.as_str() }; use pest::error::{Error,ErrorVariant}; Ok(input.parse().map_err(|e: std::num::ParseIntError| { let var: ErrorVariant = ErrorVariant::CustomError { message: e.to_string() }; Error::new_from_span(var, pair.as_span()) })?) } fn convert_admonition_gen(pair: Pair) -> Result { let mut iter = pair.into_inner(); let typ = iter.next().unwrap().as_str(); // TODO: in reality it contains body elements. let children: Vec = iter.map(|p| e::Paragraph::with_children(vec![p.as_str().into()]).into()).collect(); Ok(match typ { "attention" => e::Attention::with_children(children).into(), "hint" => e::Hint::with_children(children).into(), "note" => e::Note::with_children(children).into(), "caution" => e::Caution::with_children(children).into(), "danger" => e::Danger::with_children(children).into(), "error" => e::Error::with_children(children).into(), "important" => e::Important::with_children(children).into(), "tip" => e::Tip::with_children(children).into(), "warning" => e::Warning::with_children(children).into(), typ => panic!("Unknown admontion type {}!", typ), }) } fn convert_bullet_list(pair: Pair) -> Result { Ok(e::BulletList::with_children(pair.into_inner().map(convert_bullet_item).collect::>()?)) } fn convert_bullet_item(pair: Pair) -> Result { let mut iter = pair.into_inner(); let mut children: Vec = vec![ convert_paragraph(iter.next().unwrap())?.into() ]; for p in iter { children.push(convert_body_elem(p)?); } Ok(e::ListItem::with_children(children)) } fn convert_literal_block(pair: Pair) -> e::LiteralBlock { convert_literal_lines(pair.into_inner().next().unwrap()) } fn convert_literal_lines(pair: Pair) -> e::LiteralBlock { let children = pair.into_inner().map(|l| match l.as_rule() { Rule::literal_line => l.as_str(), Rule::literal_line_blank => "\n", _ => unreachable!(), }.into()).collect(); return e::LiteralBlock::with_children(children); } fn convert_code_directive(pair: Pair) -> e::LiteralBlock { let mut iter = pair.into_inner(); let (lang, code) = match (iter.next().unwrap(), iter.next()) { (lang, Some(code)) => (Some(lang), code), (code, None) => (None, code), }; let mut code_block = convert_literal_lines(code); code_block.classes_mut().push("code".to_owned()); if let Some(lang) = lang { code_block.classes_mut().push(lang.as_str().to_owned()); }; code_block } fn convert_raw_directive(pair: Pair) -> e::Raw { let mut iter = pair.into_inner(); let format = iter.next().unwrap(); let block = iter.next().unwrap(); let children = block.into_inner().map(|l| match l.as_rule() { Rule::raw_line => l.as_str(), Rule::raw_line_blank => "\n", _ => unreachable!(), }.into()).collect(); let mut raw_block = e::Raw::with_children(children); raw_block.extra_mut().format.push(at::NameToken(format.as_str().to_owned())); raw_block } fn convert_comment(pair: Pair) -> e::Comment { let lines = pair.into_inner().map(|l| match l.as_rule() { Rule::comment_line_blank => "\n", Rule::comment_line => l.as_str(), _ => unreachable!(), }.into()).collect(); e::Comment::with_children(lines) } rst_parser-0.4.0/src/conversion/inline.rs010064400017500001750000000117221374531536400166760ustar 00000000000000use failure::Error; use pest::iterators::Pair; use document_tree::{ HasChildren, elements as e, url::Url, element_categories as c, extra_attributes as a, attribute_types as at, }; use crate::{ pest_rst::Rule, // pair_ext_parse::PairExt, }; use super::whitespace_normalize_name; pub fn convert_inline(pair: Pair) -> Result { Ok(match pair.as_rule() { Rule::str | Rule::str_nested => pair.as_str().into(), Rule::ws_newline => " ".to_owned().into(), Rule::reference => convert_reference(pair)?, Rule::substitution_name => convert_substitution_ref(pair)?.into(), Rule::emph => e::Emphasis::with_children(convert_inlines(pair)?).into(), Rule::strong => e::Strong::with_children(convert_inlines(pair)?).into(), Rule::literal => e::Literal::with_children(vec![pair.as_str().to_owned()]).into(), rule => unimplemented!("unknown rule {:?}", rule), }) } pub fn convert_inlines(pair: Pair) -> Result, Error> { pair.into_inner().map(convert_inline).collect() } fn convert_reference(pair: Pair) -> Result { let name; let refuri; let refid; let mut refname = vec![]; let mut children: Vec = vec![]; let concrete = pair.into_inner().next().unwrap(); match concrete.as_rule() { Rule::reference_target => { let rt_inner = concrete.into_inner().next().unwrap(); // reference_target_uq or target_name_qu match rt_inner.as_rule() { Rule::reference_target_uq => { refid = None; name = Some(rt_inner.as_str().into()); refuri = None; refname.push(rt_inner.as_str().into()); children.push(rt_inner.as_str().into()); }, Rule::reference_target_qu => { let (text, reference) = { let mut text = None; let mut reference = None; for inner in rt_inner.clone().into_inner() { match inner.as_rule() { Rule::reference_text => text = Some(inner), Rule::reference_bracketed => reference = Some(inner), _ => unreachable!() } } (text, reference) }; let trimmed_text = match (&text, &reference) { (Some(text), None) => text.as_str(), (_, Some(reference)) => { text .map(|text| text.as_str().trim_end_matches(|ch| " \n\r".contains(ch))) .filter(|text| !text.is_empty()) .unwrap_or_else(|| reference.clone().into_inner().next().unwrap().as_str()) } (None, None) => unreachable!() }; refid = None; name = Some(trimmed_text.into()); refuri = if let Some(reference) = reference { let inner = reference.into_inner().next().unwrap(); match inner.as_rule() { // The URL rules in our parser accept a narrow superset of // valid URLs, so we need to handle false positives. Rule::url => if let Ok(target) = Url::parse_absolute(inner.as_str()) { Some(target) } else if inner.as_str().ends_with('_') { // like target_name_qu (minus the final underscore) let full_str = inner.as_str(); refname.push(full_str[0..full_str.len() - 1].into()); None } else { // like relative_reference Some(Url::parse_relative(inner.as_str())?) }, Rule::target_name_qu => { refname.push(inner.as_str().into()); None }, Rule::relative_reference => { Some(Url::parse_relative(inner.as_str())?) }, _ => unreachable!() } } else { refname.push(trimmed_text.into()); None }; children.push(trimmed_text.into()); }, _ => unreachable!() } }, Rule::reference_explicit => unimplemented!("explicit reference"), Rule::reference_auto => { let rt_inner = concrete.into_inner().next().unwrap(); match rt_inner.as_rule() { Rule::url_auto => match Url::parse_absolute(rt_inner.as_str()) { Ok(target) => { refuri = Some(target); name = None; refid = None; children.push(rt_inner.as_str().into()); }, // if our parser got a URL wrong, return it as a string Err(_) => return Ok(rt_inner.as_str().into()) }, Rule::email => { let mailto_url = String::from("mailto:") + rt_inner.as_str(); match Url::parse_absolute(&mailto_url) { Ok(target) => { refuri = Some(target); name = None; refid = None; children.push(rt_inner.as_str().into()); }, // if our parser got a URL wrong, return it as a string Err(_) => return Ok(rt_inner.as_str().into()) } }, _ => unreachable!() } }, _ => unreachable!(), }; Ok(e::Reference::new( Default::default(), a::Reference { name, refuri, refid, refname }, children ).into()) } fn convert_substitution_ref(pair: Pair) -> Result { let name = whitespace_normalize_name(pair.as_str()); Ok(a::ExtraAttributes::with_extra( a::SubstitutionReference { refname: vec![at::NameToken(name)] } )) } rst_parser-0.4.0/src/conversion/tests.rs010064400017500001750000000035551360122544200165530ustar 00000000000000use document_tree::{ elements as e, element_categories as c, HasChildren, }; use crate::parse; fn ssubel_to_section(ssubel: &c::StructuralSubElement) -> &e::Section { match ssubel { c::StructuralSubElement::SubStructure(ref b) => match **b { c::SubStructure::Section(ref s) => s, ref c => panic!("Expected section, not {:?}", c), }, ref c => panic!("Expected SubStructure, not {:?}", c), } } const SECTIONS: &str = "\ Intro before first section title Level 1 ******* ------- Level 2 ------- Level 3 ======= L1 again ******** L3 again, skipping L2 ===================== "; #[test] fn convert_skipped_section() { let doctree = parse(SECTIONS).unwrap(); let lvl0 = doctree.children(); assert_eq!(lvl0.len(), 3, "Should be a paragraph and 2 sections: {:?}", lvl0); assert_eq!(lvl0[0], e::Paragraph::with_children(vec![ "Intro before first section title".to_owned().into() ]).into(), "The intro text should fit"); let lvl1a = ssubel_to_section(&lvl0[1]).children(); assert_eq!(lvl1a.len(), 2, "The 1st lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1a); //TODO: test title lvl1a[0] let lvl2 = ssubel_to_section(&lvl1a[1]).children(); assert_eq!(lvl2.len(), 2, "The lvl2 section should have (a title and) a single lvl3 section as child: {:?}", lvl2); //TODO: test title lvl2[0] let lvl3a = ssubel_to_section(&lvl2[1]).children(); assert_eq!(lvl3a.len(), 1, "The 1st lvl3 section should just a title: {:?}", lvl3a); //TODO: test title lvl3a[0] let lvl1b = ssubel_to_section(&lvl0[2]).children(); assert_eq!(lvl1b.len(), 2, "The 2nd lvl1 section should have (a title and) a single lvl2 section as child: {:?}", lvl1b); //TODO: test title lvl1b[0] let lvl3b = ssubel_to_section(&lvl1b[1]).children(); assert_eq!(lvl3b.len(), 1, "The 2nd lvl3 section should have just a title: {:?}", lvl3b); //TODO: test title lvl3b[0] } rst_parser-0.4.0/src/conversion.rs010064400017500001750000000057101360122542500154050ustar 00000000000000mod block; mod inline; #[cfg(test)] mod tests; use failure::Error; use pest::iterators::Pairs; use document_tree::{ Element,HasChildren, elements as e, element_categories as c, attribute_types as at, }; use crate::pest_rst::Rule; fn ssubel_to_section_unchecked_mut(ssubel: &mut c::StructuralSubElement) -> &mut e::Section { match ssubel { c::StructuralSubElement::SubStructure(ref mut b) => match **b { c::SubStructure::Section(ref mut s) => s, _ => unreachable!(), }, _ => unreachable!(), } } fn get_level<'tl>(toplevel: &'tl mut Vec, section_idxs: &[Option]) -> &'tl mut Vec { let mut level = toplevel; for maybe_i in section_idxs { if let Some(i) = *maybe_i { level = ssubel_to_section_unchecked_mut(&mut level[i]).children_mut(); } } level } pub fn convert_document(pairs: Pairs) -> Result { use self::block::TitleOrSsubel::*; let mut toplevel: Vec = vec![]; // The kinds of section titles encountered. // `section_idx[x]` has the kind `kinds[x]`, but `kinds` can be longer let mut kinds: Vec = vec![]; // Recursive indices into the tree, pointing at the active sections. // `None`s indicate skipped section levels: // toplevel[section_idxs.flatten()[0]].children[section_idxs.flatten()[1]]... let mut section_idxs: Vec> = vec![]; for pair in pairs { if let Some(ssubel) = block::convert_ssubel(pair)? { match ssubel { Title(title, kind) => { match kinds.iter().position(|k| k == &kind) { // Idx points to the level we want to add, // so idx-1 needs to be the last valid index. Some(idx) => { // If idx < len: Remove found section and all below section_idxs.truncate(idx); // If idx > len: Add None for skipped levels // TODO: test skipped levels while section_idxs.len() < idx { section_idxs.push(None) } }, None => kinds.push(kind), } let super_level = get_level(&mut toplevel, §ion_idxs); let slug = title.names().iter().next().map(|at::NameToken(name)| at::ID(name.to_owned())); let mut section = e::Section::with_children(vec![title.into()]); section.ids_mut().extend(slug.into_iter()); super_level.push(section.into()); section_idxs.push(Some(super_level.len() - 1)); }, Ssubel(elem) => get_level(&mut toplevel, §ion_idxs).push(elem), }} } Ok(e::Document::with_children(toplevel)) } /// Normalizes a name in terms of whitespace. Equivalent to docutils's /// `docutils.nodes.whitespace_normalize_name`. pub fn whitespace_normalize_name(name: &str) -> String { // Python's string.split() defines whitespace differently than Rust does. let split_iter = name.split( |ch: char| ch.is_whitespace() || (ch >= '\x1C' && ch <= '\x1F') ).filter(|split| !split.is_empty()); let mut ret = String::new(); for split in split_iter { if !ret.is_empty() { ret.push(' '); } ret.push_str(split); } ret } rst_parser-0.4.0/src/lib.rs010064400017500001750000000012561360122052400137620ustar 00000000000000pub mod token; mod conversion; mod simplify; mod pest_rst; mod pair_ext_parse; #[cfg(test)] pub mod tests; use failure::Error; use pest::Parser; use document_tree::Document; use self::pest_rst::{RstParser,Rule}; use self::conversion::convert_document; use self::simplify::resolve_references; /// Parse into a document tree and resolve sections, but not references. pub fn parse_only(source: &str) -> Result { let pairs = RstParser::parse(Rule::document, source)?; convert_document(pairs) } /// Parse into a document tree and resolve sections and references. pub fn parse(source: &str) -> Result { parse_only(source).map(resolve_references) } rst_parser-0.4.0/src/pair_ext_parse.rs010064400017500001750000000012701345022575400162310ustar 00000000000000use std::str::FromStr; use pest::Span; use pest::iterators::Pair; use pest::error::{Error,ErrorVariant}; pub trait PairExt where R: pest::RuleType { fn parse(&self) -> Result> where T: FromStr, E: ToString; } impl<'l, R> PairExt for Pair<'l, R> where R: pest::RuleType { fn parse(&self) -> Result> where T: FromStr, E: ToString { self.as_str().parse().map_err(|e| to_parse_error(self.as_span(), &e)) } } pub(crate) fn to_parse_error(span: Span, e: &E) -> Error where E: ToString, R: pest::RuleType { let var: ErrorVariant = ErrorVariant::CustomError { message: e.to_string() }; Error::new_from_span(var, span) } rst_parser-0.4.0/src/pest_rst.rs010064400017500001750000000002001340256727100150570ustar 00000000000000#![allow(clippy::redundant_closure)] use pest_derive::Parser; #[derive(Parser)] #[grammar = "rst.pest"] pub struct RstParser; rst_parser-0.4.0/src/rst.pest010064400017500001750000000563501374755412300144000ustar 00000000000000// Entry point: the document. // This grammar is aligned to the doctree names when possible. // It will however contain blocks, as we can’t parse sections: // Section headers define the hierarchy by their delimiters, // and pest only has one stack that we need for indentation. document = _{ SOI ~ blocks ~ EOI } blocks = _{ block ~ (blank_line* ~ block)* ~ blank_line? } block = _{ PEEK[..] ~ hanging_block } // This is the list of all block-level elements // They’re defined hanging, i.e. without the first PEEK[..] hanging_block = _{ substitution_def | image_directive | code_directive | raw_directive | admonition | admonition_gen | target | literal_block // Comments should be below the directives to try to match them first, but // above the title that will interpret ".." as a title marker. | block_comment | title | bullet_list | paragraph // TODO: implement all those things: // | block_quote // | verbatim // | doctest_block // | horizontal_rule // | table // | ordered_list // | plain } // Substitution definition. A block type substitution_def = { ".." ~ PUSH(" "+) ~ "|" ~ substitution_name ~ "|" ~ " "+ ~ inline_dirblock ~ DROP } substitution_name = { !" " ~ (!(" "|"|") ~ ANY)+ ~ (" "+ ~ (!(" "|"|") ~ ANY)+)* } inline_dirblock = _{ replace | image } // TODO: implement others // Target. A block type target = { target_qu | target_uq } target_uq = _{ ".. _" ~ target_name_uq ~ ":" ~ (" " ~ link_target)? ~ " "* ~ NEWLINE } target_qu = _{ ".. _`" ~ !"``" ~ target_name_qu ~ !"``:" ~ "`:" ~ (" " ~ link_target)? ~ " "* ~ NEWLINE } target_name_uq = { ( !("_"|":"|"`") ~ !NEWLINE ~ ANY )* } target_name_qu = { ( !(":"|"`"|"_>") ~ ANY )* } link_target = { nonspacechar+ } // Title. A block type title = { title_double | title_single } title_double = { PUSH(adornments) ~ NEWLINE ~ PEEK[..-1] ~ " "* ~ line ~ PEEK[..-1] ~ POP } title_single = { line ~ PEEK[..] ~ adornments ~ NEWLINE } // Bullet list. A block type. bullet_list = { bullet_item ~ (PEEK[..] ~ bullet_item)* } bullet_item = { bullet_marker ~ PUSH(" "+) ~ line ~ blank_line* ~ blist_body? ~ DROP } blist_body = _{ PEEK[..-1] ~ PUSH(" " ~ POP) ~ hanging_block ~ block* } // paragraph. A block type. paragraph = { inlines } // literal_block literal_block = { "::" ~ " "* ~ NEWLINE ~ blank_line+ ~ PUSH(" "+) ~ literal_lines ~ DROP } literal_lines = { literal_line ~ (literal_line_blank* ~ PEEK[..] ~ literal_line)* } literal_line_blank = { " "* ~ NEWLINE } literal_line = { (!NEWLINE ~ ANY)+ ~ NEWLINE } /* Directives: http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#directives * .. name:: arguments ~ :options: ~ blank_line+ ~ content * Everything except for the first argument has to be indented */ // Directives with options can have these or specific ones: common_opt_name = { "class" | "name" } // Replace. A directive only usable in substitutions. replace = { ^"replace::" ~ " "* ~ paragraph } // Image. A directive. image_directive = _{ ".." ~ PUSH(" "+) ~ image ~ DROP } image = { ^"image::" ~ line ~ image_opt_block? } image_opt_block = _{ PEEK[..-1] ~ PUSH(" " ~ POP) ~ image_option } //TODO: merge with other directives? image_option = { ":" ~ image_opt_name ~ ":" ~ line } image_opt_name = { common_opt_name | "alt" | "height" | "width" | "scale" | "align" | "target" } // Code block. A directive that allows adding a language to a literal block code_directive = { ".." ~ PUSH(" "+) ~ "code" ~ "-block"? ~ "::" ~ (" "+ ~ source)? ~ NEWLINE ~ blank_line+ ~ PEEK[..-1] ~ PUSH(" " ~ POP) ~ literal_lines ~ DROP } source = { (!NEWLINE ~ ANY)+ } // Raw block. A directive raw_directive = { ".." ~ PUSH(" "+) ~ "raw::" ~ " "+ ~ raw_output_format ~ NEWLINE ~ blank_line+ ~ PEEK[..-1] ~ PUSH(" " ~ POP) ~ raw_block ~ DROP } raw_output_format = { (!NEWLINE ~ ANY)+ } raw_block = { raw_line ~ (raw_line_blank* ~ PEEK[..] ~ raw_line)* } raw_line_blank = { " "* ~ NEWLINE } raw_line = { (!NEWLINE ~ ANY)+ ~ NEWLINE } // Admonition. A directive. The generic one has a title admonition = { ".." ~ PUSH(" "+) ~ ^"admonition::" ~ line ~ blank_line* ~ admonition_content? ~ DROP } admonition_gen = { ".." ~ PUSH(" "+) ~ admonition_type ~ "::" ~ (blank_line | line) ~ blank_line* ~ admonition_content? ~ DROP } admonition_type = { ^"attention" | ^"caution" | ^"danger" | ^"error" | ^"hint" | ^"important" | ^"note" | ^"tip" | ^"warning" } admonition_content = _{ PEEK[..-1] ~ PUSH(" " ~ POP) ~ hanging_block ~ block* } //TODO: merge with other directives? // Comments. block_comment = { ".." ~ ( // Without title (" "* ~ NEWLINE)+ ~ PUSH(" "+) ~ comment_hanging ~ DROP // or with title | PUSH(" "+) ~ comment_line ~ (comment_line_blank* ~ PEEK[..-1] ~ PUSH(" " ~ POP) ~ comment_hanging)? ~ DROP // or empty | " "* ~ NEWLINE ) ~ (" "* ~ NEWLINE)* } comment_hanging = _{ comment_line ~ (comment_line_blank* ~ PEEK[..] ~ comment_line)* } comment_line_blank = { " "* ~ NEWLINE } comment_line = { (!NEWLINE ~ ANY)+ ~ NEWLINE } /* * inlines */ line = { !marker ~ inline+ ~ NEWLINE } blank_line = _{ !marker ~ !inline ~ " "* ~ NEWLINE } inlines = _{ !marker ~ inline+ ~ ( ( ws_newline ~ PEEK[..] ~ !marker ~ inline+ )+ ~ NEWLINE )? } ws_newline = { NEWLINE } inline = _{ inline_special | str } inline_special = _{ reference | substitution_ref | emph_outer | strong_outer | literal_outer // | ul_or_star_line // | space // | note_reference // | footnote // //| citation // | code // | application_depent // | entity // | escaped_char // | smart // | symbol } str = { (!(NEWLINE | inline_special) ~ ANY)+ } // simple formatting inline_nested = _{ inline_special | str_nested } str_nested = { word_nested ~ ( " "+ ~ word_nested)* } // TODO: allow ` in emph word_nested = _{ (!(NEWLINE | " " | inline_special | "*" | "`") ~ ANY)+ } emph_outer = _{ "*" ~ emph ~ "*" } emph = { (!("*"|" ") ~ inline_nested)+ ~ (" "+ ~ (!("*"|" ") ~ inline_nested)+)* } strong_outer = _{ "**" ~ strong ~ "**" } strong = { (!("*"|" ") ~ inline_nested)+ ~ (" "+ ~ (!("*"|" ") ~ inline_nested)+)* } literal_outer = _{ "``" ~ literal ~ "``" } literal = { (!"``" ~ ANY)+ } // inline links reference = { reference_target | reference_explicit | reference_auto } reference_target = { (reference_target_uq ~ "_" | reference_target_qu) ~ !(LETTER|NUMBER) } reference_target_uq = { (!("_"|":"|"`") ~ nonspacechar)+ } reference_target_qu = { ( !("`"? ~ "`_") ~ "`" ~ !"``" ) ~ reference_text? ~ ("<" ~ reference_bracketed ~ ">")? ~ ( "`" ~ !"``" ) ~ "_" } reference_text = { !"<" ~ ( !("`"|"<") ~ ANY )+ } reference_bracketed = { url | (target_name_qu ~ "_") | relative_reference } relative_reference = { (!("`"|">") ~ ANY)+ } reference_explicit = { reference_label ~ "(" ~ " "* ~ reference_source ~ " "* ~ (NEWLINE ~ PEEK[..])? ~ reference_title ~ " "* ~ ")" } reference_label = { "[" ~ !"^" ~ (!"]" ~ inline)* ~ "]" } reference_source = { reference_source_contents } reference_source_contents = _{ ( (!("("|")"|">") ~ nonspacechar)+ | "(" ~ reference_source_contents ~ ")" )* } reference_title = { ( reference_title_single | reference_title_double | "" ) } reference_title_single = { "'" ~ ( !("'" ~ " "+ ~ (")" | NEWLINE)) ~ ANY )* ~ "'" } reference_title_double = { "\"" ~ ( !("\"" ~ " "+ ~ (")" | NEWLINE)) ~ ANY )* ~ "\"" } // Emails can't end with punctuation, but URLs must use a separate rule. reference_auto = { url_auto | email } //reference_embedded = { "`" ~ reference_embedded_source ~ "<" ~ absolute_url_with_fragment ~ ">`_" ~ "_"? } //reference_embedded_source = { ( !("<"|":"|"`") ~ ( " " | nonspacechar | blank_line ) )* } substitution_ref = _{ "|" ~ substitution_name ~ "|" } /* URLs as defined by the WHATWG URL standard. */ url = { absolute_url_no_query ~ ("?" ~ url_unit*)? ~ ("#" ~ url_unit*)? } absolute_url_no_query = { ( special_url_scheme ~ ":" ~ scheme_relative_special_url ) | ( ^"file:" ~ scheme_relative_file_url ) | ( arbitrary_scheme ~ ":" ~ relative_url ) } scheme_relative_special_url = { "//" ~ host ~ (":" ~ url_port)? ~ path_absolute_url? } path_absolute_url = { "/" ~ path_relative_url } path_relative_url = { ( url_path_segment_unit* ~ "/" )* ~ url_path_segment_unit* } url_path_segment_unit = { !("/"|"?") ~ url_unit } url_port = { ASCII_DIGIT* } scheme_relative_file_url = { "//" ~ ( host ~ !("/:/"|"/|/") )? ~ path_absolute_url } relative_url = { ( "//" ~ host ~ (":" ~ url_port)? ~ path_absolute_url? ) | path_absolute_url | (!(arbitrary_scheme ~ ":") ~ path_relative_url) } // this is approximately a superset of valid hosts and opaque hosts host = { ( !(":"|"/"|"?"|"#") ~ url_unit)+ | ("["~(ASCII_HEX_DIGIT|"."|":")+~"]") } special_url_scheme = { ^"ftp" | (^"http" | ^"ws") ~ ^"s"? } /* doesn't include "file" */ arbitrary_scheme = { ASCII_ALPHA ~ ASCII_ALPHANUMERIC* } // taken at 2020-09-06 from https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml known_scheme = { "aaa"|"aaas"|"about"|"acap"|"acct"|"acd"|"acr"|"adiumxtra"|"adt"|"afp"|"afs"|"aim"|"amss"|"android"|"appdata"|"apt"|"ark"|"attachment"|"aw"| "barion"|"beshare"|"bitcoin"|"bitcoincash"|"blob"|"bolo"|"browserext"|"cabal"|"calculator"|"callto"|"cap"|"cast"|"casts"|"chrome"| "chrome-extension"|"cid"|"coap"|"coap+tcp"|"coap+ws"|"coaps"|"coaps+tcp"|"coaps+ws"|"com-eventbrite-attendee"|"content"|"conti"|"crid"|"cvs"| "dab"|"dat"|"data"|"dav"|"diaspora"|"dict"|"did"|"dis"|"dlna-playcontainer"|"dlna-playsingle"|"dns"|"dntp"|"doi"|"dpp"|"drm"|"drop"|"dtmi"| "dtn"|"dvb"|"dweb"|"ed2k"|"elsi"|"ens"|"ethereum"|"example"|"facetime"|"fax"|"feed"|"feedready"|"file"|"filesystem"|"finger"| "first-run-pen-experience"|"fish"|"fm"|"ftp"|"fuchsia-pkg"|"geo"|"gg"|"git"|"gizmoproject"|"go"|"gopher"|"graph"|"gtalk"|"h323"|"ham"|"hcap"| "hcp"|"http"|"https"|"hxxp"|"hxxps"|"hydrazone"|"hyper"|"iax"|"icap"|"icon"|"im"|"imap"|"info"|"iotdisco"|"ipfs"|"ipn"|"ipns"|"ipp"|"ipps"| "irc"|"irc6"|"ircs"|"iris"|"iris.beep"|"iris.lwz"|"iris.xpc"|"iris.xpcs"|"isostore"|"itms"|"jabber"|"jar"|"jms"|"keyparc"|"lastfm"|"lbry"| "ldap"|"ldaps"|"leaptofrogans"|"lorawan"|"lvlt"|"magnet"|"mailserver"|"mailto"|"maps"|"market"|"matrix"|"message"|"microsoft.windows.camera"| "microsoft.windows.camera.multipicker"|"microsoft.windows.camera.picker"|"mid"|"mms"|"modem"|"mongodb"|"moz"|"ms-access"| "ms-browser-extension"|"ms-calculator"|"ms-drive-to"|"ms-enrollment"|"ms-excel"|"ms-eyecontrolspeech"|"ms-gamebarservices"| "ms-gamingoverlay"|"ms-getoffice"|"ms-help"|"ms-infopath"|"ms-inputapp"|"ms-lockscreencomponent-config"|"ms-media-stream-id"| "ms-mixedrealitycapture"|"ms-mobileplans"|"ms-officeapp"|"ms-people"|"ms-project"|"ms-powerpoint"|"ms-publisher"|"ms-restoretabcompanion"| "ms-screenclip"|"ms-screensketch"|"ms-search"|"ms-search-repair"|"ms-secondary-screen-controller"|"ms-secondary-screen-setup"|"ms-settings"| "ms-settings-airplanemode"|"ms-settings-bluetooth"|"ms-settings-camera"|"ms-settings-cellular"|"ms-settings-cloudstorage"| "ms-settings-connectabledevices"|"ms-settings-displays-topology"|"ms-settings-emailandaccounts"|"ms-settings-language"| "ms-settings-location"|"ms-settings-lock"|"ms-settings-nfctransactions"|"ms-settings-notifications"|"ms-settings-power"| "ms-settings-privacy"|"ms-settings-proximity"|"ms-settings-screenrotation"|"ms-settings-wifi"|"ms-settings-workplace"|"ms-spd"| "ms-sttoverlay"|"ms-transit-to"|"ms-useractivityset"|"ms-virtualtouchpad"|"ms-visio"|"ms-walk-to"|"ms-whiteboard"|"ms-whiteboard-cmd"| "ms-word"|"msnim"|"msrp"|"msrps"|"mss"|"mtqp"|"mumble"|"mupdate"|"mvn"|"news"|"nfs"|"ni"|"nih"|"nntp"|"notes"|"ocf"|"oid"|"onenote"| "onenote-cmd"|"opaquelocktoken"|"openpgp4fpr"|"otpauth"|"pack"|"palm"|"paparazzi"|"payment"|"payto"|"pkcs11"|"platform"|"pop"|"pres"| "prospero"|"proxy"|"pwid"|"psyc"|"pttp"|"qb"|"query"|"quic-transport"|"redis"|"rediss"|"reload"|"res"|"resource"|"rmi"|"rsync"|"rtmfp"| "rtmp"|"rtsp"|"rtsps"|"rtspu"|"secondlife"|"service"|"session"|"sftp"|"sgn"|"shttp"|"sieve"|"simpleledger"|"sip"|"sips"|"skype"|"smb"|"sms"| "smtp"|"snews"|"snmp"|"soap.beep"|"soap.beeps"|"soldat"|"spiffe"|"spotify"|"ssb"|"ssh"|"steam"|"stun"|"stuns"|"submit"|"swh"|"svn"|"tag"| "teamspeak"|"tel"|"teliaeid"|"telnet"|"tftp"|"things"|"thismessage"|"tip"|"tn3270"|"tool"|"turn"|"turns"|"tv"|"udp"|"unreal"|"upt"|"urn"| "ut2004"|"v-event"|"vemmi"|"ventrilo"|"videotex"|"vnc"|"view-source"|"vscode"|"vscode-insiders"|"vsls"|"wais"|"webcal"|"wifi"|"wpid"|"ws"| "wss"|"wtai"|"wyciwyg"|"xcon"|"xcon-userid"|"xfire"|"xmlrpc.beep"|"xmlrpc.beeps"|"xmpp"|"xri"|"ymsgr"|"z39.50"|"z39.50r"|"z39.50s" } url_unit = { ASCII_ALPHANUMERIC | "!"|"$"|"&"|"'"|"("|")"|"*"|"+"|","|"-"|"."|"/"|":"|";"|"="|"?"|"@"|"_"|"~" | (!(SURROGATE|NONCHARACTER_CODE_POINT) ~ '\u{A0}'..'\u{10FFFD}') | ("%" ~ ASCII_HEX_DIGIT{2}) } /* * Rules for URLs that don't end in punctuation. * This is a modification of the rules above to incorporate the docutils rules * for the final character in an auto URL and for the character after it. * The patterns used here to emulate the behavior of docutils' regex are taken * from . */ url_auto = { ( absolute_url_no_query ~ ("?" ~ url_unit*)? ~ "#" ~ url_units_auto ) | ( absolute_url_no_query ~ "?" ~ url_units_auto ) | ( special_url_scheme ~ "://" ~ host ~ (":" ~ url_port)? ~ path_absolute_url_auto ) | ( special_url_scheme ~ "://" ~ host ~ ":" ~ url_port ~ &follows_auto_url ) | ( special_url_scheme ~ "://" ~ ( domain_host_auto | "["~(ASCII_HEX_DIGIT|"."|":")+~"]" ~ &follows_auto_url ) ) | ( ^"file://" ~ ( host ~ !("/:/"|"/|/") )? ~ path_absolute_url_auto ) | ( known_scheme ~ ":" ~ relative_url_auto ) } domain_host_auto = { ( !(":"|"/"|"?"|"#") ~ url_unit ~ url_units_auto ) | ( !(":"|"/"|"?"|"#") ~ url_unit ~ &">" ) | ( (ASCII_ALPHANUMERIC|"_"|"~"|"*"|"/"|"="|"+") ~ &follows_auto_url ) } path_absolute_url_auto = { "/" ~ path_relative_url_auto } path_relative_url_auto = { prua1 | prua2 | &follows_auto_url } prua1 = { ( url_path_segment_unit ~ prua1 ) | ( "/" ~ path_relative_url_auto ) } prua2 = { ( url_path_segment_unit ~ prua2 ) | ( (ASCII_ALPHANUMERIC|"_"|"~"|"*"|"="|"+") ~ &follows_auto_url ) } relative_url_auto = { ( "//" ~ host ~ (":" ~ url_port)? ~ path_absolute_url_auto ) | ( "//" ~ host ~ ":" ~ url_port ~ &follows_auto_url ) | ( "//" ~ ( domain_host_auto | "["~(ASCII_HEX_DIGIT|"."|":")+~"]" ~ &follows_auto_url ) ) | path_absolute_url_auto | // (prua1|prua2) is path_relative_url_auto minus the &follows_auto_url case (!(known_scheme ~ ":") ~ (prua1 | prua2)) } url_units_auto = { ( url_unit ~ url_units_auto ) | ( url_unit ~ &">" ~ &follows_auto_url ) | ( (ASCII_ALPHANUMERIC|"_"|"~"|"*"|"/"|"="|"+") ~ &follows_auto_url ) } follows_auto_url = { EOI|"\x00"|WHITE_SPACE|">"|"\u{201A}"|"\u{201E}"| (!(CONNECTOR_PUNCTUATION|OPEN_PUNCTUATION|"#"|"%"|"&"|"*"|"@") ~ PUNCTUATION) } /* Rules for emails as defined by the HTML standard */ email = { ( email_atext | "." )+ ~ "@" ~ email_label ~ ( "." ~ email_label )* } email_atext = { ASCII_ALPHANUMERIC|"!"|"#"|"$"|"%"|"&"|"'"|"/"|"="|"?"|"^"|"_"|"`"|"{"|"|"|"}"|"~" } email_label = { ASCII_ALPHANUMERIC ~ ( !("-"+ ~ !ASCII_ALPHANUMERIC) ~ (ASCII_ALPHANUMERIC|"-") ){0,62} } /* * character classes */ bullet_marker = _{ "+" | "*" | "-" } adornments = { // recommended "="+ | "-"+ | "`"+ | ":"+ | "."+ | "'"+ | "\""+ | "~"+ | "^"+ | "_"+ | "*"+ | "+"+ | "#"+ | // parentheses "("+ | ")"+ | "["+ | "]"+ | "{"+ | "}"+ | // punctuation ","+ | ";"+ | "!"+ | "?"+ | // operators "&"+ | "|"+ | "/"+ | "%"+ | "<"+ | ">"+ | // misc "$"+ | "@"+ | "\\"+ } nonspacechar = _{ !(" " | NEWLINE) ~ ANY } /* * lookaheads. do not use in another position */ marker = _{ (bullet_marker | "..") ~ " " } //################################################################################# // doctest_block = { (doctest_line+ ~ (!(">" | blank_line) ~ line)*)+ } // block_quote_raw = { ":" ~ blank_line ~ NEWLINE ~ nonblank_indented_line+ } // block_quote_chunk = { // !"::" ~ ":" ~ blank_line ~ // NEWLINE ~ // blank_line* ~ // nonblank_indented_line+ // } // block_quote = { block_quote_chunk+ } // horizontal_rule = { // ( "=" ~ sp ~ "=" ~ sp ~ "=" ~ (sp ~ "=")* // | "-" ~ sp ~ "-" ~ sp ~ "-" ~ (sp ~ "-")* // | "*" ~ sp ~ "*" ~ sp ~ "*" ~ (sp ~ "*")* // | "^" ~ sp ~ "^" ~ sp ~ "^" ~ (sp ~ "^")* // | "~" ~ sp ~ "~" ~ sp ~ "~" ~ (sp ~ "~")* // | "_" ~ sp ~ "_" ~ sp ~ "_" ~ (sp ~ "_")* // ) ~ // sp ~ NEWLINE ~ blank_line+ // } // table = { grid_table | header_less_grid_table | simple_table } // simple_table = { "NotImplemented" ~ "simple_table" } // grid_table = { grid_table_header ~ grid_table_header_sep ~ grid_table_body+ } // header_less_grid_table = { grid_table_sep ~ grid_table_body+ } // grid_table_header = { sp ~ "+" ~ ( "-"+ ~ "+" )+ ~ blank_line ~ grid_table_row+ } // grid_table_body = { ( grid_table_row ~ grid_table_sep )+ } // grid_table_row = { sp ~ "|" ~ sp ~ ( table_cell ~ sp ~ "|" )+ ~ blank_line } // table_cell = { ( ":" | ">" | "<" | "/" | "-" | spacechar | escaped_char | alphanumeric )+ } // grid_table_header_sep = { sp ~ "+" ~ ( "="+ ~ "+" )+ ~ blank_line } // grid_table_sep = { sp ~ "+" ~ ( "-"+ ~ "+" )+ ~ blank_line } // bullet = { !horizontal_rule ~ ("+" | "*" | "-") ~ spacechar+ } // bullet_list = { &bullet ~ (list_tight | list_loose) } // list_tight = { list_item_tight+ ~ blank_line* ~ !(bullet | enumerator | def_marker) } // list_loose = { ( list_item ~ blank_line* )+ } // list_item = { (bullet | enumerator | def_marker) ~ list_block ~ list_continuation_block* } // list_item_tight = { // (bullet | enumerator | def_marker) ~ // list_block ~ // (!blank_line ~ list_continuation_block)* ~ // !list_continuation_block // } // list_block = { !blank_line ~ line ~ list_block_line* } // list_continuation_block = { blank_line* ~ ( indent ~ list_block )+ } // enumerator = { (ASCII_DIGIT+ | "#"+) ~ "." ~ spacechar+ } // ordered_list = { &enumerator ~ (list_tight | list_loose) } // list_block_line = { // !blank_line ~ // !( (indent? ~ (bullet | enumerator)) | def_marker ) ~ // !horizontal_rule ~ // optionally_indented_line // } // space = _{ spacechar+ } // str = { normal_char+ ~ str_chunk* } // str_chunk = _{ (normal_char | "_"+ ~ &alphanumeric)+ } // escaped_char = { "\\" ~ !NEWLINE ~ ("-" | "\\" | "`" | "|" | "*" | "_" | "{" | "}" | "[" | "]" | "(" | ")" | "#" | "+" | "." | "!" | ">" | "<") } // entity = { hex_entity | dec_entity | char_entity } // endline = _{ line_break | terminal_endline | normal_endline } // normal_endline = _{ sp ~ NEWLINE ~ !(blank_line | ">" | line ~ ("="+ | "-"+) ~ NEWLINE) } // terminal_endline = _{ sp ~ NEWLINE ~ EOI } // line_break = _{ " " ~ normal_endline } // symbol = { special_char } // application_depent = { !("`_" | "``_") ~ "`" ~ !"``" ~ target_name_qu ~ "`" ~ !("``" | "_") } // // This keeps the parser from getting bogged down on long strings of "*" or "_", // // or strings of "*" or "_" with space on each side: // ul_or_star_line = { ul_line | star_line } // star_line = { "****" ~ "*"* | spacechar ~ "*"+ ~ &spacechar } // ul_line = { "____" ~ "_"* | spacechar ~ "_"+ ~ &spacechar } // empty_title = { "" } // ticks_2 = { "``" ~ !"`" } // code = { ticks_2 ~ ( (!"`" ~ nonspacechar)+ | "_" | !ticks_2 ~ "`" | !(sp ~ ticks_2) ~ (spacechar | NEWLINE ~ !blank_line) )+ ~ ticks_2 } // quoted = { // "\"" ~ (!"\"" ~ ANY)* ~ "\"" | // "'" ~ (!"'" ~ ANY)* ~ "'" // } // spacechar = _{ " " | "\t" } // sp = _{ spacechar* } // spnl = _{ sp ~ (NEWLINE ~ sp)? } // special_char = _{ "~" | "*" | "_" | "`" | "&" | "[" | "]" | "(" | ")" | "<" | "!" | "#" | "\\" | "\"" | "'" | extended_special_char } // normal_char = _{ !( special_char | spacechar | NEWLINE ) ~ ANY } // alphanumeric = { // ASCII_ALPHANUMERIC | // "\u{200}" | "\u{201}" | "\u{202}" | "\u{203}" | "\u{204}" | "\u{205}" | "\u{206}" | "\u{207}" | // "\u{210}" | "\u{211}" | "\u{212}" | "\u{213}" | "\u{214}" | "\u{215}" | "\u{216}" | "\u{217}" | // "\u{220}" | "\u{221}" | "\u{222}" | "\u{223}" | "\u{224}" | "\u{225}" | "\u{226}" | "\u{227}" | // "\u{230}" | "\u{231}" | "\u{232}" | "\u{233}" | "\u{234}" | "\u{235}" | "\u{236}" | "\u{237}" | // "\u{240}" | "\u{241}" | "\u{242}" | "\u{243}" | "\u{244}" | "\u{245}" | "\u{246}" | "\u{247}" | // "\u{250}" | "\u{251}" | "\u{252}" | "\u{253}" | "\u{254}" | "\u{255}" | "\u{256}" | "\u{257}" | // "\u{260}" | "\u{261}" | "\u{262}" | "\u{263}" | "\u{264}" | "\u{265}" | "\u{266}" | "\u{267}" | // "\u{270}" | "\u{271}" | "\u{272}" | "\u{273}" | "\u{274}" | "\u{275}" | "\u{276}" | "\u{277}" | // "\u{300}" | "\u{301}" | "\u{302}" | "\u{303}" | "\u{304}" | "\u{305}" | "\u{306}" | "\u{307}" | // "\u{310}" | "\u{311}" | "\u{312}" | "\u{313}" | "\u{314}" | "\u{315}" | "\u{316}" | "\u{317}" | // "\u{320}" | "\u{321}" | "\u{322}" | "\u{323}" | "\u{324}" | "\u{325}" | "\u{326}" | "\u{327}" | // "\u{330}" | "\u{331}" | "\u{332}" | "\u{333}" | "\u{334}" | "\u{335}" | "\u{336}" | "\u{337}" | // "\u{340}" | "\u{341}" | "\u{342}" | "\u{343}" | "\u{344}" | "\u{345}" | "\u{346}" | "\u{347}" | // "\u{350}" | "\u{351}" | "\u{352}" | "\u{353}" | "\u{354}" | "\u{355}" | "\u{356}" | "\u{357}" | // "\u{360}" | "\u{361}" | "\u{362}" | "\u{363}" | "\u{364}" | "\u{365}" | "\u{366}" | "\u{367}" | // "\u{370}" | "\u{371}" | "\u{372}" | "\u{373}" | "\u{374}" | "\u{375}" | "\u{376}" | "\u{377}" // } // hex_entity = { "&#" ~ ("X"|"x") ~ ('0'..'9' | 'a'..'f' | 'A'..'F')+ ~ ";" } // dec_entity = { "&#" ~ ASCII_DIGIT+ ~ ";" } // char_entity = { "&" ~ ASCII_ALPHANUMERIC+ ~ ";" } // indent = _{ "\t" | " " } // indented_line = { indent ~ line } // optionally_indented_line = { indent? ~ line } // doctest_line = { ">>> " ~ raw_line } // line = _{ raw_line } // raw_line = _{ (!NEWLINE ~ ANY)* ~ NEWLINE | (!EOI ~ ANY)+ ~ EOI } // // Syntax extensions // extended_special_char = { // //&{ extension(EXT_SMART) } ~ // ("." | "-" | "\"" | "'") | // //&{ extension(EXT_NOTES) } ~ // "^" // } // smart = { // //&{ extension(EXT_SMART) } ~ // ( ellipsis | dash | single_quoted | double_quoted | apostrophe ) // } // apostrophe = { "'" } // ellipsis = { "..." | ". . ." } // dash = { em_dash | en_dash } // en_dash = { "-" ~ &ASCII_DIGIT } // em_dash = { "---" | "--" } // single_quote_start = { "'" ~ !(spacechar | NEWLINE) } // single_quote_end = { "'" ~ !alphanumeric } // single_quoted = { single_quote_start ~ ( !single_quote_end ~ inline )+ ~ single_quote_end } // double_quote_start = { "\"" } // double_quote_end = { "\"" } // double_quoted = { double_quote_start ~ ( !double_quote_end ~ inline )+ ~ double_quote_end } // footnote = { "[#" ~ (!"]" ~ inline)+ ~ "]_" } // definition = { // &( (!defmark ~ nonspacechar ~ raw_line) ~ blank_line? ~ defmark) ~ // d_list_title+ ~ // (def_tight | def_loose) // } // d_list_title = { !defmark ~ &nonspacechar ~ (!endline ~ inline)+ ~ sp ~ NEWLINE } // def_tight = { &defmark ~ list_tight } // def_loose = { blank_line ~ &defmark ~ list_loose } // defmark = { (":" | "~") ~ spacechar+ } // def_marker = { // //&{ extension(EXT_DLISTS) } ~ // defmark // } rst_parser-0.4.0/src/simplify.rs010064400017500001750000000474561374531536400151040ustar 00000000000000/* http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#hyperlink-targets Links can have internal or external targets. In the source, targets look like: .. targetname1: .. targetname2: some paragraph or list item or so or: .. targetname1: .. targetname2: https://link There’s also anonymous links and targets without names. TODO: continue documenting how it’s done via https://repo.or.cz/docutils.git/blob/HEAD:/docutils/docutils/transforms/references.py */ use std::collections::HashMap; use document_tree::{ url::Url, Document, HasChildren, attribute_types::NameToken, elements::{self as e, Element}, element_categories as c, extra_attributes::ExtraAttributes, }; #[derive(Debug)] enum NamedTargetType { NumberedFootnote(usize), LabeledFootnote(usize), Citation, InternalLink, ExternalLink(Url), IndirectLink(NameToken), SectionTitle, } impl NamedTargetType { fn is_implicit_target(&self) -> bool { match self { NamedTargetType::SectionTitle => true, _ => false, } } } #[derive(Clone, Debug)] struct Substitution { content: Vec, /// If true and the sibling before the reference is a text node, /// the text node gets right-trimmed. ltrim: bool, /// Same as `ltrim` with the sibling after the reference. rtrim: bool, } #[derive(Default, Debug)] struct TargetsCollected { named_targets: HashMap, substitutions: HashMap, normalized_substitutions: HashMap, } impl TargetsCollected { fn target_url<'t>(self: &'t TargetsCollected, refname: &[NameToken]) -> Option<&'t Url> { // TODO: Check if the target would expand circularly if refname.len() != 1 { panic!("Expected exactly one name in a reference."); } let name = refname[0].clone(); match self.named_targets.get(&name)? { NamedTargetType::ExternalLink(url) => Some(url), _ => unimplemented!(), } } fn substitution<'t>(self: &'t TargetsCollected, refname: &[NameToken]) -> Option<&'t Substitution> { // TODO: Check if the substitution would expand circularly if refname.len() != 1 { panic!("Expected exactly one name in a substitution reference."); } let name = refname[0].clone(); self.substitutions.get(&name).or_else(|| { self.normalized_substitutions.get(&name.0.to_lowercase()) }) } } trait ResolvableRefs { fn populate_targets(&self, refs: &mut TargetsCollected); fn resolve_refs(self, refs: &TargetsCollected) -> Vec where Self: Sized; } pub fn resolve_references(mut doc: Document) -> Document { let mut references: TargetsCollected = Default::default(); for c in doc.children() { c.populate_targets(&mut references); } let new: Vec<_> = doc.children_mut().drain(..).flat_map(|c| c.resolve_refs(&references)).collect(); Document::with_children(new) } fn sub_pop(parent: &P, refs: &mut TargetsCollected) where P: HasChildren, C: ResolvableRefs { for c in parent.children() { c.populate_targets(refs); } } fn sub_res(mut parent: P, refs: &TargetsCollected) -> P where P: e::Element + HasChildren, C: ResolvableRefs { let new: Vec<_> = parent.children_mut().drain(..).flat_map(|c| c.resolve_refs(refs)).collect(); parent.children_mut().extend(new); parent } fn sub_sub_pop(parent: &P, refs: &mut TargetsCollected) where P: HasChildren, C1: HasChildren, C2: ResolvableRefs { for c in parent.children() { sub_pop(c, refs); } } fn sub_sub_res(mut parent: P, refs: &TargetsCollected) -> P where P: e::Element + HasChildren, C1: e::Element + HasChildren, C2: ResolvableRefs { let new: Vec<_> = parent.children_mut().drain(..).map(|c| sub_res(c, refs)).collect(); parent.children_mut().extend(new); parent } impl ResolvableRefs for c::StructuralSubElement { fn populate_targets(&self, refs: &mut TargetsCollected) { use c::StructuralSubElement::*; match self { Title(e) => sub_pop(&**e, refs), Subtitle(e) => sub_pop(&**e, refs), Decoration(e) => sub_pop(&**e, refs), Docinfo(e) => sub_pop(&**e, refs), SubStructure(e) => e.populate_targets(refs), } } fn resolve_refs(self, refs: &TargetsCollected) -> Vec { use c::StructuralSubElement::*; vec![match self { Title(e) => sub_res(*e, refs).into(), Subtitle(e) => sub_res(*e, refs).into(), Decoration(e) => sub_res(*e, refs).into(), Docinfo(e) => sub_res(*e, refs).into(), SubStructure(e) => return e.resolve_refs(refs).drain(..).map(Into::into).collect(), }] } } impl ResolvableRefs for c::SubStructure { fn populate_targets(&self, refs: &mut TargetsCollected) { use c::SubStructure::*; match self { Topic(e) => sub_pop(&**e, refs), Sidebar(e) => sub_pop(&**e, refs), Transition(_) => {}, Section(e) => sub_pop(&**e, refs), BodyElement(e) => e.populate_targets(refs), } } fn resolve_refs(self, refs: &TargetsCollected) -> Vec { use c::SubStructure::*; vec![match self { Topic(e) => sub_res(*e, refs).into(), Sidebar(e) => sub_res(*e, refs).into(), Transition(e) => Transition(e), Section(e) => sub_res(*e, refs).into(), BodyElement(e) => return e.resolve_refs(refs).drain(..).map(Into::into).collect(), }] } } impl ResolvableRefs for c::BodyElement { fn populate_targets(&self, refs: &mut TargetsCollected) { use c::BodyElement::*; match self { Paragraph(e) => sub_pop(&**e, refs), LiteralBlock(e) => sub_pop(&**e, refs), DoctestBlock(e) => sub_pop(&**e, refs), MathBlock(_) => {}, Rubric(e) => sub_pop(&**e, refs), SubstitutionDefinition(e) => { let subst = Substitution { content: e.children().clone(), ltrim: e.extra().ltrim, rtrim: e.extra().rtrim }; for name in e.names() { if refs.substitutions.contains_key(name) { // TODO: Duplicate substitution name (level 3 system message). } // Intentionally overriding any previous values. refs.substitutions.insert(name.clone(), subst.clone()); refs.normalized_substitutions.insert(name.0.to_lowercase(), subst.clone()); } }, Comment(_) => {}, Pending(_) => { unimplemented!(); }, Target(e) => { if let Some(uri) = &e.extra().refuri { for name in e.names() { refs.named_targets.insert(name.clone(), NamedTargetType::ExternalLink(uri.clone())); } } // TODO: as is, people can only refer to the target directly containing the URL. // add refid and refnames to some HashMap and follow those later. }, Raw(_) => {}, Image(_) => {}, Compound(e) => sub_pop(&**e, refs), Container(e) => sub_pop(&**e, refs), BulletList(e) => sub_sub_pop(&**e, refs), EnumeratedList(e) => sub_sub_pop(&**e, refs), DefinitionList(e) => sub_sub_pop(&**e, refs), FieldList(e) => sub_sub_pop(&**e, refs), OptionList(e) => sub_sub_pop(&**e, refs), LineBlock(e) => sub_pop(&**e, refs), BlockQuote(e) => sub_pop(&**e, refs), Admonition(e) => sub_pop(&**e, refs), Attention(e) => sub_pop(&**e, refs), Hint(e) => sub_pop(&**e, refs), Note(e) => sub_pop(&**e, refs), Caution(e) => sub_pop(&**e, refs), Danger(e) => sub_pop(&**e, refs), Error(e) => sub_pop(&**e, refs), Important(e) => sub_pop(&**e, refs), Tip(e) => sub_pop(&**e, refs), Warning(e) => sub_pop(&**e, refs), Footnote(e) => sub_pop(&**e, refs), Citation(e) => sub_pop(&**e, refs), SystemMessage(e) => sub_pop(&**e, refs), Figure(e) => sub_pop(&**e, refs), Table(e) => sub_pop(&**e, refs) } } fn resolve_refs(self, refs: &TargetsCollected) -> Vec { use c::BodyElement::*; vec![match self { Paragraph(e) => sub_res(*e, refs).into(), LiteralBlock(e) => sub_res(*e, refs).into(), DoctestBlock(e) => sub_res(*e, refs).into(), MathBlock(e) => MathBlock(e), Rubric(e) => sub_res(*e, refs).into(), SubstitutionDefinition(_) => return vec![], Comment(e) => Comment(e), Pending(e) => Pending(e), Target(e) => Target(e), Raw(e) => Raw(e), Image(e) => Image(e), Compound(e) => sub_res(*e, refs).into(), Container(e) => sub_res(*e, refs).into(), BulletList(e) => sub_sub_res(*e, refs).into(), EnumeratedList(e) => sub_sub_res(*e, refs).into(), DefinitionList(e) => sub_sub_res(*e, refs).into(), FieldList(e) => sub_sub_res(*e, refs).into(), OptionList(e) => sub_sub_res(*e, refs).into(), LineBlock(e) => sub_res(*e, refs).into(), BlockQuote(e) => sub_res(*e, refs).into(), Admonition(e) => sub_res(*e, refs).into(), Attention(e) => sub_res(*e, refs).into(), Hint(e) => sub_res(*e, refs).into(), Note(e) => sub_res(*e, refs).into(), Caution(e) => sub_res(*e, refs).into(), Danger(e) => sub_res(*e, refs).into(), Error(e) => sub_res(*e, refs).into(), Important(e) => sub_res(*e, refs).into(), Tip(e) => sub_res(*e, refs).into(), Warning(e) => sub_res(*e, refs).into(), Footnote(e) => sub_res(*e, refs).into(), Citation(e) => sub_res(*e, refs).into(), SystemMessage(e) => sub_res(*e, refs).into(), Figure(e) => sub_res(*e, refs).into(), Table(e) => sub_res(*e, refs).into() }] } } impl ResolvableRefs for c::BibliographicElement { fn populate_targets(&self, refs: &mut TargetsCollected) { use c::BibliographicElement::*; match self { Author(e) => sub_pop(&**e, refs), Authors(e) => sub_pop(&**e, refs), Organization(e) => sub_pop(&**e, refs), Address(e) => sub_pop(&**e, refs), Contact(e) => sub_pop(&**e, refs), Version(e) => sub_pop(&**e, refs), Revision(e) => sub_pop(&**e, refs), Status(e) => sub_pop(&**e, refs), Date(e) => sub_pop(&**e, refs), Copyright(e) => sub_pop(&**e, refs), Field(e) => sub_pop(&**e, refs), } } fn resolve_refs(self, refs: &TargetsCollected) -> Vec { use c::BibliographicElement::*; vec![match self { Author(e) => sub_res(*e, refs).into(), Authors(e) => sub_res(*e, refs).into(), Organization(e) => sub_res(*e, refs).into(), Address(e) => sub_res(*e, refs).into(), Contact(e) => sub_res(*e, refs).into(), Version(e) => sub_res(*e, refs).into(), Revision(e) => sub_res(*e, refs).into(), Status(e) => sub_res(*e, refs).into(), Date(e) => sub_res(*e, refs).into(), Copyright(e) => sub_res(*e, refs).into(), Field(e) => sub_res(*e, refs).into(), }] } } impl ResolvableRefs for c::TextOrInlineElement { fn populate_targets(&self, refs: &mut TargetsCollected) { use c::TextOrInlineElement::*; match self { String(_) => {}, Emphasis(e) => sub_pop(&**e, refs), Strong(e) => sub_pop(&**e, refs), Literal(_) => {}, Reference(e) => sub_pop(&**e, refs), FootnoteReference(e) => sub_pop(&**e, refs), CitationReference(e) => sub_pop(&**e, refs), SubstitutionReference(e) => sub_pop(&**e, refs), TitleReference(e) => sub_pop(&**e, refs), Abbreviation(e) => sub_pop(&**e, refs), Acronym(e) => sub_pop(&**e, refs), Superscript(e) => sub_pop(&**e, refs), Subscript(e) => sub_pop(&**e, refs), Inline(e) => sub_pop(&**e, refs), Problematic(e) => sub_pop(&**e, refs), Generated(e) => sub_pop(&**e, refs), Math(_) => {}, TargetInline(_) => { unimplemented!(); }, RawInline(_) => {}, ImageInline(_) => {} } } fn resolve_refs(self, refs: &TargetsCollected) -> Vec { use c::TextOrInlineElement::*; vec![match self { String(e) => String(e), Emphasis(e) => sub_res(*e, refs).into(), Strong(e) => sub_res(*e, refs).into(), Literal(e) => Literal(e), Reference(mut e) => { if e.extra().refuri.is_none() { if let Some(uri) = refs.target_url(&e.extra().refname) { e.extra_mut().refuri = Some(uri.clone()); } } (*e).into() }, FootnoteReference(e) => sub_res(*e, refs).into(), CitationReference(e) => sub_res(*e, refs).into(), SubstitutionReference(e) => match refs.substitution(&e.extra().refname) { Some(Substitution {content, ltrim, rtrim}) => { // (level 3 system message). // TODO: ltrim and rtrim. if *ltrim || *rtrim { dbg!(content, ltrim, rtrim); } return content.clone() }, None => { // Undefined substitution name (level 3 system message). // TODO: This replaces the reference by a Problematic node. // The corresponding SystemMessage node should go in a generated // section with class "system-messages" at the end of the document. use document_tree::Problematic; let mut replacement: Box = Box::new(Default::default()); replacement.children_mut().push( c::TextOrInlineElement::String(Box::new(format!("|{}|", e.extra().refname[0].0))) ); // TODO: Create an ID for replacement for the system_message to reference. // TODO: replacement.refid pointing to the system_message. Problematic(replacement) } }, TitleReference(e) => sub_res(*e, refs).into(), Abbreviation(e) => sub_res(*e, refs).into(), Acronym(e) => sub_res(*e, refs).into(), Superscript(e) => sub_res(*e, refs).into(), Subscript(e) => sub_res(*e, refs).into(), Inline(e) => sub_res(*e, refs).into(), Problematic(e) => sub_res(*e, refs).into(), Generated(e) => sub_res(*e, refs).into(), Math(e) => Math(e), TargetInline(e) => TargetInline(e), RawInline(e) => RawInline(e), ImageInline(e) => ImageInline(e) }] } } impl ResolvableRefs for c::AuthorInfo { fn populate_targets(&self, refs: &mut TargetsCollected) { use c::AuthorInfo::*; match self { Author(e) => sub_pop(&**e, refs), Organization(e) => sub_pop(&**e, refs), Address(e) => sub_pop(&**e, refs), Contact(e) => sub_pop(&**e, refs), } } fn resolve_refs(self, refs: &TargetsCollected) -> Vec { use c::AuthorInfo::*; vec![match self { Author(e) => sub_res(*e, refs).into(), Organization(e) => sub_res(*e, refs).into(), Address(e) => sub_res(*e, refs).into(), Contact(e) => sub_res(*e, refs).into(), }] } } impl ResolvableRefs for c::DecorationElement { fn populate_targets(&self, refs: &mut TargetsCollected) { use c::DecorationElement::*; match self { Header(e) => sub_pop(&**e, refs), Footer(e) => sub_pop(&**e, refs), } } fn resolve_refs(self, refs: &TargetsCollected) -> Vec { use c::DecorationElement::*; vec![match self { Header(e) => sub_res(*e, refs).into(), Footer(e) => sub_res(*e, refs).into(), }] } } impl ResolvableRefs for c::SubTopic { fn populate_targets(&self, refs: &mut TargetsCollected) { use c::SubTopic::*; match self { Title(e) => sub_pop(&**e, refs), BodyElement(e) => e.populate_targets(refs), } } fn resolve_refs(self, refs: &TargetsCollected) -> Vec { use c::SubTopic::*; match self { Title(e) => vec![sub_res(*e, refs).into()], BodyElement(e) => e.resolve_refs(refs).drain(..).map(Into::into).collect(), } } } impl ResolvableRefs for c::SubSidebar { fn populate_targets(&self, refs: &mut TargetsCollected) { use c::SubSidebar::*; match self { Topic(e) => sub_pop(&**e, refs), Title(e) => sub_pop(&**e, refs), Subtitle(e) => sub_pop(&**e, refs), BodyElement(e) => e.populate_targets(refs), } } fn resolve_refs(self, refs: &TargetsCollected) -> Vec { use c::SubSidebar::*; vec![match self { Topic(e) => sub_res(*e, refs).into(), Title(e) => sub_res(*e, refs).into(), Subtitle(e) => sub_res(*e, refs).into(), BodyElement(e) => return e.resolve_refs(refs).drain(..).map(Into::into).collect(), }] } } impl ResolvableRefs for c::SubDLItem { fn populate_targets(&self, refs: &mut TargetsCollected) { use c::SubDLItem::*; match self { Term(e) => sub_pop(&**e, refs), Classifier(e) => sub_pop(&**e, refs), Definition(e) => sub_pop(&**e, refs), } } fn resolve_refs(self, refs: &TargetsCollected) -> Vec { use c::SubDLItem::*; vec![match self { Term(e) => sub_res(*e, refs).into(), Classifier(e) => sub_res(*e, refs).into(), Definition(e) => sub_res(*e, refs).into(), }] } } impl ResolvableRefs for c::SubField { fn populate_targets(&self, refs: &mut TargetsCollected) { use c::SubField::*; match self { FieldName(e) => sub_pop(&**e, refs), FieldBody(e) => sub_pop(&**e, refs), } } fn resolve_refs(self, refs: &TargetsCollected) -> Vec { use c::SubField::*; vec![match self { FieldName(e) => sub_res(*e, refs).into(), FieldBody(e) => sub_res(*e, refs).into(), }] } } impl ResolvableRefs for c::SubOptionListItem { fn populate_targets(&self, refs: &mut TargetsCollected) { use c::SubOptionListItem::*; match self { OptionGroup(e) => sub_sub_pop(&**e, refs), Description(e) => sub_pop(&**e, refs), } } fn resolve_refs(self, refs: &TargetsCollected) -> Vec { use c::SubOptionListItem::*; vec![match self { OptionGroup(e) => sub_sub_res(*e, refs).into(), Description(e) => sub_res(*e, refs).into(), }] } } impl ResolvableRefs for c::SubOption { fn populate_targets(&self, _: &mut TargetsCollected) {} fn resolve_refs(self, _: &TargetsCollected) -> Vec { vec![self] } } impl ResolvableRefs for c::SubLineBlock { fn populate_targets(&self, refs: &mut TargetsCollected) { use c::SubLineBlock::*; match self { LineBlock(e) => sub_pop(&**e, refs), Line(e) => sub_pop(&**e, refs), } } fn resolve_refs(self, refs: &TargetsCollected) -> Vec { use c::SubLineBlock::*; vec![match self { LineBlock(e) => sub_res(*e, refs).into(), Line(e) => sub_res(*e, refs).into(), }] } } impl ResolvableRefs for c::SubBlockQuote { fn populate_targets(&self, refs: &mut TargetsCollected) { use c::SubBlockQuote::*; match self { Attribution(e) => sub_pop(&**e, refs), BodyElement(e) => e.populate_targets(refs), } } fn resolve_refs(self, refs: &TargetsCollected) -> Vec { use c::SubBlockQuote::*; match self { Attribution(e) => vec![sub_res(*e, refs).into()], BodyElement(e) => e.resolve_refs(refs).drain(..).map(Into::into).collect(), } } } impl ResolvableRefs for c::SubFootnote { fn populate_targets(&self, refs: &mut TargetsCollected) { use c::SubFootnote::*; match self { Label(e) => sub_pop(&**e, refs), BodyElement(e) => e.populate_targets(refs), } } fn resolve_refs(self, refs: &TargetsCollected) -> Vec { use c::SubFootnote::*; match self { Label(e) => vec![sub_res(*e, refs).into()], BodyElement(e) => e.resolve_refs(refs).drain(..).map(Into::into).collect(), } } } impl ResolvableRefs for c::SubFigure { fn populate_targets(&self, refs: &mut TargetsCollected) { use c::SubFigure::*; match self { Caption(e) => sub_pop(&**e, refs), Legend(e) => sub_pop(&**e, refs), BodyElement(e) => e.populate_targets(refs), } } fn resolve_refs(self, refs: &TargetsCollected) -> Vec { use c::SubFigure::*; vec![match self { Caption(e) => sub_res(*e, refs).into(), Legend(e) => sub_res(*e, refs).into(), BodyElement(e) => return e.resolve_refs(refs).drain(..).map(Into::into).collect(), }] } } impl ResolvableRefs for c::SubTable { fn populate_targets(&self, refs: &mut TargetsCollected) { use c::SubTable::*; match self { Title(e) => sub_pop(&**e, refs), TableGroup(e) => sub_pop(&**e, refs), } } fn resolve_refs(self, refs: &TargetsCollected) -> Vec { use c::SubTable::*; vec![match self { Title(e) => sub_res(*e, refs).into(), TableGroup(e) => sub_res(*e, refs).into(), }] } } impl ResolvableRefs for c::SubTableGroup { fn populate_targets(&self, refs: &mut TargetsCollected) { use c::SubTableGroup::*; match self { TableColspec(_) => { unimplemented!(); }, TableHead(e) => { for c in e.children() { sub_sub_pop(c, refs); } }, TableBody(e) => { for c in e.children() { sub_sub_pop(c, refs); } }, } } fn resolve_refs(self, refs: &TargetsCollected) -> Vec { use c::SubTableGroup::*; vec![match self { TableColspec(e) => TableColspec(e), TableHead(mut e) => { let new: Vec<_> = e.children_mut().drain(..).map(|c| sub_sub_res(c, refs)).collect(); e.children_mut().extend(new); TableHead(e) }, TableBody(mut e) => { let new: Vec<_> = e.children_mut().drain(..).map(|c| sub_sub_res(c, refs)).collect(); e.children_mut().extend(new); TableBody(e) }, }] } } rst_parser-0.4.0/src/tests.rs010064400017500001750000000161301374755412300143730ustar 00000000000000use pest::consumes_to; use pest::parses_to; use crate::pest_rst::{RstParser, Rule}; #[test] fn plain() { parses_to! { parser: RstParser, input: "line\n", rule: Rule::paragraph, tokens: [ paragraph(0, 4, [ str(0, 4) ]) ] }; } #[test] fn emph_only() { parses_to! { parser: RstParser, input: "*emphasis*", rule: Rule::emph_outer, tokens: [ emph(1, 9, [str_nested(1, 9)]) ] }; } #[test] fn emph() { parses_to! { parser: RstParser, input: "line *with markup*\n", rule: Rule::paragraph, tokens: [ paragraph(0, 18, [ str(0, 5), emph(6, 17, [str_nested(6, 17)]), ]) ] }; } #[test] fn title() { parses_to! { parser: RstParser, input: "\ Title ===== ", rule: Rule::title, tokens: [ title(0, 12, [ title_single(0, 12, [ line(0, 6, [ str(0, 5) ]), adornments(6, 11), ]) ]) ] }; } #[test] fn title_overline() { parses_to! { parser: RstParser, input: "\ ----- Title ----- ", rule: Rule::title, tokens: [ title(0, 17, [ title_double(0, 17, [ adornments(0, 5), line(6, 12, [ str(6, 11) ]), ]) ]) ] }; } #[allow(clippy::cognitive_complexity)] #[test] fn two_targets() { parses_to! { parser: RstParser, input: "\ .. _a: http://example.com .. _`b_`: https://example.org ", rule: Rule::document, tokens: [ target(0, 26, [ target_name_uq(4, 5), link_target(7, 25), ]), target(26, 56, [ target_name_qu(31, 33), link_target(36, 55), ]), ] }; } #[test] fn inline_code_literal_with_underscore() { parses_to! { parser: RstParser, input: "``NAME_WITH_UNDERSCORE``", rule: Rule::inline, tokens: [ literal(2, 22), ] }; } #[allow(clippy::cognitive_complexity)] #[test] fn admonitions() { parses_to! { parser: RstParser, input: "\ .. note:: Just next line .. admonition:: In line title Next line .. danger:: Just this line ", rule: Rule::document, tokens: [ admonition_gen(0, 27, [ admonition_type(3, 7), paragraph(13, 27, [ str(13, 27) ]), ]), admonition(28, 71, [ line(43, 58, [ str(43, 57) ]), paragraph(62, 71, [ str(62, 71) ]), ]), admonition_gen(73, 100, [ admonition_type(76, 82), line(84, 100, [ str(84, 99) ]), ]), ] }; } #[allow(clippy::cognitive_complexity)] #[test] fn literal_block() { parses_to! { parser: RstParser, input: "\ :: print('x') # second line The end ", rule: Rule::document, tokens: [ literal_block(0, 36, [ literal_lines(7, 36, [ literal_line(7, 18), literal_line_blank(18, 19), literal_line(22, 36), ]), ]), paragraph(37, 44, [ str(37, 44) ]), ] }; } #[allow(clippy::cognitive_complexity)] #[test] fn code_directive() { parses_to! { parser: RstParser, input: "\ .. code:: Single line .. code-block:: python print('x') # second line The end ", rule: Rule::document, tokens: [ code_directive(0, 26, [ literal_lines(14, 26, [ literal_line(14, 26) ]), ]), code_directive(27, 83, [ source(43, 49), literal_lines(54, 83, [ literal_line(54, 65), literal_line_blank(65, 66), literal_line(69, 83), ]), ]), paragraph(84, 91, [ str(84, 91) ]), ] }; } #[allow(clippy::cognitive_complexity)] #[test] fn raw() { parses_to! { parser: RstParser, input: "\ .. raw:: html hello world .. raw:: html hello
world

   parse
this The end ", rule: Rule::document, tokens: [ raw_directive(0, 43, [ raw_output_format(9, 13), raw_block(18, 43, [ raw_line(18, 43), ]), ]), raw_directive(44, 100, [ raw_output_format(53, 57), raw_block(62, 100, [ raw_line(62, 79), raw_line_blank(79, 80), raw_line(83, 100), ]), ]), paragraph(101, 108, [ str(101, 108) ]), ] }; } #[allow(clippy::cognitive_complexity)] #[test] fn comments() { parses_to! { parser: RstParser, input: "\ .. This is a comment .. This as well. .. _so: is this! .. [and] this! .. this:: too! .. |even| this:: ! .. With a title.. and a blank line... followed by a non-blank line and another one. .. .. Comments can also be run-in like this ", rule: Rule::document, tokens: [ block_comment(0, 22, [ comment_line(3, 21), ]), block_comment(22, 43, [ comment_line(28, 42), ]), block_comment(43, 63, [ comment_line(49, 63), ]), block_comment(63, 81, [ comment_line(69, 81), ]), block_comment(81, 99, [ comment_line(87, 99), ]), block_comment(99, 121, [ comment_line(105, 121), ]), block_comment(121, 216, [ comment_line(124, 139), comment_line_blank(139, 140), comment_line(143, 163), comment_line(166, 195), comment_line_blank(195, 196), comment_line(199, 216), ]), block_comment(216, 219), block_comment(219, 263, [ comment_line(222, 243), comment_line(246, 263), ]), ] }; } #[allow(clippy::cognitive_complexity)] #[test] fn substitutions() { parses_to! { parser: RstParser, input: "\ A |subst| in-line .. |subst| replace:: substitution .. |subst2| replace:: it can also be hanging ", rule: Rule::document, tokens: [ paragraph(0, 17, [ str(0, 2), substitution_name(3, 8), str(9, 17), ]), substitution_def(19, 52, [ substitution_name(23, 28), replace(30, 52, [ paragraph(40, 52, [str(40, 52)]) ]), ]), substitution_def(53, 101, [ substitution_name(57, 63), replace(65, 101, [ paragraph(75, 101, [ str(75, 86), ws_newline(86, 87), str(88, 100), ]) ]), ]), ] }; } #[test] fn substitution_in_literal() { parses_to! { parser: RstParser, input: "Just ``|code|``, really ``*code* |only|``", rule: Rule::document, tokens: [ paragraph(0, 41, [ str(0, 5), literal(7, 13), str(15, 24), literal(26, 39), ]), ] }; } #[allow(clippy::cognitive_complexity)] #[test] fn substitution_image() { parses_to! { parser: RstParser, input: "\ .. |subst| image:: thing.png :target: foo.html ", rule: Rule::document, tokens: [ substitution_def(0, 50, [ substitution_name(4, 9), image(11, 50, [ line(18, 29, [ str(18, 28) ]), image_option(32, 50, [ image_opt_name(33, 39), line(40, 50, [ str(40, 49) ]), ]), ]), ]), ] }; } // TODO: test images #[allow(clippy::cognitive_complexity)] #[test] fn nested_lists() { parses_to! { parser: RstParser, input: "\ paragraph - item 1 - item 2 more text more text 2 more text 3 - nested item 1 - nested item 2 - nested item 3 ", rule: Rule::document, tokens: [ paragraph(0, 9, [ str(0, 9) ]), bullet_list(11, 131, [ bullet_item(11, 21, [ line(14, 21, [ str(14, 20) ]), ]), bullet_item(21, 131, [ line(24, 31, [ str(24, 30) ]), paragraph(34, 74, [ str(34, 43), ws_newline(43, 44), str(47, 58), ws_newline(58, 59), str(62, 73), ]), bullet_list(77, 131, [ bullet_item( 77, 93, [ line( 79, 93, [str( 79, 92)]) ]), bullet_item( 96, 112, [ line( 98, 112, [str( 98, 111)]) ]), bullet_item(115, 131, [ line(117, 131, [str(117, 130)]) ]), ]), ]), ]), ] } } rst_parser-0.4.0/src/token.rs010064400017500001750000000014471337336124200143470ustar 00000000000000//http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#bullet-lists // *, +, -, •, ‣, ⁃ pub enum BulletListType { Ast, Plus, Minus, Bullet, TriBullet, HyphenBullet } // 1, A, a, I, i pub enum EnumListChar { Arabic, AlphaUpper, AlphaLower, RomanUpper, RomanLower, Auto } // 1., (1), 1) pub enum EnumListType { Period, ParenEnclosed, Paren } // ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~ pub enum AdornmentChar { Bang, DQuote, Hash, Dollar, Percent, Amp, SQuote, LParen, RParen, Ast, Plus, Comma, Minus, Period, Slash, Colon, Semicolon, Less, Eq, More, Question, At, LBrack, Backslash, RBrack, Caret, Underscore, Backtick, LBrace, Pipe, RBrace, Tilde, } // [1], [#], [*], [#foo] pub enum FootnoteType { Numbered(usize), AutoNumber, AutoSymbol, AutoNamed(String) }