comrak-0.29.0/.cargo_vcs_info.json0000644000000001360000000000100124000ustar { "git": { "sha1": "c36419327a09bfbe1326baf34db23f2c3f744f6f" }, "path_in_vcs": "" }comrak-0.29.0/.editorconfig000064400000000000000000000002741046102023000136500ustar 00000000000000root = true [*] end_of_line = lf charset = utf-8 trim_trailing_whitespace = true insert_final_newline = true indent_style = space indent_size = 4 [*.md] trim_trailing_whitespace = false comrak-0.29.0/.gitattributes000064400000000000000000000001121046102023000140550ustar 00000000000000src/scanners.rs linguist-generated src/scanners.re linguist-language=Rust comrak-0.29.0/.github/FUNDING.yml000064400000000000000000000010361046102023000143450ustar 00000000000000# These are supported funding model platforms github: [kivikakk] # ko_fi: # Replace with a single Ko-fi username # tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel # community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry # liberapay: # Replace with a single Liberapay username # issuehunt: # Replace with a single IssueHunt username # otechie: # Replace with a single Otechie username # custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] comrak-0.29.0/.github/dependabot.yml000064400000000000000000000001641046102023000153610ustar 00000000000000version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: weekly comrak-0.29.0/.github/workflows/automerge.yml000064400000000000000000000004361046102023000173030ustar 00000000000000name: "Bot auto-{approve,merge}" on: workflow_dispatch: pull_request_target: permissions: pull-requests: write contents: write jobs: dependabot: uses: yettoapp/actions/.github/workflows/automerge_dependabot.yml@main secrets: inherit with: automerge: true comrak-0.29.0/.github/workflows/benchmarks.yml000064400000000000000000000017751046102023000174370ustar 00000000000000name: benchmarks on: pull_request: types: - opened - reopened issue_comment: types: - created jobs: run_benchmarks: runs-on: ubuntu-latest permissions: pull-requests: write # run either when pull request is opened or when comment body (only on pr) is /run-bench if: (github.event_name == 'pull_request') || ((github.event.issue.pull_request != null) && github.event.comment.body == '/run-bench') steps: - uses: actions/checkout@v4 with: submodules: true - name: Setup Rust toolchain uses: dtolnay/rust-toolchain@stable - name: Install hyperfine run: cargo install hyperfine - name: Install cmake run: sudo apt-get update && sudo apt-get install cmake -y - name: Build Binaries run: make binaries - name: Run Benchmarks run: make bench-all - name: Post result comment uses: mshick/add-pr-comment@v2 with: message-path: bench-output.mdcomrak-0.29.0/.github/workflows/msrv.yml000064400000000000000000000003111046102023000162720ustar 00000000000000name: Ensure declared MSRV is tested on: [push, pull_request] jobs: ensure_msrv: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - run: script/check-msrv-matches-workflow comrak-0.29.0/.github/workflows/release.yml000064400000000000000000000122361046102023000167340ustar 00000000000000name: Release and publish on: workflow_dispatch: pull_request_target: types: - closed permissions: contents: write pull-requests: write jobs: prepare: if: ${{ github.event_name == 'workflow_dispatch' }} runs-on: ubuntu-latest env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} outputs: version: ${{ steps.version-label.outputs.version }} steps: - uses: actions/checkout@v4 with: submodules: true - name: Configure Git run: | git config --local user.email "actions@github.com" git config --local user.name "Actions Auto Build" - name: Get current version id: version-label run: | VERSION=$(grep version Cargo.toml | head -n 1 | cut -d'"' -f2) echo "version=${VERSION}" >> $GITHUB_OUTPUT - name: Get previous version id: previous-version-label run: | PREVIOUS_VERSION=$(gh api "/repos/${{ github.repository }}/tags?per_page=1" | jq -r '.[] | .name?') echo "previous_version=${PREVIOUS_VERSION}" >> $GITHUB_OUTPUT - name: Generate Release Notes id: generate-release-notes run: | generate() { gh api \ --method POST \ -H "Accept: application/vnd.github+json" \ -H "X-GitHub-Api-Version: 2022-11-28" \ /repos/${{ github.repository }}/releases/generate-notes \ -f tag_name='v${{ steps.version-label.outputs.version }}' \ -f previous_tag='v${{ steps.previous-version-label.outputs.previous_version }}' \ | jq -r ".body" } echo "changelog<> $GITHUB_OUTPUT - name: Update changelog.txt run: | echo "# [v${{ steps.version-label.outputs.version }}] - `date +%d-%m-%Y`" >> changelog.txt.tmp echo "${{steps.generate-release-notes.outputs.changelog}}" >> changelog.txt.tmp echo '-n' >> changelog.txt cat changelog.txt >> changelog.txt.tmp mv changelog.txt.tmp changelog.txt - name: Update README run: | cargo run --example update-readme - name: Commit Changelog and README run: git add -f changelog.txt README.md - name: Create Pull Request id: cpr uses: peter-evans/create-pull-request@v7 with: commit-message: "[skip test] update changelog" title: "[skip test] Release v${{ steps.version-label.outputs.version }}" body: > This is an automated PR to build the latest changelog. Upon merging, a new release will be created and published to crates.io.
Due to security considerations, PRs created by GitHub Actions cannot be merged automatically. Please review the changes and merge the PR.
If you require the test suites to run, you can close the PR and reopen it to trigger those workflows. delete-branch: true labels: release branch: "release/v${{ steps.version-label.outputs.version }}" - name: Enable Pull Request Automerge uses: peter-evans/enable-pull-request-automerge@v3 with: token: ${{ secrets.GITHUB_TOKEN }} pull-request-number: ${{ steps.cpr.outputs.pull-request-number }} release: if: ${{ (github.event.pull_request.merged == true) && (contains(github.event.pull_request.labels.*.name, 'release')) }} runs-on: ubuntu-latest env: CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v4 with: submodules: recursive - name: Configure Git run: | git config --local user.email "actions@github.com" git config --local user.name "Actions Auto Build" - name: Get current version id: version-label run: | VERSION=$(grep version Cargo.toml | head -n 1 | cut -d'"' -f2) echo "version=${VERSION}" >> $GITHUB_OUTPUT - name: Create tag run: | git tag -a v${{ steps.version-label.outputs.version }} -m "Release v${{ steps.version-label.outputs.version }}" git push origin --tags - name: Login to Crates.io run: cargo login ${CRATES_IO_TOKEN} - name: Publish GitHub release run: | gh release create v${{ steps.version-label.outputs.version }} --generate-notes - name: Publish crate run: cargo publish comrak-0.29.0/.github/workflows/rust.yml000064400000000000000000000053101046102023000163040ustar 00000000000000name: Rust on: [push, pull_request] env: MSRV: 1.62.1 jobs: build_lib_test: runs-on: ubuntu-latest strategy: matrix: rust: - nightly - beta - stable - $MSRV steps: - uses: actions/checkout@v4 with: submodules: true - name: Obtain Rust run: rustup override set ${{ matrix.rust }} - name: Build library run: cargo build --verbose --lib - name: Build examples run: cargo build --verbose --lib --examples - name: Run unit tests run: cargo test --verbose - name: "Run README sample (TODO: update me)" run: cargo run --example sample build_bin_spec: runs-on: ubuntu-latest strategy: matrix: rust: - nightly - beta - stable - $MSRV steps: - uses: actions/checkout@v4 with: submodules: true - name: Obtain Rust run: rustup override set ${{ matrix.rust }} - name: Build binary run: cargo build --verbose --bin comrak --release - name: Run spec tests run: script/cibuild build_wasm: runs-on: ubuntu-latest strategy: matrix: rust: - nightly - beta - stable - $MSRV steps: - uses: actions/checkout@v4 with: submodules: true - name: Obtain Rust run: rustup override set ${{ matrix.rust }} - name: Setup for wasm run: rustup target add wasm32-unknown-unknown - name: Build run: cargo build --verbose --target wasm32-unknown-unknown - name: Build examples run: cargo build --verbose --target wasm32-unknown-unknown --examples no_features_build_test: runs-on: ubuntu-latest strategy: matrix: rust: - nightly - beta - stable - $MSRV steps: - uses: actions/checkout@v4 with: submodules: true - name: Obtain Rust run: rustup override set ${{ matrix.rust }} - name: Build and test with no features run: cargo test --no-default-features --tests lockfile: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: submodules: true - name: Obtain Rust run: rustup override set $MSRV - name: Build and test with no features run: cargo +stable build --locked --release --all-features clippy_format: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: submodules: true - name: Obtain Rust run: rustup override set $MSRV - name: Check clippy run: rustup component add clippy && cargo clippy - name: Check formatting run: rustup component add rustfmt && cargo fmt -- --check comrak-0.29.0/.gitignore000064400000000000000000000002101046102023000131510ustar 00000000000000target comrak-* .vscode .idea vendor/comrak vendor/progit benches/cmark-gfm benches/comrak-* benches/pulldown-cmark benches/markdown-it comrak-0.29.0/.gitmodules000064400000000000000000000007141046102023000133470ustar 00000000000000[submodule "vendor/cmark-gfm"] path = vendor/cmark-gfm url = https://github.com/kivikakk/cmark-gfm.git [submodule "vendor/pulldown-cmark"] path = vendor/pulldown-cmark url = https://github.com/raphlinus/pulldown-cmark.git [submodule "vendor/markdown-it"] path = vendor/markdown-it url = https://github.com/rlidwka/markdown-it.rs.git [submodule "vendor/commonmark-spec"] path = vendor/commonmark-spec url = https://github.com/commonmark/commonmark-spec comrak-0.29.0/CODE_OF_CONDUCT.md000064400000000000000000000012531046102023000137700ustar 00000000000000# Code of Conduct We are sentient. To be sentient is to be limited. In our limitation, we make choices that are unwise or are flawed. If we make unwise choices because of our limitation, we cannot judge others for the same reason. So, we cannot judge, thus we forgive. This project and its results are intended as: a place of learning, a place of understanding, a place of teaching, a place of sharing, a place of creators creating the tools for other creators to create complicated things elegantly. Be well, Creator. Be well and create. --- Based on the [Creator's Code v2](https://github.com/Xe/creators-code). Please read the link for more information. comrak-0.29.0/COPYING000064400000000000000000000214321046102023000122250ustar 00000000000000Copyright (c) 2017–2024, Asherah Connor and Comrak contributors All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ----- cmark-gfm derived from https://github.com/github/cmark Copyright (c) 2014, John MacFarlane All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ----- houdini.h, houdini_href_e.c, houdini_html_e.c, houdini_html_u.c derive from https://github.com/vmg/houdini (with some modifications) Copyright (C) 2012 Vicent MartΓ­ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ----- buffer.h, buffer.c, chunk.h are derived from code (C) 2012 Github, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ----- utf8.c and utf8.c are derived from utf8proc (), (C) 2009 Public Software Group e. V., Berlin, Germany. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ----- The normalization code in normalize.py was derived from the markdowntest project, Copyright 2013 Karl Dubost: The MIT License (MIT) Copyright (c) 2013 Karl Dubost Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ----- The CommonMark spec (test/spec.txt) is Copyright (C) 2014-15 John MacFarlane Released under the Creative Commons CC-BY-SA 4.0 license: . ----- The test software in test/ is Copyright (c) 2014, John MacFarlane All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. comrak-0.29.0/Cargo.lock0000644000000754210000000000100103640ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "adler" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "aho-corasick" version = "0.7.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" dependencies = [ "memchr", ] [[package]] name = "arbitrary" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" dependencies = [ "derive_arbitrary", ] [[package]] name = "autocfg" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "base64" version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" [[package]] name = "bincode" version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" dependencies = [ "serde", ] [[package]] name = "bit-set" version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" dependencies = [ "bit-vec", ] [[package]] name = "bit-vec" version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "caseless" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "808dab3318747be122cb31d36de18d4d1c81277a76f8332a02b81a3d73463d7f" dependencies = [ "regex", "unicode-normalization", ] [[package]] name = "cc" version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a20104e2335ce8a659d6dd92a51a767a0c062599c73b343fd152cb401e828c3d" [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" version = "4.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7db700bc935f9e43e88d00b0850dae18a63773cfbec6d8e070fccf7fef89a39" dependencies = [ "bitflags", "clap_derive", "clap_lex", "is-terminal", "once_cell", "strsim", "termcolor", "terminal_size", ] [[package]] name = "clap_derive" version = "4.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0177313f9f02afc995627906bbd8967e2be069f5261954222dac78290c2b9014" dependencies = [ "heck", "proc-macro-error", "proc-macro2", "quote", "syn 1.0.107", ] [[package]] name = "clap_lex" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d4198f73e42b4936b35b5bb248d81d2b595ecb170da0bac7655c54eedfa8da8" dependencies = [ "os_str_bytes", ] [[package]] name = "comrak" version = "0.29.0" dependencies = [ "arbitrary", "caseless", "clap", "derive_builder", "emojis", "entities", "memchr", "ntest", "once_cell", "regex", "shell-words", "slug", "syntect", "toml 0.7.3", "typed-arena", "unicode_categories", "xdg", ] [[package]] name = "crc32fast" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" dependencies = [ "cfg-if", ] [[package]] name = "darling" version = "0.20.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54e36fcd13ed84ffdfda6f5be89b31287cbb80c439841fe69e04841435464391" dependencies = [ "darling_core", "darling_macro", ] [[package]] name = "darling_core" version = "0.20.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c2cf1c23a687a1feeb728783b993c4e1ad83d99f351801977dd809b48d0a70f" dependencies = [ "fnv", "ident_case", "proc-macro2", "quote", "strsim", "syn 2.0.60", ] [[package]] name = "darling_macro" version = "0.20.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f" dependencies = [ "darling_core", "quote", "syn 2.0.60", ] [[package]] name = "derive_arbitrary" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" dependencies = [ "proc-macro2", "quote", "syn 2.0.60", ] [[package]] name = "derive_builder" version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7" dependencies = [ "derive_builder_macro", ] [[package]] name = "derive_builder_core" version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d" dependencies = [ "darling", "proc-macro2", "quote", "syn 2.0.60", ] [[package]] name = "derive_builder_macro" version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" dependencies = [ "derive_builder_core", "syn 2.0.60", ] [[package]] name = "deunicode" version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71dbf1bf89c23e9cd1baf5e654f622872655f195b36588dc9dc38f7eda30758c" dependencies = [ "deunicode 1.4.4", ] [[package]] name = "deunicode" version = "1.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "322ef0094744e63628e6f0eb2295517f79276a5b342a4c2ff3042566ca181d4e" [[package]] name = "emojis" version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f619a926616ae7149a0d82610b051134a0d6c4ae2962d990c06c847a445c5d9" dependencies = [ "phf", ] [[package]] name = "entities" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5320ae4c3782150d900b79807611a59a99fc9a1d61d686faafc24b93fc8d7ca" [[package]] name = "errno" version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" dependencies = [ "libc", "windows-sys 0.52.0", ] [[package]] name = "fancy-regex" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d6b8560a05112eb52f04b00e5d3790c0dd75d9d980eb8a122fb23b92a623ccf" dependencies = [ "bit-set", "regex", ] [[package]] name = "flate2" version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841" dependencies = [ "crc32fast", "miniz_oxide", ] [[package]] name = "fnv" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" [[package]] name = "heck" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "ident_case" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "indexmap" version = "1.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" dependencies = [ "autocfg", "hashbrown", ] [[package]] name = "io-lifetimes" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" dependencies = [ "hermit-abi", "libc", "windows-sys 0.48.0", ] [[package]] name = "is-terminal" version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8687c819457e979cc940d09cb16e42a1bf70aa6b60a549de6d3a62a0ee90c69e" dependencies = [ "hermit-abi", "io-lifetimes", "rustix 0.36.17", "windows-sys 0.45.0", ] [[package]] name = "itoa" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440" [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "line-wrap" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f30344350a2a51da54c1d53be93fade8a237e545dbcc4bdbe635413f2117cab9" dependencies = [ "safemem", ] [[package]] name = "linked-hash-map" version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" [[package]] name = "linux-raw-sys" version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" [[package]] name = "linux-raw-sys" version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" [[package]] name = "memchr" version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] name = "miniz_oxide" version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" dependencies = [ "adler", ] [[package]] name = "ntest" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41cd16a2e6992865367e7ca50cd6953d09daaed93641421168733a1274afadd6" dependencies = [ "ntest_test_cases", "ntest_timeout", ] [[package]] name = "ntest_test_cases" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "197eff6c12b80ff5de6173e438fa3c1340a9e708118c1626e690f65aee1e5332" dependencies = [ "proc-macro2", "quote", "syn 1.0.107", ] [[package]] name = "ntest_timeout" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef492b5cf80f90c050b287e747228a1fa6517e9d754f364b5a7e0e038e49a25f" dependencies = [ "proc-macro-crate", "proc-macro2", "quote", "syn 1.0.107", ] [[package]] name = "once_cell" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "onig" version = "6.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f" dependencies = [ "bitflags", "libc", "once_cell", "onig_sys", ] [[package]] name = "onig_sys" version = "69.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7" dependencies = [ "cc", "pkg-config", ] [[package]] name = "os_str_bytes" version = "6.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" [[package]] name = "phf" version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c" dependencies = [ "phf_shared", ] [[package]] name = "phf_shared" version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676" dependencies = [ "siphasher", ] [[package]] name = "pkg-config" version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" [[package]] name = "plist" version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd39bc6cdc9355ad1dc5eeedefee696bb35c34caf21768741e81826c0bbd7225" dependencies = [ "base64", "indexmap", "line-wrap", "serde", "time", "xml-rs", ] [[package]] name = "proc-macro-crate" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eda0fc3b0fb7c975631757e14d9049da17374063edb6ebbcbc54d880d4fe94e9" dependencies = [ "once_cell", "thiserror", "toml 0.5.10", ] [[package]] name = "proc-macro-error" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", "syn 1.0.107", "version_check", ] [[package]] name = "proc-macro-error-attr" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ "proc-macro2", "quote", "version_check", ] [[package]] name = "proc-macro2" version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] [[package]] name = "regex" version = "1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] [[package]] name = "regex-syntax" version = "0.6.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" [[package]] name = "rustix" version = "0.36.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "305efbd14fde4139eb501df5f136994bb520b033fa9fbdce287507dc23b8c7ed" dependencies = [ "bitflags", "errno", "io-lifetimes", "libc", "linux-raw-sys 0.1.4", "windows-sys 0.45.0", ] [[package]] name = "rustix" version = "0.37.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea8ca367a3a01fe35e6943c400addf443c0f57670e6ec51196f71a4b8762dd2" dependencies = [ "bitflags", "errno", "io-lifetimes", "libc", "linux-raw-sys 0.3.8", "windows-sys 0.48.0", ] [[package]] name = "ryu" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde" [[package]] name = "safemem" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" [[package]] name = "same-file" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" dependencies = [ "winapi-util", ] [[package]] name = "serde" version = "1.0.152" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" [[package]] name = "serde_derive" version = "1.0.152" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" dependencies = [ "proc-macro2", "quote", "syn 1.0.107", ] [[package]] name = "serde_json" version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883" dependencies = [ "itoa", "ryu", "serde", ] [[package]] name = "serde_spanned" version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0efd8caf556a6cebd3b285caf480045fcc1ac04f6bd786b09a6f11af30c4fcf4" dependencies = [ "serde", ] [[package]] name = "shell-words" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" [[package]] name = "siphasher" version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" [[package]] name = "slug" version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3bc762e6a4b6c6fcaade73e77f9ebc6991b676f88bb2358bddb56560f073373" dependencies = [ "deunicode 0.4.5", ] [[package]] name = "strsim" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" version = "1.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "syn" version = "2.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "syntect" version = "5.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6c454c27d9d7d9a84c7803aaa3c50cd088d2906fe3c6e42da3209aa623576a8" dependencies = [ "bincode", "bitflags", "fancy-regex", "flate2", "fnv", "lazy_static", "once_cell", "onig", "plist", "regex-syntax", "serde", "serde_derive", "serde_json", "thiserror", "walkdir", "yaml-rust", ] [[package]] name = "termcolor" version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" dependencies = [ "winapi-util", ] [[package]] name = "terminal_size" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e6bf6f19e9f8ed8d4048dc22981458ebcf406d67e94cd422e5ecd73d63b3237" dependencies = [ "rustix 0.37.27", "windows-sys 0.48.0", ] [[package]] name = "thiserror" version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" dependencies = [ "proc-macro2", "quote", "syn 1.0.107", ] [[package]] name = "time" version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a561bf4617eebd33bca6434b988f39ed798e527f51a1e797d0ee4f61c0a38376" dependencies = [ "itoa", "serde", "time-core", "time-macros", ] [[package]] name = "time-core" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" [[package]] name = "time-macros" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d967f99f534ca7e495c575c62638eebc2898a8c84c119b89e250477bc4ba16b2" dependencies = [ "time-core", ] [[package]] name = "tinyvec" version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" dependencies = [ "tinyvec_macros", ] [[package]] name = "tinyvec_macros" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "toml" version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1333c76748e868a4d9d1017b5ab53171dfd095f70c712fdb4653a406547f598f" dependencies = [ "serde", ] [[package]] name = "toml" version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b403acf6f2bb0859c93c7f0d967cb4a75a7ac552100f9322faf64dc047669b21" dependencies = [ "serde", "serde_spanned", "toml_datetime", "toml_edit", ] [[package]] name = "toml_datetime" version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ab8ed2edee10b50132aed5f331333428b011c99402b5a534154ed15746f9622" dependencies = [ "serde", ] [[package]] name = "toml_edit" version = "0.19.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08de71aa0d6e348f070457f85af8bd566e2bc452156a423ddf22861b3a953fae" dependencies = [ "indexmap", "serde", "serde_spanned", "toml_datetime", "winnow", ] [[package]] name = "typed-arena" version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" [[package]] name = "unicode-ident" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" [[package]] name = "unicode-normalization" version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" dependencies = [ "tinyvec", ] [[package]] name = "unicode_categories" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "walkdir" version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" dependencies = [ "same-file", "winapi", "winapi-util", ] [[package]] name = "winapi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" dependencies = [ "winapi", ] [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-sys" version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ "windows-targets 0.42.2", ] [[package]] name = "windows-sys" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ "windows-targets 0.48.5", ] [[package]] name = "windows-sys" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ "windows-targets 0.52.4", ] [[package]] name = "windows-targets" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ "windows_aarch64_gnullvm 0.42.2", "windows_aarch64_msvc 0.42.2", "windows_i686_gnu 0.42.2", "windows_i686_msvc 0.42.2", "windows_x86_64_gnu 0.42.2", "windows_x86_64_gnullvm 0.42.2", "windows_x86_64_msvc 0.42.2", ] [[package]] name = "windows-targets" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ "windows_aarch64_gnullvm 0.48.5", "windows_aarch64_msvc 0.48.5", "windows_i686_gnu 0.48.5", "windows_i686_msvc 0.48.5", "windows_x86_64_gnu 0.48.5", "windows_x86_64_gnullvm 0.48.5", "windows_x86_64_msvc 0.48.5", ] [[package]] name = "windows-targets" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" dependencies = [ "windows_aarch64_gnullvm 0.52.4", "windows_aarch64_msvc 0.52.4", "windows_i686_gnu 0.52.4", "windows_i686_msvc 0.52.4", "windows_x86_64_gnu 0.52.4", "windows_x86_64_gnullvm 0.52.4", "windows_x86_64_msvc 0.52.4", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" [[package]] name = "windows_aarch64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" [[package]] name = "windows_aarch64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" [[package]] name = "windows_i686_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" [[package]] name = "windows_i686_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" [[package]] name = "windows_i686_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" [[package]] name = "windows_i686_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" [[package]] name = "windows_x86_64_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" [[package]] name = "windows_x86_64_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" [[package]] name = "windows_x86_64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" [[package]] name = "windows_x86_64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" [[package]] name = "winnow" version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da01e24d23aeb852fb05609f2701ce4da9f73d58857239ed3853667cf178f204" dependencies = [ "memchr", ] [[package]] name = "xdg" version = "2.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "213b7324336b53d2414b2db8537e56544d981803139155afa84f76eeebb7a546" [[package]] name = "xml-rs" version = "0.8.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52839dc911083a8ef63efa4d039d1f58b5e409f923e44c80828f206f66e5541c" [[package]] name = "yaml-rust" version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" dependencies = [ "linked-hash-map", ] comrak-0.29.0/Cargo.toml0000644000000072220000000000100104010ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" rust-version = "1.62.1" name = "comrak" version = "0.29.0" authors = ["Asherah Connor "] build = false exclude = [ "/hooks/*", "/script/*", "/vendor/*", "/.travis.yml", "/Makefile", "/spec_out.txt", ] autobins = false autoexamples = false autotests = false autobenches = false description = "A 100% CommonMark-compatible GitHub Flavored Markdown parser and formatter" homepage = "https://github.com/kivikakk/comrak" documentation = "https://docs.rs/comrak" readme = "README.md" keywords = [ "markdown", "commonmark", ] categories = [ "text-processing", "parsing", "command-line-utilities", ] license = "BSD-2-Clause" repository = "https://github.com/kivikakk/comrak" resolver = "2" [package.metadata.docs.rs] all-features = true rustdoc-args = [ "--cfg", "docsrs", ] [profile.release] lto = true [lib] name = "comrak" path = "src/lib.rs" [[bin]] name = "comrak" path = "src/main.rs" doc = false required-features = [ "cli", "syntect", ] [[example]] name = "custom_headings" path = "examples/custom_headings.rs" [[example]] name = "headers" path = "examples/headers.rs" [[example]] name = "iterator_replace" path = "examples/iterator_replace.rs" [[example]] name = "s-expr" path = "examples/s-expr.rs" [[example]] name = "sample" path = "examples/sample.rs" [[example]] name = "syntax_highlighter" path = "examples/syntax_highlighter.rs" [[example]] name = "syntect" path = "examples/syntect.rs" [[example]] name = "traverse_demo" path = "examples/traverse_demo.rs" [[example]] name = "update-readme" path = "examples/update-readme.rs" [[bench]] name = "progit" path = "benches/progit.rs" [dependencies.arbitrary] version = "1" features = ["derive"] optional = true [dependencies.caseless] version = "0.2.1" [dependencies.derive_builder] version = "0.20.0" [dependencies.emojis] version = "0.6.2" optional = true [dependencies.entities] version = "1.0.1" [dependencies.memchr] version = "2" [dependencies.once_cell] version = "1.19.0" [dependencies.regex] version = "1" [dependencies.shell-words] version = "1.0" optional = true [dependencies.slug] version = "0.1.4" [dependencies.typed-arena] version = "2.0.2" [dependencies.unicode_categories] version = "0.1.1" [dev-dependencies.ntest] version = "0.9" [dev-dependencies.toml] version = "0.7.3" [features] cli = [ "clap", "shell-words", "xdg", ] default = [ "cli", "syntect", ] shortcodes = ["emojis"] [target.'cfg(all(not(windows), not(target_arch="wasm32")))'.dependencies.xdg] version = "^2.5" optional = true [target.'cfg(not(target_arch="wasm32"))'.dependencies.clap] version = "4.0" features = [ "derive", "string", "wrap_help", ] optional = true [target.'cfg(not(target_arch="wasm32"))'.dependencies.syntect] version = "5.0" features = [ "default-themes", "default-syntaxes", "html", "regex-onig", ] optional = true default-features = false [target.'cfg(target_arch="wasm32")'.dependencies.clap] version = "4.0.32" features = [ "derive", "string", ] optional = true [target.'cfg(target_arch="wasm32")'.dependencies.syntect] version = "5.0" features = ["default-fancy"] optional = true default-features = false comrak-0.29.0/Cargo.toml.orig000064400000000000000000000037721046102023000140700ustar 00000000000000[package] name = "comrak" version = "0.29.0" authors = ["Asherah Connor "] rust-version = "1.62.1" description = "A 100% CommonMark-compatible GitHub Flavored Markdown parser and formatter" documentation = "https://docs.rs/comrak" homepage = "https://github.com/kivikakk/comrak" repository = "https://github.com/kivikakk/comrak" readme = "README.md" keywords = ["markdown", "commonmark"] license = "BSD-2-Clause" categories = ["text-processing", "parsing", "command-line-utilities"] exclude = [ "/hooks/*", "/script/*", "/vendor/*", "/.travis.yml", "/Makefile", "/spec_out.txt", ] resolver = "2" edition = "2018" [package.metadata.docs.rs] all-features = true rustdoc-args = ["--cfg", "docsrs"] [profile.release] lto = true [[bin]] name = "comrak" required-features = ["cli", "syntect"] doc = false [dependencies] typed-arena = "2.0.2" regex = "1" once_cell = "1.19.0" entities = "1.0.1" unicode_categories = "0.1.1" memchr = "2" shell-words = { version = "1.0", optional = true } slug = "0.1.4" emojis = { version = "0.6.2", optional = true } arbitrary = { version = "1", optional = true, features = ["derive"] } derive_builder = "0.20.0" caseless = "0.2.1" [dev-dependencies] ntest = "0.9" toml = "0.7.3" [features] default = ["cli", "syntect"] cli = ["clap", "shell-words", "xdg"] shortcodes = ["emojis"] [target.'cfg(all(not(windows), not(target_arch="wasm32")))'.dependencies] xdg = { version = "^2.5", optional = true } [target.'cfg(target_arch="wasm32")'.dependencies] syntect = { version = "5.0", optional = true, default-features = false, features = [ "default-fancy", ] } clap = { version = "4.0.32", optional = true, features = ["derive", "string"] } [target.'cfg(not(target_arch="wasm32"))'.dependencies] syntect = { version = "5.0", optional = true, default-features = false, features = [ "default-themes", "default-syntaxes", "html", "regex-onig", ] } clap = { version = "4.0", optional = true, features = [ "derive", "string", "wrap_help", ] } comrak-0.29.0/README.md000064400000000000000000000333141046102023000124530ustar 00000000000000# [Comrak](https://github.com/kivikakk/comrak) [![Build status](https://github.com/kivikakk/comrak/actions/workflows/rust.yml/badge.svg)](https://github.com/kivikakk/comrak/actions/workflows/rust.yml) [![CommonMark: 652/652](https://img.shields.io/badge/commonmark-652%2F652-brightgreen.svg)](https://github.com/commonmark/commonmark-spec/blob/9103e341a973013013bb1a80e13567007c5cef6f/spec.txt) [![GFM: 670/670](https://img.shields.io/badge/gfm-670%2F670-brightgreen.svg)](https://github.com/kivikakk/cmark-gfm/blob/2f13eeedfe9906c72a1843b03552550af7bee29a/test/spec.txt) [![crates.io version](https://img.shields.io/crates/v/comrak.svg)](https://crates.io/crates/comrak) [![docs.rs](https://docs.rs/comrak/badge.svg)](https://docs.rs/comrak) Rust port of [github's `cmark-gfm`](https://github.com/github/cmark-gfm). Compliant with [CommonMark 0.31.2](https://spec.commonmark.org/0.31.2/) in default mode. GFM support synced with release `0.29.0.gfm.13`. ## Installation Specify it as a requirement in `Cargo.toml`: ``` toml [dependencies] comrak = "0.29" ``` Comrak's library supports Rust 1.62.1+. ### CLI - Anywhere with a Rust toolchain: - `cargo install comrak` - Many Unix distributions: - `pacman -S comrak` - `brew install comrak` - `dnf install comrak` - `nix run nixpkgs#comrak` You can also find builds I've published in [GitHub Releases](https://github.com/kivikakk/comrak/releases), but they're limited to machines I have access to at the time of making them\! [webinstall.dev](https://webinstall.dev/comrak/) offers `curl | shell`-style installation of the latest of these for your OS. ## Usage
Click to expand the CLI --help output. ``` console $ comrak --help ``` ``` A 100% CommonMark-compatible GitHub Flavored Markdown parser and formatter Usage: comrak [OPTIONS] [FILE]... Arguments: [FILE]... CommonMark file(s) to parse; or standard input if none passed Options: -c, --config-file Path to config file containing command-line arguments, or 'none' [default: /home/runner/.config/comrak/config] -i, --inplace To perform an in-place formatting --hardbreaks Treat newlines as hard line breaks --smart Use smart punctuation --github-pre-lang Use GitHub-style
 for code blocks

      --full-info-string
          Enable full info strings for code blocks

      --gfm
          Enable GitHub-flavored markdown extensions: strikethrough, tagfilter, table, autolink, and
          tasklist. Also enables --github-pre-lang and --gfm-quirks

      --gfm-quirks
          Enables GFM-style quirks in output HTML, such as not nesting  tags, which
          otherwise breaks CommonMark compatibility

      --relaxed-tasklist-character
          Enable relaxing which character is allowed in a tasklists

      --relaxed-autolinks
          Enable relaxing of autolink parsing, allow links to be recognized when in brackets and
          allow all url schemes

      --default-info-string 
          Default value for fenced code block's info strings if none is given

      --unsafe
          Allow raw HTML and dangerous URLs

      --gemojis
          Translate gemojis into UTF-8 characters

      --escape
          Escape raw HTML instead of clobbering it

      --escaped-char-spans
          Wrap escaped characters in span tags

  -e, --extension 
          Specify extension name(s) to use
          
          Multiple extensions can be delimited with ",", e.g. --extension strikethrough,table
          
          [possible values: strikethrough, tagfilter, table, autolink, tasklist, superscript,
          footnotes, description-lists, multiline-block-quotes, math-dollars, math-code,
          wikilinks-title-after-pipe, wikilinks-title-before-pipe, underline, spoiler, greentext]

  -t, --to 
          Specify output format
          
          [default: html]
          [possible values: html, xml, commonmark]

  -o, --output 
          Write output to FILE instead of stdout

      --width 
          Specify wrap width (0 = nowrap)
          
          [default: 0]

      --header-ids 
          Use the Comrak header IDs extension, with the given ID prefix

      --front-matter-delimiter 
          Ignore front-matter that starts and ends with the given string

      --syntax-highlighting 
          Syntax highlighting for codefence blocks. Choose a theme or 'none' for disabling
          
          [default: base16-ocean.dark]

      --list-style 
          Specify bullet character for lists (-, +, *) in CommonMark output
          
          [default: dash]
          [possible values: dash, plus, star]

      --sourcepos
          Include source position attribute in HTML and XML output

      --experimental-inline-sourcepos
          Include inline sourcepos in HTML output, which is known to have issues

      --ignore-setext
          Ignore setext headers

      --ignore-empty-links
          Ignore empty links

  -h, --help
          Print help information (use `-h` for a summary)

  -V, --version
          Print version information

By default, Comrak will attempt to read command-line options from a config file specified by
--config-file. This behaviour can be disabled by passing --config-file none. It is not an error if
the file does not exist.
```

And there's a Rust interface. You can use `comrak::markdown_to_html` directly: ``` rust use comrak::{markdown_to_html, Options}; assert_eq!(markdown_to_html("Hello, **δΈ–η•Œ**!", &Options::default()), "

Hello, δΈ–η•Œ!

\n"); ``` Or you can parse the input into an AST yourself, manipulate it, and then use your desired formatter: ``` rust use comrak::nodes::NodeValue; use comrak::{format_html, parse_document, Arena, Options}; fn replace_text(document: &str, orig_string: &str, replacement: &str) -> String { // The returned nodes are created in the supplied Arena, and are bound by its lifetime. let arena = Arena::new(); // Parse the document into a root `AstNode` let root = parse_document(&arena, document, &Options::default()); // Iterate over all the descendants of root. for node in root.descendants() { if let NodeValue::Text(ref mut text) = node.data.borrow_mut().value { // If the node is a text node, perform the string replacement. *text = text.replace(orig_string, replacement); } } let mut html = vec![]; format_html(root, &Options::default(), &mut html).unwrap(); String::from_utf8(html).unwrap() } fn main() { let doc = "This is my input.\n\n1. Also [my](#) input.\n2. Certainly *my* input.\n"; let orig = "my"; let repl = "your"; let html = replace_text(&doc, &orig, &repl); println!("{}", html); // Output: // //

This is your input.

//
    //
  1. Also your input.
  2. //
  3. Certainly your input.
  4. //
} ``` For a slightly more real-world example, see how I [generate my GitHub user README](https://github.com/kivikakk/kivikakk) from a base document with embedded YAML, which itself has embedded Markdown, or [check out some of Comrak's dependents on crates.io](https://crates.io/crates/comrak/reverse_dependencies) or [on GitHub](https://github.com/kivikakk/comrak/network/dependents). ## Security As with [`cmark`](https://github.com/commonmark/cmark) and [`cmark-gfm`](https://github.com/github/cmark-gfm#security), Comrak will scrub raw HTML and potentially dangerous links. This change was introduced in Comrak 0.4.0 in support of a safe-by-default posture, and later adopted by our contemporaries. :) To allow these, use the `unsafe_` option (or `--unsafe` with the command line program). If doing so, we recommend the use of a sanitisation library like [`ammonia`](https://github.com/notriddle/ammonia) configured specific to your needs. ## Extensions Comrak supports the five extensions to CommonMark defined in the [GitHub Flavored Markdown Spec](https://github.github.com/gfm/): - [Tables](https://github.github.com/gfm/#tables-extension-) - [Task list items](https://github.github.com/gfm/#task-list-items-extension-) - [Strikethrough](https://github.github.com/gfm/#strikethrough-extension-) - [Autolinks](https://github.github.com/gfm/#autolinks-extension-) - [Disallowed Raw HTML](https://github.github.com/gfm/#disallowed-raw-html-extension-) Comrak additionally supports its own extensions, which are yet to be specced out (PRs welcome\!): - Superscript - Header IDs - Footnotes - Description lists - Front matter - Multi-line blockquotes - Math - Emoji shortcodes - Wikilinks - Underline - Spoiler text - "Greentext" By default none are enabled; they are individually enabled with each parse by setting the appropriate values in the [`ExtensionOptions` struct](https://docs.rs/comrak/latest/comrak/struct.ExtensionOptions.html). ## Plugins ### Fenced code block syntax highlighting You can provide your own syntax highlighting engine. Create an implementation of the `SyntaxHighlighterAdapter` trait, and then provide an instance of such adapter to `Plugins.render.codefence_syntax_highlighter`. For formatting a Markdown document with plugins, use the `markdown_to_html_with_plugins` function, which accepts your plugins object as a parameter. See the `syntax_highlighter.rs` and `syntect.rs` examples for more details. #### Syntect [`syntect`](https://github.com/trishume/syntect) is a syntax highlighting library for Rust. By default, `comrak` offers a plugin for it. In order to utilize it, create an instance of `plugins::syntect::SyntectAdapter` and use it in your `Plugins` option. ## Related projects Comrak's design goal is to model the upstream [`cmark-gfm`](https://github.com/github/cmark-gfm) as closely as possible in terms of code structure. The upside of this is that a change in `cmark-gfm` has a very predictable change in Comrak. Likewise, any bug in `cmark-gfm` is likely to be reproduced in Comrak. This could be considered a pro or a con, depending on your use case. The downside, of course, is that the code often diverges from idiomatic Rust, especially in the AST's extensive use of `RefCell`, and while contributors have made it as fast as possible, it simply won't be as fast as some other CommonMark parsers depending on your use-case. Here are some other projects to consider: - [Raph Levien](https://github.com/raphlinus)'s [`pulldown-cmark`](https://github.com/google/pulldown-cmark). It's very fast, uses a novel parsing algorithm, and doesn't construct an AST (but you can use it to make one if you want). `cargo doc` uses this, as do many other projects in the ecosystem. - [markdown-rs](https://github.com/wooorm/markdown-rs) (1.x) looks worth watching. - Know of another library? Please open a PR to add it\! As far as I know, Comrak is the only library to implement all of the [GitHub Flavored Markdown extensions](https://github.github.com/gfm) rigorously. ## Benchmarking You'll need to [install hyperfine](https://github.com/sharkdp/hyperfine#installation), and CMake if you want to compare against `cmark-gfm`. If you want to just run the benchmark for the `comrak` binary itself, run: ``` bash make bench-comrak ``` This will build Comrak in release mode, and run benchmark on it. You will see the time measurements as reported by hyperfine in the console. The `Makefile` also provides a way to run benchmarks for `comrak` current state (with your changes), `comrak` main branch, [`cmark-gfm`](https://github.com/github/cmark-gfm), [`pulldown-cmark`](https://github.com/raphlinus/pulldown-cmark) and [`markdown-it.rs`](https://github.com/rlidwka/markdown-it.rs). You'll need CMake, and ensure [submodules are prepared](https://stackoverflow.com/a/10168693/499609). ``` bash make bench-all ``` This will build and run benchmarks across all, and report the time taken by each as well as relative time. ## Contributing Contributions are **highly encouraged**; if you'd like to assist, consider checking out the [`good first issue` label](https://github.com/kivikakk/comrak/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)\! I'm happy to help provide direction and guidance throughout, even if (especially if\!) you're new to Rust or open source. Where possible I practice [Optimistic Merging](http://hintjens.com/blog:106) as described by Peter Hintjens. Please keep the [code of conduct](CODE_OF_CONDUCT.md) in mind too. Thank you to Comrak's many contributors for PRs and issues opened\! ### Code Contributors [![Small chart showing Comrak contributors.](https://opencollective.com/comrak/contributors.svg?width=890&button=false)](https://github.com/kivikakk/comrak/graphs/contributors) ### Financial Contributors Become a financial contributor and help sustain Comrak's development. I'm self-employed --- open-source software relies on the collective. - [GitHub Sponsors](https://github.com/sponsors/kivikakk) ## Contact Asherah Connor \ ## Legal Copyright (c) 2017–2024, Asherah Connor and Comrak contributors. Licensed under the [2-Clause BSD License](https://opensource.org/licenses/BSD-2-Clause). `cmark` itself is is copyright (c) 2014, John MacFarlane. See [COPYING](COPYING) for all the details. comrak-0.29.0/RELEASE_CHECKLIST.md000064400000000000000000000005331046102023000141640ustar 00000000000000* Bump version in `Cargo.toml`. * Did `tests::exercise_full_api` change? if so, it's a semver-breaking change. * Run https://github.com/kivikakk/comrak/actions/workflows/release.yml. * Inspect the created PR, make any changes, and merge when ready. * This will automatically create a new git tag, GitHub release, and publish to crates.io. comrak-0.29.0/benches/bench.sh000075500000000000000000000004131046102023000142130ustar 00000000000000#! /bin/bash PROG=$1 ROOTDIR=$(git rev-parse --show-toplevel) for lang in ar az be ca cs de en eo es es-ni fa fi fr hi hu id it ja ko mk nl no-nb pl pt-br ro ru sr th tr uk vi zh zh-tw; do \ cat $ROOTDIR/vendor/progit/$lang/*/*.markdown | $PROG > /dev/null donecomrak-0.29.0/benches/progit.rs000064400000000000000000000011141046102023000144460ustar 00000000000000#![feature(test)] extern crate test; use comrak::{format_html, parse_document, Arena, Options}; use test::Bencher; #[bench] fn bench_progit(b: &mut Bencher) { use std::fs::File; use std::io::Read; let mut file = File::open("script/progit.md").unwrap(); let mut s = String::with_capacity(524288); file.read_to_string(&mut s).unwrap(); b.iter(|| { let arena = Arena::new(); let root = parse_document(&arena, &s, &Options::default()); let mut output = vec![]; format_html(root, &Options::default(), &mut output).unwrap() }); } comrak-0.29.0/changelog.txt000064400000000000000000000525331046102023000136700ustar 00000000000000# [v0.29.0] - 10-10-2024 * Add support for backslash escape in wikilinks by @digitalmoksha in https://github.com/kivikakk/comrak/pull/471 Diff: https://github.com/kivikakk/comrak/compare/v0.28.0...v0.29.0 # [v0.28.0] - 05-09-2024 * Add a render option to render the image as
by @JmPotato in https://github.com/kivikakk/comrak/pull/458 * Fix edge cases for relaxed-autolink option by @digitalmoksha in https://github.com/kivikakk/comrak/pull/461 Diff: https://github.com/kivikakk/comrak/compare/v0.27.0...v0.28.0 # [v0.27.0] - 19-08-2024 * Track line offsets for better accuracy of inline sourcepos by @digitalmoksha in https://github.com/kivikakk/comrak/pull/453 * Add experimental-inline-sourcepos to cli options by @digitalmoksha in https://github.com/kivikakk/comrak/pull/455 Diff: https://github.com/kivikakk/comrak/compare/v0.26.0...v0.27.0 # [v0.26.0] - 12-07-2024 * Restore inline sourcepos as experimental. by @kivikakk in https://github.com/kivikakk/comrak/pull/444 * This is needed by some downstream users, so we re-introduce it, with a clearly labelled option. Diff: https://github.com/kivikakk/comrak/compare/v0.25.0...v0.26.0 # [v0.25.0] - 12-07-2024 * Discord-flavored Markdown by @Meow and @liamwhite in https://github.com/kivikakk/comrak/pull/421 * Three new extensions and two render options are added: * `extension.underline` adds support for `__underlined__` text. * `extension.spoiler` adds support for `||spoiler||` text. * `extension.greentext` adds support for image board-style `>greentext`, which isn't transformed into a blockquote. * `render.ignore_setext` disables parsing setext-style headings. * `render.ignore_empty_links` causes links with no text (like `[](xyz)`) to remain in the text as-is. * nodes: add From impls for AstNode. by @kivikakk in https://github.com/kivikakk/comrak/pull/424 * Back by popular demand: `AstNode::from(NodeValue)`. * Also added is `AstNode::from(Ast)`, if you have sourcepos. * AST validation by @yannham in https://github.com/kivikakk/comrak/pull/425 * The AST is validated when formatting a document as CommonMark in debug builds. * Address autolink edge cases. by @kivikakk in https://github.com/kivikakk/comrak/pull/426 * Autolinks had many edge cases where output differed from upstream `cmark-gfm`. These have been fixed by following upstream's parser design closely. * shortcodes: capture all known aliases. by @kivikakk in https://github.com/kivikakk/comrak/pull/427 * We didn't parse shortcodes containing numbers or `+`. We do now. * Support both upstream CommonMark and GFM's differences in the base spec. by @kivikakk in https://github.com/kivikakk/comrak/pull/428 * GFM modifies even base CommonMark output somewhat. We now support and validate against both. * cm: count ol items from start of each list. by @kivikakk in https://github.com/kivikakk/comrak/pull/429 * Ordered list item numbers are normalised on formatting back to CommonMark. * arena_tree: panic if iterator invalidation causes trouble. by @kivikakk in https://github.com/kivikakk/comrak/pull/437 * `arena_tree` would silently stop iteration when trying to proceed from a child that had lost its parent. It now panics instead, as the old behaviour is incorrect and impossible to notice. * broken reflink callback updates & big cleanup. by @kivikakk in https://github.com/kivikakk/comrak/pull/438 * The broken reference link callback has been moved into `ParseOptions` (which now takes a lifetime, meaning `Options` does too). * The callback now takes a struct containing both the normalised reference, and the original text, and the return value has changed from a 2-tuple to a struct for clarity. * `parse_document_with_broken_link_callback` has been marked deprecated. * Inline sourcepos fixes. by @kivikakk in https://github.com/kivikakk/comrak/pull/439 * Inline sourcepos was provided on a best-effort basis, but there are multiple correctness issues which can't be fixed without significant work. * Inline sourcepos is no longer reported in HTML output. It remains in the AST and in XML output, but it is not reliable. See the PR for details. * Link sourcepos is slightly better than it was when it spans multiple lines. ## New Contributors * @liamwhite made their first contribution in https://github.com/kivikakk/comrak/pull/421 * @yannham made their first contribution in https://github.com/kivikakk/comrak/pull/425 Diff: https://github.com/kivikakk/comrak/compare/v0.24.1...v0.25.0 # [v0.24.1] - 19-05-2024 * Add GH_TOKEN to release workflow by @digitalmoksha in https://github.com/kivikakk/comrak/pull/418 Diff: https://github.com/kivikakk/comrak/compare/v0.24.0...v0.24.1 # [v0.24.0] - 19-05-2024 * Miscellany. by @kivikakk in https://github.com/kivikakk/comrak/pull/387 * Add automation to release new crates by @gjtorikian in https://github.com/kivikakk/comrak/pull/374 * build(deps): bump emojis from 0.5.2 to 0.6.2 by @dependabot in https://github.com/kivikakk/comrak/pull/393 * build(deps): bump arbitrary from 1.3.0 to 1.3.2 by @dependabot in https://github.com/kivikakk/comrak/pull/394 * build(deps): bump actions/checkout from 3 to 4 by @dependabot in https://github.com/kivikakk/comrak/pull/389 * build(deps): bump once_cell from 1.17.0 to 1.19.0 by @dependabot in https://github.com/kivikakk/comrak/pull/390 * build(deps): bump xdg from 2.4.1 to 2.5.2 by @dependabot in https://github.com/kivikakk/comrak/pull/391 * build(deps): bump derive_builder from 0.12.0 to 0.20.0 by @dependabot in https://github.com/kivikakk/comrak/pull/392 * build(deps): bump memchr from 2.5.0 to 2.7.2 by @dependabot in https://github.com/kivikakk/comrak/pull/396 * build(deps): bump ntest from 0.9.0 to 0.9.2 by @dependabot in https://github.com/kivikakk/comrak/pull/397 * build(deps): bump typed-arena from 2.0.1 to 2.0.2 by @dependabot in https://github.com/kivikakk/comrak/pull/398 * Update automerge.yml by @gjtorikian in https://github.com/kivikakk/comrak/pull/401 * build(deps): bump clap from 4.0.32 to 4.5.4 by @dependabot in https://github.com/kivikakk/comrak/pull/400 * build(deps): bump regex from 1.7.0 to 1.10.4 by @dependabot in https://github.com/kivikakk/comrak/pull/402 * Fix release workflows by @gjtorikian in https://github.com/kivikakk/comrak/pull/395 * workflows: check MSRV in CI. by @kivikakk in https://github.com/kivikakk/comrak/pull/406 * Add support for wikilinks format by @digitalmoksha in https://github.com/kivikakk/comrak/pull/407 * Autolink should ignore wikilinks by @digitalmoksha in https://github.com/kivikakk/comrak/pull/413 * Bump version to 0.24.0 by @digitalmoksha in https://github.com/kivikakk/comrak/pull/415 Diff: https://github.com/kivikakk/comrak/compare/0.23.0...v0.24.0 # [0.23.0] * add traverse() demo example by @kaesluder in https://github.com/kivikakk/comrak/pull/370 * Avoid backslashes before a new block. by @jneem in https://github.com/kivikakk/comrak/pull/373 * Expand traverse and descendants documentation: Issue #369 by @kaesluder in https://github.com/kivikakk/comrak/pull/375 * Feat/inplace: add new parameter `--inplace` (`-i`) for in-place formatting by @bioinformatist in https://github.com/kivikakk/comrak/pull/377 * Change `relaxed-autolinks` to allow any url scheme by @digitalmoksha in https://github.com/kivikakk/comrak/pull/380 * Fix sourcepos for setext headers by @digitalmoksha in https://github.com/kivikakk/comrak/pull/381 * Add iterative search/replace example to examples and README.md by @kaesluder in https://github.com/kivikakk/comrak/pull/383 * un-Nix in CI. by @kivikakk in https://github.com/kivikakk/comrak/pull/384 * Return brackets in autolinks behavior back to cmark-gfm by @digitalmoksha in https://github.com/kivikakk/comrak/pull/386 # [0.22.0] * Fix broken docs link in README by @ohakutsu in https://github.com/kivikakk/comrak/pull/364 * Make non public nodes public by @mfontanini in https://github.com/kivikakk/comrak/pull/363 * cargo update -p rustix --precise 0.36.17 by @kivikakk in https://github.com/kivikakk/comrak/pull/368 * Add render option to wrap escaped chars in span by @digitalmoksha in https://github.com/kivikakk/comrak/pull/367 * Add math support by @digitalmoksha in https://github.com/kivikakk/comrak/pull/366 # [0.21.0] * Add a multiline blockquote extension by @digitalmoksha in https://github.com/kivikakk/comrak/pull/359 # [0.20.0] * build(deps): bump rustix from 0.36.11 to 0.36.16 in /fuzz by @dependabot in https://github.com/kivikakk/comrak/pull/346 * Use Nix for CI. by @charlottia in https://github.com/kivikakk/comrak/pull/338 * Allow for Syntect to simply generate CSS classes by @gjtorikian in https://github.com/kivikakk/comrak/pull/347 # [0.19.0] * Simplify anchorize() by @kornelski in https://github.com/kivikakk/comrak/pull/297 * Use footnote name for reference id by @digitalmoksha in https://github.com/kivikakk/comrak/pull/300 * Escape footnote name by @digitalmoksha in https://github.com/kivikakk/comrak/pull/308 * Add in-doc labels for public facing features by @CosmicHorrorDev in https://github.com/kivikakk/comrak/pull/304 * build(deps): bump xml-rs from 0.8.4 to 0.8.14 by @dependabot in https://github.com/kivikakk/comrak/pull/312 * Handle footnote names that have been parsed into multiple nodes by @digitalmoksha in https://github.com/kivikakk/comrak/pull/311 * Sync with cmark-gfm-0.29.0.gfm.3 by @digitalmoksha in https://github.com/kivikakk/comrak/pull/313 * Sync with cmark-gfm-0.29.0.gfm.4 by @digitalmoksha in https://github.com/kivikakk/comrak/pull/314 * Sync with cmark-gfm-0.29.0.gfm.5 by @digitalmoksha in https://github.com/kivikakk/comrak/pull/315 * Fix backslash in a link issue by @vpetrigo in https://github.com/kivikakk/comrak/pull/317 * Sync with cmark-gfm-0.29.0.gfm.7 by @digitalmoksha in https://github.com/kivikakk/comrak/pull/318 * Rename `ComrakFoo` types to just `Foo` for easier usage by @tgross35 in https://github.com/kivikakk/comrak/pull/320 * Make `ComrakExtensionOptions` non-exhaustive by @CosmicHorrorDev in https://github.com/kivikakk/comrak/pull/305 * Add builder derive and non_exhaustive for option structs by @YJDoc2 in https://github.com/kivikakk/comrak/pull/292 * add PartialEq and Eq derive for Ast and its components by @YJDoc2 in https://github.com/kivikakk/comrak/pull/322 * Sync with cmark-gfm-0.29.0.gfm.11 by @digitalmoksha in https://github.com/kivikakk/comrak/pull/319 * Fix autolink detection inside wiki style link brackets by @digitalmoksha in https://github.com/kivikakk/comrak/pull/325 * Add CI for running benchmarks by @YJDoc2 in https://github.com/kivikakk/comrak/pull/326 * Make adapters Send + Sync by @lucperkins in https://github.com/kivikakk/comrak/pull/337 * docs: fix-up broken docs.rs link by @silverjam in https://github.com/kivikakk/comrak/pull/341 * Use github/cmark-gfm submodule by @digitalmoksha in https://github.com/kivikakk/comrak/pull/344 * Sync with cmark-gfm-0.29.0.gfm.12 by @digitalmoksha in https://github.com/kivikakk/comrak/pull/343 * Sync with cmark-gfm-0.29.0.gfm.13 by @digitalmoksha in https://github.com/kivikakk/comrak/pull/345 # [0.18.0] * Improve performance of bundled plugins, and streaming I/O by @kivikakk in https://github.com/kivikakk/comrak/pull/288 * Implement Default for enums without using #[default] attribute by @silverpill in https://github.com/kivikakk/comrak/pull/293 * XML and sourcepos support by @kivikakk in https://github.com/kivikakk/comrak/pull/232 * Add a quadratic fuzzer by @philipturnbull in https://github.com/kivikakk/comrak/pull/295 # [0.17.1] * Fix some panics found by trivial fuzzing. Missed from the 0.17.0 changelog: * Add footnote attributes that mirror cmark-gfm by @digitalmoksha in https://github.com/kivikakk/comrak/pull/273 * Add support for full_info_string render option by @digitalmoksha in https://github.com/kivikakk/comrak/pull/276 * chore: improve debug performance by @conradludgate in https://github.com/kivikakk/comrak/pull/283 # [0.17.0] This contains some breaking changes from an API point of view, but output is largely unchanged. Spec compliance is improved, and benchmark runtime is over 20% faster. * SECURITY: GHSA-8hqf-xjwp-p67v / Quadratic runtime when parsing Markdown (GHSL-2023-047) * * A variety of quadratic runtime issues that could lead to DoS were reported and addressed. * We replaced pest with an re2c-based scanner. * SECURITY: GHSA-xxmq-4vph-956w / Excessive output when parsing Markdown (GHSL-2023-048) * * Reference output is limited to 100Kb. * SECURITY: GHSA-5r3x-p7xx-x6q5 / Attacker controlled data in AST nodes is not validated (GHSL-2023-049) * * AST nodes no longer store raw `Vec`s, and instead store `String`s. * Various API points were cleaned up. * Comrak now targets Rust 2018. Many thanks to @philipturnbull and @darakian of the GitHub Security Lab for bringing these issues to my attention and detailing the reproduction steps for each case. # [0.16.0] * Track which symbol was used to mark task item as checked by @felipesere in https://github.com/kivikakk/comrak/pull/252 * improve tagfilter performance by @fiji-flo in https://github.com/kivikakk/comrak/pull/256 * [ShortCode] Add support for gemojis via shortcodes extension by @eklipse2k8 in https://github.com/kivikakk/comrak/pull/260 * "mod three rule" fix by @kivikakk in https://github.com/kivikakk/comrak/pull/262 * Add `shortcodes` to the README by @gjtorikian in https://github.com/kivikakk/comrak/pull/263 * Cargo.toml: remove timebomb by @kivikakk in https://github.com/kivikakk/comrak/pull/264 * Add custom heading adapter by @lucperkins in https://github.com/kivikakk/comrak/pull/266 * Keep track of "^" symbol when within footnotes by @gjtorikian in https://github.com/kivikakk/comrak/pull/274 # [0.15.0] * table: fix start_line of Table itself by @kivikakk in https://github.com/kivikakk/comrak/pull/231 * Rename header file to match c libname by @gjtorikian in https://github.com/kivikakk/comrak/pull/233 * Change the name of the ifdef by @gjtorikian in https://github.com/kivikakk/comrak/pull/234 * Add `comrak_set_parse_option_smart` by @gjtorikian in https://github.com/kivikakk/comrak/pull/235 * Allow `c_char` options to be NULL by @gjtorikian in https://github.com/kivikakk/comrak/pull/237 * Replace `lazy_static` dependency with `once_cell` by @Turbo87 in https://github.com/kivikakk/comrak/pull/238 * Make `comrak --help` readable on my terminal by @mgeisler in https://github.com/kivikakk/comrak/pull/242 * c-api: fix CI build by @kivikakk in https://github.com/kivikakk/comrak/pull/240 * Bump versions of some dependencies by @helmet91 in https://github.com/kivikakk/comrak/pull/243 * Adding functionality to build SyntectAdapters with custom themes, syntax sets, etc. by @ArvinSKushwaha in https://github.com/kivikakk/comrak/pull/239 * Make shell-words and xdg dependencies optional by @silverpill in https://github.com/kivikakk/comrak/pull/245 * Bump clap version to 4.0 and switch to the Derive API by @tranzystorek-io in https://github.com/kivikakk/comrak/pull/248 * c-api: remove by @kivikakk in https://github.com/kivikakk/comrak/pull/249 # [0.14.0] * Add C FFI, allowing Comrak to be used from other languages. (#171, Garen Torikian) * Fix line wrapping in CommonMark output. (#228, Edward Loveall) * Add option to specify character used for unordered list bullets in CommonMark output. (#229, Edward Loveall) # [0.13.2] * Fix Windows build. # [0.13.1] * Support compiling for WASM. (#222, Ben Wishoshavich) * Replace deprecated twoway dependency. (#224) # [0.13.0] * SECURITY: Bump regex to 1.5.5. (#221, Dependabot) * Drop unneeded YAML dependency from Syntect. (#199, Chris Wong) * Match newline handling in code inlines to upstream, and improve test failure reporting. (#210, Michael Anderson) * Make all node value fields public. (#216, Evan Schwartz) * Line break handling adjustments. (#214, Michael Anderson) * Disable control characters in link definitions. (#219, Michael Anderson) # [0.12.1] * Only load syntax and theme sets once, on Syntect plugin instantiation. (#197) * Match syntax highlighting language names more loosely. (#198) # [0.12.0] * Add pluggable syntax highlighting, and default implementation with syntect. (Daniel Simon, #194) # [0.11.0] * Allow short URLs even with non-empty path. (#191, Bernard Teo) * Expose NodeCode struct in AST. (#192, Vojtech Kral) # [0.10.1] * SECURITY: it was possible to smuggle unsafe URLs --- like `javascript:` ones --- even without using the "unsafe" mode of operation. Thanks to Sam Sanoop (snoopysecurity) for reporting. * Recognise tables without a preceding newline. (#183) # [0.10.0] * 0.9.1 was a semver-breaking change. * Add -o/--output CLI option. (#177) # [0.9.1] * SECURITY: we were matching unsafe URL prefixes, such as `data:` or `javascript:`, in a case-sensitive manner. This meant prefixes like `Data:` were untouched. Please upgrade as soon as possible. (Kouhei Morita) * Add support for ignoring front matter. (#170, Eitan Mosenkis.) # [0.9.0] * 0.8.2 was a semver-breaking change, so we're now bumping to 0.9.0. Some tests have been added to catch this in future. * Allow image/ prefix on data URIs. (#169, Daniel Sorichetti) # [0.8.2] * Fix some lint issues. (#152, Caleb Maclennan) * Build benchmarks separately to tests. (#154) * Add support for a config file for CLI use. (#157, with thanks to AJ ONeal.) # [0.8.1] * Add escape option to escape raw HTML instead of clobbering it. (#150, Ryan Westlund) # [0.8.0] * 0.7.1 was a semver-breaking change. This is now 0.8.0. # [0.7.1] * Reduce list item indentation in line with spec. (#135, Casey Rodarmor) * Split uber-struct ComrakOptions into substructures. * Refactor HTML formatter escaping. (#140, Donough Liu) * Don't render

inside

tags. (#145) # [0.7.0] * Supporting stable and newer again, since dependencies keep breaking for 1.27.0. (#134) # [0.6.2] * Exclude unneeded files from crate. (#120, Igor Gnatenko) * Bump the twoway dependency. (#121, Igor Gnatenko) # [0.6.1] * Add --gfm flag to CLI to enable all GitHub Flavored Markdown extensions and options. (#118, James R Miller) # [0.6.0] * Add TaskItem variant to NodeValue. (#115, Γ‰lisabeth Henry) # [0.5.1] * Support building on Rust versions back to 1.27.0. (#114) # [0.5.0] * Update API so that footnote reference and definition identifiers match. (#110, Γ‰lisabeth Henry) * Update to CommonMark spec 0.29. (#112) # [0.4.4] * Add From impl to AstNode. (#105, Sunjay Varma) # [0.4.3] * Add a Default derive and Ast::new to make ASTs programmatically constructible. (#101, Sunjay Varma and #102) # [0.4.2] * Add a callback to fill in broken reference links, per pulldown_cmark's Parser::new_with_broken_link_callback. (#100, Sunjay Varma) * Update to latest spec. (#99) # [0.4.1] * Fix a bug in anchor generation; it should now be on par with GitHub's. (#97, Clifford T. Matthews) * Expose anchor generation for use in library consumers. (#94, Clifford T. Matthews) # [0.4.0] * Invert default-false `safe` flag to default-false `unsafe_` flag. If you were not enabling safe mode before, you'll need to enable unsafe mode now. # [0.3.1] * Keep up-to-date with the spec. # [0.3.0] * Significant test coverage and code clean up. (#82, #83, Brian Anderson) * Description list support. (#86, Ayose Cazorla) * Example use of comrak to convert CommonMark documents into S-expressions. (#86, Ayose Cazorla) * Footnotes are now enabled via an extension option, not a flag of its own. (#87) * Extend `cmark-gfm` compatibility to include all extension and regression tests. (#87) # [0.2.14] * Speed enhancements. (#76, Brian Anderson) * Target latest spec; bring comrak closer into line with cmark. (#81, Brian Anderson and Ashe Connor) # [0.2.13] * Speed enhancements. (#75, Shaquille Johnson) # [0.2.12] * Add safety options per the reference C implementation. (#67) # [0.2.11] * Expose Arena type so users don't need to bring it in themselves (#66, Vincent Prouillet). # [0.2.10] * Bring up to date with latest spec. * Fix parsing of tables nested in other block elements (#61, Brian Anderson). * Protect against stack smashing in inline processors and CommonMark and HTML formatters (#63, Brian Anderson). # [0.2.9] * Fix a corner case in the ATX header parser (#53, Brian Anderson). * Fix grammar for scanning table marker rows (#55, Brian Anderson). * Add smart punctuation (#57). # [0.2.8] * Add `default-info-string` argument/option to specify a default language in fenced code blocks. (Thanks to @steveklabnik for the suggestion.) # [0.2.7] * Use [`pest`](https://github.com/pest-parser/pest) instead of regexes for lexing. # [0.2.6] * Fixed a bug where back-to-back emphases would not be processed correctly. (#45; thanks to @SSJohns for the report.) # [0.2.5] * Fixed a bug where an exclamation mark "!" followed by a footnote would be eaten by the parser. # [0.2.4] * Added footnotes support. # [0.2.3] * Added header IDs extension. # [0.2.2] * Fix for pathological reference link parsing. # [0.2.1] * Speed optimisations. # [0.2.0] * The formatters no longer produce Strings themeselves; you must specify an output stream. * Speed up whitespace normalisation. # [0.1.9] * Multibyte character fix for autolink (#35, Shaquille Johnson). * Resolve panics with tables in awkward situations (#36). # [0.1.8] * Fix possible DoS in link parsing (#33, Demi Obenour). comrak-0.29.0/examples/custom_headings.rs000064400000000000000000000034771046102023000165430ustar 00000000000000use comrak::{ adapters::{HeadingAdapter, HeadingMeta}, markdown_to_html_with_plugins, nodes::Sourcepos, Options, Plugins, }; use std::io::{self, Write}; fn main() { let adapter = CustomHeadingAdapter; let mut options = Options::default(); let mut plugins = Plugins::default(); plugins.render.heading_adapter = Some(&adapter); print_html( "Some text.\n\n## Please hide me from search\n\nSome other text", &options, &plugins, ); print_html( "Some text.\n\n### Here is some `code`\n\nSome other text", &options, &plugins, ); print_html( "Some text.\n\n### Here is some **bold** text and some *italicized* text\n\nSome other text", &options, &plugins ); options.render.sourcepos = true; print_html("# Here is a [link](/)", &options, &plugins); } struct CustomHeadingAdapter; impl HeadingAdapter for CustomHeadingAdapter { fn enter( &self, output: &mut dyn Write, heading: &HeadingMeta, sourcepos: Option, ) -> io::Result<()> { let id = slug::slugify(&heading.content); let search_include = !&heading.content.contains("hide"); write!(output, "", id, search_include ) } fn exit(&self, output: &mut dyn Write, heading: &HeadingMeta) -> io::Result<()> { write!(output, "", heading.level) } } fn print_html(document: &str, options: &Options, plugins: &Plugins) { let html = markdown_to_html_with_plugins(document, options, plugins); println!("{}", html); } comrak-0.29.0/examples/headers.rs000064400000000000000000000027011046102023000147670ustar 00000000000000// Extract the document title by srching for a level-one header at the root level. use comrak::{ nodes::{AstNode, NodeCode, NodeValue}, parse_document, Arena, Options, }; fn main() { println!("{:?}", get_document_title("# Hello\n")); println!("{:?}", get_document_title("## Hello\n")); println!("{:?}", get_document_title("# `hi` **there**\n")); } fn get_document_title(document: &str) -> String { let arena = Arena::new(); let root = parse_document(&arena, document, &Options::default()); for node in root.children() { let header = match node.data.clone().into_inner().value { NodeValue::Heading(c) => c, _ => continue, }; if header.level != 1 { continue; } let mut text = String::new(); collect_text(node, &mut text); // The input was already known good UTF-8 (document: &str) so comrak // guarantees the output will be too. return text; } "Untitled Document".to_string() } fn collect_text<'a>(node: &'a AstNode<'a>, output: &mut String) { match node.data.borrow().value { NodeValue::Text(ref literal) | NodeValue::Code(NodeCode { ref literal, .. }) => { output.push_str(literal) } NodeValue::LineBreak | NodeValue::SoftBreak => output.push(' '), _ => { for n in node.children() { collect_text(n, output); } } } } comrak-0.29.0/examples/iterator_replace.rs000064400000000000000000000032151046102023000167010ustar 00000000000000extern crate comrak; use comrak::nodes::NodeValue; use comrak::{format_html, parse_document, Arena, Options}; fn replace_text(document: &str, orig_string: &str, replacement: &str) -> String { // The returned nodes are created in the supplied Arena, and are bound by its lifetime. let arena = Arena::new(); // Parse the document into a root `AstNode` let root = parse_document(&arena, document, &Options::default()); // Iterate over all the descendants of root. for node in root.descendants() { if let NodeValue::Text(ref mut text) = node.data.borrow_mut().value { // If the node is a text node, replace `orig_string` with `replacement`. *text = text.replace(orig_string, replacement) } } let mut html = vec![]; format_html(root, &Options::default(), &mut html).unwrap(); String::from_utf8(html).unwrap() } fn main() { let doc = "This is my input.\n\n1. Also [my](#) input.\n2. Certainly *my* input.\n"; let orig = "my"; let repl = "your"; let html = replace_text(&doc, &orig, &repl); println!("{}", html); } #[cfg(test)] mod tests { use super::*; use ntest::{assert_false, assert_true}; #[test] fn sample_replace() { let doc = "Replace deeply nested *[foo](https://example.com)* with bar.\n\nReplace shallow foo with bar."; let orig = "foo"; let repl = "bar"; let html = replace_text(&doc, &orig, &repl); println!("{:?}", html); assert_false!(html.contains("foo")); assert_true!(html.contains("bar")); assert_true!(html.contains("( node: &'a AstNode<'a>, writer: &mut W, indent: usize, ) -> io::Result<()> { use NodeValue::*; macro_rules! try_node_inline { ($node:expr, $name:ident) => {{ if let $name(t) = $node { return write!(writer, concat!(stringify!($name), "({:?})"), t,); } }}; } match &node.data.borrow().value { Text(t) => write!(writer, "{:?}", t)?, value => { try_node_inline!(value, FootnoteDefinition); try_node_inline!(value, FootnoteReference); try_node_inline!(value, HtmlInline); if let Code(code) = value { return write!(writer, "Code({:?}, {})", code.literal, code.num_backticks); } let has_blocks = node.children().any(|c| c.data.borrow().value.block()); write!(writer, "({:?}", value)?; for child in node.children() { if has_blocks { write!(writer, "\n{1:0$}", indent + INDENT, " ")?; } else { write!(writer, " ")?; } iter_nodes(child, writer, indent + INDENT)?; } if indent == 0 { write!(writer, "\n)\n")?; } else if CLOSE_NEWLINE && has_blocks { write!(writer, "\n{1:0$})", indent, " ")?; } else { write!(writer, ")")?; } } } Ok(()) } fn dump(source: &str) -> io::Result<()> { let arena = Arena::new(); let extension = ExtensionOptionsBuilder::default() .strikethrough(true) .tagfilter(true) .table(true) .autolink(true) .tasklist(true) .superscript(true) .footnotes(true) .description_lists(true) .multiline_block_quotes(true) .math_dollars(true) .math_code(true) .wikilinks_title_after_pipe(true) .wikilinks_title_before_pipe(true) .build() .unwrap(); let opts = Options { extension, ..Options::default() }; let doc = parse_document(&arena, source, &opts); let mut output = BufWriter::new(io::stdout()); iter_nodes(doc, &mut output, 0) } fn main() -> Result<(), Box> { let mut args = env::args_os().skip(1).peekable(); let mut body = String::new(); if args.peek().is_none() { io::stdin().read_to_string(&mut body)?; dump(&body)?; } for filename in args { println!("{:?}", filename); body.clear(); File::open(&filename)?.read_to_string(&mut body)?; dump(&body)?; } Ok(()) } comrak-0.29.0/examples/sample.rs000064400000000000000000000033101046102023000146320ustar 00000000000000// Samples used in the README. Wanna make sure they work as advertised. fn small() { use comrak::{markdown_to_html, Options}; assert_eq!( markdown_to_html("Hello, **δΈ–η•Œ**!", &Options::default()), "

Hello, δΈ–η•Œ!

\n" ); } fn large() { use comrak::nodes::NodeValue; use comrak::{format_html, parse_document, Arena, Options}; fn replace_text(document: &str, orig_string: &str, replacement: &str) -> String { // The returned nodes are created in the supplied Arena, and are bound by its lifetime. let arena = Arena::new(); // Parse the document into a root `AstNode` let root = parse_document(&arena, document, &Options::default()); // Iterate over all the descendants of root. for node in root.descendants() { if let NodeValue::Text(ref mut text) = node.data.borrow_mut().value { // If the node is a text node, perform the string replacement. *text = text.replace(orig_string, replacement) } } let mut html = vec![]; format_html(root, &Options::default(), &mut html).unwrap(); String::from_utf8(html).unwrap() } fn main() { let doc = "This is my input.\n\n1. Also [my](#) input.\n2. Certainly *my* input.\n"; let orig = "my"; let repl = "your"; let html = replace_text(&doc, &orig, &repl); println!("{}", html); // Output: // //

This is your input.

//
    //
  1. Also your input.
  2. //
  3. Certainly your input.
  4. //
} main() } fn main() { small(); large(); } comrak-0.29.0/examples/syntax_highlighter.rs000064400000000000000000000035741046102023000172710ustar 00000000000000//! This example shows how to implement a syntax highlighter plugin. use comrak::adapters::SyntaxHighlighterAdapter; use comrak::{markdown_to_html_with_plugins, Options, Plugins}; use std::collections::HashMap; use std::io::{self, Write}; #[derive(Debug, Copy, Clone)] pub struct PotatoSyntaxAdapter { potato_size: i32, } impl PotatoSyntaxAdapter { pub fn new(potato_size: i32) -> Self { PotatoSyntaxAdapter { potato_size } } } impl SyntaxHighlighterAdapter for PotatoSyntaxAdapter { fn write_highlighted( &self, output: &mut dyn Write, lang: Option<&str>, code: &str, ) -> io::Result<()> { write!( output, "{}potato", lang.unwrap(), code, self.potato_size ) } fn write_pre_tag( &self, output: &mut dyn Write, attributes: HashMap, ) -> io::Result<()> { if attributes.contains_key("lang") { write!(output, "
", attributes["lang"])
        } else {
            output.write_all(b"
")
        }
    }

    fn write_code_tag(
        &self,
        output: &mut dyn Write,
        attributes: HashMap,
    ) -> io::Result<()> {
        if attributes.contains_key("class") {
            write!(output, "", attributes["class"])
        } else {
            output.write_all(b"")
        }
    }
}

fn main() {
    let adapter = PotatoSyntaxAdapter::new(42);
    let options = Options::default();
    let mut plugins = Plugins::default();

    plugins.render.codefence_syntax_highlighter = Some(&adapter);

    let input = concat!("```Rust\n", "fn main<'a>();\n", "```");

    let formatted = markdown_to_html_with_plugins(input, &options, &plugins);

    println!("{}", formatted);
}
comrak-0.29.0/examples/syntect.rs000064400000000000000000000013231046102023000150440ustar  00000000000000//! This example shows how to use the bundled syntect plugin.

use comrak::plugins::syntect::SyntectAdapterBuilder;
use comrak::{markdown_to_html_with_plugins, Options, Plugins};

fn main() {
    run_with(SyntectAdapterBuilder::new().theme("base16-ocean.dark"));
    run_with(SyntectAdapterBuilder::new().css());
}

fn run_with(builder: SyntectAdapterBuilder) {
    let adapter = builder.build();
    let options = Options::default();
    let mut plugins = Plugins::default();

    plugins.render.codefence_syntax_highlighter = Some(&adapter);

    let input = concat!("```Rust\n", "fn main<'a>();\n", "```");

    let formatted = markdown_to_html_with_plugins(input, &options, &plugins);

    println!("{}", formatted);
}
comrak-0.29.0/examples/traverse_demo.rs000064400000000000000000000041761046102023000162230ustar  00000000000000use comrak::{
    arena_tree::NodeEdge,
    nodes::{AstNode, NodeValue},
    parse_document, Arena, ComrakOptions,
};

// `node.traverse()`` creates an itertor that will traverse
// the current node and all descendants in order.
// The iterator yields `NodeEdges`. `NodeEdges` can have the
// following values:
//
// `NodeEdge::Start(node)` Start of node.
// `NodeEdge::End(node)` End of node.
// `None` End of iterator at bottom of last branch.
//
// This example extracts plain text ignoring nested
// markup.

// Note: root can be any AstNode, not just document root.

fn extract_text_traverse<'a>(root: &'a AstNode<'a>) -> String {
    let mut output_text = String::new();

    // Use `traverse` to get an iterator of `NodeEdge` and process each.
    for edge in root.traverse() {
        if let NodeEdge::Start(node) = edge {
            // Handle the Start edge to process the node's value.
            if let NodeValue::Text(ref text) = node.data.borrow().value {
                // If the node is a text node, append its text to `output_text`.
                output_text.push_str(text);
            }
        }
    }

    output_text
}

fn main() {
    let markdown_input = "Hello, *worl[d](https://example.com/)*";
    // Nested inline markup. Equivalent html should look like this:
    //"

Hello, world

println!("INPUT: {}", markdown_input); // setup parser let arena = Arena::new(); let options = ComrakOptions::default(); // parse document and return root. let root = parse_document(&arena, markdown_input, &options); // extract text and print println!("OUTPUT: {}", extract_text_traverse(root).as_str()) } #[cfg(test)] mod tests { // Import everything from the outer module to make it available for tests use super::*; #[test] fn extract_text_traverse_test() { let markdown_input = "Hello, *worl[d](https://example.com/)*"; let arena = Arena::new(); let options = ComrakOptions::default(); let root = parse_document(&arena, markdown_input, &options); assert_eq!("Hello, world", extract_text_traverse(root)); } } comrak-0.29.0/examples/update-readme.rs000064400000000000000000000064371046102023000161030ustar 00000000000000// Update the "comrak --help" text in Comrak's own README. use std::error::Error; use std::fmt::Write; use std::str; use toml::Table; use comrak::nodes::NodeValue; use comrak::{format_commonmark, parse_document, Arena, Options}; const DEPENDENCIES: &str = "[dependencies]\ncomrak = "; const HELP: &str = "$ comrak --help\n"; const HELP_START: &str = "A 100% CommonMark-compatible GitHub Flavored Markdown parser and formatter\n"; fn main() -> Result<(), Box> { let arena = Arena::new(); let readme = std::fs::read_to_string("README.md")?; let doc = parse_document(&arena, &readme, &Options::default()); let cargo_toml = std::fs::read_to_string("Cargo.toml")?.parse::()?; let msrv = cargo_toml["package"].as_table().unwrap()["rust-version"] .as_str() .unwrap(); let mut in_msrv = false; let mut next_block_is_help_body = false; for node in doc.descendants() { match node.data.borrow_mut().value { NodeValue::CodeBlock(ref mut ncb) => { // Look for the Cargo.toml example block. if ncb.info == "toml" && ncb.literal.starts_with(DEPENDENCIES) { let mut content = DEPENDENCIES.to_string(); let mut version_parts = comrak::version().split('.').collect::>(); version_parts.pop(); write!(content, "\"{}\"", version_parts.join(".")).unwrap(); ncb.literal = content; continue; } // Look for a console code block whose contents starts with the HELP string. // The *next* code block contains our help, minus the starting string. if ncb.info == "console" && ncb.literal.starts_with(HELP) { next_block_is_help_body = true; continue; } if next_block_is_help_body { next_block_is_help_body = false; assert!(ncb.info == "" && ncb.literal.starts_with(HELP_START)); let mut content = String::new(); let mut cmd = std::process::Command::new("cargo"); content.push_str( str::from_utf8( &cmd.args(["run", "--all-features", "--", "--help"]) .output() .unwrap() .stdout, ) .unwrap(), ); ncb.literal = content; continue; } } NodeValue::HtmlInline(ref mut s) => { if s == "" { in_msrv = true; } else if in_msrv && s == "" { in_msrv = false; } } NodeValue::Text(ref mut t) => { if in_msrv { std::mem::swap(t, &mut msrv.to_string()); } } _ => {} } } let mut options = Options::default(); options.render.prefer_fenced = true; let mut out = vec![]; format_commonmark(doc, &options, &mut out)?; std::fs::write("README.md", &out)?; Ok(()) } comrak-0.29.0/flake.lock000064400000000000000000000067471046102023000131420ustar 00000000000000{ "nodes": { "advisory-db": { "flake": false, "locked": { "lastModified": 1723840407, "narHash": "sha256-AZI593yLh4lcKJdAnnjyLMKUm5PMDpFy1APIYFURLyI=", "owner": "rustsec", "repo": "advisory-db", "rev": "201638b35a3e85b7794e84cc73f876d7a2b7ad51", "type": "github" }, "original": { "owner": "rustsec", "repo": "advisory-db", "type": "github" } }, "crane": { "inputs": { "nixpkgs": [ "nixpkgs" ] }, "locked": { "lastModified": 1722960479, "narHash": "sha256-NhCkJJQhD5GUib8zN9JrmYGMwt4lCRp6ZVNzIiYCl0Y=", "owner": "ipetkov", "repo": "crane", "rev": "4c6c77920b8d44cd6660c1621dea6b3fc4b4c4f4", "type": "github" }, "original": { "owner": "ipetkov", "repo": "crane", "type": "github" } }, "fenix": { "inputs": { "nixpkgs": [ "nixpkgs" ], "rust-analyzer-src": "rust-analyzer-src" }, "locked": { "lastModified": 1722493751, "narHash": "sha256-l7/yMehbrL5d4AI8E2hKtNlT50BlUAau4EKTgPg9KcY=", "owner": "nix-community", "repo": "fenix", "rev": "60ab4a085ef6ee40f2ef7921ca4061084dd8cf26", "type": "github" }, "original": { "owner": "nix-community", "ref": "monthly", "repo": "fenix", "type": "github" } }, "flake-utils": { "inputs": { "systems": "systems" }, "locked": { "lastModified": 1710146030, "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=", "owner": "numtide", "repo": "flake-utils", "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a", "type": "github" }, "original": { "owner": "numtide", "repo": "flake-utils", "type": "github" } }, "nixpkgs": { "locked": { "lastModified": 1723637854, "narHash": "sha256-med8+5DSWa2UnOqtdICndjDAEjxr5D7zaIiK4pn0Q7c=", "owner": "NixOS", "repo": "nixpkgs", "rev": "c3aa7b8938b17aebd2deecf7be0636000d62a2b9", "type": "github" }, "original": { "owner": "NixOS", "ref": "nixos-unstable", "repo": "nixpkgs", "type": "github" } }, "root": { "inputs": { "advisory-db": "advisory-db", "crane": "crane", "fenix": "fenix", "flake-utils": "flake-utils", "nixpkgs": "nixpkgs" } }, "rust-analyzer-src": { "flake": false, "locked": { "lastModified": 1722449213, "narHash": "sha256-1na4m2PNH99syz2g/WQ+Hr3RfY7k4H8NBnmkr5dFDXw=", "owner": "rust-lang", "repo": "rust-analyzer", "rev": "c8e41d95061543715b30880932ec3dc24c42d7ae", "type": "github" }, "original": { "owner": "rust-lang", "ref": "nightly", "repo": "rust-analyzer", "type": "github" } }, "systems": { "locked": { "lastModified": 1681028828, "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", "owner": "nix-systems", "repo": "default", "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", "type": "github" }, "original": { "owner": "nix-systems", "repo": "default", "type": "github" } } }, "root": "root", "version": 7 } comrak-0.29.0/flake.nix000064400000000000000000000057171046102023000130040ustar 00000000000000{ description = "comrak"; inputs = { nixpkgs.url = github:NixOS/nixpkgs/nixos-unstable; crane = { url = github:ipetkov/crane; inputs.nixpkgs.follows = "nixpkgs"; }; fenix = { url = github:nix-community/fenix/monthly; inputs.nixpkgs.follows = "nixpkgs"; }; flake-utils.url = github:numtide/flake-utils; advisory-db = { url = github:rustsec/advisory-db; flake = false; }; }; outputs = { self, nixpkgs, crane, fenix, flake-utils, advisory-db, ... }: flake-utils.lib.eachDefaultSystem (system: let pkgs = import nixpkgs {inherit system;}; inherit (pkgs) lib; craneLib = crane.mkLib pkgs; src = craneLib.cleanCargoSource (craneLib.path ./.); commonArgs = { inherit src; buildInputs = lib.optionals pkgs.stdenv.isDarwin [ pkgs.libiconv ]; }; toolchain = fenix.packages.${system}.complete; craneLibLLvmTools = craneLib.overrideToolchain (toolchain.withComponents [ "cargo" "llvm-tools" "rustc" ]); cargoArtifacts = craneLib.buildDepsOnly commonArgs; comrak = craneLib.buildPackage (commonArgs // { inherit cargoArtifacts; doCheck = false; }); in { checks = { inherit comrak; comrak-clippy = craneLib.cargoClippy (commonArgs // { inherit cargoArtifacts; # cargoClippyExtraArgs = "--lib --bins --examples --tests -- --deny warnings"; # XXX Not sure if we can fix all these and retain our current MSRV. cargoClippyExtraArgs = "--lib --bins --examples --tests"; }); comrak-doc = craneLib.cargoDoc (commonArgs // { inherit cargoArtifacts; }); comrak-fmt = craneLib.cargoFmt { inherit src; }; comrak-nextest = craneLib.cargoNextest (commonArgs // { inherit cargoArtifacts; partitions = 1; partitionType = "count"; }); } // lib.optionalAttrs (system == "x86_64-linux") { comrak-coverage = craneLib.cargoTarpaulin (commonArgs // { inherit cargoArtifacts; }); }; packages = { default = comrak; comrak-llvm-coverage = craneLibLLvmTools.cargoLlvmCov (commonArgs // { inherit cargoArtifacts; }); }; apps.default = flake-utils.lib.mkApp { drv = comrak; }; formatter = pkgs.alejandra; devShells.default = pkgs.mkShell { inputsFrom = builtins.attrValues self.checks.${system}; nativeBuildInputs = [ (toolchain.withComponents [ "cargo" "rustc" "rust-analyzer" ]) pkgs.cargo-fuzz pkgs.python3 ]; }; }); } comrak-0.29.0/rustfmt.toml000064400000000000000000000000271046102023000135700ustar 00000000000000reorder_imports = true comrak-0.29.0/src/adapters.rs000064400000000000000000000052451046102023000141360ustar 00000000000000//! Adapter traits for plugins. //! //! Each plugin has to implement one of the traits available in this module. use std::collections::HashMap; use std::io::{self, Write}; use crate::nodes::Sourcepos; /// Implement this adapter for creating a plugin for custom syntax highlighting of codefence blocks. pub trait SyntaxHighlighterAdapter: Send + Sync { /// Generates a syntax highlighted HTML output. /// /// lang: Name of the programming language (the info string of the codefence block after the initial "```" part). /// code: The source code to be syntax highlighted. fn write_highlighted( &self, output: &mut dyn Write, lang: Option<&str>, code: &str, ) -> io::Result<()>; /// Generates the opening `
` tag. Some syntax highlighter libraries might include their own
    /// `
` tag possibly with some HTML attribute pre-filled.
    ///
    /// `attributes`: A map of HTML attributes provided by comrak.
    fn write_pre_tag(
        &self,
        output: &mut dyn Write,
        attributes: HashMap,
    ) -> io::Result<()>;

    /// Generates the opening `` tag. Some syntax highlighter libraries might include their own
    /// `` tag possibly with some HTML attribute pre-filled.
    ///
    /// `attributes`: A map of HTML attributes provided by comrak.
    fn write_code_tag(
        &self,
        output: &mut dyn Write,
        attributes: HashMap,
    ) -> io::Result<()>;
}

/// The struct passed to the [`HeadingAdapter`] for custom heading implementations.
#[derive(Clone, Debug)]
pub struct HeadingMeta {
    /// The level of the heading; from 1 to 6 for ATX headings, 1 or 2 for setext headings.
    pub level: u8,

    /// The content of the heading as a "flattened" string—flattened in the sense that any
    /// `` or other tags are removed. In the Markdown heading `## This is **bold**`, for
    /// example, the would be the string `"This is bold"`.
    pub content: String,
}

/// Implement this adapter for creating a plugin for custom headings (`h1`, `h2`, etc.). The `enter`
/// method defines what's rendered prior the AST content of the heading while the `exit` method
/// defines what's rendered after it. Both methods provide access to a [`HeadingMeta`] struct and
/// leave the AST content of the heading unchanged.
pub trait HeadingAdapter: Send + Sync {
    /// Render the opening tag.
    fn enter(
        &self,
        output: &mut dyn Write,
        heading: &HeadingMeta,
        sourcepos: Option,
    ) -> io::Result<()>;

    /// Render the closing tag.
    fn exit(&self, output: &mut dyn Write, heading: &HeadingMeta) -> io::Result<()>;
}
comrak-0.29.0/src/arena_tree.rs000064400000000000000000000362101046102023000144340ustar  00000000000000/*!
  Included from .
  MIT license (per Cargo.toml).

A DOM-like tree data structure based on `&Node` references.

Any non-trivial tree involves reference cycles
(e.g. if a node has a first child, the parent of the child is that node).
To enable this, nodes need to live in an arena allocator
such as `arena::TypedArena` distributed with rustc (which is `#[unstable]` as of this writing)
or [`typed_arena::Arena`](https://crates.io/crates/typed-arena).

If you need mutability in the node’s `data`,
make it a cell (`Cell` or `RefCell`) or use cells inside of it.

*/

use std::cell::Cell;
use std::fmt;

/// A node inside a DOM-like tree.
pub struct Node<'a, T: 'a> {
    parent: Cell>>,
    previous_sibling: Cell>>,
    next_sibling: Cell>>,
    first_child: Cell>>,
    last_child: Cell>>,

    /// The data held by the node.
    pub data: T,
}

/// A simple Debug implementation that prints the children as a tree, without
/// looping through the various interior pointer cycles.
impl<'a, T: 'a> fmt::Debug for Node<'a, T>
where
    T: fmt::Debug,
{
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        struct Children<'a, T>(Option<&'a Node<'a, T>>);
        impl fmt::Debug for Children<'_, T> {
            fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
                f.debug_list()
                    .entries(std::iter::successors(self.0, |child| {
                        child.next_sibling.get()
                    }))
                    .finish()
            }
        }

        let mut struct_fmt = f.debug_struct("Node");
        struct_fmt.field("data", &self.data);
        struct_fmt.field("children", &Children(self.first_child.get()));
        struct_fmt.finish()?;

        Ok(())
    }
}

impl<'a, T> Node<'a, T> {
    /// Create a new node from its associated data.
    ///
    /// Typically, this node needs to be moved into an arena allocator
    /// before it can be used in a tree.
    pub fn new(data: T) -> Node<'a, T> {
        Node {
            parent: Cell::new(None),
            first_child: Cell::new(None),
            last_child: Cell::new(None),
            previous_sibling: Cell::new(None),
            next_sibling: Cell::new(None),
            data,
        }
    }

    /// Return a reference to the parent node, unless this node is the root of the tree.
    pub fn parent(&self) -> Option<&'a Node<'a, T>> {
        self.parent.get()
    }

    /// Return a reference to the first child of this node, unless it has no child.
    pub fn first_child(&self) -> Option<&'a Node<'a, T>> {
        self.first_child.get()
    }

    /// Return a reference to the last child of this node, unless it has no child.
    pub fn last_child(&self) -> Option<&'a Node<'a, T>> {
        self.last_child.get()
    }

    /// Return a reference to the previous sibling of this node, unless it is a first child.
    pub fn previous_sibling(&self) -> Option<&'a Node<'a, T>> {
        self.previous_sibling.get()
    }

    /// Return a reference to the next sibling of this node, unless it is a last child.
    pub fn next_sibling(&self) -> Option<&'a Node<'a, T>> {
        self.next_sibling.get()
    }

    /// Returns whether two references point to the same node.
    pub fn same_node(&self, other: &Node<'a, T>) -> bool {
        std::ptr::eq(self, other)
    }

    /// Return an iterator of references to this node and its ancestors.
    ///
    /// Call `.next().unwrap()` once on the iterator to skip the node itself.
    pub fn ancestors(&'a self) -> Ancestors<'a, T> {
        Ancestors(Some(self))
    }

    /// Return an iterator of references to this node and the siblings before it.
    ///
    /// Call `.next().unwrap()` once on the iterator to skip the node itself.
    pub fn preceding_siblings(&'a self) -> PrecedingSiblings<'a, T> {
        PrecedingSiblings(Some(self))
    }

    /// Return an iterator of references to this node and the siblings after it.
    ///
    /// Call `.next().unwrap()` once on the iterator to skip the node itself.
    pub fn following_siblings(&'a self) -> FollowingSiblings<'a, T> {
        FollowingSiblings(Some(self))
    }

    /// Return an iterator of references to this node’s children.
    pub fn children(&'a self) -> Children<'a, T> {
        Children(self.first_child.get())
    }

    /// Return an iterator of references to this node’s children, in reverse order.
    pub fn reverse_children(&'a self) -> ReverseChildren<'a, T> {
        ReverseChildren(self.last_child.get())
    }

    /// Return an iterator of references to this `Node` and its descendants, in tree order.
    ///
    /// Parent nodes appear before the descendants.
    /// Call `.next().unwrap()` once on the iterator to skip the node itself.
    ///
    /// *Similar Functions:* Use `traverse()` or `reverse_traverse` if you need
    /// references to the `NodeEdge` structs associated with each `Node`
    pub fn descendants(&'a self) -> Descendants<'a, T> {
        Descendants(self.traverse())
    }

    /// Return an iterator of references to `NodeEdge` enums for each `Node` and its descendants,
    /// in tree order.
    ///
    /// `NodeEdge` enums represent the `Start` or `End` of each node.
    ///
    /// *Similar Functions:* Use `descendants()` if you don't need `Start` and `End`.
    pub fn traverse(&'a self) -> Traverse<'a, T> {
        Traverse {
            root: self,
            next: Some(NodeEdge::Start(self)),
        }
    }

    /// Return an iterator of references to `NodeEdge` enums for each `Node` and its descendants,
    /// in *reverse* order.
    ///
    /// `NodeEdge` enums represent the `Start` or `End` of each node.
    ///
    /// *Similar Functions:* Use `descendants()` if you don't need `Start` and `End`.
    pub fn reverse_traverse(&'a self) -> ReverseTraverse<'a, T> {
        ReverseTraverse {
            root: self,
            next: Some(NodeEdge::End(self)),
        }
    }

    /// Detach a node from its parent and siblings. Children are not affected.
    pub fn detach(&self) {
        let parent = self.parent.take();
        let previous_sibling = self.previous_sibling.take();
        let next_sibling = self.next_sibling.take();

        if let Some(next_sibling) = next_sibling {
            next_sibling.previous_sibling.set(previous_sibling);
        } else if let Some(parent) = parent {
            parent.last_child.set(previous_sibling);
        }

        if let Some(previous_sibling) = previous_sibling {
            previous_sibling.next_sibling.set(next_sibling);
        } else if let Some(parent) = parent {
            parent.first_child.set(next_sibling);
        }
    }

    /// Append a new child to this node, after existing children.
    pub fn append(&'a self, new_child: &'a Node<'a, T>) {
        new_child.detach();
        new_child.parent.set(Some(self));
        if let Some(last_child) = self.last_child.take() {
            new_child.previous_sibling.set(Some(last_child));
            debug_assert!(last_child.next_sibling.get().is_none());
            last_child.next_sibling.set(Some(new_child));
        } else {
            debug_assert!(self.first_child.get().is_none());
            self.first_child.set(Some(new_child));
        }
        self.last_child.set(Some(new_child));
    }

    /// Prepend a new child to this node, before existing children.
    pub fn prepend(&'a self, new_child: &'a Node<'a, T>) {
        new_child.detach();
        new_child.parent.set(Some(self));
        if let Some(first_child) = self.first_child.take() {
            debug_assert!(first_child.previous_sibling.get().is_none());
            first_child.previous_sibling.set(Some(new_child));
            new_child.next_sibling.set(Some(first_child));
        } else {
            debug_assert!(self.first_child.get().is_none());
            self.last_child.set(Some(new_child));
        }
        self.first_child.set(Some(new_child));
    }

    /// Insert a new sibling after this node.
    pub fn insert_after(&'a self, new_sibling: &'a Node<'a, T>) {
        new_sibling.detach();
        new_sibling.parent.set(self.parent.get());
        new_sibling.previous_sibling.set(Some(self));
        if let Some(next_sibling) = self.next_sibling.take() {
            debug_assert!(std::ptr::eq(
                next_sibling.previous_sibling.get().unwrap(),
                self
            ));
            next_sibling.previous_sibling.set(Some(new_sibling));
            new_sibling.next_sibling.set(Some(next_sibling));
        } else if let Some(parent) = self.parent.get() {
            debug_assert!(std::ptr::eq(parent.last_child.get().unwrap(), self));
            parent.last_child.set(Some(new_sibling));
        }
        self.next_sibling.set(Some(new_sibling));
    }

    /// Insert a new sibling before this node.
    pub fn insert_before(&'a self, new_sibling: &'a Node<'a, T>) {
        new_sibling.detach();
        new_sibling.parent.set(self.parent.get());
        new_sibling.next_sibling.set(Some(self));
        if let Some(previous_sibling) = self.previous_sibling.take() {
            new_sibling.previous_sibling.set(Some(previous_sibling));
            debug_assert!(std::ptr::eq(
                previous_sibling.next_sibling.get().unwrap(),
                self
            ));
            previous_sibling.next_sibling.set(Some(new_sibling));
        } else if let Some(parent) = self.parent.get() {
            debug_assert!(std::ptr::eq(parent.first_child.get().unwrap(), self));
            parent.first_child.set(Some(new_sibling));
        }
        self.previous_sibling.set(Some(new_sibling));
    }
}

macro_rules! axis_iterator {
    (#[$attr:meta] $name:ident : $next:ident) => {
        #[$attr]
        #[derive(Debug)]
        pub struct $name<'a, T: 'a>(Option<&'a Node<'a, T>>);

        impl<'a, T> Iterator for $name<'a, T> {
            type Item = &'a Node<'a, T>;

            fn next(&mut self) -> Option<&'a Node<'a, T>> {
                match self.0.take() {
                    Some(node) => {
                        self.0 = node.$next.get();
                        Some(node)
                    }
                    None => None,
                }
            }
        }
    };
}

axis_iterator! {
    #[doc = "An iterator of references to the ancestors a given node."]
    Ancestors: parent
}

axis_iterator! {
    #[doc = "An iterator of references to the siblings before a given node."]
    PrecedingSiblings: previous_sibling
}

axis_iterator! {
    #[doc = "An iterator of references to the siblings after a given node."]
    FollowingSiblings: next_sibling
}

axis_iterator! {
    #[doc = "An iterator of references to the children of a given node."]
    Children: next_sibling
}

axis_iterator! {
    #[doc = "An iterator of references to the children of a given node, in reverse order."]
    ReverseChildren: previous_sibling
}

/// An iterator of references to a given node and its descendants, in tree order.
#[derive(Debug)]
pub struct Descendants<'a, T: 'a>(Traverse<'a, T>);

impl<'a, T> Iterator for Descendants<'a, T> {
    type Item = &'a Node<'a, T>;

    fn next(&mut self) -> Option<&'a Node<'a, T>> {
        loop {
            match self.0.next() {
                Some(NodeEdge::Start(node)) => return Some(node),
                Some(NodeEdge::End(_)) => {}
                None => return None,
            }
        }
    }
}

/// An edge of the node graph returned by a traversal iterator.
#[derive(Debug, Clone)]
pub enum NodeEdge {
    /// Indicates that start of a node that has children.
    /// Yielded by `Traverse::next` before the node’s descendants.
    /// In HTML or XML, this corresponds to an opening tag like `
` Start(T), /// Indicates that end of a node that has children. /// Yielded by `Traverse::next` after the node’s descendants. /// In HTML or XML, this corresponds to a closing tag like `
` End(T), } macro_rules! traverse_iterator { (#[$attr:meta] $name:ident : $first_child:ident, $next_sibling:ident) => { #[$attr] #[derive(Debug)] pub struct $name<'a, T: 'a> { root: &'a Node<'a, T>, next: Option>>, } impl<'a, T> Iterator for $name<'a, T> { type Item = NodeEdge<&'a Node<'a, T>>; fn next(&mut self) -> Option>> { match self.next.take() { Some(item) => { self.next = match item { NodeEdge::Start(node) => match node.$first_child.get() { Some(child) => Some(NodeEdge::Start(child)), None => Some(NodeEdge::End(node)), }, NodeEdge::End(node) => { if node.same_node(self.root) { None } else { match node.$next_sibling.get() { Some(sibling) => Some(NodeEdge::Start(sibling)), None => match node.parent.get() { Some(parent) => Some(NodeEdge::End(parent)), None => panic!("tree modified during iteration"), }, } } } }; Some(item) } None => None, } } } }; } traverse_iterator! { #[doc = "An iterator of the start and end edges of a given node and its descendants, in tree order."] Traverse: first_child, next_sibling } traverse_iterator! { #[doc = "An iterator of the start and end edges of a given node and its descendants, in reverse tree order."] ReverseTraverse: last_child, previous_sibling } #[test] fn it_works() { struct DropTracker<'a>(&'a Cell); impl<'a> Drop for DropTracker<'a> { fn drop(&mut self) { self.0.set(self.0.get() + 1); } } let drop_counter = Cell::new(0); { let mut new_counter = 0; let arena = typed_arena::Arena::new(); let mut new = || { new_counter += 1; arena.alloc(Node::new((new_counter, DropTracker(&drop_counter)))) }; let a = new(); // 1 a.append(new()); // 2 a.append(new()); // 3 a.prepend(new()); // 4 let b = new(); // 5 b.append(a); a.insert_before(new()); // 6 a.insert_before(new()); // 7 a.insert_after(new()); // 8 a.insert_after(new()); // 9 let c = new(); // 10 b.append(c); assert_eq!(drop_counter.get(), 0); c.previous_sibling.get().unwrap().detach(); assert_eq!(drop_counter.get(), 0); assert_eq!( b.descendants().map(|node| node.data.0).collect::>(), [5, 6, 7, 1, 4, 2, 3, 9, 10] ); } assert_eq!(drop_counter.get(), 10); } comrak-0.29.0/src/cm.rs000064400000000000000000000756051046102023000127410ustar 00000000000000use crate::ctype::{isalpha, isdigit, ispunct, isspace}; use crate::nodes::{ AstNode, ListDelimType, ListType, NodeCodeBlock, NodeHeading, NodeHtmlBlock, NodeLink, NodeMath, NodeTable, NodeValue, NodeWikiLink, }; use crate::nodes::{NodeList, TableAlignment}; #[cfg(feature = "shortcodes")] use crate::parser::shortcodes::NodeShortCode; use crate::parser::Options; use crate::scanners; use crate::strings::trim_start_match; use crate::{nodes, Plugins}; use std::cmp::max; use std::io::{self, Write}; /// Formats an AST as CommonMark, modified by the given options. pub fn format_document<'a>( root: &'a AstNode<'a>, options: &Options, output: &mut dyn Write, ) -> io::Result<()> { // Formatting an ill-formed AST might lead to invalid output. However, we don't want to pay for // validation in normal workflow. As a middleground, we validate the AST in debug builds. See // https://github.com/kivikakk/comrak/issues/371. #[cfg(debug_assertions)] root.validate().unwrap_or_else(|e| { panic!("The document to format is ill-formed: {:?}", e); }); format_document_with_plugins(root, options, output, &Plugins::default()) } /// Formats an AST as CommonMark, modified by the given options. Accepts custom plugins. pub fn format_document_with_plugins<'a>( root: &'a AstNode<'a>, options: &Options, output: &mut dyn Write, _plugins: &Plugins, ) -> io::Result<()> { let mut f = CommonMarkFormatter::new(root, options); f.format(root); if !f.v.is_empty() && f.v[f.v.len() - 1] != b'\n' { f.v.push(b'\n'); } output.write_all(&f.v)?; Ok(()) } struct CommonMarkFormatter<'a, 'o, 'c> { node: &'a AstNode<'a>, options: &'o Options<'c>, v: Vec, prefix: Vec, column: usize, need_cr: u8, last_breakable: usize, begin_line: bool, begin_content: bool, no_linebreaks: bool, in_tight_list_item: bool, custom_escape: Option, u8) -> bool>, footnote_ix: u32, ol_stack: Vec, } #[derive(PartialEq, Clone, Copy)] enum Escaping { Literal, Normal, Url, Title, } impl<'a, 'o, 'c> Write for CommonMarkFormatter<'a, 'o, 'c> { fn write(&mut self, buf: &[u8]) -> std::io::Result { self.output(buf, false, Escaping::Literal); Ok(buf.len()) } fn flush(&mut self) -> std::io::Result<()> { Ok(()) } } impl<'a, 'o, 'c> CommonMarkFormatter<'a, 'o, 'c> { fn new(node: &'a AstNode<'a>, options: &'o Options<'c>) -> Self { CommonMarkFormatter { node, options, v: vec![], prefix: vec![], column: 0, need_cr: 0, last_breakable: 0, begin_line: true, begin_content: true, no_linebreaks: false, in_tight_list_item: false, custom_escape: None, footnote_ix: 0, ol_stack: vec![], } } fn output(&mut self, buf: &[u8], wrap: bool, escaping: Escaping) { let wrap = wrap && !self.no_linebreaks; if self.in_tight_list_item && self.need_cr > 1 { self.need_cr = 1; } let mut k = self.v.len() as i32 - 1; while self.need_cr > 0 { if k < 0 || self.v[k as usize] == b'\n' { k -= 1; } else { self.v.push(b'\n'); if self.need_cr > 1 { self.v.extend(&self.prefix); } } self.column = 0; self.last_breakable = 0; self.begin_line = true; self.begin_content = true; self.need_cr -= 1; } let mut i = 0; while i < buf.len() { if self.begin_line { self.v.extend(&self.prefix); self.column = self.prefix.len(); } if self.custom_escape.is_some() && self.custom_escape.unwrap()(self.node, buf[i]) { self.v.push(b'\\'); } let nextc = buf.get(i + 1); if buf[i] == b' ' && wrap { if !self.begin_line { let last_nonspace = self.v.len(); self.v.push(b' '); self.column += 1; self.begin_line = false; self.begin_content = false; while buf.get(i + 1) == Some(&(b' ')) { i += 1; } if !buf.get(i + 1).map_or(false, |&c| isdigit(c)) { self.last_breakable = last_nonspace; } } } else if escaping == Escaping::Literal { if buf[i] == b'\n' { self.v.push(b'\n'); self.column = 0; self.begin_line = true; self.begin_content = true; self.last_breakable = 0; } else { self.v.push(buf[i]); self.column += 1; self.begin_line = false; self.begin_content = self.begin_content && isdigit(buf[i]); } } else { self.outc(buf[i], escaping, nextc); self.begin_line = false; self.begin_content = self.begin_content && isdigit(buf[i]); } if self.options.render.width > 0 && self.column > self.options.render.width && !self.begin_line && self.last_breakable > 0 { let remainder = self.v[self.last_breakable + 1..].to_vec(); self.v.truncate(self.last_breakable); self.v.push(b'\n'); self.v.extend(&self.prefix); self.v.extend(&remainder); self.column = self.prefix.len() + remainder.len(); self.last_breakable = 0; self.begin_line = false; self.begin_content = false; } i += 1; } } fn outc(&mut self, c: u8, escaping: Escaping, nextc: Option<&u8>) { let follows_digit = !self.v.is_empty() && isdigit(self.v[self.v.len() - 1]); let nextc = nextc.map_or(0, |&c| c); let needs_escaping = c < 0x80 && escaping != Escaping::Literal && ((escaping == Escaping::Normal && (c < 0x20 || c == b'*' || c == b'_' || c == b'[' || c == b']' || c == b'#' || c == b'<' || c == b'>' || c == b'\\' || c == b'`' || c == b'!' || (c == b'&' && isalpha(nextc)) || (c == b'!' && nextc == 0x5b) || (self.begin_content && (c == b'-' || c == b'+' || c == b'=') && !follows_digit) || (self.begin_content && (c == b'.' || c == b')') && follows_digit && (nextc == 0 || isspace(nextc))))) || (escaping == Escaping::Url && (c == b'`' || c == b'<' || c == b'>' || isspace(c) || c == b'\\' || c == b')' || c == b'(')) || (escaping == Escaping::Title && (c == b'`' || c == b'<' || c == b'>' || c == b'"' || c == b'\\'))); if needs_escaping { if escaping == Escaping::Url && isspace(c) { write!(self.v, "%{:2X}", c).unwrap(); self.column += 3; } else if ispunct(c) { write!(self.v, "\\{}", c as char).unwrap(); self.column += 2; } else { let s = format!("&#{};", c); self.write_all(s.as_bytes()).unwrap(); self.column += s.len(); } } else { self.v.push(c); self.column += 1; } } fn cr(&mut self) { self.need_cr = max(self.need_cr, 1); } fn blankline(&mut self) { self.need_cr = max(self.need_cr, 2); } fn format(&mut self, node: &'a AstNode<'a>) { enum Phase { Pre, Post, } let mut stack = vec![(node, Phase::Pre)]; while let Some((node, phase)) = stack.pop() { match phase { Phase::Pre => { if self.format_node(node, true) { stack.push((node, Phase::Post)); for ch in node.reverse_children() { stack.push((ch, Phase::Pre)); } } } Phase::Post => { self.format_node(node, false); } } } } fn get_in_tight_list_item(&self, node: &'a AstNode<'a>) -> bool { let tmp = match nodes::containing_block(node) { Some(tmp) => tmp, None => return false, }; match tmp.data.borrow().value { NodeValue::Item(..) | NodeValue::TaskItem(..) => { if let NodeValue::List(ref nl) = tmp.parent().unwrap().data.borrow().value { return nl.tight; } return false; } _ => {} } let parent = match tmp.parent() { Some(parent) => parent, None => return false, }; match parent.data.borrow().value { NodeValue::Item(..) | NodeValue::TaskItem(..) => { if let NodeValue::List(ref nl) = parent.parent().unwrap().data.borrow().value { return nl.tight; } } _ => {} } false } fn format_node(&mut self, node: &'a AstNode<'a>, entering: bool) -> bool { self.node = node; let allow_wrap = self.options.render.width > 0 && !self.options.render.hardbreaks; let parent_node = node.parent(); if entering { if parent_node.is_some() && matches!( parent_node.unwrap().data.borrow().value, NodeValue::Item(..) | NodeValue::TaskItem(..) ) { self.in_tight_list_item = self.get_in_tight_list_item(node); } } else if matches!(node.data.borrow().value, NodeValue::List(..)) { self.in_tight_list_item = parent_node.is_some() && matches!( parent_node.unwrap().data.borrow().value, NodeValue::Item(..) | NodeValue::TaskItem(..) ) && self.get_in_tight_list_item(node); } let next_is_block = node .next_sibling() .map_or(true, |next| next.data.borrow().value.block()); match node.data.borrow().value { NodeValue::Document => (), NodeValue::FrontMatter(ref fm) => self.format_front_matter(fm.as_bytes(), entering), NodeValue::BlockQuote => self.format_block_quote(entering), NodeValue::List(..) => self.format_list(node, entering), NodeValue::Item(..) => self.format_item(node, entering), NodeValue::DescriptionList => (), NodeValue::DescriptionItem(..) => (), NodeValue::DescriptionTerm => (), NodeValue::DescriptionDetails => self.format_description_details(entering), NodeValue::Heading(ref nch) => self.format_heading(nch, entering), NodeValue::CodeBlock(ref ncb) => self.format_code_block(node, ncb, entering), NodeValue::HtmlBlock(ref nhb) => self.format_html_block(nhb, entering), NodeValue::ThematicBreak => self.format_thematic_break(entering), NodeValue::Paragraph => self.format_paragraph(entering), NodeValue::Text(ref literal) => { self.format_text(literal.as_bytes(), allow_wrap, entering) } NodeValue::LineBreak => self.format_line_break(entering, next_is_block), NodeValue::SoftBreak => self.format_soft_break(allow_wrap, entering), NodeValue::Code(ref code) => { self.format_code(code.literal.as_bytes(), allow_wrap, entering) } NodeValue::HtmlInline(ref literal) => { self.format_html_inline(literal.as_bytes(), entering) } NodeValue::Strong => { if parent_node.is_none() || !matches!(parent_node.unwrap().data.borrow().value, NodeValue::Strong) { self.format_strong(); } } NodeValue::Emph => self.format_emph(node), NodeValue::TaskItem(symbol) => self.format_task_item(symbol, node, entering), NodeValue::Strikethrough => self.format_strikethrough(), NodeValue::Superscript => self.format_superscript(), NodeValue::Link(ref nl) => return self.format_link(node, nl, entering), NodeValue::Image(ref nl) => self.format_image(nl, allow_wrap, entering), #[cfg(feature = "shortcodes")] NodeValue::ShortCode(ref ne) => self.format_shortcode(ne, entering), NodeValue::Table(..) => self.format_table(entering), NodeValue::TableRow(..) => self.format_table_row(entering), NodeValue::TableCell => self.format_table_cell(node, entering), NodeValue::FootnoteDefinition(ref nfd) => { self.format_footnote_definition(&nfd.name, entering) } NodeValue::FootnoteReference(ref nfr) => { self.format_footnote_reference(nfr.name.as_bytes(), entering) } NodeValue::MultilineBlockQuote(..) => self.format_block_quote(entering), NodeValue::Escaped => { // noop - automatic escaping is already being done } NodeValue::Math(ref math) => self.format_math(math, allow_wrap, entering), NodeValue::WikiLink(ref nl) => return self.format_wikilink(nl, entering), NodeValue::Underline => self.format_underline(), NodeValue::SpoileredText => self.format_spoiler(), NodeValue::EscapedTag(ref net) => self.format_escaped_tag(net), }; true } fn format_front_matter(&mut self, front_matter: &[u8], entering: bool) { if entering { self.output(front_matter, false, Escaping::Literal); } } fn format_block_quote(&mut self, entering: bool) { if entering { write!(self, "> ").unwrap(); self.begin_content = true; write!(self.prefix, "> ").unwrap(); } else { let new_len = self.prefix.len() - 2; self.prefix.truncate(new_len); self.blankline(); } } fn format_list(&mut self, node: &'a AstNode<'a>, entering: bool) { let ol_start = match node.data.borrow().value { NodeValue::List(NodeList { list_type: ListType::Ordered, start, .. }) => Some(start), _ => None, }; if entering { if let Some(start) = ol_start { self.ol_stack.push(start); } } else { if ol_start.is_some() { self.ol_stack.pop(); } if match node.next_sibling() { Some(next_sibling) => matches!( next_sibling.data.borrow().value, NodeValue::CodeBlock(..) | NodeValue::List(..) ), _ => false, } { self.cr(); write!(self, "").unwrap(); self.blankline(); } } } fn format_item(&mut self, node: &'a AstNode<'a>, entering: bool) { let parent = match node.parent().unwrap().data.borrow().value { NodeValue::List(ref nl) => *nl, _ => unreachable!(), }; let mut listmarker = vec![]; let marker_width = if parent.list_type == ListType::Bullet { 2 } else { let last_stack = self.ol_stack.last_mut().unwrap(); let list_number = *last_stack; if entering { *last_stack += 1; } let list_delim = parent.delimiter; write!( listmarker, "{}{}{}", list_number, if list_delim == ListDelimType::Paren { ")" } else { "." }, if list_number < 10 { " " } else { " " } ) .unwrap(); listmarker.len() }; if entering { if parent.list_type == ListType::Bullet { let bullet = char::from(self.options.render.list_style as u8); write!(self, "{} ", bullet).unwrap(); } else { self.write_all(&listmarker).unwrap(); } self.begin_content = true; for _ in 0..marker_width { write!(self.prefix, " ").unwrap(); } } else { let new_len = self.prefix.len() - marker_width; self.prefix.truncate(new_len); self.cr(); } } fn format_description_details(&mut self, entering: bool) { if entering { write!(self, ": ").unwrap() } } fn format_heading(&mut self, nch: &NodeHeading, entering: bool) { if entering { for _ in 0..nch.level { write!(self, "#").unwrap(); } write!(self, " ").unwrap(); self.begin_content = true; self.no_linebreaks = true; } else { self.no_linebreaks = false; self.blankline(); } } fn format_code_block(&mut self, node: &'a AstNode<'a>, ncb: &NodeCodeBlock, entering: bool) { if entering { let first_in_list_item = node.previous_sibling().is_none() && match node.parent() { Some(parent) => { matches!( parent.data.borrow().value, NodeValue::Item(..) | NodeValue::TaskItem(..) ) } _ => false, }; if !first_in_list_item { self.blankline(); } let info = ncb.info.as_bytes(); let literal = ncb.literal.as_bytes(); #[allow(clippy::len_zero)] if !(info.len() > 0 || literal.len() <= 2 || isspace(literal[0]) || first_in_list_item || self.options.render.prefer_fenced || isspace(literal[literal.len() - 1]) && isspace(literal[literal.len() - 2])) { write!(self, " ").unwrap(); write!(self.prefix, " ").unwrap(); self.write_all(literal).unwrap(); let new_len = self.prefix.len() - 4; self.prefix.truncate(new_len); } else { let fence_char = if info.contains(&b'`') { b'~' } else { b'`' }; let numticks = max(3, longest_char_sequence(literal, fence_char) + 1); for _ in 0..numticks { write!(self, "{}", fence_char as char).unwrap(); } if !info.is_empty() { write!(self, " ").unwrap(); self.write_all(info).unwrap(); } self.cr(); self.write_all(literal).unwrap(); self.cr(); for _ in 0..numticks { write!(self, "{}", fence_char as char).unwrap(); } } self.blankline(); } } fn format_html_block(&mut self, nhb: &NodeHtmlBlock, entering: bool) { if entering { self.blankline(); self.write_all(nhb.literal.as_bytes()).unwrap(); self.blankline(); } } fn format_thematic_break(&mut self, entering: bool) { if entering { self.blankline(); write!(self, "-----").unwrap(); self.blankline(); } } fn format_paragraph(&mut self, entering: bool) { if !entering { self.blankline(); } } fn format_text(&mut self, literal: &[u8], allow_wrap: bool, entering: bool) { if entering { self.output(literal, allow_wrap, Escaping::Normal); } } fn format_line_break(&mut self, entering: bool, next_is_block: bool) { if entering { if !self.options.render.hardbreaks && !next_is_block { // If the next element is a block, a backslash means a // literal backslash instead of a line break. In this case // we can just skip the line break since it's meaningless // before a block. write!(self, "\\").unwrap(); } self.cr(); } } fn format_soft_break(&mut self, allow_wrap: bool, entering: bool) { if entering { if !self.no_linebreaks && self.options.render.width == 0 && !self.options.render.hardbreaks { self.cr(); } else if self.options.render.hardbreaks { self.output(&[b'\n'], allow_wrap, Escaping::Literal); } else { self.output(&[b' '], allow_wrap, Escaping::Literal); } } } fn format_code(&mut self, literal: &[u8], allow_wrap: bool, entering: bool) { if entering { let numticks = shortest_unused_sequence(literal, b'`'); for _ in 0..numticks { write!(self, "`").unwrap(); } let all_space = literal .iter() .all(|&c| c == b' ' || c == b'\r' || c == b'\n'); let has_edge_space = literal[0] == b' ' || literal[literal.len() - 1] == b' '; let has_edge_backtick = literal[0] == b'`' || literal[literal.len() - 1] == b'`'; let pad = literal.is_empty() || has_edge_backtick || (!all_space && has_edge_space); if pad { write!(self, " ").unwrap(); } self.output(literal, allow_wrap, Escaping::Literal); if pad { write!(self, " ").unwrap(); } for _ in 0..numticks { write!(self, "`").unwrap(); } } } fn format_html_inline(&mut self, literal: &[u8], entering: bool) { if entering { self.write_all(literal).unwrap(); } } fn format_strong(&mut self) { write!(self, "**").unwrap(); } fn format_emph(&mut self, node: &'a AstNode<'a>) { let emph_delim = if match node.parent() { Some(parent) => matches!(parent.data.borrow().value, NodeValue::Emph), _ => false, } && node.next_sibling().is_none() && node.previous_sibling().is_none() { b'_' } else { b'*' }; self.write_all(&[emph_delim]).unwrap(); } fn format_task_item(&mut self, symbol: Option, node: &'a AstNode<'a>, entering: bool) { self.format_item(node, entering); if entering { write!(self, "[{}] ", symbol.unwrap_or(' ')).unwrap(); } } fn format_strikethrough(&mut self) { write!(self, "~").unwrap(); } fn format_superscript(&mut self) { write!(self, "^").unwrap(); } fn format_underline(&mut self) { write!(self, "__").unwrap(); } fn format_spoiler(&mut self) { write!(self, "||").unwrap(); } fn format_escaped_tag(&mut self, net: &String) { self.output(net.as_bytes(), false, Escaping::Literal); } fn format_link(&mut self, node: &'a AstNode<'a>, nl: &NodeLink, entering: bool) -> bool { if is_autolink(node, nl) { if entering { write!(self, "<{}>", trim_start_match(&nl.url, "mailto:")).unwrap(); return false; } } else if entering { write!(self, "[").unwrap(); } else { write!(self, "](").unwrap(); self.output(nl.url.as_bytes(), false, Escaping::Url); if !nl.title.is_empty() { write!(self, " \"").unwrap(); self.output(nl.title.as_bytes(), false, Escaping::Title); write!(self, "\"").unwrap(); } write!(self, ")").unwrap(); } true } fn format_wikilink(&mut self, nl: &NodeWikiLink, entering: bool) -> bool { if entering { write!(self, "[[").unwrap(); if self.options.extension.wikilinks_title_after_pipe { self.output(nl.url.as_bytes(), false, Escaping::Url); write!(self, "|").unwrap(); } } else { if self.options.extension.wikilinks_title_before_pipe { write!(self, "|").unwrap(); self.output(nl.url.as_bytes(), false, Escaping::Url); } write!(self, "]]").unwrap(); } true } fn format_image(&mut self, nl: &NodeLink, allow_wrap: bool, entering: bool) { if entering { write!(self, "![").unwrap(); } else { write!(self, "](").unwrap(); self.output(nl.url.as_bytes(), false, Escaping::Url); if !nl.title.is_empty() { self.output(&[b' ', b'"'], allow_wrap, Escaping::Literal); self.output(nl.title.as_bytes(), false, Escaping::Title); write!(self, "\"").unwrap(); } write!(self, ")").unwrap(); } } #[cfg(feature = "shortcodes")] fn format_shortcode(&mut self, ne: &NodeShortCode, entering: bool) { if entering { write!(self, ":").unwrap(); self.output(ne.code.as_bytes(), false, Escaping::Literal); write!(self, ":").unwrap(); } } fn format_table(&mut self, entering: bool) { if entering { self.custom_escape = Some(table_escape); } else { self.custom_escape = None; } self.blankline(); } fn format_table_row(&mut self, entering: bool) { if entering { self.cr(); write!(self, "|").unwrap(); } } fn format_table_cell(&mut self, node: &'a AstNode<'a>, entering: bool) { if entering { write!(self, " ").unwrap(); } else { write!(self, " |").unwrap(); let row = &node.parent().unwrap().data.borrow().value; let in_header = match *row { NodeValue::TableRow(header) => header, _ => panic!(), }; if in_header && node.next_sibling().is_none() { let table = &node.parent().unwrap().parent().unwrap().data.borrow().value; let alignments = match *table { NodeValue::Table(NodeTable { ref alignments, .. }) => alignments, _ => panic!(), }; self.cr(); write!(self, "|").unwrap(); for a in alignments { write!( self, " {} |", match *a { TableAlignment::Left => ":--", TableAlignment::Center => ":-:", TableAlignment::Right => "--:", TableAlignment::None => "---", } ) .unwrap(); } self.cr(); } } } fn format_footnote_definition(&mut self, name: &str, entering: bool) { if entering { self.footnote_ix += 1; writeln!(self, "[^{}]:", name).unwrap(); write!(self.prefix, " ").unwrap(); } else { let new_len = self.prefix.len() - 4; self.prefix.truncate(new_len); } } fn format_footnote_reference(&mut self, r: &[u8], entering: bool) { if entering { self.write_all(b"[^").unwrap(); self.write_all(r).unwrap(); self.write_all(b"]").unwrap(); } } fn format_math(&mut self, math: &NodeMath, allow_wrap: bool, entering: bool) { if entering { let literal = math.literal.as_bytes(); let start_fence = if math.dollar_math { if math.display_math { "$$" } else { "$" } } else { "$`" }; let end_fence = if start_fence == "$`" { "`$" } else { start_fence }; self.output(start_fence.as_bytes(), false, Escaping::Literal); self.output(literal, allow_wrap, Escaping::Literal); self.output(end_fence.as_bytes(), false, Escaping::Literal); } } } fn longest_char_sequence(literal: &[u8], ch: u8) -> usize { let mut longest = 0; let mut current = 0; for c in literal { if *c == ch { current += 1; } else { if current > longest { longest = current; } current = 0; } } if current > longest { longest = current; } longest } fn shortest_unused_sequence(literal: &[u8], f: u8) -> usize { let mut used = 1; let mut current = 0; for c in literal { if *c == f { current += 1; } else { if current > 0 { used |= 1 << current; } current = 0; } } if current > 0 { used |= 1 << current; } let mut i = 0; while used & 1 != 0 { used >>= 1; i += 1; } i } fn is_autolink<'a>(node: &'a AstNode<'a>, nl: &NodeLink) -> bool { if nl.url.is_empty() || scanners::scheme(nl.url.as_bytes()).is_none() { return false; } if !nl.title.is_empty() { return false; } let link_text = match node.first_child() { None => return false, Some(child) => match child.data.borrow().value { NodeValue::Text(ref t) => t.clone(), _ => return false, }, }; trim_start_match(&nl.url, "mailto:") == link_text } fn table_escape<'a>(node: &'a AstNode<'a>, c: u8) -> bool { match node.data.borrow().value { NodeValue::Table(..) | NodeValue::TableRow(..) | NodeValue::TableCell => false, _ => c == b'|', } } comrak-0.29.0/src/ctype.rs000064400000000000000000000027341046102023000134570ustar 00000000000000#[rustfmt::skip] const CMARK_CTYPE_CLASS: [u8; 256] = [ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, /* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2 */ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 3 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, /* 4 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, /* 5 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, /* 6 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, /* 7 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 0, /* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; pub fn isspace(ch: u8) -> bool { CMARK_CTYPE_CLASS[ch as usize] == 1 } pub fn ispunct(ch: u8) -> bool { CMARK_CTYPE_CLASS[ch as usize] == 2 } pub fn isdigit(ch: u8) -> bool { CMARK_CTYPE_CLASS[ch as usize] == 3 } pub fn isalpha(ch: u8) -> bool { CMARK_CTYPE_CLASS[ch as usize] == 4 } pub fn isalnum(ch: u8) -> bool { CMARK_CTYPE_CLASS[ch as usize] == 3 || CMARK_CTYPE_CLASS[ch as usize] == 4 } comrak-0.29.0/src/entity.rs000064400000000000000000000057141046102023000136500ustar 00000000000000use crate::ctype::isdigit; use entities::ENTITIES; use std::char; use std::cmp::min; use std::str; pub const ENTITY_MIN_LENGTH: usize = 2; pub const ENTITY_MAX_LENGTH: usize = 32; fn isxdigit(ch: &u8) -> bool { (*ch >= b'0' && *ch <= b'9') || (*ch >= b'a' && *ch <= b'f') || (*ch >= b'A' && *ch <= b'F') } pub fn unescape(text: &[u8]) -> Option<(Vec, usize)> { if text.len() >= 3 && text[0] == b'#' { let mut codepoint: u32 = 0; let mut i = 0; let num_digits = if isdigit(text[1]) { i = 1; while i < text.len() && isdigit(text[i]) { codepoint = (codepoint * 10) + (text[i] as u32 - '0' as u32); codepoint = min(codepoint, 0x11_0000); i += 1; } i - 1 } else if text[1] == b'x' || text[1] == b'X' { i = 2; while i < text.len() && isxdigit(&text[i]) { codepoint = (codepoint * 16) + ((text[i] as u32 | 32) % 39 - 9); codepoint = min(codepoint, 0x11_0000); i += 1; } i - 2 } else { 0 }; if i < text.len() && text[i] == b';' && (((text[1] == b'x' || text[1] == b'X') && (1..=6).contains(&num_digits)) || (1..=7).contains(&num_digits)) { if codepoint == 0 || (0xD800..=0xE000).contains(&codepoint) || codepoint >= 0x110000 { codepoint = 0xFFFD; } return Some(( char::from_u32(codepoint) .unwrap_or('\u{FFFD}') .to_string() .into_bytes(), i + 1, )); } } let size = min(text.len(), ENTITY_MAX_LENGTH); for i in ENTITY_MIN_LENGTH..size { if text[i] == b' ' { return None; } if text[i] == b';' { return lookup(&text[..i]).map(|e| (e.to_vec(), i + 1)); } } None } fn lookup(text: &[u8]) -> Option<&[u8]> { let entity_str = format!("&{};", unsafe { str::from_utf8_unchecked(text) }); let entity = ENTITIES.iter().find(|e| e.entity == entity_str); match entity { Some(e) => Some(e.characters.as_bytes()), None => None, } } pub fn unescape_html(src: &[u8]) -> Vec { let size = src.len(); let mut i = 0; let mut v = Vec::with_capacity(size); while i < size { let org = i; while i < size && src[i] != b'&' { i += 1; } if i > org { if org == 0 && i >= size { return src.to_vec(); } v.extend_from_slice(&src[org..i]); } if i >= size { return v; } i += 1; match unescape(&src[i..]) { Some((chs, size)) => { v.extend_from_slice(&chs); i += size; } None => v.push(b'&'), } } v } comrak-0.29.0/src/html.rs000064400000000000000000001340371046102023000133010ustar 00000000000000//! The HTML renderer for the CommonMark AST, as well as helper functions. use crate::ctype::isspace; use crate::nodes::{ AstNode, ListType, NodeCode, NodeFootnoteDefinition, NodeMath, NodeTable, NodeValue, TableAlignment, }; use crate::parser::{Options, Plugins}; use crate::scanners; use once_cell::sync::Lazy; use regex::Regex; use std::borrow::Cow; use std::cell::Cell; use std::collections::{HashMap, HashSet}; use std::io::{self, Write}; use std::str; use crate::adapters::HeadingMeta; /// Formats an AST as HTML, modified by the given options. pub fn format_document<'a>( root: &'a AstNode<'a>, options: &Options, output: &mut dyn Write, ) -> io::Result<()> { format_document_with_plugins(root, options, output, &Plugins::default()) } /// Formats an AST as HTML, modified by the given options. Accepts custom plugins. pub fn format_document_with_plugins<'a>( root: &'a AstNode<'a>, options: &Options, output: &mut dyn Write, plugins: &Plugins, ) -> io::Result<()> { let mut writer = WriteWithLast { output, last_was_lf: Cell::new(true), }; let mut f = HtmlFormatter::new(options, &mut writer, plugins); f.format(root, false)?; if f.footnote_ix > 0 { f.output.write_all(b"\n\n")?; } Ok(()) } struct WriteWithLast<'w> { output: &'w mut dyn Write, last_was_lf: Cell, } impl<'w> Write for WriteWithLast<'w> { fn flush(&mut self) -> io::Result<()> { self.output.flush() } fn write(&mut self, buf: &[u8]) -> io::Result { let l = buf.len(); if l > 0 { self.last_was_lf.set(buf[l - 1] == 10); } self.output.write(buf) } } /// Converts header strings to canonical, unique, but still human-readable, /// anchors. /// /// To guarantee uniqueness, an anchorizer keeps track of the anchors it has /// returned; use one per output file. /// /// ## Example /// /// ``` /// # use comrak::Anchorizer; /// let mut anchorizer = Anchorizer::new(); /// // First "stuff" is unsuffixed. /// assert_eq!("stuff".to_string(), anchorizer.anchorize("Stuff".to_string())); /// // Second "stuff" has "-1" appended to make it unique. /// assert_eq!("stuff-1".to_string(), anchorizer.anchorize("Stuff".to_string())); /// ``` #[derive(Debug, Default)] #[doc(hidden)] pub struct Anchorizer(HashSet); impl Anchorizer { /// Construct a new anchorizer. pub fn new() -> Self { Anchorizer(HashSet::new()) } /// Returns a String that has been converted into an anchor using the /// GFM algorithm, which involves changing spaces to dashes, removing /// problem characters and, if needed, adding a suffix to make the /// resultant anchor unique. /// /// ``` /// # use comrak::Anchorizer; /// let mut anchorizer = Anchorizer::new(); /// let source = "Ticks aren't in"; /// assert_eq!("ticks-arent-in".to_string(), anchorizer.anchorize(source.to_string())); /// ``` pub fn anchorize(&mut self, header: String) -> String { static REJECTED_CHARS: Lazy = Lazy::new(|| Regex::new(r"[^\p{L}\p{M}\p{N}\p{Pc} -]").unwrap()); let mut id = header.to_lowercase(); id = REJECTED_CHARS.replace_all(&id, "").replace(' ', "-"); let mut uniq = 0; id = loop { let anchor = if uniq == 0 { Cow::from(&id) } else { Cow::from(format!("{}-{}", id, uniq)) }; if !self.0.contains(&*anchor) { break anchor.into_owned(); } uniq += 1; }; self.0.insert(id.clone()); id } } struct HtmlFormatter<'o, 'c> { output: &'o mut WriteWithLast<'o>, options: &'o Options<'c>, anchorizer: Anchorizer, footnote_ix: u32, written_footnote_ix: u32, plugins: &'o Plugins<'o>, } #[rustfmt::skip] const NEEDS_ESCAPED : [bool; 256] = [ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, ]; fn tagfilter(literal: &[u8]) -> bool { static TAGFILTER_BLACKLIST: [&str; 9] = [ "title", "textarea", "style", "xmp", "iframe", "noembed", "noframes", "script", "plaintext", ]; if literal.len() < 3 || literal[0] != b'<' { return false; } let mut i = 1; if literal[i] == b'/' { i += 1; } let lc = unsafe { String::from_utf8_unchecked(literal[i..].to_vec()) }.to_lowercase(); for t in TAGFILTER_BLACKLIST.iter() { if lc.starts_with(t) { let j = i + t.len(); return isspace(literal[j]) || literal[j] == b'>' || (literal[j] == b'/' && literal.len() >= j + 2 && literal[j + 1] == b'>'); } } false } fn tagfilter_block(input: &[u8], o: &mut dyn Write) -> io::Result<()> { let size = input.len(); let mut i = 0; while i < size { let org = i; while i < size && input[i] != b'<' { i += 1; } if i > org { o.write_all(&input[org..i])?; } if i >= size { break; } if tagfilter(&input[i..]) { o.write_all(b"<")?; } else { o.write_all(b"<")?; } i += 1; } Ok(()) } fn dangerous_url(input: &[u8]) -> bool { scanners::dangerous_url(input).is_some() } /// Writes buffer to output, escaping anything that could be interpreted as an /// HTML tag. /// /// Namely: /// /// * U+0022 QUOTATION MARK " is rendered as " /// * U+0026 AMPERSAND & is rendered as & /// * U+003C LESS-THAN SIGN < is rendered as < /// * U+003E GREATER-THAN SIGN > is rendered as > /// * Everything else is passed through unchanged. /// /// Note that this is appropriate and sufficient for free text, but not for /// URLs in attributes. See escape_href. pub fn escape(output: &mut dyn Write, buffer: &[u8]) -> io::Result<()> { let mut offset = 0; for (i, &byte) in buffer.iter().enumerate() { if NEEDS_ESCAPED[byte as usize] { let esc: &[u8] = match byte { b'"' => b""", b'&' => b"&", b'<' => b"<", b'>' => b">", _ => unreachable!(), }; output.write_all(&buffer[offset..i])?; output.write_all(esc)?; offset = i + 1; } } output.write_all(&buffer[offset..])?; Ok(()) } /// Writes buffer to output, escaping in a manner appropriate for URLs in HTML /// attributes. /// /// Namely: /// /// * U+0026 AMPERSAND & is rendered as & /// * U+0027 APOSTROPHE ' is rendered as ' /// * Alphanumeric and a range of non-URL safe characters. /// /// The inclusion of characters like "%" in those which are not escaped is /// explained somewhat here: /// /// /// /// In other words, if a CommonMark user enters: /// /// ```markdown /// [hi](https://ddg.gg/?q=a%20b) /// ``` /// /// We assume they actually want the query string "?q=a%20b", a search for /// the string "a b", rather than "?q=a%2520b", a search for the literal /// string "a%20b". pub fn escape_href(output: &mut dyn Write, buffer: &[u8]) -> io::Result<()> { static HREF_SAFE: Lazy<[bool; 256]> = Lazy::new(|| { let mut a = [false; 256]; for &c in b"-_.+!*(),%#@?=;:/,+$~abcdefghijklmnopqrstuvwxyz".iter() { a[c as usize] = true; } for &c in b"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".iter() { a[c as usize] = true; } a }); let size = buffer.len(); let mut i = 0; while i < size { let org = i; while i < size && HREF_SAFE[buffer[i] as usize] { i += 1; } if i > org { output.write_all(&buffer[org..i])?; } if i >= size { break; } match buffer[i] as char { '&' => { output.write_all(b"&")?; } '\'' => { output.write_all(b"'")?; } _ => write!(output, "%{:02X}", buffer[i])?, } i += 1; } Ok(()) } /// Writes an opening HTML tag, using an iterator to enumerate the attributes. /// Note that attribute values are automatically escaped. pub fn write_opening_tag( output: &mut dyn Write, tag: &str, attributes: impl IntoIterator, ) -> io::Result<()> where Str: AsRef, { write!(output, "<{}", tag)?; for (attr, val) in attributes { write!(output, " {}=\"", attr.as_ref())?; escape(output, val.as_ref().as_bytes())?; output.write_all(b"\"")?; } output.write_all(b">")?; Ok(()) } impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { fn new( options: &'o Options<'c>, output: &'o mut WriteWithLast<'o>, plugins: &'o Plugins, ) -> Self { HtmlFormatter { options, output, anchorizer: Anchorizer::new(), footnote_ix: 0, written_footnote_ix: 0, plugins, } } fn cr(&mut self) -> io::Result<()> { if !self.output.last_was_lf.get() { self.output.write_all(b"\n")?; } Ok(()) } fn escape(&mut self, buffer: &[u8]) -> io::Result<()> { escape(&mut self.output, buffer) } fn escape_href(&mut self, buffer: &[u8]) -> io::Result<()> { escape_href(&mut self.output, buffer) } fn format<'a>(&mut self, node: &'a AstNode<'a>, plain: bool) -> io::Result<()> { // Traverse the AST iteratively using a work stack, with pre- and // post-child-traversal phases. During pre-order traversal render the // opening tags, then push the node back onto the stack for the // post-order traversal phase, then push the children in reverse order // onto the stack and begin rendering first child. enum Phase { Pre, Post, } let mut stack = vec![(node, plain, Phase::Pre)]; while let Some((node, plain, phase)) = stack.pop() { match phase { Phase::Pre => { let new_plain = if plain { match node.data.borrow().value { NodeValue::Text(ref literal) | NodeValue::Code(NodeCode { ref literal, .. }) | NodeValue::HtmlInline(ref literal) => { self.escape(literal.as_bytes())?; } NodeValue::LineBreak | NodeValue::SoftBreak => { self.output.write_all(b" ")?; } NodeValue::Math(NodeMath { ref literal, .. }) => { self.escape(literal.as_bytes())?; } _ => (), } plain } else { stack.push((node, false, Phase::Post)); self.format_node(node, true)? }; for ch in node.reverse_children() { stack.push((ch, new_plain, Phase::Pre)); } } Phase::Post => { debug_assert!(!plain); self.format_node(node, false)?; } } } Ok(()) } fn collect_text<'a>(node: &'a AstNode<'a>, output: &mut Vec) { match node.data.borrow().value { NodeValue::Text(ref literal) | NodeValue::Code(NodeCode { ref literal, .. }) => { output.extend_from_slice(literal.as_bytes()) } NodeValue::LineBreak | NodeValue::SoftBreak => output.push(b' '), NodeValue::Math(NodeMath { ref literal, .. }) => { output.extend_from_slice(literal.as_bytes()) } _ => { for n in node.children() { Self::collect_text(n, output); } } } } fn format_node<'a>(&mut self, node: &'a AstNode<'a>, entering: bool) -> io::Result { match node.data.borrow().value { NodeValue::Document => (), NodeValue::FrontMatter(_) => (), NodeValue::BlockQuote => { if entering { self.cr()?; self.output.write_all(b"\n")?; } else { self.cr()?; self.output.write_all(b"\n")?; } } NodeValue::List(ref nl) => { if entering { self.cr()?; if nl.list_type == ListType::Bullet { self.output.write_all(b"\n")?; } else if nl.start == 1 { self.output.write_all(b"\n")?; } else { self.output.write_all(b"", nl.start)?; } } else if nl.list_type == ListType::Bullet { self.output.write_all(b"\n")?; } else { self.output.write_all(b"\n")?; } } NodeValue::Item(..) => { if entering { self.cr()?; self.output.write_all(b"")?; } else { self.output.write_all(b"\n")?; } } NodeValue::DescriptionList => { if entering { self.cr()?; self.output.write_all(b"")?; } else { self.output.write_all(b"\n")?; } } NodeValue::DescriptionItem(..) => (), NodeValue::DescriptionTerm => { if entering { self.output.write_all(b"")?; } else { self.output.write_all(b"\n")?; } } NodeValue::DescriptionDetails => { if entering { self.output.write_all(b"")?; } else { self.output.write_all(b"\n")?; } } NodeValue::Heading(ref nch) => match self.plugins.render.heading_adapter { None => { if entering { self.cr()?; write!(self.output, "")?; if let Some(ref prefix) = self.options.extension.header_ids { let mut text_content = Vec::with_capacity(20); Self::collect_text(node, &mut text_content); let mut id = String::from_utf8(text_content).unwrap(); id = self.anchorizer.anchorize(id); write!( self.output, "", id, prefix, id )?; } } else { writeln!(self.output, "", nch.level)?; } } Some(adapter) => { let mut text_content = Vec::with_capacity(20); Self::collect_text(node, &mut text_content); let content = String::from_utf8(text_content).unwrap(); let heading = HeadingMeta { level: nch.level, content, }; if entering { self.cr()?; adapter.enter( self.output, &heading, if self.options.render.sourcepos { Some(node.data.borrow().sourcepos) } else { None }, )?; } else { adapter.exit(self.output, &heading)?; } } }, NodeValue::CodeBlock(ref ncb) => { if entering { if ncb.info.eq("math") { self.render_math_code_block(node, &ncb.literal)?; } else { self.cr()?; let mut first_tag = 0; let mut pre_attributes: HashMap = HashMap::new(); let mut code_attributes: HashMap = HashMap::new(); let code_attr: String; let literal = &ncb.literal.as_bytes(); let info = &ncb.info.as_bytes(); if !info.is_empty() { while first_tag < info.len() && !isspace(info[first_tag]) { first_tag += 1; } let lang_str = str::from_utf8(&info[..first_tag]).unwrap(); let info_str = str::from_utf8(&info[first_tag..]).unwrap().trim(); if self.options.render.github_pre_lang { pre_attributes.insert(String::from("lang"), lang_str.to_string()); if self.options.render.full_info_string && !info_str.is_empty() { pre_attributes.insert( String::from("data-meta"), info_str.trim().to_string(), ); } } else { code_attr = format!("language-{}", lang_str); code_attributes.insert(String::from("class"), code_attr); if self.options.render.full_info_string && !info_str.is_empty() { code_attributes .insert(String::from("data-meta"), info_str.to_string()); } } } if self.options.render.sourcepos { let ast = node.data.borrow(); pre_attributes .insert("data-sourcepos".to_string(), ast.sourcepos.to_string()); } match self.plugins.render.codefence_syntax_highlighter { None => { write_opening_tag(self.output, "pre", pre_attributes)?; write_opening_tag(self.output, "code", code_attributes)?; self.escape(literal)?; self.output.write_all(b"
\n")? } Some(highlighter) => { highlighter.write_pre_tag(self.output, pre_attributes)?; highlighter.write_code_tag(self.output, code_attributes)?; highlighter.write_highlighted( self.output, match str::from_utf8(&info[..first_tag]) { Ok(lang) => Some(lang), Err(_) => None, }, &ncb.literal, )?; self.output.write_all(b"
\n")? } } } } } NodeValue::HtmlBlock(ref nhb) => { // No sourcepos. if entering { self.cr()?; let literal = nhb.literal.as_bytes(); if self.options.render.escape { self.escape(literal)?; } else if !self.options.render.unsafe_ { self.output.write_all(b"")?; } else if self.options.extension.tagfilter { tagfilter_block(literal, &mut self.output)?; } else { self.output.write_all(literal)?; } self.cr()?; } } NodeValue::ThematicBreak => { if entering { self.cr()?; self.output.write_all(b"\n")?; } } NodeValue::Paragraph => { let tight = match node .parent() .and_then(|n| n.parent()) .map(|n| n.data.borrow().value.clone()) { Some(NodeValue::List(nl)) => nl.tight, _ => false, }; let tight = tight || matches!( node.parent().map(|n| n.data.borrow().value.clone()), Some(NodeValue::DescriptionTerm) ); if !tight { if entering { self.cr()?; self.output.write_all(b"")?; } else { if let NodeValue::FootnoteDefinition(nfd) = &node.parent().unwrap().data.borrow().value { if node.next_sibling().is_none() { self.output.write_all(b" ")?; self.put_footnote_backref(nfd)?; } } self.output.write_all(b"

\n")?; } } } NodeValue::Text(ref literal) => { // Nowhere to put sourcepos. if entering { self.escape(literal.as_bytes())?; } } NodeValue::LineBreak => { // Unreliable sourcepos. if entering { self.output.write_all(b"\n")?; } } NodeValue::SoftBreak => { // Unreliable sourcepos. if entering { if self.options.render.hardbreaks { self.output.write_all(b"\n")?; } else { self.output.write_all(b"\n")?; } } } NodeValue::Code(NodeCode { ref literal, .. }) => { // Unreliable sourcepos. if entering { self.output.write_all(b"")?; self.escape(literal.as_bytes())?; self.output.write_all(b"")?; } } NodeValue::HtmlInline(ref literal) => { // No sourcepos. if entering { let literal = literal.as_bytes(); if self.options.render.escape { self.escape(literal)?; } else if !self.options.render.unsafe_ { self.output.write_all(b"")?; } else if self.options.extension.tagfilter && tagfilter(literal) { self.output.write_all(b"<")?; self.output.write_all(&literal[1..])?; } else { self.output.write_all(literal)?; } } } NodeValue::Strong => { // Unreliable sourcepos. let parent_node = node.parent(); if !self.options.render.gfm_quirks || (parent_node.is_none() || !matches!(parent_node.unwrap().data.borrow().value, NodeValue::Strong)) { if entering { self.output.write_all(b"")?; } else { self.output.write_all(b"
")?; } } } NodeValue::Emph => { // Unreliable sourcepos. if entering { self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::Strikethrough => { // Unreliable sourcepos. if entering { self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::Superscript => { // Unreliable sourcepos. if entering { self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::Link(ref nl) => { // Unreliable sourcepos. let parent_node = node.parent(); if !self.options.parse.relaxed_autolinks || (parent_node.is_none() || !matches!( parent_node.unwrap().data.borrow().value, NodeValue::Link(..) )) { if entering { self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } } NodeValue::Image(ref nl) => { // Unreliable sourcepos. if entering { if self.options.render.figure_with_caption { self.output.write_all(b"
")?; } self.output.write_all(b"")?; if self.options.render.figure_with_caption { if !nl.title.is_empty() { self.output.write_all(b"
")?; self.escape(nl.title.as_bytes())?; self.output.write_all(b"
")?; } self.output.write_all(b"
")?; } } } #[cfg(feature = "shortcodes")] NodeValue::ShortCode(ref nsc) => { // Nowhere to put sourcepos. if entering { self.output.write_all(nsc.emoji.as_bytes())?; } } NodeValue::Table(..) => { if entering { self.cr()?; self.output.write_all(b"\n")?; } else { if !node .last_child() .unwrap() .same_node(node.first_child().unwrap()) { self.cr()?; self.output.write_all(b"\n")?; } self.cr()?; self.output.write_all(b"
\n")?; } } NodeValue::TableRow(header) => { if entering { self.cr()?; if header { self.output.write_all(b"\n")?; } else if let Some(n) = node.previous_sibling() { if let NodeValue::TableRow(true) = n.data.borrow().value { self.output.write_all(b"\n")?; } } self.output.write_all(b"")?; } else { self.cr()?; self.output.write_all(b"")?; if header { self.cr()?; self.output.write_all(b"")?; } } } NodeValue::TableCell => { let row = &node.parent().unwrap().data.borrow().value; let in_header = match *row { NodeValue::TableRow(header) => header, _ => panic!(), }; let table = &node.parent().unwrap().parent().unwrap().data.borrow().value; let alignments = match *table { NodeValue::Table(NodeTable { ref alignments, .. }) => alignments, _ => panic!(), }; if entering { self.cr()?; if in_header { self.output.write_all(b" { self.output.write_all(b" align=\"left\"")?; } TableAlignment::Right => { self.output.write_all(b" align=\"right\"")?; } TableAlignment::Center => { self.output.write_all(b" align=\"center\"")?; } TableAlignment::None => (), } self.output.write_all(b">")?; } else if in_header { self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::FootnoteDefinition(ref nfd) => { if entering { if self.footnote_ix == 0 { self.output.write_all(b"\n
    \n")?; } self.footnote_ix += 1; self.output.write_all(b"")?; } else { if self.put_footnote_backref(nfd)? { self.output.write_all(b"\n")?; } self.output.write_all(b"\n")?; } } NodeValue::FootnoteReference(ref nfr) => { // Unreliable sourcepos. if entering { let mut ref_id = format!("fnref-{}", nfr.name); if nfr.ref_num > 1 { ref_id = format!("{}-{}", ref_id, nfr.ref_num); } self.output.write_all(b"{}", nfr.ix)?; } } NodeValue::TaskItem(symbol) => { if entering { self.cr()?; self.output.write_all(b"")?; write!( self.output, " ", if symbol.is_some() { "checked=\"\" " } else { "" } )?; } else { self.output.write_all(b"\n")?; } } NodeValue::MultilineBlockQuote(_) => { if entering { self.cr()?; self.output.write_all(b"\n")?; } else { self.cr()?; self.output.write_all(b"\n")?; } } NodeValue::Escaped => { // Unreliable sourcepos. if self.options.render.escaped_char_spans { if entering { self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } } NodeValue::Math(NodeMath { ref literal, display_math, dollar_math, .. }) => { if entering { self.render_math_inline(node, literal, display_math, dollar_math)?; } } NodeValue::WikiLink(ref nl) => { // Unreliable sourcepos. if entering { self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::Underline => { // Unreliable sourcepos. if entering { self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::SpoileredText => { // Unreliable sourcepos. if entering { self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::EscapedTag(ref net) => { // Nowhere to put sourcepos. self.output.write_all(net.as_bytes())?; } } Ok(false) } fn render_sourcepos<'a>(&mut self, node: &'a AstNode<'a>) -> io::Result<()> { if self.options.render.sourcepos { let ast = node.data.borrow(); if ast.sourcepos.start.line > 0 { write!(self.output, " data-sourcepos=\"{}\"", ast.sourcepos)?; } } Ok(()) } fn put_footnote_backref(&mut self, nfd: &NodeFootnoteDefinition) -> io::Result { if self.written_footnote_ix >= self.footnote_ix { return Ok(false); } self.written_footnote_ix = self.footnote_ix; let mut ref_suffix = String::new(); let mut superscript = String::new(); for ref_num in 1..=nfd.total_references { if ref_num > 1 { ref_suffix = format!("-{}", ref_num); superscript = format!("{}", ref_num); write!(self.output, " ")?; } self.output.write_all(b"↩{}", ref_suffix, self.footnote_ix, ref_suffix, self.footnote_ix, ref_suffix, superscript )?; } Ok(true) } // Renders a math dollar inline, `$...$` and `$$...$$` using `` to be similar // to other renderers. fn render_math_inline<'a>( &mut self, node: &'a AstNode<'a>, literal: &String, display_math: bool, dollar_math: bool, ) -> io::Result<()> { let mut tag_attributes: Vec<(String, String)> = Vec::new(); let style_attr = if display_math { "display" } else { "inline" }; let tag: &str = if dollar_math { "span" } else { "code" }; tag_attributes.push((String::from("data-math-style"), String::from(style_attr))); // Unreliable sourcepos. if self.options.render.experimental_inline_sourcepos && self.options.render.sourcepos { let ast = node.data.borrow(); tag_attributes.push(("data-sourcepos".to_string(), ast.sourcepos.to_string())); } write_opening_tag(self.output, tag, tag_attributes)?; self.escape(literal.as_bytes())?; write!(self.output, "", tag)?; Ok(()) } // Renders a math code block, ```` ```math ```` using `
    `
        fn render_math_code_block<'a>(
            &mut self,
            node: &'a AstNode<'a>,
            literal: &String,
        ) -> io::Result<()> {
            self.cr()?;
    
            // use vectors to ensure attributes always written in the same order,
            // for testing stability
            let mut pre_attributes: Vec<(String, String)> = Vec::new();
            let mut code_attributes: Vec<(String, String)> = Vec::new();
            let lang_str = "math";
    
            if self.options.render.github_pre_lang {
                pre_attributes.push((String::from("lang"), lang_str.to_string()));
                pre_attributes.push((String::from("data-math-style"), String::from("display")));
            } else {
                let code_attr = format!("language-{}", lang_str);
                code_attributes.push((String::from("class"), code_attr));
                code_attributes.push((String::from("data-math-style"), String::from("display")));
            }
    
            if self.options.render.sourcepos {
                let ast = node.data.borrow();
                pre_attributes.push(("data-sourcepos".to_string(), ast.sourcepos.to_string()));
            }
    
            write_opening_tag(self.output, "pre", pre_attributes)?;
            write_opening_tag(self.output, "code", code_attributes)?;
    
            self.escape(literal.as_bytes())?;
            self.output.write_all(b"
    \n")?; Ok(()) } } comrak-0.29.0/src/lib.rs000064400000000000000000000123311046102023000130730ustar 00000000000000//! A 100% [CommonMark](http://commonmark.org/) and [GFM](https://github.github.com/gfm/) //! compatible Markdown parser. //! //! Source repository and detailed `README` is at . //! //! You can use `comrak::markdown_to_html` directly: //! //! ``` //! use comrak::{markdown_to_html, Options}; //! assert_eq!(markdown_to_html("Hello, **δΈ–η•Œ**!", &Options::default()), //! "

    Hello, δΈ–η•Œ!

    \n"); //! ``` //! //! Or you can parse the input into an AST yourself, manipulate it, and then use your desired //! formatter: //! //! ``` //! use comrak::{Arena, parse_document, format_html, Options}; //! use comrak::nodes::{AstNode, NodeValue}; //! //! # fn main() { //! let arena = Arena::new(); //! //! let root = parse_document( //! &arena, //! "This is my input.\n\n1. Also [my](#) input.\n2. Certainly *my* input.\n", //! &Options::default()); //! //! for node in root.descendants() { //! if let NodeValue::Text(ref mut text) = node.data.borrow_mut().value { //! *text = text.replace("my", "your"); //! } //! } //! //! let mut html = vec![]; //! format_html(root, &Options::default(), &mut html).unwrap(); //! //! assert_eq!( //! String::from_utf8(html).unwrap(), //! "

    This is your input.

    \n\ //!
      \n\ //!
    1. Also your input.
    2. \n\ //!
    3. Certainly your input.
    4. \n\ //!
    \n"); //! # } //! ``` #![cfg_attr(docsrs, feature(doc_cfg))] #![deny( missing_docs, missing_debug_implementations, missing_copy_implementations, trivial_casts, trivial_numeric_casts, unstable_features, unused_import_braces )] #![allow( unknown_lints, clippy::doc_markdown, cyclomatic_complexity, clippy::bool_to_int_with_if, clippy::too_many_arguments )] use std::io::BufWriter; pub mod adapters; pub mod arena_tree; mod cm; mod ctype; mod entity; pub mod html; pub mod nodes; mod parser; pub mod plugins; mod scanners; mod strings; #[cfg(test)] mod tests; mod xml; pub use cm::format_document as format_commonmark; pub use cm::format_document_with_plugins as format_commonmark_with_plugins; pub use html::format_document as format_html; pub use html::format_document_with_plugins as format_html_with_plugins; #[doc(inline)] pub use html::Anchorizer; #[allow(deprecated)] pub use parser::parse_document_with_broken_link_callback; pub use parser::{ parse_document, BrokenLinkCallback, BrokenLinkReference, ExtensionOptions, ExtensionOptionsBuilder, ListStyleType, Options, ParseOptions, ParseOptionsBuilder, Plugins, PluginsBuilder, RenderOptions, RenderOptionsBuilder, RenderPlugins, RenderPluginsBuilder, ResolvedReference, }; pub use typed_arena::Arena; pub use xml::format_document as format_xml; pub use xml::format_document_with_plugins as format_xml_with_plugins; /// Legacy naming of [`ExtensionOptions`] pub type ComrakExtensionOptions = ExtensionOptions; /// Legacy naming of [`Options`] pub type ComrakOptions<'c> = Options<'c>; /// Legacy naming of [`ParseOptions`] pub type ComrakParseOptions<'c> = ParseOptions<'c>; /// Legacy naming of [`Plugins`] pub type ComrakPlugins<'a> = Plugins<'a>; /// Legacy naming of [`RenderOptions`] pub type ComrakRenderOptions = RenderOptions; /// Legacy naming of [`RenderPlugins`] pub type ComrakRenderPlugins<'a> = RenderPlugins<'a>; /// Render Markdown to HTML. /// /// See the documentation of the crate root for an example. pub fn markdown_to_html(md: &str, options: &Options) -> String { markdown_to_html_with_plugins(md, options, &Plugins::default()) } /// Render Markdown to HTML using plugins. /// /// See the documentation of the crate root for an example. pub fn markdown_to_html_with_plugins(md: &str, options: &Options, plugins: &Plugins) -> String { let arena = Arena::new(); let root = parse_document(&arena, md, options); let mut bw = BufWriter::new(Vec::new()); format_html_with_plugins(root, options, &mut bw, plugins).unwrap(); String::from_utf8(bw.into_inner().unwrap()).unwrap() } /// Return the version of the crate. pub fn version() -> &'static str { env!("CARGO_PKG_VERSION") } /// Render Markdown back to CommonMark. pub fn markdown_to_commonmark(md: &str, options: &Options) -> String { let arena = Arena::new(); let root = parse_document(&arena, md, options); let mut bw = BufWriter::new(Vec::new()); format_commonmark(root, options, &mut bw).unwrap(); String::from_utf8(bw.into_inner().unwrap()).unwrap() } /// Render Markdown to CommonMark XML. /// See . pub fn markdown_to_commonmark_xml(md: &str, options: &Options) -> String { markdown_to_commonmark_xml_with_plugins(md, options, &Plugins::default()) } /// Render Markdown to CommonMark XML using plugins. /// See . pub fn markdown_to_commonmark_xml_with_plugins( md: &str, options: &Options, plugins: &Plugins, ) -> String { let arena = Arena::new(); let root = parse_document(&arena, md, options); let mut bw = BufWriter::new(Vec::new()); format_xml_with_plugins(root, options, &mut bw, plugins).unwrap(); String::from_utf8(bw.into_inner().unwrap()).unwrap() } comrak-0.29.0/src/main.rs000064400000000000000000000272371046102023000132640ustar 00000000000000//! The `comrak` binary. use comrak::{ adapters::SyntaxHighlighterAdapter, plugins::syntect::SyntectAdapter, Arena, ExtensionOptionsBuilder, ListStyleType, Options, ParseOptionsBuilder, Plugins, RenderOptionsBuilder, }; use std::boxed::Box; use std::env; use std::error::Error; use std::fs; use std::io::{BufWriter, Read, Write}; use std::path::PathBuf; use std::process; use clap::{Parser, ValueEnum}; const EXIT_SUCCESS: i32 = 0; const EXIT_PARSE_CONFIG: i32 = 2; const EXIT_READ_INPUT: i32 = 3; const EXIT_CHECK_FILE_NUM: i32 = 4; #[derive(Debug, Parser)] #[command(about, author, version)] #[command(after_help = "\ By default, Comrak will attempt to read command-line options from a config file specified by \ --config-file. This behaviour can be disabled by passing --config-file none. It is not an error \ if the file does not exist.\ ")] struct Cli { /// CommonMark file(s) to parse; or standard input if none passed #[arg(value_name = "FILE")] files: Option>, /// Path to config file containing command-line arguments, or 'none' #[arg(short, long, value_name = "PATH", default_value = get_default_config_path())] config_file: String, /// To perform an in-place formatting #[arg(short, long, conflicts_with_all(["format", "output"]))] inplace: bool, /// Treat newlines as hard line breaks #[arg(long)] hardbreaks: bool, /// Use smart punctuation #[arg(long)] smart: bool, /// Use GitHub-style
     for code blocks
        #[arg(long)]
        github_pre_lang: bool,
    
        /// Enable full info strings for code blocks
        #[arg(long)]
        full_info_string: bool,
    
        /// Enable GitHub-flavored markdown extensions: strikethrough, tagfilter,
        /// table, autolink, and tasklist. Also enables --github-pre-lang and
        /// --gfm-quirks.
        #[arg(long)]
        gfm: bool,
    
        /// Enables GFM-style quirks in output HTML, such as not nesting 
        /// tags, which otherwise breaks CommonMark compatibility.
        #[arg(long)]
        gfm_quirks: bool,
    
        /// Enable relaxing which character is allowed in a tasklists.
        #[arg(long)]
        relaxed_tasklist_character: bool,
    
        /// Enable relaxing of autolink parsing, allow links to be recognized when in brackets
        /// and allow all url schemes
        #[arg(long)]
        relaxed_autolinks: bool,
    
        /// Default value for fenced code block's info strings if none is given
        #[arg(long, value_name = "INFO")]
        default_info_string: Option,
    
        /// Allow raw HTML and dangerous URLs
        #[arg(long = "unsafe")]
        unsafe_: bool,
    
        /// Translate gemojis into UTF-8 characters
        #[arg(long)]
        #[cfg(feature = "shortcodes")]
        gemojis: bool,
    
        /// Escape raw HTML instead of clobbering it
        #[arg(long)]
        escape: bool,
    
        /// Wrap escaped characters in span tags
        #[arg(long)]
        escaped_char_spans: bool,
    
        /// Specify extension name(s) to use
        ///
        /// Multiple extensions can be delimited with ",", e.g. --extension strikethrough,table
        #[arg(
            short,
            long = "extension",
            value_name = "EXTENSION",
            value_delimiter = ',',
            value_enum
        )]
        extensions: Vec,
    
        /// Specify output format
        #[arg(short = 't', long = "to", value_enum, default_value_t = Format::Html)]
        format: Format,
    
        /// Write output to FILE instead of stdout
        #[arg(short, long, value_name = "FILE")]
        output: Option,
    
        /// Specify wrap width (0 = nowrap)
        #[arg(long, default_value_t = 0)]
        width: usize,
    
        /// Use the Comrak header IDs extension, with the given ID prefix
        #[arg(long, value_name = "PREFIX")]
        header_ids: Option,
    
        /// Ignore front-matter that starts and ends with the given string
        #[arg(long, value_name = "DELIMITER", allow_hyphen_values = true)]
        front_matter_delimiter: Option,
    
        /// Syntax highlighting for codefence blocks. Choose a theme or 'none' for disabling.
        #[arg(long, value_name = "THEME", default_value = "base16-ocean.dark")]
        syntax_highlighting: String,
    
        /// Specify bullet character for lists (-, +, *) in CommonMark output
        #[arg(long, value_enum, default_value_t = ListStyle::Dash)]
        list_style: ListStyle,
    
        /// Include source position attribute in HTML and XML output
        #[arg(long)]
        sourcepos: bool,
    
        /// Include inline sourcepos in HTML output, which is known to have issues.
        #[arg(long)]
        experimental_inline_sourcepos: bool,
    
        /// Ignore setext headers
        #[arg(long)]
        ignore_setext: bool,
    
        /// Ignore empty links
        #[arg(long)]
        ignore_empty_links: bool,
    }
    
    #[derive(Clone, Copy, Debug, ValueEnum)]
    enum Format {
        Html,
    
        Xml,
    
        #[value(name = "commonmark")]
        CommonMark,
    }
    
    #[derive(Clone, Copy, Debug, PartialEq, Eq, ValueEnum)]
    enum Extension {
        Strikethrough,
        Tagfilter,
        Table,
        Autolink,
        Tasklist,
        Superscript,
        Footnotes,
        DescriptionLists,
        MultilineBlockQuotes,
        MathDollars,
        MathCode,
        WikilinksTitleAfterPipe,
        WikilinksTitleBeforePipe,
        Underline,
        Spoiler,
        Greentext,
    }
    
    #[derive(Clone, Copy, Debug, ValueEnum)]
    enum ListStyle {
        Dash,
        Plus,
        Star,
    }
    
    impl From for ListStyleType {
        fn from(style: ListStyle) -> Self {
            match style {
                ListStyle::Dash => Self::Dash,
                ListStyle::Plus => Self::Plus,
                ListStyle::Star => Self::Star,
            }
        }
    }
    
    fn cli_with_config() -> Cli {
        let cli = Cli::parse();
        let config_file_path = &cli.config_file;
    
        if config_file_path == "none" {
            return cli;
        }
    
        if let Ok(args) = fs::read_to_string(config_file_path) {
            match shell_words::split(&args) {
                Ok(mut args) => {
                    for (i, arg) in env::args_os().enumerate() {
                        if let Some(s) = arg.to_str() {
                            args.insert(i, s.into());
                        }
                    }
    
                    Cli::parse_from(args)
                }
                Err(e) => {
                    eprintln!("failed to parse {}: {}", config_file_path, e);
                    process::exit(EXIT_PARSE_CONFIG);
                }
            }
        } else {
            cli
        }
    }
    
    fn main() -> Result<(), Box> {
        let cli = cli_with_config();
    
        if cli.inplace {
            if let Some(ref files) = cli.files {
                if files.len() != 1 {
                    eprintln!("cannot have more than 1 input file with in-place mode");
                    process::exit(EXIT_CHECK_FILE_NUM);
                }
            } else {
                eprintln!("no input file specified: cannot use standard input with in-place mode");
                process::exit(EXIT_CHECK_FILE_NUM);
            }
        }
    
        let exts = &cli.extensions;
    
        let mut extension = ExtensionOptionsBuilder::default();
        extension
            .strikethrough(exts.contains(&Extension::Strikethrough) || cli.gfm)
            .tagfilter(exts.contains(&Extension::Tagfilter) || cli.gfm)
            .table(exts.contains(&Extension::Table) || cli.gfm)
            .autolink(exts.contains(&Extension::Autolink) || cli.gfm)
            .tasklist(exts.contains(&Extension::Tasklist) || cli.gfm)
            .superscript(exts.contains(&Extension::Superscript))
            .header_ids(cli.header_ids)
            .footnotes(exts.contains(&Extension::Footnotes))
            .description_lists(exts.contains(&Extension::DescriptionLists))
            .multiline_block_quotes(exts.contains(&Extension::MultilineBlockQuotes))
            .math_dollars(exts.contains(&Extension::MathDollars))
            .math_code(exts.contains(&Extension::MathCode))
            .wikilinks_title_after_pipe(exts.contains(&Extension::WikilinksTitleAfterPipe))
            .wikilinks_title_before_pipe(exts.contains(&Extension::WikilinksTitleBeforePipe))
            .underline(exts.contains(&Extension::Underline))
            .spoiler(exts.contains(&Extension::Spoiler))
            .greentext(exts.contains(&Extension::Greentext))
            .front_matter_delimiter(cli.front_matter_delimiter);
    
        #[cfg(feature = "shortcodes")]
        {
            extension.shortcodes(cli.gemojis);
        }
    
        let extension = extension.build()?;
    
        let parse = ParseOptionsBuilder::default()
            .smart(cli.smart)
            .default_info_string(cli.default_info_string)
            .relaxed_tasklist_matching(cli.relaxed_tasklist_character)
            .relaxed_autolinks(cli.relaxed_autolinks)
            .build()?;
    
        let render = RenderOptionsBuilder::default()
            .hardbreaks(cli.hardbreaks)
            .github_pre_lang(cli.github_pre_lang || cli.gfm)
            .full_info_string(cli.full_info_string)
            .width(cli.width)
            .unsafe_(cli.unsafe_)
            .escape(cli.escape)
            .list_style(cli.list_style.into())
            .sourcepos(cli.sourcepos)
            .experimental_inline_sourcepos(cli.experimental_inline_sourcepos)
            .escaped_char_spans(cli.escaped_char_spans)
            .ignore_setext(cli.ignore_setext)
            .ignore_empty_links(cli.ignore_empty_links)
            .gfm_quirks(cli.gfm_quirks || cli.gfm)
            .build()?;
    
        let options = Options {
            extension,
            parse,
            render,
        };
    
        let syntax_highlighter: Option<&dyn SyntaxHighlighterAdapter>;
        let mut plugins: Plugins = Plugins::default();
        let adapter: SyntectAdapter;
    
        let theme = cli.syntax_highlighting;
        if theme.is_empty() || theme == "none" {
            syntax_highlighter = None;
        } else {
            adapter = SyntectAdapter::new(Some(&theme));
            syntax_highlighter = Some(&adapter);
        }
    
        let mut s: Vec = Vec::with_capacity(2048);
    
        match cli.files {
            None => {
                std::io::stdin().read_to_end(&mut s)?;
            }
            Some(ref fs) => {
                for f in fs {
                    match fs::File::open(f) {
                        Ok(mut io) => {
                            io.read_to_end(&mut s)?;
                        }
                        Err(e) => {
                            eprintln!("failed to read {}: {}", f.display(), e);
                            process::exit(EXIT_READ_INPUT);
                        }
                    }
                }
            }
        };
    
        let arena = Arena::new();
        let root = comrak::parse_document(&arena, &String::from_utf8(s)?, &options);
    
        let formatter = if cli.inplace {
            comrak::format_commonmark_with_plugins
        } else {
            match cli.format {
                Format::Html => {
                    plugins.render.codefence_syntax_highlighter = syntax_highlighter;
                    comrak::format_html_with_plugins
                }
                Format::Xml => comrak::format_xml_with_plugins,
                Format::CommonMark => comrak::format_commonmark_with_plugins,
            }
        };
    
        if let Some(output_filename) = cli.output {
            let mut bw = BufWriter::new(fs::File::create(output_filename)?);
            formatter(root, &options, &mut bw, &plugins)?;
            bw.flush()?;
        } else if cli.inplace {
            let output_filename = cli.files.unwrap().get(0).unwrap().clone();
            let mut bw = BufWriter::new(fs::File::create(output_filename)?);
            formatter(root, &options, &mut bw, &plugins)?;
            bw.flush()?;
        } else {
            let stdout = std::io::stdout();
            let mut bw = BufWriter::new(stdout.lock());
            formatter(root, &options, &mut bw, &plugins)?;
            bw.flush()?;
        };
    
        process::exit(EXIT_SUCCESS);
    }
    
    #[cfg(all(not(windows), not(target_arch = "wasm32")))]
    fn get_default_config_path() -> String {
        if let Ok(xdg_dirs) = xdg::BaseDirectories::with_prefix("comrak") {
            if let Ok(path) = xdg_dirs.place_config_file("config") {
                if let Some(path_str) = path.to_str() {
                    return path_str.into();
                }
            }
        }
    
        "comrak.config".into()
    }
    // If on Windows or compiling to wasm, disable default config file check
    #[cfg(any(windows, target_arch = "wasm32"))]
    fn get_default_config_path() -> String {
        "none".into()
    }
    comrak-0.29.0/src/nodes.rs000064400000000000000000000653751046102023000134550ustar  00000000000000//! The CommonMark AST.
    
    use crate::arena_tree::Node;
    use std::cell::RefCell;
    use std::convert::TryFrom;
    
    #[cfg(feature = "shortcodes")]
    pub use crate::parser::shortcodes::NodeShortCode;
    
    pub use crate::parser::math::NodeMath;
    pub use crate::parser::multiline_block_quote::NodeMultilineBlockQuote;
    
    /// The core AST node enum.
    #[derive(Debug, Clone, PartialEq, Eq)]
    pub enum NodeValue {
        /// The root of every CommonMark document.  Contains **blocks**.
        Document,
    
        /// Non-Markdown front matter.  Treated as an opaque blob.
        FrontMatter(String),
    
        /// **Block**. A [block quote](https://github.github.com/gfm/#block-quotes).  Contains other
        /// **blocks**.
        ///
        /// ``` md
        /// > A block quote.
        /// ```
        BlockQuote,
    
        /// **Block**.  A [list](https://github.github.com/gfm/#lists).  Contains
        /// [list items](https://github.github.com/gfm/#list-items).
        ///
        /// ``` md
        /// * An unordered list
        /// * Another item
        ///
        /// 1. An ordered list
        /// 2. Another item
        /// ```
        List(NodeList),
    
        /// **Block**.  A [list item](https://github.github.com/gfm/#list-items).  Contains other
        /// **blocks**.
        Item(NodeList),
    
        /// **Block**. A description list, enabled with `ext_description_lists` option.  Contains
        /// description items.
        ///
        /// It is required to put a blank line between terms and details.
        ///
        /// ``` md
        /// Term 1
        ///
        /// : Details 1
        ///
        /// Term 2
        ///
        /// : Details 2
        /// ```
        DescriptionList,
    
        /// *Block**. An item of a description list.  Contains a term and one details block.
        DescriptionItem(NodeDescriptionItem),
    
        /// **Block**. Term of an item in a definition list.
        DescriptionTerm,
    
        /// **Block**. Details of an item in a definition list.
        DescriptionDetails,
    
        /// **Block**. A code block; may be [fenced](https://github.github.com/gfm/#fenced-code-blocks)
        /// or [indented](https://github.github.com/gfm/#indented-code-blocks).  Contains raw text
        /// which is not parsed as Markdown, although is HTML escaped.
        CodeBlock(NodeCodeBlock),
    
        /// **Block**. A [HTML block](https://github.github.com/gfm/#html-blocks).  Contains raw text
        /// which is neither parsed as Markdown nor HTML escaped.
        HtmlBlock(NodeHtmlBlock),
    
        /// **Block**. A [paragraph](https://github.github.com/gfm/#paragraphs).  Contains **inlines**.
        Paragraph,
    
        /// **Block**. A heading; may be an [ATX heading](https://github.github.com/gfm/#atx-headings)
        /// or a [setext heading](https://github.github.com/gfm/#setext-headings). Contains
        /// **inlines**.
        Heading(NodeHeading),
    
        /// **Block**. A [thematic break](https://github.github.com/gfm/#thematic-breaks).  Has no
        /// children.
        ThematicBreak,
    
        /// **Block**. A footnote definition.  The `String` is the footnote's name.
        /// Contains other **blocks**.
        FootnoteDefinition(NodeFootnoteDefinition),
    
        /// **Block**. A [table](https://github.github.com/gfm/#tables-extension-) per the GFM spec.
        /// Contains table rows.
        Table(NodeTable),
    
        /// **Block**. A table row.  The `bool` represents whether the row is the header row or not.
        /// Contains table cells.
        TableRow(bool),
    
        /// **Block**.  A table cell.  Contains **inlines**.
        TableCell,
    
        /// **Inline**.  [Textual content](https://github.github.com/gfm/#textual-content).  All text
        /// in a document will be contained in a `Text` node.
        Text(String),
    
        /// **Block**. [Task list item](https://github.github.com/gfm/#task-list-items-extension-).
        /// The value is the symbol that was used in the brackets to mark a task item as checked, or
        /// None if the item is unchecked.
        TaskItem(Option),
    
        /// **Inline**.  A [soft line break](https://github.github.com/gfm/#soft-line-breaks).  If
        /// the `hardbreaks` option is set in `Options` during formatting, it will be formatted
        /// as a `LineBreak`.
        SoftBreak,
    
        /// **Inline**.  A [hard line break](https://github.github.com/gfm/#hard-line-breaks).
        LineBreak,
    
        /// **Inline**.  A [code span](https://github.github.com/gfm/#code-spans).
        Code(NodeCode),
    
        /// **Inline**.  [Raw HTML](https://github.github.com/gfm/#raw-html) contained inline.
        HtmlInline(String),
    
        /// **Inline**.  [Emphasized](https://github.github.com/gfm/#emphasis-and-strong-emphasis)
        /// text.
        Emph,
    
        /// **Inline**.  [Strong](https://github.github.com/gfm/#emphasis-and-strong-emphasis) text.
        Strong,
    
        /// **Inline**.  [Strikethrough](https://github.github.com/gfm/#strikethrough-extension-) text
        /// per the GFM spec.
        Strikethrough,
    
        /// **Inline**.  Superscript.  Enabled with `ext_superscript` option.
        Superscript,
    
        /// **Inline**.  A [link](https://github.github.com/gfm/#links) to some URL, with possible
        /// title.
        Link(NodeLink),
    
        /// **Inline**.  An [image](https://github.github.com/gfm/#images).
        Image(NodeLink),
    
        /// **Inline**.  A footnote reference.
        FootnoteReference(NodeFootnoteReference),
    
        #[cfg(feature = "shortcodes")]
        /// **Inline**. An Emoji character generated from a shortcode. Enable with feature "shortcodes".
        ShortCode(NodeShortCode),
    
        /// **Inline**. A math span. Contains raw text which is not parsed as Markdown.
        /// Dollar math or code math
        ///
        /// Inline math $1 + 2$ and $`1 + 2`$
        ///
        /// Display math $$1 + 2$$ and
        /// $$
        /// 1 + 2
        /// $$
        ///
        Math(NodeMath),
    
        /// **Block**. A [multiline block quote](https://github.github.com/gfm/#block-quotes).  Spans multiple
        /// lines and contains other **blocks**.
        ///
        /// ``` md
        /// >>>
        /// A paragraph.
        ///
        /// - item one
        /// - item two
        /// >>>
        /// ```
        MultilineBlockQuote(NodeMultilineBlockQuote),
    
        /// **Inline**.  A character that has been [escaped](https://github.github.com/gfm/#backslash-escapes)
        Escaped,
    
        /// **Inline**.  A wikilink to some URL.
        WikiLink(NodeWikiLink),
    
        /// **Inline**.  Underline. Enabled with `underline` option.
        Underline,
    
        /// **Inline**.  Spoilered text.  Enabled with `spoiler` option.
        SpoileredText,
    
        /// **Inline**. Text surrounded by escaped markup. Enabled with `spoiler` option.
        /// The `String` is the tag to be escaped.
        EscapedTag(String),
    }
    
    /// Alignment of a single table cell.
    #[derive(Debug, Copy, Clone, PartialEq, Eq)]
    pub enum TableAlignment {
        /// Cell content is unaligned.
        None,
    
        /// Cell content is aligned left.
        Left,
    
        /// Cell content is centered.
        Center,
    
        /// Cell content is aligned right.
        Right,
    }
    
    impl TableAlignment {
        pub(crate) fn xml_name(&self) -> Option<&'static str> {
            match *self {
                TableAlignment::None => None,
                TableAlignment::Left => Some("left"),
                TableAlignment::Center => Some("center"),
                TableAlignment::Right => Some("right"),
            }
        }
    }
    
    /// The metadata of a table
    #[derive(Debug, Default, Clone, PartialEq, Eq)]
    pub struct NodeTable {
        /// The table alignments
        pub alignments: Vec,
    
        /// Number of columns of the table
        pub num_columns: usize,
    
        /// Number of rows of the table
        pub num_rows: usize,
    
        /// Number of non-empty, non-autocompleted cells
        pub num_nonempty_cells: usize,
    }
    
    /// An inline [code span](https://github.github.com/gfm/#code-spans).
    #[derive(Debug, Clone, PartialEq, Eq)]
    pub struct NodeCode {
        /// The number of backticks
        pub num_backticks: usize,
    
        /// The content of the inline code span.
        /// As the contents are not interpreted as Markdown at all,
        /// they are contained within this structure,
        /// rather than inserted into a child inline of any kind.
        pub literal: String,
    }
    
    /// The details of a link's destination, or an image's source.
    #[derive(Debug, Clone, PartialEq, Eq)]
    pub struct NodeLink {
        /// The URL for the link destination or image source.
        pub url: String,
    
        /// The title for the link or image.
        ///
        /// Note this field is used for the `title` attribute by the HTML formatter even for images;
        /// `alt` text is supplied in the image inline text.
        pub title: String,
    }
    
    /// The details of a wikilink's destination.
    #[derive(Debug, Clone, PartialEq, Eq)]
    pub struct NodeWikiLink {
        /// The URL for the link destination.
        pub url: String,
    }
    
    /// The metadata of a list; the kind of list, the delimiter used and so on.
    #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
    pub struct NodeList {
        /// The kind of list (bullet (unordered) or ordered).
        pub list_type: ListType,
    
        /// Number of spaces before the list marker.
        pub marker_offset: usize,
    
        /// Number of characters between the start of the list marker and the item text (including the list marker(s)).
        pub padding: usize,
    
        /// For ordered lists, the ordinal the list starts at.
        pub start: usize,
    
        /// For ordered lists, the delimiter after each number.
        pub delimiter: ListDelimType,
    
        /// For bullet lists, the character used for each bullet.
        pub bullet_char: u8,
    
        /// Whether the list is [tight](https://github.github.com/gfm/#tight), i.e. whether the
        /// paragraphs are wrapped in `

    ` tags when formatted as HTML. pub tight: bool, } /// The metadata of a description list #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] pub struct NodeDescriptionItem { /// Number of spaces before the list marker. pub marker_offset: usize, /// Number of characters between the start of the list marker and the item text (including the list marker(s)). pub padding: usize, } /// The type of list. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum ListType { /// A bullet list, i.e. an unordered list. #[default] Bullet, /// An ordered list. Ordered, } /// The delimiter for ordered lists, i.e. the character which appears after each number. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum ListDelimType { /// A period character `.`. #[default] Period, /// A paren character `)`. Paren, } impl ListDelimType { pub(crate) fn xml_name(&self) -> &'static str { match *self { ListDelimType::Period => "period", ListDelimType::Paren => "paren", } } } /// The metadata and data of a code block (fenced or indented). #[derive(Default, Debug, Clone, PartialEq, Eq)] pub struct NodeCodeBlock { /// Whether the code block is fenced. pub fenced: bool, /// For fenced code blocks, the fence character itself (`` ` `` or `~`). pub fence_char: u8, /// For fenced code blocks, the length of the fence. pub fence_length: usize, /// For fenced code blocks, the indentation level of the code within the block. pub fence_offset: usize, /// For fenced code blocks, the [info string](https://github.github.com/gfm/#info-string) after /// the opening fence, if any. pub info: String, /// The literal contents of the code block. As the contents are not interpreted as Markdown at /// all, they are contained within this structure, rather than inserted into a child inline of /// any kind. pub literal: String, } /// The metadata of a heading. #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] pub struct NodeHeading { /// The level of the header; from 1 to 6 for ATX headings, 1 or 2 for setext headings. pub level: u8, /// Whether the heading is setext (if not, ATX). pub setext: bool, } /// The metadata of an included HTML block. #[derive(Debug, Default, Clone, PartialEq, Eq)] pub struct NodeHtmlBlock { /// The HTML block's type pub block_type: u8, /// The literal contents of the HTML block. Per NodeCodeBlock, the content is included here /// rather than in any inline. pub literal: String, } /// The metadata of a footnote definition. #[derive(Debug, Default, Clone, PartialEq, Eq)] pub struct NodeFootnoteDefinition { /// The name of the footnote. pub name: String, /// Total number of references to this footnote pub total_references: u32, } /// The metadata of a footnote reference. #[derive(Debug, Default, Clone, PartialEq, Eq)] pub struct NodeFootnoteReference { /// The name of the footnote. pub name: String, /// The index of reference to the same footnote pub ref_num: u32, /// The index of the footnote in the document. pub ix: u32, } impl NodeValue { /// Indicates whether this node is a block node or inline node. pub fn block(&self) -> bool { matches!( *self, NodeValue::Document | NodeValue::BlockQuote | NodeValue::FootnoteDefinition(_) | NodeValue::List(..) | NodeValue::DescriptionList | NodeValue::DescriptionItem(_) | NodeValue::DescriptionTerm | NodeValue::DescriptionDetails | NodeValue::Item(..) | NodeValue::CodeBlock(..) | NodeValue::HtmlBlock(..) | NodeValue::Paragraph | NodeValue::Heading(..) | NodeValue::ThematicBreak | NodeValue::Table(..) | NodeValue::TableRow(..) | NodeValue::TableCell | NodeValue::TaskItem(..) | NodeValue::MultilineBlockQuote(_) ) } /// Whether the type the node is of can contain inline nodes. pub fn contains_inlines(&self) -> bool { matches!( *self, NodeValue::Paragraph | NodeValue::Heading(..) | NodeValue::TableCell ) } /// Return a reference to the text of a `Text` inline, if this node is one. /// /// Convenience method. pub fn text(&self) -> Option<&String> { match *self { NodeValue::Text(ref t) => Some(t), _ => None, } } /// Return a mutable reference to the text of a `Text` inline, if this node is one. /// /// Convenience method. pub fn text_mut(&mut self) -> Option<&mut String> { match *self { NodeValue::Text(ref mut t) => Some(t), _ => None, } } pub(crate) fn accepts_lines(&self) -> bool { matches!( *self, NodeValue::Paragraph | NodeValue::Heading(..) | NodeValue::CodeBlock(..) ) } pub(crate) fn xml_node_name(&self) -> &'static str { match *self { NodeValue::Document => "document", NodeValue::BlockQuote => "block_quote", NodeValue::FootnoteDefinition(_) => "footnote_definition", NodeValue::List(..) => "list", NodeValue::DescriptionList => "description_list", NodeValue::DescriptionItem(_) => "description_item", NodeValue::DescriptionTerm => "description_term", NodeValue::DescriptionDetails => "description_details", NodeValue::Item(..) => "item", NodeValue::CodeBlock(..) => "code_block", NodeValue::HtmlBlock(..) => "html_block", NodeValue::Paragraph => "paragraph", NodeValue::Heading(..) => "heading", NodeValue::ThematicBreak => "thematic_break", NodeValue::Table(..) => "table", NodeValue::TableRow(..) => "table_row", NodeValue::TableCell => "table_cell", NodeValue::Text(..) => "text", NodeValue::SoftBreak => "softbreak", NodeValue::LineBreak => "linebreak", NodeValue::Image(..) => "image", NodeValue::Link(..) => "link", NodeValue::Emph => "emph", NodeValue::Strong => "strong", NodeValue::Code(..) => "code", NodeValue::HtmlInline(..) => "html_inline", NodeValue::Strikethrough => "strikethrough", NodeValue::FrontMatter(_) => "frontmatter", NodeValue::TaskItem { .. } => "taskitem", NodeValue::Superscript => "superscript", NodeValue::FootnoteReference(..) => "footnote_reference", #[cfg(feature = "shortcodes")] NodeValue::ShortCode(_) => "shortcode", NodeValue::MultilineBlockQuote(_) => "multiline_block_quote", NodeValue::Escaped => "escaped", NodeValue::Math(..) => "math", NodeValue::WikiLink(..) => "wikilink", NodeValue::Underline => "underline", NodeValue::SpoileredText => "spoiler", NodeValue::EscapedTag(_) => "escaped_tag", } } } /// A single node in the CommonMark AST. /// /// The struct contains metadata about the node's position in the original document, and the core /// enum, `NodeValue`. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Ast { /// The node value itself. pub value: NodeValue, /// The positions in the source document this node comes from. pub sourcepos: Sourcepos, pub(crate) internal_offset: usize, pub(crate) content: String, pub(crate) open: bool, pub(crate) last_line_blank: bool, pub(crate) table_visited: bool, pub(crate) line_offsets: Vec, } /// Represents the position in the source Markdown this node was rendered from. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct Sourcepos { /// The line and column of the first character of this node. pub start: LineColumn, /// The line and column of the last character of this node. pub end: LineColumn, } impl std::fmt::Display for Sourcepos { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, "{}:{}-{}:{}", self.start.line, self.start.column, self.end.line, self.end.column, ) } } impl From<(usize, usize, usize, usize)> for Sourcepos { fn from(sp: (usize, usize, usize, usize)) -> Sourcepos { Sourcepos { start: LineColumn { line: sp.0, column: sp.1, }, end: LineColumn { line: sp.2, column: sp.3, }, } } } /// Represents the 1-based line and column positions of a given character. #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct LineColumn { /// The 1-based line number of the character. pub line: usize, /// The 1-based column number of the character. pub column: usize, } impl From<(usize, usize)> for LineColumn { fn from(lc: (usize, usize)) -> LineColumn { LineColumn { line: lc.0, column: lc.1, } } } impl LineColumn { /// Return a new LineColumn based on this one, with the column adjusted by offset. pub fn column_add(&self, offset: isize) -> LineColumn { LineColumn { line: self.line, column: usize::try_from((self.column as isize) + offset).unwrap(), } } } impl Ast { /// Create a new AST node with the given value. pub fn new(value: NodeValue, start: LineColumn) -> Self { Ast { value, content: String::new(), sourcepos: (start.line, start.column, start.line, 0).into(), internal_offset: 0, open: true, last_line_blank: false, table_visited: false, line_offsets: Vec::with_capacity(0), } } } /// The type of a node within the document. /// /// It is bound by the lifetime `'a`, which corresponds to the `Arena` nodes are /// allocated in. Child `Ast`s are wrapped in `RefCell` for interior mutability. /// /// You can construct a new `AstNode` from a `NodeValue` using the `From` trait: /// /// ```no_run /// # use comrak::nodes::{AstNode, NodeValue}; /// let root = AstNode::from(NodeValue::Document); /// ``` /// /// Note that no sourcepos information is given to the created node. If you wish /// to assign sourcepos information, use the `From` trait to create an `AstNode` /// from an `Ast`: /// /// ```no_run /// # use comrak::nodes::{Ast, AstNode, NodeValue}; /// let root = AstNode::from(Ast::new( /// NodeValue::Paragraph, /// (4, 1).into(), // start_line, start_col /// )); /// ``` /// /// Adjust the `end` position manually. /// /// For practical use, you'll probably need it allocated in an `Arena`, in which /// case you can use `.into()` to simplify creation: /// /// ```no_run /// # use comrak::{nodes::{AstNode, NodeValue}, Arena}; /// # let arena = Arena::::new(); /// let node_in_arena = arena.alloc(NodeValue::Document.into()); /// ``` pub type AstNode<'a> = Node<'a, RefCell>; impl<'a> From for AstNode<'a> { /// Create a new AST node with the given value. The sourcepos is set to (0,0)-(0,0). fn from(value: NodeValue) -> Self { Node::new(RefCell::new(Ast::new(value, LineColumn::default()))) } } impl<'a> From for AstNode<'a> { /// Create a new AST node with the given Ast. fn from(ast: Ast) -> Self { Node::new(RefCell::new(ast)) } } /// Validation errors produced by [Node::validate]. #[derive(Debug, Clone)] pub enum ValidationError<'a> { /// The type of a child node is not allowed in the parent node. This can happen when an inline /// node is found in a block container, a block is found in an inline node, etc. InvalidChildType { /// The parent node. parent: &'a AstNode<'a>, /// The child node. child: &'a AstNode<'a>, }, } impl<'a> Node<'a, RefCell> { /// The comrak representation of a markdown node in Rust isn't strict enough to rule out /// invalid trees according to the CommonMark specification. One simple example is that block /// containers, such as lists, should only contain blocks, but it's possible to put naked /// inline text in a list item. Such invalid trees can lead comrak to generate incorrect output /// if rendered. /// /// This method performs additional structural checks to ensure that a markdown AST is valid /// according to the CommonMark specification. /// /// Note that those invalid trees can only be generated programmatically. Parsing markdown with /// comrak, on the other hand, should always produce a valid tree. pub fn validate(&'a self) -> Result<(), ValidationError<'a>> { let mut stack = vec![self]; while let Some(node) = stack.pop() { // Check that this node type is valid wrt to the type of its parent. if let Some(parent) = node.parent() { if !can_contain_type(parent, &node.data.borrow().value) { return Err(ValidationError::InvalidChildType { parent, child: node, }); } } stack.extend(node.children()); } Ok(()) } } pub(crate) fn last_child_is_open<'a>(node: &'a AstNode<'a>) -> bool { node.last_child().map_or(false, |n| n.data.borrow().open) } /// Returns true if the given node can contain a node with the given value. pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool { match *child { NodeValue::Document => { return false; } NodeValue::FrontMatter(_) => { return matches!(node.data.borrow().value, NodeValue::Document); } _ => {} } match node.data.borrow().value { NodeValue::Document | NodeValue::BlockQuote | NodeValue::FootnoteDefinition(_) | NodeValue::DescriptionTerm | NodeValue::DescriptionDetails | NodeValue::Item(..) | NodeValue::TaskItem(..) => { child.block() && !matches!(*child, NodeValue::Item(..) | NodeValue::TaskItem(..)) } NodeValue::List(..) => matches!(*child, NodeValue::Item(..) | NodeValue::TaskItem(..)), NodeValue::DescriptionList => matches!(*child, NodeValue::DescriptionItem(_)), NodeValue::DescriptionItem(_) => matches!( *child, NodeValue::DescriptionTerm | NodeValue::DescriptionDetails ), #[cfg(feature = "shortcodes")] NodeValue::ShortCode(..) => !child.block(), NodeValue::Paragraph | NodeValue::Heading(..) | NodeValue::Emph | NodeValue::Strong | NodeValue::Link(..) | NodeValue::Image(..) | NodeValue::WikiLink(..) | NodeValue::Strikethrough | NodeValue::Superscript | NodeValue::SpoileredText | NodeValue::Underline // XXX: this is quite a hack: the EscapedTag _contains_ whatever was // possibly going to fall into the spoiler. This should be fixed in // inlines. | NodeValue::EscapedTag(_) => !child.block(), NodeValue::Table(..) => matches!(*child, NodeValue::TableRow(..)), NodeValue::TableRow(..) => matches!(*child, NodeValue::TableCell), #[cfg(not(feature = "shortcodes"))] NodeValue::TableCell => matches!( *child, NodeValue::Text(..) | NodeValue::Code(..) | NodeValue::Emph | NodeValue::Strong | NodeValue::Link(..) | NodeValue::Image(..) | NodeValue::Strikethrough | NodeValue::HtmlInline(..) | NodeValue::Math(..) | NodeValue::WikiLink(..) | NodeValue::FootnoteReference(..) | NodeValue::Superscript | NodeValue::SpoileredText | NodeValue::Underline ), #[cfg(feature = "shortcodes")] NodeValue::TableCell => matches!( *child, NodeValue::Text(..) | NodeValue::Code(..) | NodeValue::Emph | NodeValue::Strong | NodeValue::Link(..) | NodeValue::Image(..) | NodeValue::Strikethrough | NodeValue::HtmlInline(..) | NodeValue::Math(..) | NodeValue::WikiLink(..) | NodeValue::FootnoteReference(..) | NodeValue::Superscript | NodeValue::SpoileredText | NodeValue::Underline | NodeValue::ShortCode(..) ), NodeValue::MultilineBlockQuote(_) => { child.block() && !matches!(*child, NodeValue::Item(..) | NodeValue::TaskItem(..)) } _ => false, } } pub(crate) fn ends_with_blank_line<'a>(node: &'a AstNode<'a>) -> bool { let mut it = Some(node); while let Some(cur) = it { if cur.data.borrow().last_line_blank { return true; } match cur.data.borrow().value { NodeValue::List(..) | NodeValue::Item(..) | NodeValue::TaskItem(..) => { it = cur.last_child() } _ => it = None, }; } false } pub(crate) fn containing_block<'a>(node: &'a AstNode<'a>) -> Option<&'a AstNode<'a>> { let mut ch = Some(node); while let Some(n) = ch { if n.data.borrow().value.block() { return Some(n); } ch = n.parent(); } None } comrak-0.29.0/src/parser/autolink.rs000064400000000000000000000264601046102023000154570ustar 00000000000000use crate::ctype::{isalnum, isalpha, isspace}; use crate::nodes::{AstNode, NodeLink, NodeValue}; use crate::parser::inlines::make_inline; use once_cell::sync::Lazy; use std::str; use typed_arena::Arena; use unicode_categories::UnicodeCategories; // TODO: this can probably be cleaned up a lot. It used to handle all three of // {url,www,email}_match, but now just the last of those. pub(crate) fn process_autolinks<'a>( arena: &'a Arena>, node: &'a AstNode<'a>, contents_str: &mut String, relaxed_autolinks: bool, ) { let contents = contents_str.as_bytes(); let len = contents.len(); let mut i = 0; while i < len { let mut post_org = None; let mut bracket_opening = 0; // cmark-gfm ignores links inside brackets, such as `[[http://example.com]` while i < len { if !relaxed_autolinks { match contents[i] { b'[' => { bracket_opening += 1; } b']' => { bracket_opening -= 1; } _ => (), } if bracket_opening > 0 { i += 1; continue; } } if contents[i] == b'@' { post_org = email_match(arena, contents, i, relaxed_autolinks); if post_org.is_some() { break; } } i += 1; } if let Some((post, reverse, skip)) = post_org { i -= reverse; node.insert_after(post); if i + skip < len { let remain = str::from_utf8(&contents[i + skip..]).unwrap(); assert!(!remain.is_empty()); post.insert_after(make_inline( arena, NodeValue::Text(remain.to_string()), (0, 1, 0, 1).into(), )); } contents_str.truncate(i); return; } } } pub fn www_match<'a>( arena: &'a Arena>, contents: &[u8], i: usize, relaxed_autolinks: bool, ) -> Option<(&'a AstNode<'a>, usize, usize)> { static WWW_DELIMS: Lazy<[bool; 256]> = Lazy::new(|| { let mut sc = [false; 256]; for c in &[b'*', b'_', b'~', b'(', b'['] { sc[*c as usize] = true; } sc }); if i > 0 && !isspace(contents[i - 1]) && !WWW_DELIMS[contents[i - 1] as usize] { return None; } if !contents[i..].starts_with(b"www.") { return None; } let mut link_end = match check_domain(&contents[i..], false) { None => return None, Some(link_end) => link_end, }; while i + link_end < contents.len() && !isspace(contents[i + link_end]) { // basic test to detect whether we're in a normal markdown link - not exhaustive if relaxed_autolinks && contents[i + link_end - 1] == b']' && contents[i + link_end] == b'(' { return None; } link_end += 1; } link_end = autolink_delim(&contents[i..], link_end, relaxed_autolinks); let mut url = "http://".to_string(); url.push_str(str::from_utf8(&contents[i..link_end + i]).unwrap()); let inl = make_inline( arena, NodeValue::Link(NodeLink { url, title: String::new(), }), (0, 1, 0, 1).into(), ); inl.append(make_inline( arena, NodeValue::Text( str::from_utf8(&contents[i..link_end + i]) .unwrap() .to_string(), ), (0, 1, 0, 1).into(), )); Some((inl, 0, link_end)) } fn check_domain(data: &[u8], allow_short: bool) -> Option { let mut np = 0; let mut uscore1 = 0; let mut uscore2 = 0; for (i, c) in unsafe { str::from_utf8_unchecked(data) }.char_indices() { if c == '\\' && i < data.len() - 1 { // Ignore escaped characters per https://github.com/github/cmark-gfm/pull/292. // Not sure I love this, but it tracks upstream .. } else if c == '_' { uscore2 += 1; } else if c == '.' { uscore1 = uscore2; uscore2 = 0; np += 1; } else if !is_valid_hostchar(c) && c != '-' { if uscore1 == 0 && uscore2 == 0 && (allow_short || np > 0) { return Some(i); } return None; } } if (uscore1 > 0 || uscore2 > 0) && np <= 10 { None } else if allow_short || np > 0 { Some(data.len()) } else { None } } fn is_valid_hostchar(ch: char) -> bool { !(ch.is_whitespace() || ch.is_punctuation() || ch.is_symbol()) } fn autolink_delim(data: &[u8], mut link_end: usize, relaxed_autolinks: bool) -> usize { static LINK_END_ASSORTMENT: Lazy<[bool; 256]> = Lazy::new(|| { let mut sc = [false; 256]; for c in &[b'?', b'!', b'.', b',', b':', b'*', b'_', b'~', b'\'', b'"'] { sc[*c as usize] = true; } sc }); for (i, &b) in data.iter().enumerate().take(link_end) { if b == b'<' { link_end = i; break; } } while link_end > 0 { let cclose = data[link_end - 1]; // Allow any number of matching parentheses (as recognised in copen/cclose) // at the end of the URL. If there is a greater number of closing // parentheses than opening ones, we remove one character from the end of // the link. let mut copen = if cclose == b')' { Some(b'(') } else { None }; if relaxed_autolinks && copen.is_none() { // allow balancing of `[]` and `{}` just like `()` copen = if cclose == b']' { Some(b'[') } else if cclose == b'}' { Some(b'{') } else { None }; } if LINK_END_ASSORTMENT[cclose as usize] { link_end -= 1; } else if cclose == b';' { let mut new_end = link_end - 2; while new_end > 0 && isalpha(data[new_end]) { new_end -= 1; } if new_end < link_end - 2 && data[new_end] == b'&' { link_end = new_end; } else { link_end -= 1; } } else if let Some(copen) = copen { let mut opening = 0; let mut closing = 0; for &b in data.iter().take(link_end) { if b == copen { opening += 1; } else if b == cclose { closing += 1; } } if closing <= opening { break; } link_end -= 1; } else { break; } } link_end } pub fn url_match<'a>( arena: &'a Arena>, contents: &[u8], i: usize, relaxed_autolinks: bool, ) -> Option<(&'a AstNode<'a>, usize, usize)> { const SCHEMES: [&[u8]; 3] = [b"http", b"https", b"ftp"]; let size = contents.len(); if size - i < 4 || contents[i + 1] != b'/' || contents[i + 2] != b'/' { return None; } let mut rewind = 0; while rewind < i && isalpha(contents[i - rewind - 1]) { rewind += 1; } if !relaxed_autolinks { let cond = |s: &&[u8]| size - i + rewind >= s.len() && &&contents[i - rewind..i] == s; if !SCHEMES.iter().any(cond) { return None; } } let mut link_end = match check_domain(&contents[i + 3..], true) { None => return None, Some(link_end) => link_end, }; while link_end < size - i && !isspace(contents[i + link_end]) { // basic test to detect whether we're in a normal markdown link - not exhaustive if relaxed_autolinks && link_end > 0 && contents[i + link_end - 1] == b']' && contents[i + link_end] == b'(' { return None; } link_end += 1; } link_end = autolink_delim(&contents[i..], link_end, relaxed_autolinks); let url = str::from_utf8(&contents[i - rewind..i + link_end]) .unwrap() .to_string(); let inl = make_inline( arena, NodeValue::Link(NodeLink { url: url.clone(), title: String::new(), }), (0, 1, 0, 1).into(), ); inl.append(make_inline( arena, NodeValue::Text(url), (0, 1, 0, 1).into(), )); Some((inl, rewind, rewind + link_end)) } fn email_match<'a>( arena: &'a Arena>, contents: &[u8], i: usize, relaxed_autolinks: bool, ) -> Option<(&'a AstNode<'a>, usize, usize)> { static EMAIL_OK_SET: Lazy<[bool; 256]> = Lazy::new(|| { let mut sc = [false; 256]; for c in &[b'.', b'+', b'-', b'_'] { sc[*c as usize] = true; } sc }); let size = contents.len(); let mut auto_mailto = true; let mut is_xmpp = false; let mut rewind = 0; while rewind < i { let c = contents[i - rewind - 1]; if isalnum(c) || EMAIL_OK_SET[c as usize] { rewind += 1; continue; } if c == b':' { if validate_protocol("mailto", contents, i - rewind - 1) { auto_mailto = false; rewind += 1; continue; } if validate_protocol("xmpp", contents, i - rewind - 1) { is_xmpp = true; auto_mailto = false; rewind += 1; continue; } } break; } if rewind == 0 { return None; } let mut link_end = 1; let mut np = 0; while link_end < size - i { let c = contents[i + link_end]; if isalnum(c) { // empty } else if c == b'@' { return None; } else if c == b'.' && link_end < size - i - 1 && isalnum(contents[i + link_end + 1]) { np += 1; } else if c == b'/' && is_xmpp { // xmpp allows a `/` in the url } else if c != b'-' && c != b'_' { break; } link_end += 1; } if link_end < 2 || np == 0 || (!isalpha(contents[i + link_end - 1]) && contents[i + link_end - 1] != b'.') { return None; } link_end = autolink_delim(&contents[i..], link_end, relaxed_autolinks); if link_end == 0 { return None; } let mut url = if auto_mailto { "mailto:".to_string() } else { "".to_string() }; let text = str::from_utf8(&contents[i - rewind..link_end + i]).unwrap(); url.push_str(text); let inl = make_inline( arena, NodeValue::Link(NodeLink { url, title: String::new(), }), (0, 1, 0, 1).into(), ); inl.append(make_inline( arena, NodeValue::Text(text.to_string()), (0, 1, 0, 1).into(), )); Some((inl, rewind, rewind + link_end)) } fn validate_protocol(protocol: &str, contents: &[u8], cursor: usize) -> bool { let size = contents.len(); let mut rewind = 0; while rewind < cursor && isalpha(contents[cursor - rewind - 1]) { rewind += 1; } size - cursor + rewind >= protocol.len() && &contents[cursor - rewind..cursor] == protocol.as_bytes() } comrak-0.29.0/src/parser/inlines.rs000064400000000000000000002142021046102023000152630ustar 00000000000000use crate::arena_tree::Node; use crate::ctype::{isdigit, ispunct, isspace}; use crate::entity; use crate::nodes::{ Ast, AstNode, NodeCode, NodeFootnoteReference, NodeLink, NodeMath, NodeValue, NodeWikiLink, Sourcepos, }; use crate::parser::autolink; #[cfg(feature = "shortcodes")] use crate::parser::shortcodes::NodeShortCode; use crate::parser::{ unwrap_into_2, unwrap_into_copy, AutolinkType, BrokenLinkReference, Options, ResolvedReference, }; use crate::scanners; use crate::strings::{self, is_blank, Case}; use std::cell::{Cell, RefCell}; use std::collections::HashMap; use std::convert::TryFrom; use std::ptr; use std::str; use typed_arena::Arena; use unicode_categories::UnicodeCategories; const MAXBACKTICKS: usize = 80; const MAX_LINK_LABEL_LENGTH: usize = 1000; const MAX_MATH_DOLLARS: usize = 2; pub struct Subject<'a: 'd, 'r, 'o, 'c, 'd, 'i> { pub arena: &'a Arena>, options: &'o Options<'c>, pub input: &'i [u8], line: usize, pub pos: usize, column_offset: isize, line_offset: usize, flags: Flags, pub refmap: &'r mut RefMap, delimiter_arena: &'d Arena>, last_delimiter: Option<&'d Delimiter<'a, 'd>>, brackets: Vec>, within_brackets: bool, pub backticks: [usize; MAXBACKTICKS + 1], pub scanned_for_backticks: bool, no_link_openers: bool, special_chars: [bool; 256], skip_chars: [bool; 256], smart_chars: [bool; 256], } #[derive(Default)] struct Flags { skip_html_cdata: bool, skip_html_declaration: bool, skip_html_pi: bool, skip_html_comment: bool, } pub struct RefMap { pub map: HashMap, pub(crate) max_ref_size: usize, ref_size: usize, } impl RefMap { pub fn new() -> Self { Self { map: HashMap::new(), max_ref_size: usize::MAX, ref_size: 0, } } fn lookup(&mut self, lab: &str) -> Option { match self.map.get(lab) { Some(entry) => { let size = entry.url.len() + entry.title.len(); if size > self.max_ref_size - self.ref_size { None } else { self.ref_size += size; Some(entry.clone()) } } None => None, } } } pub struct Delimiter<'a: 'd, 'd> { inl: &'a AstNode<'a>, position: usize, length: usize, delim_char: u8, can_open: bool, can_close: bool, prev: Cell>>, next: Cell>>, } struct Bracket<'a> { inl_text: &'a AstNode<'a>, position: usize, image: bool, bracket_after: bool, } #[derive(Clone, Copy)] struct WikilinkComponents<'i> { url: &'i [u8], link_label: Option<(&'i [u8], usize, usize)>, } impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { pub fn new( arena: &'a Arena>, options: &'o Options<'c>, input: &'i [u8], line: usize, refmap: &'r mut RefMap, delimiter_arena: &'d Arena>, ) -> Self { let mut s = Subject { arena, options, input, line, pos: 0, column_offset: 0, line_offset: 0, flags: Flags::default(), refmap, delimiter_arena, last_delimiter: None, brackets: vec![], within_brackets: false, backticks: [0; MAXBACKTICKS + 1], scanned_for_backticks: false, no_link_openers: true, special_chars: [false; 256], skip_chars: [false; 256], smart_chars: [false; 256], }; for &c in &[ b'\n', b'\r', b'_', b'*', b'"', b'`', b'\\', b'&', b'<', b'[', b']', b'!', b'$', ] { s.special_chars[c as usize] = true; } if options.extension.autolink { s.special_chars[b':' as usize] = true; s.special_chars[b'w' as usize] = true; } if options.extension.strikethrough { s.special_chars[b'~' as usize] = true; s.skip_chars[b'~' as usize] = true; } if options.extension.superscript { s.special_chars[b'^' as usize] = true; } #[cfg(feature = "shortcodes")] if options.extension.shortcodes { s.special_chars[b':' as usize] = true; } if options.extension.underline { s.special_chars[b'_' as usize] = true; } if options.extension.spoiler { s.special_chars[b'|' as usize] = true; } for &c in &[b'"', b'\'', b'.', b'-'] { s.smart_chars[c as usize] = true; } s } pub fn pop_bracket(&mut self) -> bool { self.brackets.pop().is_some() } pub fn parse_inline(&mut self, node: &'a AstNode<'a>) -> bool { let c = match self.peek_char() { None => return false, Some(ch) => *ch as char, }; let node_ast = node.data.borrow(); let adjusted_line = self.line - node_ast.sourcepos.start.line; self.line_offset = node_ast.line_offsets[adjusted_line]; let new_inl: Option<&'a AstNode<'a>> = match c { '\0' => return false, '\r' | '\n' => Some(self.handle_newline()), '`' => Some(self.handle_backticks()), '\\' => Some(self.handle_backslash()), '&' => Some(self.handle_entity()), '<' => Some(self.handle_pointy_brace()), ':' => { let mut res = None; if self.options.extension.autolink { res = self.handle_autolink_colon(node); } #[cfg(feature = "shortcodes")] if res.is_none() && self.options.extension.shortcodes { res = self.handle_shortcodes_colon(); } if res.is_none() { self.pos += 1; res = Some(self.make_inline( NodeValue::Text(":".to_string()), self.pos - 1, self.pos - 1, )); } res } 'w' if self.options.extension.autolink => match self.handle_autolink_w(node) { Some(inl) => Some(inl), None => { self.pos += 1; Some(self.make_inline( NodeValue::Text("w".to_string()), self.pos - 1, self.pos - 1, )) } }, '*' | '_' | '\'' | '"' => Some(self.handle_delim(c as u8)), '-' => Some(self.handle_hyphen()), '.' => Some(self.handle_period()), '[' => { self.pos += 1; let mut wikilink_inl = None; if (self.options.extension.wikilinks_title_after_pipe || self.options.extension.wikilinks_title_before_pipe) && !self.within_brackets && self.peek_char() == Some(&(b'[')) { wikilink_inl = self.handle_wikilink(); } if wikilink_inl.is_none() { let inl = self.make_inline( NodeValue::Text("[".to_string()), self.pos - 1, self.pos - 1, ); self.push_bracket(false, inl); self.within_brackets = true; Some(inl) } else { wikilink_inl } } ']' => { self.within_brackets = false; self.handle_close_bracket() } '!' => { self.pos += 1; if self.peek_char() == Some(&(b'[')) && self.peek_char_n(1) != Some(&(b'^')) { self.pos += 1; let inl = self.make_inline( NodeValue::Text("![".to_string()), self.pos - 2, self.pos - 1, ); self.push_bracket(true, inl); self.within_brackets = true; Some(inl) } else { Some(self.make_inline( NodeValue::Text("!".to_string()), self.pos - 1, self.pos - 1, )) } } '~' if self.options.extension.strikethrough => Some(self.handle_delim(b'~')), '^' if self.options.extension.superscript && !self.within_brackets => { Some(self.handle_delim(b'^')) } '$' => Some(self.handle_dollars()), '|' if self.options.extension.spoiler => Some(self.handle_delim(b'|')), _ => { let endpos = self.find_special_char(); let mut contents = self.input[self.pos..endpos].to_vec(); let startpos = self.pos; self.pos = endpos; if self .peek_char() .map_or(false, |&c| strings::is_line_end_char(c)) { strings::rtrim(&mut contents); } // if we've just produced a LineBreak, then we should consume any leading // space on this line if node.last_child().map_or(false, |n| { matches!(n.data.borrow().value, NodeValue::LineBreak) }) { strings::ltrim(&mut contents); } Some(self.make_inline( NodeValue::Text(String::from_utf8(contents).unwrap()), startpos, endpos - 1, )) } }; if let Some(inl) = new_inl { node.append(inl); } true } fn del_ref_eq(lhs: Option<&'d Delimiter<'a, 'd>>, rhs: Option<&'d Delimiter<'a, 'd>>) -> bool { match (lhs, rhs) { (None, None) => true, (Some(l), Some(r)) => ptr::eq(l, r), _ => false, } } // After parsing a block (and sometimes during), this function traverses the // stack of `Delimiters`, tokens ("*", "_", etc.) that may delimit regions // of text for special rendering: emphasis, strong, superscript, // spoilertext; looking for pairs of opening and closing delimiters, // with the goal of placing the intervening nodes into new emphasis, // etc AST nodes. // // The term stack here is a bit of a misnomer, as the `Delimiters` actually // form a doubly-linked list. Items are pushed onto the stack during parsing, // but during post-processing are removed from arbitrary locations. // // The `Delimiter` contains references AST `Text` nodes, which are also // linked into the AST as siblings in the order they are parsed. This // function doesn't know a-priori which ones are markdown syntax and which // are just text: candidate delimiters that match have their nodes removed // from the AST, as they are markdown, and their intervening siblings // lowered into a new AST parent node via the `insert_emph` function; // candidate delimiters that don't match are left in the tree. // // The basic algorithm is to start at the bottom of the stack, walk upwards // looking for closing delimiters, and from each closing delimiter walk back // down the stack looking for its matching opening delimiter. This traversal // favors the smallest matching leftmost pairs, e.g. // // _a *b c_ d* e_ // ~~~~~~ // // (The emphasis region is wavy-underlined) // // All of the `_` and `*` tokens are scanned as candidates, but only the // region "a *b c" is lowered into an `Emph` node; the other candidate // delimiters are all actually text. // // And in // // _a _b c_ // ~~~ // // "b c" is the emphasis region, not "a _b c". // // Note that Delimiters are matched by comparing their `delim_char`, which // is simply a value used to compare opening and closing delimiters - the // actual text value of the scanned token can theoretically be different. // // There's some additional trickiness in the logic because "_", "__", and // "___" (and etc. etc.) all share the same delim_char, but represent // different emphasis. Note also that "_"- and "*"-delimited regions have // complex rules for which can be opening and/or closing delimiters, // determined in `scan_delims`. pub fn process_emphasis(&mut self, stack_bottom: usize) { // This array is an important optimization that prevents searching down // the stack for openers we've previously searched for and know don't // exist, preventing exponential blowup on pathological cases. let mut openers_bottom: [usize; 12] = [stack_bottom; 12]; // This is traversing the stack from the top to the bottom, setting `closer` to // the delimiter directly above `stack_bottom`. In the case where we are processing // emphasis on an entire block, `stack_bottom` is `None`, so `closer` references // the very bottom of the stack. let mut candidate = self.last_delimiter; let mut closer: Option<&Delimiter> = None; while candidate.map_or(false, |c| c.position >= stack_bottom) { closer = candidate; candidate = candidate.unwrap().prev.get(); } while let Some(c) = closer { if c.can_close { // Each time through the outer `closer` loop we reset the opener // to the element below the closer, and search down the stack // for a matching opener. let mut opener = c.prev.get(); let mut opener_found = false; let mut mod_three_rule_invoked = false; let ix = match c.delim_char { b'|' => 0, b'~' => 1, b'^' => 2, b'"' => 3, b'\'' => 4, b'_' => 5, b'*' => 6 + (if c.can_open { 3 } else { 0 }) + (c.length % 3), _ => unreachable!(), }; // Here's where we find the opener by searching down the stack, // looking for matching delims with the `can_open` flag. // On any invocation, on the first time through the outer // `closer` loop, this inner `opener` search doesn't succeed: // when processing a full block, `opener` starts out `None`; // when processing emphasis otherwise, opener will be equal to // `stack_bottom`. // // This search short-circuits for openers we've previously // failed to find, avoiding repeatedly rescanning the bottom of // the stack, using the openers_bottom array. while opener.map_or(false, |o| o.position >= openers_bottom[ix]) { let o = opener.unwrap(); if o.can_open && o.delim_char == c.delim_char { // This is a bit convoluted; see points 9 and 10 here: // http://spec.commonmark.org/0.28/#can-open-emphasis. // This is to aid processing of runs like this: // β€œ***hello*there**” or β€œ***hello**there*”. In this // case, the middle delimiter can both open and close // emphasis; when trying to find an opening delimiter // that matches the last ** or *, we need to skip it, // and this algorithm ensures we do. (The sum of the // lengths are a multiple of 3.) let odd_match = (c.can_open || o.can_close) && ((o.length + c.length) % 3 == 0) && !(o.length % 3 == 0 && c.length % 3 == 0); if !odd_match { opener_found = true; break; } else { mod_three_rule_invoked = true; } } opener = o.prev.get(); } let old_c = c; // There's a case here for every possible delimiter. If we found // a matching opening delimiter for our closing delimiter, they // both get passed. if c.delim_char == b'*' || c.delim_char == b'_' || (self.options.extension.strikethrough && c.delim_char == b'~') || (self.options.extension.superscript && c.delim_char == b'^') || (self.options.extension.spoiler && c.delim_char == b'|') { if opener_found { // Finally, here's the happy case where the delimiters // match and they are inserted. We get a new closer // delimiter and go around the loop again. // // Note that for "***" and "___" delimiters of length // greater than 2, insert_emph will create a `Strong` // node (i.e. "**"), then _truncate_ the delimiters in // place, turning them into e.g. "*" delimiters, and // hand us back the same mutated closer to be matched // again. // // In general though the closer will be the next // delimiter up the stack. closer = self.insert_emph(opener.unwrap(), c); } else { // When no matching opener is found we move the closer // up the stack, do some bookkeeping with old_closer // (below), try again. closer = c.next.get(); } } else if c.delim_char == b'\'' || c.delim_char == b'"' { *c.inl.data.borrow_mut().value.text_mut().unwrap() = if c.delim_char == b'\'' { "’" } else { "”" }.to_string(); closer = c.next.get(); if opener_found { *opener .unwrap() .inl .data .borrow_mut() .value .text_mut() .unwrap() = if old_c.delim_char == b'\'' { "β€˜" } else { "β€œ" } .to_string(); self.remove_delimiter(opener.unwrap()); self.remove_delimiter(old_c); } } // If the search for an opener was unsuccessful, then record // the position the search started at in the `openers_bottom` // so that the `opener` search can avoid looking for this // same opener at the bottom of the stack later. if !opener_found { if !mod_three_rule_invoked { openers_bottom[ix] = old_c.position; } // Now that we've failed the `opener` search starting from // `old_closer`, future opener searches will be searching it // for openers - if `old_closer` can't be used as an opener // then we know it's just text - remove it from the // delimiter stack, leaving it in the AST as text if !old_c.can_open { self.remove_delimiter(old_c); } } } else { // Closer is !can_close. Move up the stack closer = c.next.get(); } } // At this point the entire delimiter stack from `stack_bottom` up has // been scanned for matches, everything left is just text. Pop it all // off. self.remove_delimiters(stack_bottom); } fn remove_delimiter(&mut self, delimiter: &'d Delimiter<'a, 'd>) { if delimiter.next.get().is_none() { assert!(ptr::eq(delimiter, self.last_delimiter.unwrap())); self.last_delimiter = delimiter.prev.get(); } else { delimiter.next.get().unwrap().prev.set(delimiter.prev.get()); } if delimiter.prev.get().is_some() { delimiter.prev.get().unwrap().next.set(delimiter.next.get()); } } fn remove_delimiters(&mut self, stack_bottom: usize) { while self .last_delimiter .map_or(false, |d| d.position >= stack_bottom) { self.remove_delimiter(self.last_delimiter.unwrap()); } } #[inline] pub fn eof(&self) -> bool { self.pos >= self.input.len() } #[inline] pub fn peek_char(&self) -> Option<&u8> { self.peek_char_n(0) } #[inline] fn peek_char_n(&self, n: usize) -> Option<&u8> { if self.pos + n >= self.input.len() { None } else { let c = &self.input[self.pos + n]; assert!(*c > 0); Some(c) } } pub fn find_special_char(&self) -> usize { for n in self.pos..self.input.len() { if self.special_chars[self.input[n] as usize] { if self.input[n] == b'^' && self.within_brackets { // NO OP } else { return n; } } if self.options.parse.smart && self.smart_chars[self.input[n] as usize] { return n; } } self.input.len() } fn adjust_node_newlines(&mut self, node: &'a AstNode<'a>, matchlen: usize, extra: usize) { if !self.options.render.sourcepos { return; } let (newlines, since_newline) = count_newlines(&self.input[self.pos - matchlen - extra..self.pos - extra]); if newlines > 0 { self.line += newlines; let node_ast = &mut node.data.borrow_mut(); node_ast.sourcepos.end.line += newlines; node_ast.sourcepos.end.column = since_newline; self.column_offset = -(self.pos as isize) + since_newline as isize + extra as isize; } } pub fn handle_newline(&mut self) -> &'a AstNode<'a> { let nlpos = self.pos; if self.input[self.pos] == b'\r' { self.pos += 1; } if self.input[self.pos] == b'\n' { self.pos += 1; } let inl = if nlpos > 1 && self.input[nlpos - 1] == b' ' && self.input[nlpos - 2] == b' ' { self.make_inline(NodeValue::LineBreak, nlpos, self.pos - 1) } else { self.make_inline(NodeValue::SoftBreak, nlpos, self.pos - 1) }; self.line += 1; self.column_offset = -(self.pos as isize); self.skip_spaces(); inl } pub fn take_while(&mut self, c: u8) -> usize { let start_pos = self.pos; while self.peek_char() == Some(&c) { self.pos += 1; } self.pos - start_pos } pub fn scan_to_closing_backtick(&mut self, openticklength: usize) -> Option { if openticklength > MAXBACKTICKS { return None; } if self.scanned_for_backticks && self.backticks[openticklength] <= self.pos { return None; } loop { while self.peek_char().map_or(false, |&c| c != b'`') { self.pos += 1; } if self.pos >= self.input.len() { self.scanned_for_backticks = true; return None; } let numticks = self.take_while(b'`'); if numticks <= MAXBACKTICKS { self.backticks[numticks] = self.pos - numticks; } if numticks == openticklength { return Some(self.pos); } } } pub fn handle_backticks(&mut self) -> &'a AstNode<'a> { let openticks = self.take_while(b'`'); let startpos = self.pos; let endpos = self.scan_to_closing_backtick(openticks); match endpos { None => { self.pos = startpos; self.make_inline(NodeValue::Text("`".repeat(openticks)), self.pos, self.pos) } Some(endpos) => { let buf = &self.input[startpos..endpos - openticks]; let buf = strings::normalize_code(buf); let code = NodeCode { num_backticks: openticks, literal: String::from_utf8(buf).unwrap(), }; let node = self.make_inline(NodeValue::Code(code), startpos, endpos - openticks - 1); self.adjust_node_newlines(node, endpos - startpos, openticks); node } } } pub fn scan_to_closing_dollar(&mut self, opendollarlength: usize) -> Option { if !(self.options.extension.math_dollars) || opendollarlength > MAX_MATH_DOLLARS { return None; } // space not allowed after initial $ if opendollarlength == 1 && self.peek_char().map_or(false, |&c| isspace(c)) { return None; } loop { while self.peek_char().map_or(false, |&c| c != b'$') { self.pos += 1; } if self.pos >= self.input.len() { return None; } // space not allowed before ending $ if opendollarlength == 1 { let c = self.input[self.pos - 1]; if isspace(c) { return None; } } // dollar signs must also be backslash-escaped if they occur within math let c = self.input[self.pos - 1]; if opendollarlength == 1 && c == (b'\\') { self.pos += 1; continue; } let numdollars = self.take_while(b'$'); // ending $ can't be followed by a digit if opendollarlength == 1 && self.peek_char().map_or(false, |&c| isdigit(c)) { return None; } if numdollars == opendollarlength { return Some(self.pos); } } } pub fn scan_to_closing_code_dollar(&mut self) -> Option { if !self.options.extension.math_code { return None; } loop { while self.peek_char().map_or(false, |&c| c != b'$') { self.pos += 1; } if self.pos >= self.input.len() { return None; } let c = self.input[self.pos - 1]; if c == b'`' { self.pos += 1; return Some(self.pos); } else { self.pos += 1; } } } // Heuristics used from https://pandoc.org/MANUAL.html#extension-tex_math_dollars pub fn handle_dollars(&mut self) -> &'a AstNode<'a> { if self.options.extension.math_dollars || self.options.extension.math_code { let opendollars = self.take_while(b'$'); let mut code_math = false; // check for code math if opendollars == 1 && self.options.extension.math_code && self.peek_char().map_or(false, |&c| c == b'`') { code_math = true; self.pos += 1; } let startpos = self.pos; let endpos: Option = if code_math { self.scan_to_closing_code_dollar() } else { self.scan_to_closing_dollar(opendollars) }; let fence_length = if code_math { 2 } else { opendollars }; let endpos: Option = match endpos { Some(epos) => { if epos - startpos + fence_length < fence_length * 2 + 1 { None } else { endpos } } None => endpos, }; match endpos { None => { if code_math { self.pos = startpos - 1; self.make_inline( NodeValue::Text("$".to_string()), self.pos - 1, self.pos - 1, ) } else { self.pos = startpos; self.make_inline( NodeValue::Text("$".repeat(opendollars)), self.pos, self.pos, ) } } Some(endpos) => { let buf = &self.input[startpos..endpos - fence_length]; let buf: Vec = if code_math || opendollars == 1 { strings::normalize_code(buf) } else { buf.to_vec() }; let math = NodeMath { dollar_math: !code_math, display_math: opendollars == 2, literal: String::from_utf8(buf).unwrap(), }; let node = self.make_inline( NodeValue::Math(math), startpos, endpos - fence_length - 1, ); self.adjust_node_newlines(node, endpos - startpos, fence_length); node } } } else { self.pos += 1; self.make_inline(NodeValue::Text("$".to_string()), self.pos - 1, self.pos - 1) } } pub fn skip_spaces(&mut self) -> bool { let mut skipped = false; while self.peek_char().map_or(false, |&c| c == b' ' || c == b'\t') { self.pos += 1; skipped = true; } skipped } pub fn handle_delim(&mut self, c: u8) -> &'a AstNode<'a> { let (numdelims, can_open, can_close) = self.scan_delims(c); let contents = if c == b'\'' && self.options.parse.smart { "’".to_string() } else if c == b'"' && self.options.parse.smart { if can_close { "”".to_string() } else { "β€œ".to_string() } } else { str::from_utf8(&self.input[self.pos - numdelims..self.pos]) .unwrap() .to_string() }; let inl = self.make_inline( NodeValue::Text(contents), self.pos - numdelims, self.pos - 1, ); if (can_open || can_close) && (!(c == b'\'' || c == b'"') || self.options.parse.smart) { self.push_delimiter(c, can_open, can_close, inl); } inl } pub fn handle_hyphen(&mut self) -> &'a AstNode<'a> { let start = self.pos; self.pos += 1; if !self.options.parse.smart || self.peek_char().map_or(true, |&c| c != b'-') { return self.make_inline(NodeValue::Text("-".to_string()), self.pos - 1, self.pos - 1); } while self.options.parse.smart && self.peek_char().map_or(false, |&c| c == b'-') { self.pos += 1; } let numhyphens = (self.pos - start) as i32; let (ens, ems) = if numhyphens % 3 == 0 { (0, numhyphens / 3) } else if numhyphens % 2 == 0 { (numhyphens / 2, 0) } else if numhyphens % 3 == 2 { (1, (numhyphens - 2) / 3) } else { (2, (numhyphens - 4) / 3) }; let ens = if ens > 0 { ens as usize } else { 0 }; let ems = if ems > 0 { ems as usize } else { 0 }; let mut buf = String::with_capacity(3 * (ems + ens)); buf.push_str(&"β€”".repeat(ems)); buf.push_str(&"–".repeat(ens)); self.make_inline(NodeValue::Text(buf), start, self.pos - 1) } pub fn handle_period(&mut self) -> &'a AstNode<'a> { self.pos += 1; if self.options.parse.smart && self.peek_char().map_or(false, |&c| c == b'.') { self.pos += 1; if self.peek_char().map_or(false, |&c| c == b'.') { self.pos += 1; self.make_inline(NodeValue::Text("…".to_string()), self.pos - 3, self.pos - 1) } else { self.make_inline( NodeValue::Text("..".to_string()), self.pos - 2, self.pos - 1, ) } } else { self.make_inline(NodeValue::Text(".".to_string()), self.pos - 1, self.pos - 1) } } pub fn scan_delims(&mut self, c: u8) -> (usize, bool, bool) { let before_char = if self.pos == 0 { '\n' } else { let mut before_char_pos = self.pos - 1; while before_char_pos > 0 && (self.input[before_char_pos] >> 6 == 2 || self.skip_chars[self.input[before_char_pos] as usize]) { before_char_pos -= 1; } match unsafe { str::from_utf8_unchecked(&self.input[before_char_pos..self.pos]) } .chars() .next() { Some(x) => { if (x as usize) < 256 && self.skip_chars[x as usize] { '\n' } else { x } } None => '\n', } }; let mut numdelims = 0; if c == b'\'' || c == b'"' { numdelims += 1; self.pos += 1; } else { while self.peek_char() == Some(&c) { numdelims += 1; self.pos += 1; } } let after_char = if self.eof() { '\n' } else { let mut after_char_pos = self.pos; while after_char_pos < self.input.len() - 1 && self.skip_chars[self.input[after_char_pos] as usize] { after_char_pos += 1; } match unsafe { str::from_utf8_unchecked(&self.input[after_char_pos..]) } .chars() .next() { Some(x) => { if (x as usize) < 256 && self.skip_chars[x as usize] { '\n' } else { x } } None => '\n', } }; let left_flanking = numdelims > 0 && !after_char.is_whitespace() && !((after_char.is_punctuation() || after_char.is_symbol()) && !before_char.is_whitespace() && !(before_char.is_punctuation() || before_char.is_symbol())); let right_flanking = numdelims > 0 && !before_char.is_whitespace() && !((before_char.is_punctuation() || before_char.is_symbol()) && !after_char.is_whitespace() && !(after_char.is_punctuation() || after_char.is_symbol())); if c == b'_' { ( numdelims, left_flanking && (!right_flanking || before_char.is_punctuation() || before_char.is_symbol()), right_flanking && (!left_flanking || after_char.is_punctuation() || after_char.is_symbol()), ) } else if c == b'\'' || c == b'"' { ( numdelims, left_flanking && (!right_flanking || before_char == '(' || before_char == '[') && before_char != ']' && before_char != ')', right_flanking, ) } else { (numdelims, left_flanking, right_flanking) } } pub fn push_delimiter(&mut self, c: u8, can_open: bool, can_close: bool, inl: &'a AstNode<'a>) { let d = self.delimiter_arena.alloc(Delimiter { prev: Cell::new(self.last_delimiter), next: Cell::new(None), inl, position: self.pos, length: inl.data.borrow().value.text().unwrap().len(), delim_char: c, can_open, can_close, }); if d.prev.get().is_some() { d.prev.get().unwrap().next.set(Some(d)); } self.last_delimiter = Some(d); } // Create a new emphasis node, move all the nodes between `opener` // and `closer` into it, and insert it into the AST. // // As a side-effect, handle long "***" and "___" nodes by truncating them in // place to be re-matched by `process_emphasis`. pub fn insert_emph( &mut self, opener: &'d Delimiter<'a, 'd>, closer: &'d Delimiter<'a, 'd>, ) -> Option<&'d Delimiter<'a, 'd>> { let opener_char = opener.inl.data.borrow().value.text().unwrap().as_bytes()[0]; let mut opener_num_chars = opener.inl.data.borrow().value.text().unwrap().len(); let mut closer_num_chars = closer.inl.data.borrow().value.text().unwrap().len(); let use_delims = if closer_num_chars >= 2 && opener_num_chars >= 2 { 2 } else { 1 }; opener_num_chars -= use_delims; closer_num_chars -= use_delims; if self.options.extension.strikethrough && opener_char == b'~' && (opener_num_chars != closer_num_chars || opener_num_chars > 0) { return None; } opener .inl .data .borrow_mut() .value .text_mut() .unwrap() .truncate(opener_num_chars); closer .inl .data .borrow_mut() .value .text_mut() .unwrap() .truncate(closer_num_chars); // Remove all the candidate delimiters from between the opener and the // closer. None of them are matched pairs. They've been scanned already. let mut delim = closer.prev.get(); while delim.is_some() && !Self::del_ref_eq(delim, Some(opener)) { self.remove_delimiter(delim.unwrap()); delim = delim.unwrap().prev.get(); } let emph = self.make_inline( if self.options.extension.strikethrough && opener_char == b'~' { NodeValue::Strikethrough } else if self.options.extension.superscript && opener_char == b'^' { NodeValue::Superscript } else if self.options.extension.spoiler && opener_char == b'|' { if use_delims == 2 { NodeValue::SpoileredText } else { NodeValue::EscapedTag("|".to_owned()) } } else if self.options.extension.underline && opener_char == b'_' && use_delims == 2 { NodeValue::Underline } else if use_delims == 1 { NodeValue::Emph } else { NodeValue::Strong }, // These are overriden immediately below. self.pos, self.pos, ); { // if we have `___` or `***` then we need to adjust the sourcepos colums by 1 let triple_adjustment = if opener_num_chars > 0 && use_delims == 2 { 1 } else { 0 }; emph.data.borrow_mut().sourcepos = ( opener.inl.data.borrow().sourcepos.start.line, opener.inl.data.borrow().sourcepos.start.column + triple_adjustment, closer.inl.data.borrow().sourcepos.end.line, closer.inl.data.borrow().sourcepos.end.column - triple_adjustment, ) .into(); } // Drop all the interior AST nodes into the emphasis node // and then insert the emphasis node let mut tmp = opener.inl.next_sibling().unwrap(); while !tmp.same_node(closer.inl) { let next = tmp.next_sibling(); emph.append(tmp); if let Some(n) = next { tmp = n; } else { break; } } opener.inl.insert_after(emph); // Drop the delimiters and return the next closer to process if opener_num_chars == 0 { opener.inl.detach(); self.remove_delimiter(opener); } if closer_num_chars == 0 { closer.inl.detach(); self.remove_delimiter(closer); closer.next.get() } else { Some(closer) } } pub fn handle_backslash(&mut self) -> &'a AstNode<'a> { let startpos = self.pos; self.pos += 1; if self.peek_char().map_or(false, |&c| ispunct(c)) { let inl; self.pos += 1; let inline_text = self.make_inline( NodeValue::Text(String::from_utf8(vec![self.input[self.pos - 1]]).unwrap()), self.pos - 2, self.pos - 1, ); if self.options.render.escaped_char_spans { inl = self.make_inline(NodeValue::Escaped, self.pos - 2, self.pos - 1); inl.append(inline_text); inl } else { inline_text } } else if !self.eof() && self.skip_line_end() { self.make_inline(NodeValue::LineBreak, startpos, self.pos - 1) } else { self.make_inline( NodeValue::Text("\\".to_string()), self.pos - 1, self.pos - 1, ) } } pub fn skip_line_end(&mut self) -> bool { let old_pos = self.pos; if self.peek_char() == Some(&(b'\r')) { self.pos += 1; } if self.peek_char() == Some(&(b'\n')) { self.pos += 1; } self.pos > old_pos || self.eof() } pub fn handle_entity(&mut self) -> &'a AstNode<'a> { self.pos += 1; match entity::unescape(&self.input[self.pos..]) { None => self.make_inline(NodeValue::Text("&".to_string()), self.pos - 1, self.pos - 1), Some((entity, len)) => { self.pos += len; self.make_inline( NodeValue::Text(String::from_utf8(entity).unwrap()), self.pos - 1 - len, self.pos - 1, ) } } } #[cfg(feature = "shortcodes")] pub fn handle_shortcodes_colon(&mut self) -> Option<&'a AstNode<'a>> { let matchlen = scanners::shortcode(&self.input[self.pos + 1..])?; let shortcode = unsafe { str::from_utf8_unchecked(&self.input[self.pos + 1..self.pos + 1 + matchlen - 1]) }; let nsc = NodeShortCode::resolve(shortcode)?; self.pos += 1 + matchlen; Some(self.make_inline( NodeValue::ShortCode(nsc), self.pos - 1 - matchlen, self.pos - 1, )) } pub fn handle_autolink_with( &mut self, node: &'a AstNode<'a>, f: F, ) -> Option<&'a AstNode<'a>> where F: Fn( &'a Arena>, &[u8], usize, bool, ) -> Option<(&'a AstNode<'a>, usize, usize)>, { if !self.options.parse.relaxed_autolinks && self.within_brackets { return None; } let (post, mut reverse, skip) = f( self.arena, self.input, self.pos, self.options.parse.relaxed_autolinks, )?; self.pos += skip - reverse; // We need to "rewind" by `reverse` chars, which should be in one or // more Text nodes beforehand. Typically the chars will *all* be in a // single Text node, containing whatever text came before the ":" that // triggered this method, eg. "See our website at http" ("://blah.com"). // // relaxed_autolinks allows some slightly pathological cases. First, // "://…" is a possible parse, meaning `reverse == 0`. There may also be // a scheme including the letter "w", which will split Text inlines due // to them being their own trigger (for handle_autolink_w), meaning // "wa://…" will need to traverse two Texts to complete the rewind. while reverse > 0 { match node.last_child().unwrap().data.borrow_mut().value { NodeValue::Text(ref mut prev) => { if reverse < prev.len() { prev.truncate(prev.len() - reverse); reverse = 0; } else { reverse -= prev.len(); node.last_child().unwrap().detach(); } } _ => panic!("expected text node before autolink colon"), } } Some(post) } pub fn handle_autolink_colon(&mut self, node: &'a AstNode<'a>) -> Option<&'a AstNode<'a>> { self.handle_autolink_with(node, autolink::url_match) } pub fn handle_autolink_w(&mut self, node: &'a AstNode<'a>) -> Option<&'a AstNode<'a>> { self.handle_autolink_with(node, autolink::www_match) } pub fn handle_pointy_brace(&mut self) -> &'a AstNode<'a> { self.pos += 1; if let Some(matchlen) = scanners::autolink_uri(&self.input[self.pos..]) { self.pos += matchlen; let inl = self.make_autolink( &self.input[self.pos - matchlen..self.pos - 1], AutolinkType::Uri, self.pos - 1 - matchlen, self.pos - 1, ); return inl; } if let Some(matchlen) = scanners::autolink_email(&self.input[self.pos..]) { self.pos += matchlen; let inl = self.make_autolink( &self.input[self.pos - matchlen..self.pos - 1], AutolinkType::Email, self.pos - 1 - matchlen, self.pos - 1, ); return inl; } // Most comments below are verbatim from cmark upstream. let mut matchlen: Option = None; if self.pos + 2 <= self.input.len() { let c = self.input[self.pos]; if c == b'!' && !self.flags.skip_html_comment { let c = self.input[self.pos + 1]; if c == b'-' && self.peek_char_n(2) == Some(&b'-') { if self.peek_char_n(3) == Some(&b'>') { matchlen = Some(4); } else if self.peek_char_n(3) == Some(&b'-') && self.peek_char_n(4) == Some(&b'>') { matchlen = Some(5); } else if let Some(m) = scanners::html_comment(&self.input[self.pos + 1..]) { matchlen = Some(m + 1); } else { self.flags.skip_html_comment = true; } } else if c == b'[' { if !self.flags.skip_html_cdata && self.pos + 3 <= self.input.len() { if let Some(m) = scanners::html_cdata(&self.input[self.pos + 2..]) { // The regex doesn't require the final "]]>". But if we're not at // the end of input, it must come after the match. Otherwise, // disable subsequent scans to avoid quadratic behavior. // Adding 5 to matchlen for prefix "![", suffix "]]>" if self.pos + m + 5 > self.input.len() { self.flags.skip_html_cdata = true; } else { matchlen = Some(m + 5); } } } } else if !self.flags.skip_html_declaration { if let Some(m) = scanners::html_declaration(&self.input[self.pos + 1..]) { // Adding 2 to matchlen for prefix "!", suffix ">" if self.pos + m + 2 > self.input.len() { self.flags.skip_html_declaration = true; } else { matchlen = Some(m + 2); } } } } else if c == b'?' { if !self.flags.skip_html_pi { // Note that we allow an empty match. let m = scanners::html_processing_instruction(&self.input[self.pos + 1..]) .unwrap_or(0); // Adding 3 to matchlen fro prefix "?", suffix "?>" if self.pos + m + 3 > self.input.len() { self.flags.skip_html_pi = true; } else { matchlen = Some(m + 3); } } } else { matchlen = scanners::html_tag(&self.input[self.pos..]); } } if let Some(matchlen) = matchlen { let contents = &self.input[self.pos - 1..self.pos + matchlen]; self.pos += matchlen; let inl = self.make_inline( NodeValue::HtmlInline(str::from_utf8(contents).unwrap().to_string()), self.pos - matchlen - 1, self.pos - 1, ); self.adjust_node_newlines(inl, matchlen, 1); return inl; } self.make_inline(NodeValue::Text("<".to_string()), self.pos - 1, self.pos - 1) } pub fn push_bracket(&mut self, image: bool, inl_text: &'a AstNode<'a>) { let len = self.brackets.len(); if len > 0 { self.brackets[len - 1].bracket_after = true; } self.brackets.push(Bracket { inl_text, position: self.pos, image, bracket_after: false, }); if !image { self.no_link_openers = false; } } pub fn handle_close_bracket(&mut self) -> Option<&'a AstNode<'a>> { self.pos += 1; let initial_pos = self.pos; let brackets_len = self.brackets.len(); if brackets_len == 0 { return Some(self.make_inline( NodeValue::Text("]".to_string()), self.pos - 1, self.pos - 1, )); } let is_image = self.brackets[brackets_len - 1].image; if !is_image && self.no_link_openers { self.brackets.pop(); return Some(self.make_inline( NodeValue::Text("]".to_string()), self.pos - 1, self.pos - 1, )); } // Ensure there was text if this was a link and not an image link if self.options.render.ignore_empty_links && !is_image { let mut non_blank_found = false; let mut tmpch = self.brackets[brackets_len - 1].inl_text.next_sibling(); while let Some(tmp) = tmpch { match tmp.data.borrow().value { NodeValue::Text(ref s) if is_blank(s.as_bytes()) => (), _ => { non_blank_found = true; break; } } tmpch = tmp.next_sibling(); } if !non_blank_found { self.brackets.pop(); return Some(self.make_inline( NodeValue::Text("]".to_string()), self.pos - 1, self.pos - 1, )); } } let after_link_text_pos = self.pos; // Try to find a link destination within parenthesis let mut sps = 0; let mut url: &[u8] = &[]; let mut n: usize = 0; if self.peek_char() == Some(&(b'(')) && { sps = scanners::spacechars(&self.input[self.pos + 1..]).unwrap_or(0); let offset = self.pos + 1 + sps; offset < self.input.len() && unwrap_into_2( manual_scan_link_url(&self.input[offset..]), &mut url, &mut n, ) } { let starturl = self.pos + 1 + sps; let endurl = starturl + n; let starttitle = endurl + scanners::spacechars(&self.input[endurl..]).unwrap_or(0); let endtitle = if starttitle == endurl { starttitle } else { starttitle + scanners::link_title(&self.input[starttitle..]).unwrap_or(0) }; let endall = endtitle + scanners::spacechars(&self.input[endtitle..]).unwrap_or(0); if endall < self.input.len() && self.input[endall] == b')' { self.pos = endall + 1; let url = strings::clean_url(url); let title = strings::clean_title(&self.input[starttitle..endtitle]); self.close_bracket_match( is_image, String::from_utf8(url).unwrap(), String::from_utf8(title).unwrap(), ); return None; } else { self.pos = after_link_text_pos; } } // Try to see if this is a reference link let (mut lab, mut found_label) = match self.link_label() { Some(lab) => (lab.to_string(), true), None => ("".to_string(), false), }; if !found_label { self.pos = initial_pos; } if (!found_label || lab.is_empty()) && !self.brackets[brackets_len - 1].bracket_after { lab = str::from_utf8( &self.input[self.brackets[brackets_len - 1].position..initial_pos - 1], ) .unwrap() .to_string(); found_label = true; } // Need to normalize both to lookup in refmap and to call callback let unfolded_lab = lab.to_owned(); let lab = strings::normalize_label(&lab, Case::Fold); let mut reff = if found_label { self.refmap.lookup(&lab) } else { None }; // Attempt to use the provided broken link callback if a reference cannot be resolved if reff.is_none() { if let Some(callback) = &self.options.parse.broken_link_callback { reff = callback.lock().unwrap()(BrokenLinkReference { normalized: &lab, original: &unfolded_lab, }); } } if let Some(reff) = reff { self.close_bracket_match(is_image, reff.url.clone(), reff.title); return None; } let bracket_inl_text = self.brackets[brackets_len - 1].inl_text; if self.options.extension.footnotes && match bracket_inl_text.next_sibling() { Some(n) => { if n.data.borrow().value.text().is_some() { n.data .borrow() .value .text() .unwrap() .as_bytes() .starts_with(&[b'^']) } else { false } } _ => false, } { let mut text = String::new(); let mut sibling_iterator = bracket_inl_text.following_siblings(); self.pos = initial_pos; // Skip the initial node, which holds the `[` sibling_iterator.next().unwrap(); // The footnote name could have been parsed into multiple text/htmlinline nodes. // For example `[^_foo]` gives `^`, `_`, and `foo`. So pull them together. // Since we're handling the closing bracket, the only siblings at this point are // related to the footnote name. for sibling in sibling_iterator { match sibling.data.borrow().value { NodeValue::Text(ref literal) | NodeValue::HtmlInline(ref literal) => { text.push_str(literal); } _ => {} }; } if text.len() > 1 { let inl = self.make_inline( NodeValue::FootnoteReference(NodeFootnoteReference { name: text[1..].to_string(), ref_num: 0, ix: 0, }), // Overridden immediately below. self.pos, self.pos, ); inl.data.borrow_mut().sourcepos.start.column = bracket_inl_text.data.borrow().sourcepos.start.column; inl.data.borrow_mut().sourcepos.end.column = usize::try_from( self.pos as isize + self.column_offset + self.line_offset as isize, ) .unwrap(); bracket_inl_text.insert_before(inl); // detach all the nodes, including bracket_inl_text sibling_iterator = bracket_inl_text.following_siblings(); for sibling in sibling_iterator { match sibling.data.borrow().value { NodeValue::Text(_) | NodeValue::HtmlInline(_) => { sibling.detach(); } _ => {} }; } // We don't need to process emphasis for footnote names, so cleanup // any outstanding delimiters self.remove_delimiters(self.brackets[brackets_len - 1].position); self.brackets.pop(); return None; } } self.brackets.pop(); self.pos = initial_pos; Some(self.make_inline(NodeValue::Text("]".to_string()), self.pos - 1, self.pos - 1)) } pub fn close_bracket_match(&mut self, is_image: bool, url: String, title: String) { let brackets_len = self.brackets.len(); let nl = NodeLink { url, title }; let inl = self.make_inline( if is_image { NodeValue::Image(nl) } else { NodeValue::Link(nl) }, // Manually set below. self.pos, self.pos, ); inl.data.borrow_mut().sourcepos.start = self.brackets[brackets_len - 1] .inl_text .data .borrow() .sourcepos .start; inl.data.borrow_mut().sourcepos.end.column = usize::try_from(self.pos as isize + self.column_offset + self.line_offset as isize) .unwrap(); self.brackets[brackets_len - 1].inl_text.insert_before(inl); let mut tmpch = self.brackets[brackets_len - 1].inl_text.next_sibling(); while let Some(tmp) = tmpch { tmpch = tmp.next_sibling(); inl.append(tmp); } self.brackets[brackets_len - 1].inl_text.detach(); self.process_emphasis(self.brackets[brackets_len - 1].position); self.brackets.pop(); if !is_image { self.no_link_openers = true; } } pub fn link_label(&mut self) -> Option<&str> { let startpos = self.pos; if self.peek_char() != Some(&(b'[')) { return None; } self.pos += 1; let mut length = 0; let mut c = 0; while unwrap_into_copy(self.peek_char(), &mut c) && c != b'[' && c != b']' { if c == b'\\' { self.pos += 1; length += 1; if self.peek_char().map_or(false, |&c| ispunct(c)) { self.pos += 1; length += 1; } } else { self.pos += 1; length += 1; } if length > MAX_LINK_LABEL_LENGTH { self.pos = startpos; return None; } } if c == b']' { let raw_label = strings::trim_slice(&self.input[startpos + 1..self.pos]); self.pos += 1; Some(str::from_utf8(raw_label).unwrap()) } else { self.pos = startpos; None } } // Handles wikilink syntax // [[link text|url]] // [[url|link text]] pub fn handle_wikilink(&mut self) -> Option<&'a AstNode<'a>> { let startpos = self.pos; let component = self.wikilink_url_link_label()?; let url_clean = strings::clean_url(component.url); let (link_label, link_label_start_column, _link_label_end_column) = match component.link_label { Some((label, sc, ec)) => (entity::unescape_html(label), sc, ec), None => ( entity::unescape_html(component.url), startpos + 1, self.pos - 3, ), }; let nl = NodeWikiLink { url: String::from_utf8(url_clean).unwrap(), }; let inl = self.make_inline(NodeValue::WikiLink(nl), startpos - 1, self.pos - 1); self.label_backslash_escapes(inl, link_label, link_label_start_column); Some(inl) } fn wikilink_url_link_label(&mut self) -> Option> { let left_startpos = self.pos; if self.peek_char() != Some(&(b'[')) { return None; } let found_left = self.wikilink_component(); if !found_left { self.pos = left_startpos; return None; } let left = strings::trim_slice(&self.input[left_startpos + 1..self.pos]); if self.peek_char() == Some(&(b']')) && self.peek_char_n(1) == Some(&(b']')) { self.pos += 2; return Some(WikilinkComponents { url: left, link_label: None, }); } else if self.peek_char() != Some(&(b'|')) { self.pos = left_startpos; return None; } let right_startpos = self.pos; let found_right = self.wikilink_component(); if !found_right { self.pos = left_startpos; return None; } let right = strings::trim_slice(&self.input[right_startpos + 1..self.pos]); if self.peek_char() == Some(&(b']')) && self.peek_char_n(1) == Some(&(b']')) { self.pos += 2; if self.options.extension.wikilinks_title_after_pipe { Some(WikilinkComponents { url: left, link_label: Some((right, right_startpos + 1, self.pos - 3)), }) } else { Some(WikilinkComponents { url: right, link_label: Some((left, left_startpos + 1, right_startpos - 1)), }) } } else { self.pos = left_startpos; None } } // Locates the edge of a wikilink component (link label or url), and sets the // self.pos to it's end if it's found. fn wikilink_component(&mut self) -> bool { let startpos = self.pos; if self.peek_char() != Some(&(b'[')) && self.peek_char() != Some(&(b'|')) { return false; } self.pos += 1; let mut length = 0; let mut c = 0; while unwrap_into_copy(self.peek_char(), &mut c) && c != b'[' && c != b']' && c != b'|' { if c == b'\\' { self.pos += 1; length += 1; if self.peek_char().map_or(false, |&c| ispunct(c)) { self.pos += 1; length += 1; } } else { self.pos += 1; length += 1; } if length > MAX_LINK_LABEL_LENGTH { self.pos = startpos; return false; } } true } // Given a label, handles backslash escaped characters. Appends the resulting // nodes to the container fn label_backslash_escapes( &mut self, container: &'a AstNode<'a>, label: Vec, start_column: usize, ) { let mut startpos = 0; let mut offset = 0; let len = label.len(); while offset < len { let c = label[offset]; if c == b'\\' && (offset + 1) < len && ispunct(label[offset + 1]) { let preceding_text = self.make_inline( NodeValue::Text(String::from_utf8(label[startpos..offset].to_owned()).unwrap()), start_column + startpos, start_column + offset - 1, ); container.append(preceding_text); let inline_text = self.make_inline( NodeValue::Text(String::from_utf8(vec![label[offset + 1]]).unwrap()), start_column + offset, start_column + offset + 1, ); if self.options.render.escaped_char_spans { let span = self.make_inline( NodeValue::Escaped, start_column + offset, start_column + offset + 1, ); span.append(inline_text); container.append(span); } else { container.append(inline_text); } offset += 2; startpos = offset; } else { offset += 1; } } if startpos != offset { container.append(self.make_inline( NodeValue::Text(String::from_utf8(label[startpos..offset].to_owned()).unwrap()), start_column + startpos, start_column + offset - 1, )); } } pub fn spnl(&mut self) { self.skip_spaces(); if self.skip_line_end() { self.skip_spaces(); } } fn make_inline( &self, value: NodeValue, start_column: usize, end_column: usize, ) -> &'a AstNode<'a> { let start_column = start_column as isize + 1 + self.column_offset + self.line_offset as isize; let end_column = end_column as isize + 1 + self.column_offset + self.line_offset as isize; let ast = Ast { value, content: String::new(), sourcepos: ( self.line, usize::try_from(start_column).unwrap(), self.line, usize::try_from(end_column).unwrap(), ) .into(), internal_offset: 0, open: false, last_line_blank: false, table_visited: false, line_offsets: Vec::with_capacity(0), }; self.arena.alloc(Node::new(RefCell::new(ast))) } fn make_autolink( &self, url: &[u8], kind: AutolinkType, start_column: usize, end_column: usize, ) -> &'a AstNode<'a> { let inl = self.make_inline( NodeValue::Link(NodeLink { url: String::from_utf8(strings::clean_autolink(url, kind)).unwrap(), title: String::new(), }), start_column + 1, end_column + 1, ); inl.append(self.make_inline( NodeValue::Text(String::from_utf8(entity::unescape_html(url)).unwrap()), start_column + 1, end_column - 1, )); inl } } pub fn manual_scan_link_url(input: &[u8]) -> Option<(&[u8], usize)> { let len = input.len(); let mut i = 0; if i < len && input[i] == b'<' { i += 1; while i < len { let b = input[i]; if b == b'>' { i += 1; break; } else if b == b'\\' { i += 2; } else if b == b'\n' || b == b'<' { return None; } else { i += 1; } } } else { return manual_scan_link_url_2(input); } if i >= len { None } else { Some((&input[1..i - 1], i)) } } pub fn manual_scan_link_url_2(input: &[u8]) -> Option<(&[u8], usize)> { let len = input.len(); let mut i = 0; let mut nb_p = 0; while i < len { if input[i] == b'\\' && i + 1 < len && ispunct(input[i + 1]) { i += 2; } else if input[i] == b'(' { nb_p += 1; i += 1; if nb_p > 32 { return None; } } else if input[i] == b')' { if nb_p == 0 { break; } nb_p -= 1; i += 1; } else if isspace(input[i]) || input[i].is_ascii_control() { if i == 0 { return None; } break; } else { i += 1; } } if i >= len || nb_p != 0 { None } else { Some((&input[..i], i)) } } pub fn make_inline<'a>( arena: &'a Arena>, value: NodeValue, sourcepos: Sourcepos, ) -> &'a AstNode<'a> { let ast = Ast { value, content: String::new(), sourcepos, internal_offset: 0, open: false, last_line_blank: false, table_visited: false, line_offsets: Vec::with_capacity(0), }; arena.alloc(Node::new(RefCell::new(ast))) } pub fn count_newlines(input: &[u8]) -> (usize, usize) { let mut nls = 0; let mut since_nl = 0; for &c in input { if c == b'\n' { nls += 1; since_nl = 0; } else { since_nl += 1; } } (nls, since_nl) } comrak-0.29.0/src/parser/math.rs000064400000000000000000000010331046102023000145470ustar 00000000000000/// An inline math span #[derive(Debug, Clone, PartialEq, Eq)] pub struct NodeMath { /// Whether this is dollar math (`$` or `$$`). /// `false` indicates it is code math pub dollar_math: bool, /// Whether this is display math (using `$$`) pub display_math: bool, /// The literal contents of the math span. /// As the contents are not interpreted as Markdown at all, /// they are contained within this structure, /// rather than inserted into a child inline of any kind. pub literal: String, } comrak-0.29.0/src/parser/mod.rs000064400000000000000000002717301046102023000144120ustar 00000000000000mod autolink; mod inlines; #[cfg(feature = "shortcodes")] pub mod shortcodes; mod table; pub mod math; pub mod multiline_block_quote; use crate::adapters::SyntaxHighlighterAdapter; use crate::arena_tree::Node; use crate::ctype::{isdigit, isspace}; use crate::entity; use crate::nodes::{self, NodeFootnoteDefinition, Sourcepos}; use crate::nodes::{ Ast, AstNode, ListDelimType, ListType, NodeCodeBlock, NodeDescriptionItem, NodeHeading, NodeHtmlBlock, NodeList, NodeValue, }; use crate::scanners::{self, SetextChar}; use crate::strings::{self, split_off_front_matter, Case}; use derive_builder::Builder; use std::cell::RefCell; use std::cmp::min; use std::collections::HashMap; use std::fmt::{self, Debug, Formatter}; use std::mem; use std::str; use std::sync::{Arc, Mutex}; use typed_arena::Arena; use crate::adapters::HeadingAdapter; use crate::parser::multiline_block_quote::NodeMultilineBlockQuote; use self::inlines::RefMap; const TAB_STOP: usize = 4; const CODE_INDENT: usize = 4; // Very deeply nested lists can cause quadratic performance issues. // This constant is used in open_new_blocks() to limit the nesting // depth. It is unlikely that a non-contrived markdown document will // be nested this deeply. const MAX_LIST_DEPTH: usize = 100; macro_rules! node_matches { ($node:expr, $( $pat:pat )|+) => {{ matches!( $node.data.borrow().value, $( $pat )|+ ) }}; } /// Parse a Markdown document to an AST. /// /// See the documentation of the crate root for an example. pub fn parse_document<'a>( arena: &'a Arena>, buffer: &str, options: &Options, ) -> &'a AstNode<'a> { let root: &'a AstNode<'a> = arena.alloc(Node::new(RefCell::new(Ast { value: NodeValue::Document, content: String::new(), sourcepos: (1, 1, 1, 1).into(), internal_offset: 0, open: true, last_line_blank: false, table_visited: false, line_offsets: Vec::with_capacity(0), }))); let mut parser = Parser::new(arena, root, options); let mut linebuf = Vec::with_capacity(buffer.len()); parser.feed(&mut linebuf, buffer, true); parser.finish(linebuf) } /// Parse a Markdown document to an AST, specifying /// [`ParseOptions::broken_link_callback`]. #[deprecated( since = "0.25.0", note = "The broken link callback has been moved into ParseOptions<'c>." )] pub fn parse_document_with_broken_link_callback<'a, 'c>( arena: &'a Arena>, buffer: &str, options: &Options<'c>, callback: Option>, ) -> &'a AstNode<'a> { let mut options_with_callback = options.clone(); options_with_callback.parse.broken_link_callback = callback.map(|cb| Arc::new(Mutex::new(cb))); parse_document(arena, buffer, &options_with_callback) } /// The type of the callback used when a reference link is encountered with no /// matching reference. /// /// The details of the broken reference are passed in the /// [`BrokenLinkReference`] argument. If a [`ResolvedReference`] is returned, it /// is used as the link; otherwise, no link is made and the reference text is /// preserved in its entirety. pub type BrokenLinkCallback<'c> = &'c mut dyn FnMut(BrokenLinkReference) -> Option; /// Struct to the broken link callback, containing details on the link reference /// which failed to find a match. #[derive(Debug)] pub struct BrokenLinkReference<'l> { /// The normalized reference link label. Unicode case folding is applied; /// see for a /// discussion on the details of what this exactly means. pub normalized: &'l str, /// The original text in the link label. pub original: &'l str, } pub struct Parser<'a, 'o, 'c> { arena: &'a Arena>, refmap: RefMap, root: &'a AstNode<'a>, current: &'a AstNode<'a>, line_number: usize, offset: usize, column: usize, thematic_break_kill_pos: usize, first_nonspace: usize, first_nonspace_column: usize, indent: usize, blank: bool, partially_consumed_tab: bool, curline_len: usize, curline_end_col: usize, last_line_length: usize, last_buffer_ended_with_cr: bool, total_size: usize, options: &'o Options<'c>, } #[derive(Default, Debug, Clone)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] /// Umbrella options struct. `'c` represents the lifetime of any callback /// closure options may take. pub struct Options<'c> { /// Enable CommonMark extensions. pub extension: ExtensionOptions, /// Configure parse-time options. pub parse: ParseOptions<'c>, /// Configure render-time options. pub render: RenderOptions, } #[non_exhaustive] #[derive(Default, Debug, Clone, Builder)] #[builder(default)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] /// Options to select extensions. pub struct ExtensionOptions { /// Enables the /// [strikethrough extension](https://github.github.com/gfm/#strikethrough-extension-) /// from the GFM spec. /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.strikethrough = true; /// assert_eq!(markdown_to_html("Hello ~world~ there.\n", &options), /// "

    Hello world there.

    \n"); /// ``` pub strikethrough: bool, /// Enables the /// [tagfilter extension](https://github.github.com/gfm/#disallowed-raw-html-extension-) /// from the GFM spec. /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.tagfilter = true; /// options.render.unsafe_ = true; /// assert_eq!(markdown_to_html("Hello .\n\n<xmp>", &options), /// "<p>Hello &lt;xmp>.</p>\n&lt;xmp>\n"); /// ``` pub tagfilter: bool, /// Enables the [table extension](https://github.github.com/gfm/#tables-extension-) /// from the GFM spec. /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.table = true; /// assert_eq!(markdown_to_html("| a | b |\n|---|---|\n| c | d |\n", &options), /// "<table>\n<thead>\n<tr>\n<th>a</th>\n<th>b</th>\n</tr>\n</thead>\n\ /// <tbody>\n<tr>\n<td>c</td>\n<td>d</td>\n</tr>\n</tbody>\n</table>\n"); /// ``` pub table: bool, /// Enables the [autolink extension](https://github.github.com/gfm/#autolinks-extension-) /// from the GFM spec. /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.autolink = true; /// assert_eq!(markdown_to_html("Hello www.github.com.\n", &options), /// "<p>Hello <a href=\"http://www.github.com\">www.github.com</a>.</p>\n"); /// ``` pub autolink: bool, /// Enables the /// [task list items extension](https://github.github.com/gfm/#task-list-items-extension-) /// from the GFM spec. /// /// Note that the spec does not define the precise output, so only the bare essentials are /// rendered. /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.tasklist = true; /// options.render.unsafe_ = true; /// assert_eq!(markdown_to_html("* [x] Done\n* [ ] Not done\n", &options), /// "<ul>\n<li><input type=\"checkbox\" checked=\"\" disabled=\"\" /> Done</li>\n\ /// <li><input type=\"checkbox\" disabled=\"\" /> Not done</li>\n</ul>\n"); /// ``` pub tasklist: bool, /// Enables the superscript Comrak extension. /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.superscript = true; /// assert_eq!(markdown_to_html("e = mc^2^.\n", &options), /// "<p>e = mc<sup>2</sup>.</p>\n"); /// ``` pub superscript: bool, /// Enables the header IDs Comrak extension. /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.header_ids = Some("user-content-".to_string()); /// assert_eq!(markdown_to_html("# README\n", &options), /// "<h1><a href=\"#readme\" aria-hidden=\"true\" class=\"anchor\" id=\"user-content-readme\"></a>README</h1>\n"); /// ``` pub header_ids: Option<String>, /// Enables the footnotes extension per `cmark-gfm`. /// /// For usage, see `src/tests.rs`. The extension is modelled after /// [Kramdown](https://kramdown.gettalong.org/syntax.html#footnotes). /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.footnotes = true; /// assert_eq!(markdown_to_html("Hi[^x].\n\n[^x]: A greeting.\n", &options), /// "<p>Hi<sup class=\"footnote-ref\"><a href=\"#fn-x\" id=\"fnref-x\" data-footnote-ref>1</a></sup>.</p>\n<section class=\"footnotes\" data-footnotes>\n<ol>\n<li id=\"fn-x\">\n<p>A greeting. <a href=\"#fnref-x\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"1\" aria-label=\"Back to reference 1\">↩</a></p>\n</li>\n</ol>\n</section>\n"); /// ``` pub footnotes: bool, /// Enables the description lists extension. /// /// Each term must be defined in one paragraph, followed by a blank line, /// and then by the details. Details begins with a colon. /// /// Not (yet) compatible with render.sourcepos. /// /// ``` md /// First term /// /// : Details for the **first term** /// /// Second term /// /// : Details for the **second term** /// /// More details in second paragraph. /// ``` /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.description_lists = true; /// assert_eq!(markdown_to_html("Term\n\n: Definition", &options), /// "<dl><dt>Term</dt>\n<dd>\n<p>Definition</p>\n</dd>\n</dl>\n"); /// ``` pub description_lists: bool, /// Enables the front matter extension. /// /// Front matter, which begins with the delimiter string at the beginning of the file and ends /// at the end of the next line that contains only the delimiter, is passed through unchanged /// in markdown output and omitted from HTML output. /// /// ``` md /// --- /// layout: post /// title: Formatting Markdown with Comrak /// --- /// /// # Shorter Title /// /// etc. /// ``` /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.front_matter_delimiter = Some("---".to_owned()); /// assert_eq!( /// markdown_to_html("---\nlayout: post\n---\nText\n", &options), /// markdown_to_html("Text\n", &Options::default())); /// ``` /// /// ``` /// # use comrak::{format_commonmark, Arena, Options}; /// use comrak::parse_document; /// let mut options = Options::default(); /// options.extension.front_matter_delimiter = Some("---".to_owned()); /// let arena = Arena::new(); /// let input ="---\nlayout: post\n---\nText\n"; /// let root = parse_document(&arena, input, &options); /// let mut buf = Vec::new(); /// format_commonmark(&root, &options, &mut buf); /// assert_eq!(&String::from_utf8(buf).unwrap(), input); /// ``` pub front_matter_delimiter: Option<String>, /// Enables the multiline block quote extension. /// /// Place `>>>` before and after text to make it into /// a block quote. /// /// ``` md /// Paragraph one /// /// >>> /// Paragraph two /// /// - one /// - two /// >>> /// ``` /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.multiline_block_quotes = true; /// assert_eq!(markdown_to_html(">>>\nparagraph\n>>>", &options), /// "<blockquote>\n<p>paragraph</p>\n</blockquote>\n"); /// ``` pub multiline_block_quotes: bool, /// Enables math using dollar syntax. /// /// ``` md /// Inline math $1 + 2$ and display math $$x + y$$ /// /// $$ /// x^2 /// $$ /// ``` /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.math_dollars = true; /// assert_eq!(markdown_to_html("$1 + 2$ and $$x = y$$", &options), /// "<p><span data-math-style=\"inline\">1 + 2</span> and <span data-math-style=\"display\">x = y</span></p>\n"); /// assert_eq!(markdown_to_html("$$\nx^2\n$$\n", &options), /// "<p><span data-math-style=\"display\">\nx^2\n</span></p>\n"); /// ``` pub math_dollars: bool, /// Enables math using code syntax. /// /// ```` md /// Inline math $`1 + 2`$ /// /// ```math /// x^2 /// ``` /// ```` /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.math_code = true; /// assert_eq!(markdown_to_html("$`1 + 2`$", &options), /// "<p><code data-math-style=\"inline\">1 + 2</code></p>\n"); /// assert_eq!(markdown_to_html("```math\nx^2\n```\n", &options), /// "<pre><code class=\"language-math\" data-math-style=\"display\">x^2\n</code></pre>\n"); /// ``` pub math_code: bool, #[cfg(feature = "shortcodes")] #[cfg_attr(docsrs, doc(cfg(feature = "shortcodes")))] /// Phrases wrapped inside of ':' blocks will be replaced with emojis. /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// assert_eq!(markdown_to_html("Happy Friday! :smile:", &options), /// "<p>Happy Friday! :smile:</p>\n"); /// /// options.extension.shortcodes = true; /// assert_eq!(markdown_to_html("Happy Friday! :smile:", &options), /// "<p>Happy Friday! πŸ˜„</p>\n"); /// ``` pub shortcodes: bool, /// Enables wikilinks using title after pipe syntax /// /// ```` md /// [[url|link label]] /// ```` /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.wikilinks_title_after_pipe = true; /// assert_eq!(markdown_to_html("[[url|link label]]", &options), /// "<p><a href=\"url\" data-wikilink=\"true\">link label</a></p>\n"); /// ``` pub wikilinks_title_after_pipe: bool, /// Enables wikilinks using title before pipe syntax /// /// ```` md /// [[link label|url]] /// ```` /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.wikilinks_title_before_pipe = true; /// assert_eq!(markdown_to_html("[[link label|url]]", &options), /// "<p><a href=\"url\" data-wikilink=\"true\">link label</a></p>\n"); /// ``` pub wikilinks_title_before_pipe: bool, /// Enables underlines using double underscores /// /// ```md /// __underlined text__ /// ``` /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.underline = true; /// /// assert_eq!(markdown_to_html("__underlined text__", &options), /// "<p><u>underlined text</u></p>\n"); /// ``` pub underline: bool, /// Enables spoilers using double vertical bars /// /// ```md /// Darth Vader is ||Luke's father|| /// ``` /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.spoiler = true; /// /// assert_eq!(markdown_to_html("Darth Vader is ||Luke's father||", &options), /// "<p>Darth Vader is <span class=\"spoiler\">Luke's father</span></p>\n"); /// ``` pub spoiler: bool, /// Requires at least one space after a `>` character to generate a blockquote, /// and restarts blockquote nesting across unique lines of input /// /// ```md /// >implying implications /// /// > one /// > > two /// > three /// ``` /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.greentext = true; /// /// assert_eq!(markdown_to_html(">implying implications", &options), /// "<p>&gt;implying implications</p>\n"); /// /// assert_eq!(markdown_to_html("> one\n> > two\n> three", &options), /// concat!( /// "<blockquote>\n", /// "<p>one</p>\n", /// "<blockquote>\n<p>two</p>\n</blockquote>\n", /// "<p>three</p>\n", /// "</blockquote>\n")); /// ``` pub greentext: bool, } #[non_exhaustive] #[derive(Default, Clone, Builder)] #[builder(default)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] /// Options for parser functions. pub struct ParseOptions<'c> { /// Punctuation (quotes, full-stops and hyphens) are converted into 'smart' punctuation. /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// assert_eq!(markdown_to_html("'Hello,' \"world\" ...", &options), /// "<p>'Hello,' &quot;world&quot; ...</p>\n"); /// /// options.parse.smart = true; /// assert_eq!(markdown_to_html("'Hello,' \"world\" ...", &options), /// "<p>β€˜Hello,’ β€œworld” …</p>\n"); /// ``` pub smart: bool, /// The default info string for fenced code blocks. /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// assert_eq!(markdown_to_html("```\nfn hello();\n```\n", &options), /// "<pre><code>fn hello();\n</code></pre>\n"); /// /// options.parse.default_info_string = Some("rust".into()); /// assert_eq!(markdown_to_html("```\nfn hello();\n```\n", &options), /// "<pre><code class=\"language-rust\">fn hello();\n</code></pre>\n"); /// ``` pub default_info_string: Option<String>, /// Whether or not a simple `x` or `X` is used for tasklist or any other symbol is allowed. pub relaxed_tasklist_matching: bool, /// Relax parsing of autolinks, allow links to be detected inside brackets /// and allow all url schemes. It is intended to allow a very specific type of autolink /// detection, such as `[this http://and.com that]` or `{http://foo.com}`, on a best can basis. /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.autolink = true; /// assert_eq!(markdown_to_html("[https://foo.com]", &options), /// "<p>[https://foo.com]</p>\n"); /// /// options.parse.relaxed_autolinks = true; /// assert_eq!(markdown_to_html("[https://foo.com]", &options), /// "<p>[<a href=\"https://foo.com\">https://foo.com</a>]</p>\n"); /// ``` pub relaxed_autolinks: bool, /// In case the parser encounters any potential links that have a broken /// reference (e.g `[foo]` when there is no `[foo]: url` entry at the /// bottom) the provided callback will be called with the reference name, /// both in normalized form and unmodified, and the returned pair will be /// used as the link destination and title if not [`None`]. /// /// ``` /// # use std::{str, sync::{Arc, Mutex}}; /// # use comrak::{Arena, ResolvedReference, parse_document, format_html, Options, BrokenLinkReference, ParseOptionsBuilder}; /// # use comrak::nodes::{AstNode, NodeValue}; /// # /// # fn main() -> std::io::Result<()> { /// let arena = Arena::new(); /// let mut cb = |link_ref: BrokenLinkReference| match link_ref.normalized { /// "foo" => Some(ResolvedReference { /// url: "https://www.rust-lang.org/".to_string(), /// title: "The Rust Language".to_string(), /// }), /// _ => None, /// }; /// let options = Options { /// parse: ParseOptionsBuilder::default() /// .broken_link_callback(Some(Arc::new(Mutex::new(&mut cb)))) /// .build() /// .unwrap(), /// ..Default::default() /// }; /// /// let root = parse_document( /// &arena, /// "# Cool input!\nWow look at this cool [link][foo]. A [broken link] renders as text.", /// &options, /// ); /// /// let mut output = Vec::new(); /// format_html(root, &Options::default(), &mut output)?; /// assert_eq!(str::from_utf8(&output).unwrap(), /// "<h1>Cool input!</h1>\n<p>Wow look at this cool \ /// <a href=\"https://www.rust-lang.org/\" title=\"The Rust Language\">link</a>. \ /// A [broken link] renders as text.</p>\n"); /// # Ok(()) /// # } #[cfg_attr(feature = "arbitrary", arbitrary(default))] pub broken_link_callback: Option<Arc<Mutex<BrokenLinkCallback<'c>>>>, } impl<'c> fmt::Debug for ParseOptions<'c> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { let mut struct_fmt = f.debug_struct("ParseOptions"); struct_fmt.field("smart", &self.smart); struct_fmt.field("default_info_string", &self.default_info_string); struct_fmt.field("relaxed_tasklist_matching", &self.relaxed_tasklist_matching); struct_fmt.field("relaxed_autolinks", &self.relaxed_autolinks); struct_fmt.field( "broken_link_callback.is_some()", &self.broken_link_callback.is_some(), ); struct_fmt.finish() } } #[non_exhaustive] #[derive(Default, Debug, Clone, Copy, Builder)] #[builder(default)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] /// Options for formatter functions. pub struct RenderOptions { /// [Soft line breaks](http://spec.commonmark.org/0.27/#soft-line-breaks) in the input /// translate into hard line breaks in the output. /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// assert_eq!(markdown_to_html("Hello.\nWorld.\n", &options), /// "<p>Hello.\nWorld.</p>\n"); /// /// options.render.hardbreaks = true; /// assert_eq!(markdown_to_html("Hello.\nWorld.\n", &options), /// "<p>Hello.<br />\nWorld.</p>\n"); /// ``` pub hardbreaks: bool, /// GitHub-style `<pre lang="xyz">` is used for fenced code blocks with info tags. /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// assert_eq!(markdown_to_html("``` rust\nfn hello();\n```\n", &options), /// "<pre><code class=\"language-rust\">fn hello();\n</code></pre>\n"); /// /// options.render.github_pre_lang = true; /// assert_eq!(markdown_to_html("``` rust\nfn hello();\n```\n", &options), /// "<pre lang=\"rust\"><code>fn hello();\n</code></pre>\n"); /// ``` pub github_pre_lang: bool, /// Enable full info strings for code blocks /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// assert_eq!(markdown_to_html("``` rust extra info\nfn hello();\n```\n", &options), /// "<pre><code class=\"language-rust\">fn hello();\n</code></pre>\n"); /// /// options.render.full_info_string = true; /// let html = markdown_to_html("``` rust extra info\nfn hello();\n```\n", &options); /// let re = regex::Regex::new(r#"data-meta="extra info""#).unwrap(); /// assert!(re.is_match(&html)); /// ``` pub full_info_string: bool, /// The wrap column when outputting CommonMark. /// /// ``` /// # use comrak::{parse_document, Options, format_commonmark}; /// # fn main() { /// # let arena = typed_arena::Arena::new(); /// let mut options = Options::default(); /// let node = parse_document(&arena, "hello hello hello hello hello hello", &options); /// let mut output = vec![]; /// format_commonmark(node, &options, &mut output).unwrap(); /// assert_eq!(String::from_utf8(output).unwrap(), /// "hello hello hello hello hello hello\n"); /// /// options.render.width = 20; /// let mut output = vec![]; /// format_commonmark(node, &options, &mut output).unwrap(); /// assert_eq!(String::from_utf8(output).unwrap(), /// "hello hello hello\nhello hello hello\n"); /// # } /// ``` pub width: usize, /// Allow rendering of raw HTML and potentially dangerous links. /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// let input = "<script>\nalert('xyz');\n</script>\n\n\ /// Possibly <marquee>annoying</marquee>.\n\n\ /// [Dangerous](javascript:alert(document.cookie)).\n\n\ /// [Safe](http://commonmark.org).\n"; /// /// assert_eq!(markdown_to_html(input, &options), /// "<!-- raw HTML omitted -->\n\ /// <p>Possibly <!-- raw HTML omitted -->annoying<!-- raw HTML omitted -->.</p>\n\ /// <p><a href=\"\">Dangerous</a>.</p>\n\ /// <p><a href=\"http://commonmark.org\">Safe</a>.</p>\n"); /// /// options.render.unsafe_ = true; /// assert_eq!(markdown_to_html(input, &options), /// "<script>\nalert(\'xyz\');\n</script>\n\ /// <p>Possibly <marquee>annoying</marquee>.</p>\n\ /// <p><a href=\"javascript:alert(document.cookie)\">Dangerous</a>.</p>\n\ /// <p><a href=\"http://commonmark.org\">Safe</a>.</p>\n"); /// ``` pub unsafe_: bool, /// Escape raw HTML instead of clobbering it. /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// let input = "<i>italic text</i>"; /// /// assert_eq!(markdown_to_html(input, &options), /// "<p><!-- raw HTML omitted -->italic text<!-- raw HTML omitted --></p>\n"); /// /// options.render.escape = true; /// assert_eq!(markdown_to_html(input, &options), /// "<p>&lt;i&gt;italic text&lt;/i&gt;</p>\n"); /// ``` pub escape: bool, /// Set the type of [bullet list marker](https://spec.commonmark.org/0.30/#bullet-list-marker) to use. Options are: /// /// * [`ListStyleType::Dash`] to use `-` (default) /// * [`ListStyleType::Plus`] to use `+` /// * [`ListStyleType::Star`] to use `*` /// /// ```rust /// # use comrak::{markdown_to_commonmark, Options, ListStyleType}; /// let mut options = Options::default(); /// let input = "- one\n- two\n- three"; /// assert_eq!(markdown_to_commonmark(input, &options), /// "- one\n- two\n- three\n"); // default is Dash /// /// options.render.list_style = ListStyleType::Plus; /// assert_eq!(markdown_to_commonmark(input, &options), /// "+ one\n+ two\n+ three\n"); /// /// options.render.list_style = ListStyleType::Star; /// assert_eq!(markdown_to_commonmark(input, &options), /// "* one\n* two\n* three\n"); /// ``` pub list_style: ListStyleType, /// Include source position attributes in HTML and XML output. /// /// Sourcepos information is reliable for all core block items, and most /// extensions. The description lists extension still has issues; see /// <https://github.com/kivikakk/comrak/blob/3bb6d4ce/src/tests/description_lists.rs#L60-L125>. /// /// Sourcepos information is **not** reliable for inlines, and is not /// included in HTML without also setting [`experimental_inline_sourcepos`]. /// See <https://github.com/kivikakk/comrak/pull/439> for a discussion. /// /// ```rust /// # use comrak::{markdown_to_commonmark_xml, Options}; /// let mut options = Options::default(); /// options.render.sourcepos = true; /// let input = "## Hello world!"; /// let xml = markdown_to_commonmark_xml(input, &options); /// assert!(xml.contains("<text sourcepos=\"1:4-1:15\" xml:space=\"preserve\">")); /// ``` pub sourcepos: bool, /// Include inline sourcepos in HTML output, which is known to have issues. /// See <https://github.com/kivikakk/comrak/pull/439> for a discussion. /// ```rust /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.render.sourcepos = true; /// let input = "Hello *world*!"; /// assert_eq!(markdown_to_html(input, &options), /// "<p data-sourcepos=\"1:1-1:14\">Hello <em>world</em>!</p>\n"); /// options.render.experimental_inline_sourcepos = true; /// assert_eq!(markdown_to_html(input, &options), /// "<p data-sourcepos=\"1:1-1:14\">Hello <em data-sourcepos=\"1:7-1:13\">world</em>!</p>\n"); /// ``` pub experimental_inline_sourcepos: bool, /// Wrap escaped characters in a `<span>` to allow any /// post-processing to recognize them. /// /// ```rust /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// let input = "Notify user \\@example"; /// /// assert_eq!(markdown_to_html(input, &options), /// "<p>Notify user @example</p>\n"); /// /// options.render.escaped_char_spans = true; /// assert_eq!(markdown_to_html(input, &options), /// "<p>Notify user <span data-escaped-char>@</span>example</p>\n"); /// ``` pub escaped_char_spans: bool, /// Ignore setext headings in input. /// /// ```rust /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// let input = "setext heading\n---"; /// /// assert_eq!(markdown_to_html(input, &options), /// "<h2>setext heading</h2>\n"); /// /// options.render.ignore_setext = true; /// assert_eq!(markdown_to_html(input, &options), /// "<p>setext heading</p>\n<hr />\n"); /// ``` pub ignore_setext: bool, /// Ignore empty links in input. /// /// ```rust /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// let input = "[]()"; /// /// assert_eq!(markdown_to_html(input, &options), /// "<p><a href=\"\"></a></p>\n"); /// /// options.render.ignore_empty_links = true; /// assert_eq!(markdown_to_html(input, &options), "<p>[]()</p>\n"); /// ``` pub ignore_empty_links: bool, /// Enables GFM quirks in HTML output which break CommonMark compatibility. /// /// ```rust /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// let input = "****abcd**** *_foo_*"; /// /// assert_eq!(markdown_to_html(input, &options), /// "<p><strong><strong>abcd</strong></strong> <em><em>foo</em></em></p>\n"); /// /// options.render.gfm_quirks = true; /// assert_eq!(markdown_to_html(input, &options), /// "<p><strong>abcd</strong> <em><em>foo</em></em></p>\n"); /// ``` pub gfm_quirks: bool, /// Prefer fenced code blocks when outputting CommonMark. /// /// ```rust /// # use std::str; /// # use comrak::{Arena, Options, format_commonmark, parse_document}; /// let arena = Arena::new(); /// let mut options = Options::default(); /// let input = "```\nhello\n```\n"; /// let root = parse_document(&arena, input, &options); /// /// let mut buf = Vec::new(); /// format_commonmark(&root, &options, &mut buf); /// assert_eq!(str::from_utf8(&buf).unwrap(), " hello\n"); /// /// buf.clear(); /// options.render.prefer_fenced = true; /// format_commonmark(&root, &options, &mut buf); /// assert_eq!(str::from_utf8(&buf).unwrap(), "```\nhello\n```\n"); /// ``` pub prefer_fenced: bool, /// Render the image as a figure element with the title as its caption. /// /// ```rust /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// let input = "![image](https://example.com/image.png \"this is an image\")"; /// /// assert_eq!(markdown_to_html(input, &options), /// "<p><img src=\"https://example.com/image.png\" alt=\"image\" title=\"this is an image\" /></p>\n"); /// /// options.render.figure_with_caption = true; /// assert_eq!(markdown_to_html(input, &options), /// "<p><figure><img src=\"https://example.com/image.png\" alt=\"image\" title=\"this is an image\" /><figcaption>this is an image</figcaption></figure></p>\n"); /// ``` pub figure_with_caption: bool, } #[non_exhaustive] #[derive(Default, Debug, Clone, Builder)] #[builder(default)] /// Umbrella plugins struct. pub struct Plugins<'p> { /// Configure render-time plugins. pub render: RenderPlugins<'p>, } #[non_exhaustive] #[derive(Default, Clone, Builder)] #[builder(default)] /// Plugins for alternative rendering. pub struct RenderPlugins<'p> { /// Provide a syntax highlighter adapter implementation for syntax /// highlighting of codefence blocks. /// ``` /// # use comrak::{markdown_to_html, Options, Plugins, markdown_to_html_with_plugins}; /// # use comrak::adapters::SyntaxHighlighterAdapter; /// use std::collections::HashMap; /// use std::io::{self, Write}; /// let options = Options::default(); /// let mut plugins = Plugins::default(); /// let input = "```rust\nfn main<'a>();\n```"; /// /// assert_eq!(markdown_to_html_with_plugins(input, &options, &plugins), /// "<pre><code class=\"language-rust\">fn main&lt;'a&gt;();\n</code></pre>\n"); /// /// pub struct MockAdapter {} /// impl SyntaxHighlighterAdapter for MockAdapter { /// fn write_highlighted(&self, output: &mut dyn Write, lang: Option<&str>, code: &str) -> io::Result<()> { /// write!(output, "<span class=\"lang-{}\">{}</span>", lang.unwrap(), code) /// } /// /// fn write_pre_tag(&self, output: &mut dyn Write, _attributes: HashMap<String, String>) -> io::Result<()> { /// output.write_all(b"<pre lang=\"rust\">") /// } /// /// fn write_code_tag(&self, output: &mut dyn Write, _attributes: HashMap<String, String>) -> io::Result<()> { /// output.write_all(b"<code class=\"language-rust\">") /// } /// } /// /// let adapter = MockAdapter {}; /// plugins.render.codefence_syntax_highlighter = Some(&adapter); /// /// assert_eq!(markdown_to_html_with_plugins(input, &options, &plugins), /// "<pre lang=\"rust\"><code class=\"language-rust\"><span class=\"lang-rust\">fn main<'a>();\n</span></code></pre>\n"); /// ``` pub codefence_syntax_highlighter: Option<&'p dyn SyntaxHighlighterAdapter>, /// Optional heading adapter pub heading_adapter: Option<&'p dyn HeadingAdapter>, } impl Debug for RenderPlugins<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct("RenderPlugins") .field( "codefence_syntax_highlighter", &"impl SyntaxHighlighterAdapter", ) .finish() } } /// A reference link's resolved details. #[derive(Clone, Debug)] pub struct ResolvedReference { /// The destination URL of the reference link. pub url: String, /// The text of the link. pub title: String, } struct FootnoteDefinition<'a> { ix: Option<u32>, node: &'a AstNode<'a>, name: String, total_references: u32, } impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> { fn new(arena: &'a Arena<AstNode<'a>>, root: &'a AstNode<'a>, options: &'o Options<'c>) -> Self { Parser { arena, refmap: RefMap::new(), root, current: root, line_number: 0, offset: 0, column: 0, thematic_break_kill_pos: 0, first_nonspace: 0, first_nonspace_column: 0, indent: 0, blank: false, partially_consumed_tab: false, curline_len: 0, curline_end_col: 0, last_line_length: 0, last_buffer_ended_with_cr: false, total_size: 0, options, } } fn feed(&mut self, linebuf: &mut Vec<u8>, mut s: &str, eof: bool) { if let (0, Some(delimiter)) = ( self.total_size, &self.options.extension.front_matter_delimiter, ) { if let Some((front_matter, rest)) = split_off_front_matter(s, delimiter) { let node = self.add_child( self.root, NodeValue::FrontMatter(front_matter.to_string()), 1, ); s = rest; self.finalize(node).unwrap(); } } let s = s.as_bytes(); if s.len() > usize::MAX - self.total_size { self.total_size = usize::MAX; } else { self.total_size += s.len(); } let mut buffer = 0; if self.last_buffer_ended_with_cr && !s.is_empty() && s[0] == b'\n' { buffer += 1; } self.last_buffer_ended_with_cr = false; let end = s.len(); while buffer < end { let mut process = false; let mut eol = buffer; while eol < end { if strings::is_line_end_char(s[eol]) { process = true; break; } if s[eol] == 0 { break; } eol += 1; } if eol >= end && eof { process = true; } if process { if !linebuf.is_empty() { linebuf.extend_from_slice(&s[buffer..eol]); self.process_line(linebuf); linebuf.truncate(0); } else { self.process_line(&s[buffer..eol]); } } else if eol < end && s[eol] == b'\0' { linebuf.extend_from_slice(&s[buffer..eol]); linebuf.extend_from_slice(&"\u{fffd}".to_string().into_bytes()); } else { linebuf.extend_from_slice(&s[buffer..eol]); } buffer = eol; if buffer < end { if s[buffer] == b'\0' { buffer += 1; } else { if s[buffer] == b'\r' { buffer += 1; if buffer == end { self.last_buffer_ended_with_cr = true; } } if buffer < end && s[buffer] == b'\n' { buffer += 1; } } } } } fn scan_thematic_break_inner(&mut self, line: &[u8]) -> (usize, bool) { let mut i = self.first_nonspace; if i >= line.len() { return (i, false); } let c = line[i]; if c != b'*' && c != b'_' && c != b'-' { return (i, false); } let mut count = 1; let mut nextc; loop { i += 1; if i >= line.len() { return (i, false); } nextc = line[i]; if nextc == c { count += 1; } else if nextc != b' ' && nextc != b'\t' { break; } } if count >= 3 && (nextc == b'\r' || nextc == b'\n') { ((i - self.first_nonspace) + 1, true) } else { (i, false) } } fn scan_thematic_break(&mut self, line: &[u8]) -> Option<usize> { let (offset, found) = self.scan_thematic_break_inner(line); if !found { self.thematic_break_kill_pos = offset; None } else { Some(offset) } } fn find_first_nonspace(&mut self, line: &[u8]) { let mut chars_to_tab = TAB_STOP - (self.column % TAB_STOP); if self.first_nonspace <= self.offset { self.first_nonspace = self.offset; self.first_nonspace_column = self.column; loop { if self.first_nonspace >= line.len() { break; } match line[self.first_nonspace] { 32 => { self.first_nonspace += 1; self.first_nonspace_column += 1; chars_to_tab -= 1; if chars_to_tab == 0 { chars_to_tab = TAB_STOP; } } 9 => { self.first_nonspace += 1; self.first_nonspace_column += chars_to_tab; chars_to_tab = TAB_STOP; } _ => break, } } } self.indent = self.first_nonspace_column - self.column; self.blank = self.first_nonspace < line.len() && strings::is_line_end_char(line[self.first_nonspace]); } fn process_line(&mut self, line: &[u8]) { let mut new_line: Vec<u8>; let line = if line.is_empty() || !strings::is_line_end_char(*line.last().unwrap()) { new_line = line.into(); new_line.push(b'\n'); &new_line } else { line }; self.curline_len = line.len(); self.curline_end_col = line.len(); if self.curline_end_col > 0 && line[self.curline_end_col - 1] == b'\n' { self.curline_end_col -= 1; } if self.curline_end_col > 0 && line[self.curline_end_col - 1] == b'\r' { self.curline_end_col -= 1; } self.offset = 0; self.column = 0; self.first_nonspace = 0; self.first_nonspace_column = 0; self.indent = 0; self.thematic_break_kill_pos = 0; self.blank = false; self.partially_consumed_tab = false; if self.line_number == 0 && line.len() >= 3 && unsafe { str::from_utf8_unchecked(line) }.starts_with('\u{feff}') { self.offset += 3; } self.line_number += 1; let mut all_matched = true; if let Some(last_matched_container) = self.check_open_blocks(line, &mut all_matched) { let mut container = last_matched_container; let current = self.current; self.open_new_blocks(&mut container, line, all_matched); if current.same_node(self.current) { self.add_text_to_container(container, last_matched_container, line); } } self.last_line_length = self.curline_end_col; self.curline_len = 0; self.curline_end_col = 0; } fn check_open_blocks( &mut self, line: &[u8], all_matched: &mut bool, ) -> Option<&'a AstNode<'a>> { let (new_all_matched, mut container, should_continue) = self.check_open_blocks_inner(self.root, line); *all_matched = new_all_matched; if !*all_matched { container = container.parent().unwrap(); } if !should_continue { None } else { Some(container) } } fn check_open_blocks_inner( &mut self, mut container: &'a AstNode<'a>, line: &[u8], ) -> (bool, &'a AstNode<'a>, bool) { let mut should_continue = true; while nodes::last_child_is_open(container) { container = container.last_child().unwrap(); let ast = &mut *container.data.borrow_mut(); self.find_first_nonspace(line); match ast.value { NodeValue::BlockQuote => { if !self.parse_block_quote_prefix(line) { return (false, container, should_continue); } } NodeValue::Item(ref nl) => { if !self.parse_node_item_prefix(line, container, nl) { return (false, container, should_continue); } } NodeValue::DescriptionItem(ref di) => { if !self.parse_description_item_prefix(line, container, di) { return (false, container, should_continue); } } NodeValue::CodeBlock(..) => { if !self.parse_code_block_prefix(line, container, ast, &mut should_continue) { return (false, container, should_continue); } } NodeValue::HtmlBlock(ref nhb) => { if !self.parse_html_block_prefix(nhb.block_type) { return (false, container, should_continue); } } NodeValue::Paragraph => { if self.blank { return (false, container, should_continue); } } NodeValue::Table(..) => { if !table::matches(&line[self.first_nonspace..], self.options.extension.spoiler) { return (false, container, should_continue); } continue; } NodeValue::Heading(..) | NodeValue::TableRow(..) | NodeValue::TableCell => { return (false, container, should_continue); } NodeValue::FootnoteDefinition(..) => { if !self.parse_footnote_definition_block_prefix(line) { return (false, container, should_continue); } } NodeValue::MultilineBlockQuote(..) => { if !self.parse_multiline_block_quote_prefix( line, container, ast, &mut should_continue, ) { return (false, container, should_continue); } } _ => {} } } (true, container, should_continue) } fn is_not_greentext(&mut self, line: &[u8]) -> bool { !self.options.extension.greentext || strings::is_space_or_tab(line[self.first_nonspace + 1]) } fn setext_heading_line(&mut self, s: &[u8]) -> Option<SetextChar> { match self.options.render.ignore_setext { false => scanners::setext_heading_line(s), true => None, } } fn open_new_blocks(&mut self, container: &mut &'a AstNode<'a>, line: &[u8], all_matched: bool) { let mut matched: usize = 0; let mut nl: NodeList = NodeList::default(); let mut sc: scanners::SetextChar = scanners::SetextChar::Equals; let mut maybe_lazy = node_matches!(self.current, NodeValue::Paragraph); let mut depth = 0; while !node_matches!( container, NodeValue::CodeBlock(..) | NodeValue::HtmlBlock(..) ) { depth += 1; self.find_first_nonspace(line); let indented = self.indent >= CODE_INDENT; if !indented && self.options.extension.multiline_block_quotes && unwrap_into( scanners::open_multiline_block_quote_fence(&line[self.first_nonspace..]), &mut matched, ) { let first_nonspace = self.first_nonspace; let offset = self.offset; let nmbc = NodeMultilineBlockQuote { fence_length: matched, fence_offset: first_nonspace - offset, }; *container = self.add_child( container, NodeValue::MultilineBlockQuote(nmbc), self.first_nonspace + 1, ); self.advance_offset(line, first_nonspace + matched - offset, false); } else if !indented && line[self.first_nonspace] == b'>' && self.is_not_greentext(line) { let blockquote_startpos = self.first_nonspace; let offset = self.first_nonspace + 1 - self.offset; self.advance_offset(line, offset, false); if strings::is_space_or_tab(line[self.offset]) { self.advance_offset(line, 1, true); } *container = self.add_child(container, NodeValue::BlockQuote, blockquote_startpos + 1); } else if !indented && unwrap_into( scanners::atx_heading_start(&line[self.first_nonspace..]), &mut matched, ) { let heading_startpos = self.first_nonspace; let offset = self.offset; self.advance_offset(line, heading_startpos + matched - offset, false); *container = self.add_child( container, NodeValue::Heading(NodeHeading::default()), heading_startpos + 1, ); let mut hashpos = line[self.first_nonspace..] .iter() .position(|&c| c == b'#') .unwrap() + self.first_nonspace; let mut level = 0; while line[hashpos] == b'#' { level += 1; hashpos += 1; } let container_ast = &mut container.data.borrow_mut(); container_ast.value = NodeValue::Heading(NodeHeading { level, setext: false, }); container_ast.internal_offset = matched; } else if !indented && unwrap_into( scanners::open_code_fence(&line[self.first_nonspace..]), &mut matched, ) { let first_nonspace = self.first_nonspace; let offset = self.offset; let ncb = NodeCodeBlock { fenced: true, fence_char: line[first_nonspace], fence_length: matched, fence_offset: first_nonspace - offset, info: String::with_capacity(10), literal: String::new(), }; *container = self.add_child( container, NodeValue::CodeBlock(ncb), self.first_nonspace + 1, ); self.advance_offset(line, first_nonspace + matched - offset, false); } else if !indented && (unwrap_into( scanners::html_block_start(&line[self.first_nonspace..]), &mut matched, ) || (!node_matches!(container, NodeValue::Paragraph) && unwrap_into( scanners::html_block_start_7(&line[self.first_nonspace..]), &mut matched, ))) { let nhb = NodeHtmlBlock { block_type: matched as u8, literal: String::new(), }; *container = self.add_child( container, NodeValue::HtmlBlock(nhb), self.first_nonspace + 1, ); } else if !indented && node_matches!(container, NodeValue::Paragraph) && unwrap_into( self.setext_heading_line(&line[self.first_nonspace..]), &mut sc, ) { let has_content = { let mut ast = container.data.borrow_mut(); self.resolve_reference_link_definitions(&mut ast.content) }; if has_content { container.data.borrow_mut().value = NodeValue::Heading(NodeHeading { level: match sc { scanners::SetextChar::Equals => 1, scanners::SetextChar::Hyphen => 2, }, setext: true, }); let adv = line.len() - 1 - self.offset; self.advance_offset(line, adv, false); } } else if !indented && !matches!( (&container.data.borrow().value, all_matched), (&NodeValue::Paragraph, false) ) && self.thematic_break_kill_pos <= self.first_nonspace && unwrap_into(self.scan_thematic_break(line), &mut matched) { *container = self.add_child(container, NodeValue::ThematicBreak, self.first_nonspace + 1); let adv = line.len() - 1 - self.offset; self.advance_offset(line, adv, false); } else if !indented && self.options.extension.footnotes && depth < MAX_LIST_DEPTH && unwrap_into( scanners::footnote_definition(&line[self.first_nonspace..]), &mut matched, ) { let mut c = &line[self.first_nonspace + 2..self.first_nonspace + matched]; c = c.split(|&e| e == b']').next().unwrap(); let offset = self.first_nonspace + matched - self.offset; self.advance_offset(line, offset, false); *container = self.add_child( container, NodeValue::FootnoteDefinition(NodeFootnoteDefinition { name: str::from_utf8(c).unwrap().to_string(), total_references: 0, }), self.first_nonspace + 1, ); container.data.borrow_mut().internal_offset = matched; } else if !indented && self.options.extension.description_lists && line[self.first_nonspace] == b':' && self.parse_desc_list_details(container) { let offset = self.first_nonspace + 1 - self.offset; self.advance_offset(line, offset, false); if strings::is_space_or_tab(line[self.offset]) { self.advance_offset(line, 1, true); } } else if (!indented || node_matches!(container, NodeValue::List(..))) && self.indent < 4 && depth < MAX_LIST_DEPTH && unwrap_into_2( parse_list_marker( line, self.first_nonspace, node_matches!(container, NodeValue::Paragraph), ), &mut matched, &mut nl, ) { let offset = self.first_nonspace + matched - self.offset; self.advance_offset(line, offset, false); let (save_partially_consumed_tab, save_offset, save_column) = (self.partially_consumed_tab, self.offset, self.column); while self.column - save_column <= 5 && strings::is_space_or_tab(line[self.offset]) { self.advance_offset(line, 1, true); } let i = self.column - save_column; if !(1..5).contains(&i) || strings::is_line_end_char(line[self.offset]) { nl.padding = matched + 1; self.offset = save_offset; self.column = save_column; self.partially_consumed_tab = save_partially_consumed_tab; if i > 0 { self.advance_offset(line, 1, true); } } else { nl.padding = matched + i; } nl.marker_offset = self.indent; if match container.data.borrow().value { NodeValue::List(ref mnl) => !lists_match(&nl, mnl), _ => true, } { *container = self.add_child(container, NodeValue::List(nl), self.first_nonspace + 1); } *container = self.add_child(container, NodeValue::Item(nl), self.first_nonspace + 1); } else if indented && !maybe_lazy && !self.blank { self.advance_offset(line, CODE_INDENT, true); let ncb = NodeCodeBlock { fenced: false, fence_char: 0, fence_length: 0, fence_offset: 0, info: String::new(), literal: String::new(), }; *container = self.add_child(container, NodeValue::CodeBlock(ncb), self.offset + 1); } else { let new_container = if !indented && self.options.extension.table { table::try_opening_block(self, container, line) } else { None }; match new_container { Some((new_container, replace, mark_visited)) => { if replace { container.insert_after(new_container); container.detach(); *container = new_container; } else { *container = new_container; } if mark_visited { container.data.borrow_mut().table_visited = true; } } _ => break, } } if container.data.borrow().value.accepts_lines() { break; } maybe_lazy = false; } } fn advance_offset(&mut self, line: &[u8], mut count: usize, columns: bool) { while count > 0 { match line[self.offset] { 9 => { let chars_to_tab = TAB_STOP - (self.column % TAB_STOP); if columns { self.partially_consumed_tab = chars_to_tab > count; let chars_to_advance = min(count, chars_to_tab); self.column += chars_to_advance; self.offset += if self.partially_consumed_tab { 0 } else { 1 }; count -= chars_to_advance; } else { self.partially_consumed_tab = false; self.column += chars_to_tab; self.offset += 1; count -= 1; } } _ => { self.partially_consumed_tab = false; self.offset += 1; self.column += 1; count -= 1; } } } } fn parse_block_quote_prefix(&mut self, line: &[u8]) -> bool { let indent = self.indent; if indent <= 3 && line[self.first_nonspace] == b'>' && self.is_not_greentext(line) { self.advance_offset(line, indent + 1, true); if strings::is_space_or_tab(line[self.offset]) { self.advance_offset(line, 1, true); } return true; } false } fn parse_footnote_definition_block_prefix(&mut self, line: &[u8]) -> bool { if self.indent >= 4 { self.advance_offset(line, 4, true); true } else { line == b"\n" || line == b"\r\n" } } fn parse_node_item_prefix( &mut self, line: &[u8], container: &'a AstNode<'a>, nl: &NodeList, ) -> bool { if self.indent >= nl.marker_offset + nl.padding { self.advance_offset(line, nl.marker_offset + nl.padding, true); true } else if self.blank && container.first_child().is_some() { let offset = self.first_nonspace - self.offset; self.advance_offset(line, offset, false); true } else { false } } fn parse_description_item_prefix( &mut self, line: &[u8], container: &'a AstNode<'a>, di: &NodeDescriptionItem, ) -> bool { if self.indent >= di.marker_offset + di.padding { self.advance_offset(line, di.marker_offset + di.padding, true); true } else if self.blank && container.first_child().is_some() { let offset = self.first_nonspace - self.offset; self.advance_offset(line, offset, false); true } else { false } } fn parse_code_block_prefix( &mut self, line: &[u8], container: &'a AstNode<'a>, ast: &mut Ast, should_continue: &mut bool, ) -> bool { let (fenced, fence_char, fence_length, fence_offset) = match ast.value { NodeValue::CodeBlock(ref ncb) => ( ncb.fenced, ncb.fence_char, ncb.fence_length, ncb.fence_offset, ), _ => unreachable!(), }; if !fenced { if self.indent >= CODE_INDENT { self.advance_offset(line, CODE_INDENT, true); return true; } else if self.blank { let offset = self.first_nonspace - self.offset; self.advance_offset(line, offset, false); return true; } return false; } let matched = if self.indent <= 3 && line[self.first_nonspace] == fence_char { scanners::close_code_fence(&line[self.first_nonspace..]).unwrap_or(0) } else { 0 }; if matched >= fence_length { *should_continue = false; self.advance_offset(line, matched, false); self.current = self.finalize_borrowed(container, ast).unwrap(); return false; } let mut i = fence_offset; while i > 0 && strings::is_space_or_tab(line[self.offset]) { self.advance_offset(line, 1, true); i -= 1; } true } fn parse_html_block_prefix(&mut self, t: u8) -> bool { match t { 1..=5 => true, 6 | 7 => !self.blank, _ => unreachable!(), } } fn parse_desc_list_details(&mut self, container: &mut &'a AstNode<'a>) -> bool { let last_child = match container.last_child() { Some(lc) => lc, None => return false, }; if node_matches!(last_child, NodeValue::Paragraph) { // We have found the details after the paragraph for the term. // // This paragraph is moved as a child of a new DescriptionTerm node. // // If the node before the paragraph is a description list, the item // is added to it. If not, create a new list. last_child.detach(); let last_child_sourcepos = last_child.data.borrow().sourcepos; // TODO: description list sourcepos has issues. // // DescriptionItem: // For all but the last, the end line/col is wrong. // Where it should be l:c, it gives (l+1):0. // // DescriptionTerm: // All are incorrect; they all give the start line/col of // the DescriptionDetails, and the end line/col is completely off. // // descriptionDetails: // Same as the DescriptionItem. All but last, the end line/col // is (l+1):0. // // See crate::tests::description_lists::sourcepos. let list = match container.last_child() { Some(lc) if node_matches!(lc, NodeValue::DescriptionList) => { reopen_ast_nodes(lc); lc } _ => { let list = self.add_child( container, NodeValue::DescriptionList, self.first_nonspace + 1, ); list.data.borrow_mut().sourcepos.start = last_child_sourcepos.start; list } }; let metadata = NodeDescriptionItem { marker_offset: self.indent, padding: 2, }; let item = self.add_child( list, NodeValue::DescriptionItem(metadata), self.first_nonspace + 1, ); item.data.borrow_mut().sourcepos.start = last_child_sourcepos.start; let term = self.add_child(item, NodeValue::DescriptionTerm, self.first_nonspace + 1); let details = self.add_child(item, NodeValue::DescriptionDetails, self.first_nonspace + 1); term.append(last_child); *container = details; true } else { false } } fn parse_multiline_block_quote_prefix( &mut self, line: &[u8], container: &'a AstNode<'a>, ast: &mut Ast, should_continue: &mut bool, ) -> bool { let (fence_length, fence_offset) = match ast.value { NodeValue::MultilineBlockQuote(ref node_value) => { (node_value.fence_length, node_value.fence_offset) } _ => unreachable!(), }; let matched = if self.indent <= 3 && line[self.first_nonspace] == b'>' { scanners::close_multiline_block_quote_fence(&line[self.first_nonspace..]).unwrap_or(0) } else { 0 }; if matched >= fence_length { *should_continue = false; self.advance_offset(line, matched, false); // The last child, like an indented codeblock, could be left open. // Make sure it's finalized. if nodes::last_child_is_open(container) { let child = container.last_child().unwrap(); let child_ast = &mut *child.data.borrow_mut(); self.finalize_borrowed(child, child_ast).unwrap(); } self.current = self.finalize_borrowed(container, ast).unwrap(); return false; } let mut i = fence_offset; while i > 0 && strings::is_space_or_tab(line[self.offset]) { self.advance_offset(line, 1, true); i -= 1; } true } fn add_child( &mut self, mut parent: &'a AstNode<'a>, value: NodeValue, start_column: usize, ) -> &'a AstNode<'a> { while !nodes::can_contain_type(parent, &value) { parent = self.finalize(parent).unwrap(); } assert!(start_column > 0); let child = Ast::new(value, (self.line_number, start_column).into()); let node = self.arena.alloc(Node::new(RefCell::new(child))); parent.append(node); node } fn add_text_to_container( &mut self, mut container: &'a AstNode<'a>, last_matched_container: &'a AstNode<'a>, line: &[u8], ) { self.find_first_nonspace(line); if self.blank { if let Some(last_child) = container.last_child() { last_child.data.borrow_mut().last_line_blank = true; } } container.data.borrow_mut().last_line_blank = self.blank && match container.data.borrow().value { NodeValue::BlockQuote | NodeValue::Heading(..) | NodeValue::ThematicBreak => false, NodeValue::CodeBlock(ref ncb) => !ncb.fenced, NodeValue::Item(..) => { container.first_child().is_some() || container.data.borrow().sourcepos.start.line != self.line_number } NodeValue::MultilineBlockQuote(..) => false, _ => true, }; let mut tmp = container; while let Some(parent) = tmp.parent() { parent.data.borrow_mut().last_line_blank = false; tmp = parent; } if !self.current.same_node(last_matched_container) && container.same_node(last_matched_container) && !self.blank && (!self.options.extension.greentext || !matches!( container.data.borrow().value, NodeValue::BlockQuote | NodeValue::Document )) && node_matches!(self.current, NodeValue::Paragraph) { self.add_line(self.current, line); } else { while !self.current.same_node(last_matched_container) { self.current = self.finalize(self.current).unwrap(); } let add_text_result = match container.data.borrow().value { NodeValue::CodeBlock(..) => AddTextResult::LiteralText, NodeValue::HtmlBlock(ref nhb) => AddTextResult::HtmlBlock(nhb.block_type), _ => AddTextResult::Otherwise, }; match add_text_result { AddTextResult::LiteralText => { self.add_line(container, line); } AddTextResult::HtmlBlock(block_type) => { self.add_line(container, line); let matches_end_condition = match block_type { 1 => scanners::html_block_end_1(&line[self.first_nonspace..]), 2 => scanners::html_block_end_2(&line[self.first_nonspace..]), 3 => scanners::html_block_end_3(&line[self.first_nonspace..]), 4 => scanners::html_block_end_4(&line[self.first_nonspace..]), 5 => scanners::html_block_end_5(&line[self.first_nonspace..]), _ => false, }; if matches_end_condition { container = self.finalize(container).unwrap(); } } _ => { if self.blank { // do nothing } else if container.data.borrow().value.accepts_lines() { let mut line: Vec<u8> = line.into(); if let NodeValue::Heading(ref nh) = container.data.borrow().value { if !nh.setext { strings::chop_trailing_hashtags(&mut line); } }; let count = self.first_nonspace - self.offset; // In a rare case the above `chop` operation can leave // the line shorter than the recorded `first_nonspace` // This happens with ATX headers containing no header // text, multiple spaces and trailing hashes, e.g // // ### ### // // In this case `first_nonspace` indexes into the second // set of hashes, while `chop_trailing_hashtags` truncates // `line` to just `###` (the first three hashes). // In this case there's no text to add, and no further // processing to be done. let have_line_text = self.first_nonspace <= line.len(); if have_line_text { self.advance_offset(&line, count, false); self.add_line(container, &line); } } else { container = self.add_child( container, NodeValue::Paragraph, self.first_nonspace + 1, ); let count = self.first_nonspace - self.offset; self.advance_offset(line, count, false); self.add_line(container, line); } } } self.current = container; } } fn add_line(&mut self, node: &'a AstNode<'a>, line: &[u8]) { let mut ast = node.data.borrow_mut(); assert!(ast.open); if self.partially_consumed_tab { self.offset += 1; let chars_to_tab = TAB_STOP - (self.column % TAB_STOP); for _ in 0..chars_to_tab { ast.content.push(' '); } } if self.offset < line.len() { // since whitespace is stripped off the beginning of lines, we need to keep // track of how much was stripped off. This allows us to properly calculate // inline sourcepos during inline processing. ast.line_offsets.push(self.offset); ast.content .push_str(str::from_utf8(&line[self.offset..]).unwrap()); } } fn finish(&mut self, remaining: Vec<u8>) -> &'a AstNode<'a> { if !remaining.is_empty() { self.process_line(&remaining); } self.finalize_document(); self.postprocess_text_nodes(self.root); self.root } fn finalize_document(&mut self) { while !self.current.same_node(self.root) { self.current = self.finalize(self.current).unwrap(); } self.finalize(self.root); self.refmap.max_ref_size = if self.total_size > 100000 { self.total_size } else { 100000 }; self.process_inlines(); if self.options.extension.footnotes { self.process_footnotes(); } } fn finalize(&mut self, node: &'a AstNode<'a>) -> Option<&'a AstNode<'a>> { self.finalize_borrowed(node, &mut node.data.borrow_mut()) } fn resolve_reference_link_definitions(&mut self, content: &mut String) -> bool { let mut seeked = 0; { let mut pos = 0; let mut seek: &[u8] = content.as_bytes(); while !seek.is_empty() && seek[0] == b'[' && unwrap_into(self.parse_reference_inline(seek), &mut pos) { seek = &seek[pos..]; seeked += pos; } } if seeked != 0 { *content = content[seeked..].to_string(); } !strings::is_blank(content.as_bytes()) } fn finalize_borrowed( &mut self, node: &'a AstNode<'a>, ast: &mut Ast, ) -> Option<&'a AstNode<'a>> { assert!(ast.open); ast.open = false; let content = &mut ast.content; let parent = node.parent(); if self.curline_len == 0 { ast.sourcepos.end = (self.line_number, self.last_line_length).into(); } else if match ast.value { NodeValue::Document => true, NodeValue::CodeBlock(ref ncb) => ncb.fenced, NodeValue::MultilineBlockQuote(..) => true, _ => false, } { ast.sourcepos.end = (self.line_number, self.curline_end_col).into(); } else { ast.sourcepos.end = (self.line_number - 1, self.last_line_length).into(); } match ast.value { NodeValue::Paragraph => { let has_content = self.resolve_reference_link_definitions(content); if !has_content { node.detach(); } } NodeValue::CodeBlock(ref mut ncb) => { if !ncb.fenced { strings::remove_trailing_blank_lines(content); content.push('\n'); } else { let mut pos = 0; while pos < content.len() { if strings::is_line_end_char(content.as_bytes()[pos]) { break; } pos += 1; } assert!(pos < content.len()); let mut tmp = entity::unescape_html(&content.as_bytes()[..pos]); strings::trim(&mut tmp); strings::unescape(&mut tmp); if tmp.is_empty() { ncb.info = self .options .parse .default_info_string .as_ref() .map_or(String::new(), |s| s.clone()); } else { ncb.info = String::from_utf8(tmp).unwrap(); } if content.as_bytes()[pos] == b'\r' { pos += 1; } if content.as_bytes()[pos] == b'\n' { pos += 1; } content.drain(..pos); } mem::swap(&mut ncb.literal, content); } NodeValue::HtmlBlock(ref mut nhb) => { mem::swap(&mut nhb.literal, content); } NodeValue::List(ref mut nl) => { nl.tight = true; let mut ch = node.first_child(); while let Some(item) = ch { if item.data.borrow().last_line_blank && item.next_sibling().is_some() { nl.tight = false; break; } let mut subch = item.first_child(); while let Some(subitem) = subch { if (item.next_sibling().is_some() || subitem.next_sibling().is_some()) && nodes::ends_with_blank_line(subitem) { nl.tight = false; break; } subch = subitem.next_sibling(); } if !nl.tight { break; } ch = item.next_sibling(); } } _ => (), } parent } fn process_inlines(&mut self) { self.process_inlines_node(self.root); } fn process_inlines_node(&mut self, node: &'a AstNode<'a>) { for node in node.descendants() { if node.data.borrow().value.contains_inlines() { self.parse_inlines(node); } } } fn parse_inlines(&mut self, node: &'a AstNode<'a>) { let delimiter_arena = Arena::new(); let node_data = node.data.borrow(); let content = strings::rtrim_slice(node_data.content.as_bytes()); let mut subj = inlines::Subject::new( self.arena, self.options, content, node_data.sourcepos.start.line, &mut self.refmap, &delimiter_arena, ); while subj.parse_inline(node) {} subj.process_emphasis(0); while subj.pop_bracket() {} } fn process_footnotes(&mut self) { let mut map = HashMap::new(); Self::find_footnote_definitions(self.root, &mut map); let mut ix = 0; Self::find_footnote_references(self.root, &mut map, &mut ix); if !map.is_empty() { // In order for references to be found inside footnote definitions, // such as `[^1]: another reference[^2]`, // the node needed to remain in the AST. Now we can remove them. Self::cleanup_footnote_definitions(self.root); } if ix > 0 { let mut v = map.into_values().collect::<Vec<_>>(); v.sort_unstable_by(|a, b| a.ix.cmp(&b.ix)); for f in v { if f.ix.is_some() { match f.node.data.borrow_mut().value { NodeValue::FootnoteDefinition(ref mut nfd) => { nfd.name = f.name.to_string(); nfd.total_references = f.total_references; } _ => unreachable!(), } self.root.append(f.node); } } } } fn find_footnote_definitions( node: &'a AstNode<'a>, map: &mut HashMap<String, FootnoteDefinition<'a>>, ) { match node.data.borrow().value { NodeValue::FootnoteDefinition(ref nfd) => { map.insert( strings::normalize_label(&nfd.name, Case::Fold), FootnoteDefinition { ix: None, node, name: strings::normalize_label(&nfd.name, Case::Preserve), total_references: 0, }, ); } _ => { for n in node.children() { Self::find_footnote_definitions(n, map); } } } } fn find_footnote_references( node: &'a AstNode<'a>, map: &mut HashMap<String, FootnoteDefinition>, ixp: &mut u32, ) { let mut ast = node.data.borrow_mut(); let mut replace = None; match ast.value { NodeValue::FootnoteReference(ref mut nfr) => { let normalized = strings::normalize_label(&nfr.name, Case::Fold); if let Some(ref mut footnote) = map.get_mut(&normalized) { let ix = match footnote.ix { Some(ix) => ix, None => { *ixp += 1; footnote.ix = Some(*ixp); *ixp } }; footnote.total_references += 1; nfr.ref_num = footnote.total_references; nfr.ix = ix; nfr.name = strings::normalize_label(&footnote.name, Case::Preserve); } else { replace = Some(nfr.name.clone()); } } _ => { for n in node.children() { Self::find_footnote_references(n, map, ixp); } } } if let Some(mut label) = replace { label.insert_str(0, "[^"); label.push(']'); ast.value = NodeValue::Text(label); } } fn cleanup_footnote_definitions(node: &'a AstNode<'a>) { match node.data.borrow().value { NodeValue::FootnoteDefinition(_) => { node.detach(); } _ => { for n in node.children() { Self::cleanup_footnote_definitions(n); } } } } fn postprocess_text_nodes(&mut self, node: &'a AstNode<'a>) { let mut stack = vec![node]; let mut children = vec![]; while let Some(node) = stack.pop() { let mut nch = node.first_child(); while let Some(n) = nch { let mut this_bracket = false; let n_ast = &mut n.data.borrow_mut(); let mut sourcepos = n_ast.sourcepos; loop { match n_ast.value { // Join adjacent text nodes together NodeValue::Text(ref mut root) => { let ns = match n.next_sibling() { Some(ns) => ns, _ => { // Post-process once we are finished joining text nodes self.postprocess_text_node(n, root, &mut sourcepos); break; } }; match ns.data.borrow().value { NodeValue::Text(ref adj) => { root.push_str(adj); sourcepos.end.column = ns.data.borrow().sourcepos.end.column; ns.detach(); } _ => { // Post-process once we are finished joining text nodes self.postprocess_text_node(n, root, &mut sourcepos); break; } } } NodeValue::Link(..) | NodeValue::Image(..) | NodeValue::WikiLink(..) => { this_bracket = true; break; } _ => break, } } n_ast.sourcepos = sourcepos; if !this_bracket { children.push(n); } nch = n.next_sibling(); } // Push children onto work stack in reverse order so they are // traversed in order stack.extend(children.drain(..).rev()); } } fn postprocess_text_node( &mut self, node: &'a AstNode<'a>, text: &mut String, sourcepos: &mut Sourcepos, ) { if self.options.extension.tasklist { self.process_tasklist(node, text, sourcepos); } if self.options.extension.autolink { autolink::process_autolinks( self.arena, node, text, self.options.parse.relaxed_autolinks, ); } } fn process_tasklist( &mut self, node: &'a AstNode<'a>, text: &mut String, sourcepos: &mut Sourcepos, ) { let (end, symbol) = match scanners::tasklist(text.as_bytes()) { Some(p) => p, None => return, }; let symbol = symbol as char; if !self.options.parse.relaxed_tasklist_matching && !matches!(symbol, ' ' | 'x' | 'X') { return; } let parent = node.parent().unwrap(); if node.previous_sibling().is_some() || parent.previous_sibling().is_some() { return; } if !node_matches!(parent, NodeValue::Paragraph) { return; } if !node_matches!(parent.parent().unwrap(), NodeValue::Item(..)) { return; } text.drain(..end); // These are sound only because the exact text that we've matched and // the count thereof (i.e. "end") will precisely map to characters in // the source document. sourcepos.start.column += end; parent.data.borrow_mut().sourcepos.start.column += end; parent.parent().unwrap().data.borrow_mut().value = NodeValue::TaskItem(if symbol == ' ' { None } else { Some(symbol) }); } fn parse_reference_inline(&mut self, content: &[u8]) -> Option<usize> { // In this case reference inlines rarely have delimiters // so we often just need the minimal case let delimiter_arena = Arena::with_capacity(0); let mut subj = inlines::Subject::new( self.arena, self.options, content, 0, // XXX -1 in upstream; never used? &mut self.refmap, &delimiter_arena, ); let mut lab: String = match subj.link_label() { Some(lab) if !lab.is_empty() => lab.to_string(), _ => return None, }; if subj.peek_char() != Some(&(b':')) { return None; } subj.pos += 1; subj.spnl(); let (url, matchlen) = match inlines::manual_scan_link_url(&subj.input[subj.pos..]) { Some((url, matchlen)) => (url, matchlen), None => return None, }; subj.pos += matchlen; let beforetitle = subj.pos; subj.spnl(); let title_search = if subj.pos == beforetitle { None } else { scanners::link_title(&subj.input[subj.pos..]) }; let title = match title_search { Some(matchlen) => { let t = &subj.input[subj.pos..subj.pos + matchlen]; subj.pos += matchlen; t.to_vec() } _ => { subj.pos = beforetitle; vec![] } }; subj.skip_spaces(); if !subj.skip_line_end() { if !title.is_empty() { subj.pos = beforetitle; subj.skip_spaces(); if !subj.skip_line_end() { return None; } } else { return None; } } lab = strings::normalize_label(&lab, Case::Fold); if !lab.is_empty() { subj.refmap.map.entry(lab).or_insert(ResolvedReference { url: String::from_utf8(strings::clean_url(url)).unwrap(), title: String::from_utf8(strings::clean_title(&title)).unwrap(), }); } Some(subj.pos) } } enum AddTextResult { LiteralText, HtmlBlock(u8), Otherwise, } fn parse_list_marker( line: &[u8], mut pos: usize, interrupts_paragraph: bool, ) -> Option<(usize, NodeList)> { let mut c = line[pos]; let startpos = pos; if c == b'*' || c == b'-' || c == b'+' { pos += 1; if !isspace(line[pos]) { return None; } if interrupts_paragraph { let mut i = pos; while strings::is_space_or_tab(line[i]) { i += 1; } if line[i] == b'\n' { return None; } } return Some(( pos - startpos, NodeList { list_type: ListType::Bullet, marker_offset: 0, padding: 0, start: 1, delimiter: ListDelimType::Period, bullet_char: c, tight: false, }, )); } else if isdigit(c) { let mut start: usize = 0; let mut digits = 0; loop { start = (10 * start) + (line[pos] - b'0') as usize; pos += 1; digits += 1; if !(digits < 9 && isdigit(line[pos])) { break; } } if interrupts_paragraph && start != 1 { return None; } c = line[pos]; if c != b'.' && c != b')' { return None; } pos += 1; if !isspace(line[pos]) { return None; } if interrupts_paragraph { let mut i = pos; while strings::is_space_or_tab(line[i]) { i += 1; } if strings::is_line_end_char(line[i]) { return None; } } return Some(( pos - startpos, NodeList { list_type: ListType::Ordered, marker_offset: 0, padding: 0, start, delimiter: if c == b'.' { ListDelimType::Period } else { ListDelimType::Paren }, bullet_char: 0, tight: false, }, )); } None } pub fn unwrap_into<T>(t: Option<T>, out: &mut T) -> bool { match t { Some(v) => { *out = v; true } _ => false, } } pub fn unwrap_into_copy<T: Copy>(t: Option<&T>, out: &mut T) -> bool { match t { Some(v) => { *out = *v; true } _ => false, } } fn unwrap_into_2<T, U>(tu: Option<(T, U)>, out_t: &mut T, out_u: &mut U) -> bool { match tu { Some((t, u)) => { *out_t = t; *out_u = u; true } _ => false, } } fn lists_match(list_data: &NodeList, item_data: &NodeList) -> bool { list_data.list_type == item_data.list_type && list_data.delimiter == item_data.delimiter && list_data.bullet_char == item_data.bullet_char } fn reopen_ast_nodes<'a>(mut ast: &'a AstNode<'a>) { loop { ast.data.borrow_mut().open = true; ast = match ast.parent() { Some(p) => p, None => return, } } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum AutolinkType { Uri, Email, } #[derive(Debug, Clone, Copy, Default)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] /// Options for bulleted list redering in markdown. See `link_style` in [`RenderOptions`] for more details. pub enum ListStyleType { /// The `-` character #[default] Dash = 45, /// The `+` character Plus = 43, /// The `*` character Star = 42, } ����������������������������������������comrak-0.29.0/src/parser/multiline_block_quote.rs���������������������������������������������������0000644�0000000�0000000�00000000417�10461020230�0020214�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������/// The metadata of a multiline blockquote. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct NodeMultilineBlockQuote { /// The length of the fence. pub fence_length: usize, /// The indentation level of the fence marker. pub fence_offset: usize, } �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/parser/shortcodes.rs��������������������������������������������������������������0000644�0000000�0000000�00000001204�10461020230�0015773�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������/// The details of an inline "shortcode" emoji/gemoji. /// /// ("gemoji" name context: https://github.com/github/gemoji) #[derive(Debug, Clone, PartialEq, Eq)] pub struct NodeShortCode { /// The shortcode that was resolved, e.g. "rabbit". pub code: String, /// The emoji `code` resolved to, e.g. "🐰". pub emoji: String, } impl NodeShortCode { /// Checks whether the input is a valid short code. pub fn resolve(code: &str) -> Option<Self> { let emoji = emojis::get_by_shortcode(code)?; Some(NodeShortCode { code: code.to_string(), emoji: emoji.to_string(), }) } } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/parser/table.rs�������������������������������������������������������������������0000644�0000000�0000000�00000026746�10461020230�0014727�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use crate::arena_tree::Node; use crate::nodes; use crate::nodes::{Ast, AstNode, NodeTable, NodeValue, TableAlignment}; use crate::parser::Parser; use crate::scanners; use crate::strings::trim; use std::cell::RefCell; use std::cmp::min; use super::inlines::count_newlines; // Limit to prevent a malicious input from causing a denial of service. const MAX_AUTOCOMPLETED_CELLS: usize = 500_000; pub fn try_opening_block<'a>( parser: &mut Parser<'a, '_, '_>, container: &'a AstNode<'a>, line: &[u8], ) -> Option<(&'a AstNode<'a>, bool, bool)> { let aligns = match container.data.borrow().value { NodeValue::Paragraph => None, NodeValue::Table(NodeTable { ref alignments, .. }) => Some(alignments.clone()), _ => return None, }; match aligns { None => try_opening_header(parser, container, line), Some(ref aligns) => try_opening_row(parser, container, aligns, line), } } fn try_opening_header<'a>( parser: &mut Parser<'a, '_, '_>, container: &'a AstNode<'a>, line: &[u8], ) -> Option<(&'a AstNode<'a>, bool, bool)> { if container.data.borrow().table_visited { return Some((container, false, false)); } if scanners::table_start(&line[parser.first_nonspace..]).is_none() { return Some((container, false, false)); } let spoiler = parser.options.extension.spoiler; let delimiter_row = match row(&line[parser.first_nonspace..], spoiler) { Some(delimiter_row) => delimiter_row, None => return Some((container, false, true)), }; let header_row = match row(container.data.borrow().content.as_bytes(), spoiler) { Some(header_row) => header_row, None => return Some((container, false, true)), }; if header_row.cells.len() != delimiter_row.cells.len() { return Some((container, false, true)); } if header_row.paragraph_offset > 0 { try_inserting_table_header_paragraph(parser, container, header_row.paragraph_offset); } let mut alignments = vec![]; for cell in delimiter_row.cells { let cell_content = cell.content.as_bytes(); let left = !cell_content.is_empty() && cell_content[0] == b':'; let right = !cell_content.is_empty() && cell_content[cell_content.len() - 1] == b':'; alignments.push(if left && right { TableAlignment::Center } else if left { TableAlignment::Left } else if right { TableAlignment::Right } else { TableAlignment::None }); } let start = container.data.borrow().sourcepos.start; let child = Ast::new( NodeValue::Table(NodeTable { alignments, num_columns: header_row.cells.len(), num_rows: 0, num_nonempty_cells: 0, }), start, ); let table = parser.arena.alloc(Node::new(RefCell::new(child))); container.append(table); let header = parser.add_child(table, NodeValue::TableRow(true), start.column); { let header_ast = &mut header.data.borrow_mut(); header_ast.sourcepos.start.line = start.line; header_ast.sourcepos.end = start.column_add( (container.data.borrow().content.as_bytes().len() - 2 - header_row.paragraph_offset) as isize, ); } let mut i = 0; while i < header_row.cells.len() { let cell = &header_row.cells[i]; let ast_cell = parser.add_child( header, NodeValue::TableCell, start.column + cell.start_offset - header_row.paragraph_offset, ); let ast = &mut ast_cell.data.borrow_mut(); ast.sourcepos.start.line = start.line; ast.sourcepos.end = start.column_add((cell.end_offset - header_row.paragraph_offset) as isize); ast.internal_offset = cell.internal_offset; ast.content.clone_from(&cell.content); ast.line_offsets.push( start.column + cell.start_offset - 1 + cell.internal_offset - header_row.paragraph_offset, ); i += 1; } incr_table_row_count(container, i); let offset = line.len() - 1 - parser.offset; parser.advance_offset(line, offset, false); Some((table, true, false)) } fn try_opening_row<'a>( parser: &mut Parser<'a, '_, '_>, container: &'a AstNode<'a>, alignments: &[TableAlignment], line: &[u8], ) -> Option<(&'a AstNode<'a>, bool, bool)> { if parser.blank { return None; } if get_num_autocompleted_cells(container) > MAX_AUTOCOMPLETED_CELLS { return None; } let sourcepos = container.data.borrow().sourcepos; let spoiler = parser.options.extension.spoiler; let this_row = match row(&line[parser.first_nonspace..], spoiler) { Some(this_row) => this_row, None => return None, }; let new_row = parser.add_child( container, NodeValue::TableRow(false), sourcepos.start.column, ); { new_row.data.borrow_mut().sourcepos.end.column = sourcepos.end.column; } let mut i = 0; let mut last_column = sourcepos.start.column; while i < min(alignments.len(), this_row.cells.len()) { let cell = &this_row.cells[i]; let cell_node = parser.add_child( new_row, NodeValue::TableCell, sourcepos.start.column + cell.start_offset, ); let cell_ast = &mut cell_node.data.borrow_mut(); cell_ast.internal_offset = cell.internal_offset; cell_ast.sourcepos.end.column = sourcepos.start.column + cell.end_offset; cell_ast.content.clone_from(&cell.content); cell_ast .line_offsets .push(sourcepos.start.column + cell.start_offset - 1 + cell.internal_offset); last_column = cell_ast.sourcepos.end.column; i += 1; } incr_table_row_count(container, i); while i < alignments.len() { parser.add_child(new_row, NodeValue::TableCell, last_column); i += 1; } let offset = line.len() - 1 - parser.offset; parser.advance_offset(line, offset, false); Some((new_row, false, false)) } struct Row { paragraph_offset: usize, cells: Vec<Cell>, } struct Cell { start_offset: usize, end_offset: usize, internal_offset: usize, content: String, } fn row(string: &[u8], spoiler: bool) -> Option<Row> { let len = string.len(); let mut cells: Vec<Cell> = vec![]; let mut offset = scanners::table_cell_end(string).unwrap_or(0); let mut paragraph_offset = 0; let mut expect_more_cells = true; let mut max_columns_abort = false; while offset < len && expect_more_cells { let cell_matched = scanners::table_cell(&string[offset..], spoiler).unwrap_or(0); let pipe_matched = scanners::table_cell_end(&string[offset + cell_matched..]).unwrap_or(0); if cell_matched > 0 || pipe_matched > 0 { let mut cell = unescape_pipes(&string[offset..offset + cell_matched]); trim(&mut cell); let mut start_offset = offset; let mut internal_offset = 0; while start_offset > paragraph_offset && string[start_offset - 1] != b'|' { start_offset -= 1; internal_offset += 1; } // set an upper limit on the number of columns if cells.len() == <u16 as Into<usize>>::into(u16::MAX) { max_columns_abort = true; break; } cells.push(Cell { start_offset, end_offset: offset + cell_matched - 1, internal_offset, content: String::from_utf8(cell).unwrap(), }); } offset += cell_matched + pipe_matched; if pipe_matched > 0 { expect_more_cells = true; } else { let row_end_offset = scanners::table_row_end(&string[offset..]).unwrap_or(0); offset += row_end_offset; if row_end_offset > 0 && offset != len { paragraph_offset = offset; cells.clear(); offset += scanners::table_cell_end(&string[offset..]).unwrap_or(0); expect_more_cells = true; } else { expect_more_cells = false; } } } if offset != len || cells.is_empty() || max_columns_abort { None } else { Some(Row { paragraph_offset, cells, }) } } fn try_inserting_table_header_paragraph<'a>( parser: &mut Parser<'a, '_, '_>, container: &'a AstNode<'a>, paragraph_offset: usize, ) { let container_ast = &mut container.data.borrow_mut(); let preface = &container_ast.content.as_bytes()[..paragraph_offset]; let mut paragraph_content = unescape_pipes(preface); let (newlines, _since_newline) = count_newlines(&paragraph_content); trim(&mut paragraph_content); if container.parent().is_none() || !nodes::can_contain_type(container.parent().unwrap(), &NodeValue::Paragraph) { return; } let start = container_ast.sourcepos.start; let mut paragraph = Ast::new(NodeValue::Paragraph, start); paragraph.sourcepos.end.line = start.line + newlines - 1; // copy over the line offsets related to the paragraph for n in 0..newlines { paragraph.line_offsets.push(container_ast.line_offsets[n]); } let last_line_offset = *paragraph.line_offsets.last().unwrap_or(&0); paragraph.sourcepos.end.column = last_line_offset + preface .iter() .rev() .skip(1) .take_while(|&&c| c != b'\n') .count(); container_ast.sourcepos.start.line += newlines; paragraph.content = String::from_utf8(paragraph_content).unwrap(); let node = parser.arena.alloc(Node::new(RefCell::new(paragraph))); container.insert_before(node); } fn unescape_pipes(string: &[u8]) -> Vec<u8> { let len = string.len(); let mut v = Vec::with_capacity(len); for (i, &c) in string.iter().enumerate() { if c == b'\\' && i + 1 < len && string[i + 1] == b'|' { continue; } else { v.push(c); } } v } // Increment the number of rows in the table. Also update n_nonempty_cells, // which keeps track of the number of cells which were parsed from the // input file. (If one of the rows is too short, then the trailing cells // are autocompleted. Autocompleted cells are not counted in n_nonempty_cells.) // The purpose of this is to prevent a malicious input from generating a very // large number of autocompleted cells, which could cause a denial of service // vulnerability. fn incr_table_row_count<'a>(container: &'a AstNode<'a>, i: usize) -> bool { return match container.data.borrow_mut().value { NodeValue::Table(ref mut node_table) => { node_table.num_rows += 1; node_table.num_nonempty_cells += i; true } _ => false, }; } // Calculate the number of autocompleted cells. fn get_num_autocompleted_cells<'a>(container: &'a AstNode<'a>) -> usize { return match container.data.borrow().value { NodeValue::Table(ref node_table) => { let num_cells = node_table.num_columns * node_table.num_rows; if num_cells < node_table.num_nonempty_cells { 0 } else { (node_table.num_columns * node_table.num_rows) - node_table.num_nonempty_cells } } _ => 0, }; } pub fn matches(line: &[u8], spoiler: bool) -> bool { row(line, spoiler).is_some() } ��������������������������comrak-0.29.0/src/plugins/mod.rs��������������������������������������������������������������������0000644�0000000�0000000�00000000277�10461020230�0014573�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������//! Plugins for enhancing the default implementation of comrak can be defined in this module. #[cfg(feature = "syntect")] #[cfg_attr(docsrs, doc(cfg(feature = "syntect")))] pub mod syntect; ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/plugins/syntect.rs����������������������������������������������������������������0000644�0000000�0000000�00000016661�10461020230�0015511�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������//! Adapter for the Syntect syntax highlighter plugin. use crate::adapters::SyntaxHighlighterAdapter; use crate::html; use std::collections::{hash_map, HashMap}; use std::io::{self, Write}; use syntect::easy::HighlightLines; use syntect::highlighting::{Color, ThemeSet}; use syntect::html::{ append_highlighted_html_for_styled_line, ClassStyle, ClassedHTMLGenerator, IncludeBackground, }; use syntect::parsing::{SyntaxReference, SyntaxSet}; use syntect::util::LinesWithEndings; use syntect::Error; #[derive(Debug)] /// Syntect syntax highlighter plugin. pub struct SyntectAdapter { theme: Option<String>, syntax_set: SyntaxSet, theme_set: ThemeSet, } impl SyntectAdapter { /// Construct a new `SyntectAdapter` object and set the syntax highlighting theme. /// If None is specified, apply CSS classes instead. pub fn new(theme: Option<&str>) -> Self { SyntectAdapter { theme: theme.map(String::from), syntax_set: SyntaxSet::load_defaults_newlines(), theme_set: ThemeSet::load_defaults(), } } fn highlight_html(&self, code: &str, syntax: &SyntaxReference) -> Result<String, Error> { match &self.theme { Some(theme) => { // syntect::html::highlighted_html_for_string, without the opening/closing <pre>. let theme = &self.theme_set.themes[theme]; let mut highlighter = HighlightLines::new(syntax, theme); let bg = theme.settings.background.unwrap_or(Color::WHITE); let mut output = String::new(); for line in LinesWithEndings::from(code) { let regions = highlighter.highlight_line(line, &self.syntax_set)?; append_highlighted_html_for_styled_line( &regions[..], IncludeBackground::IfDifferent(bg), &mut output, )?; } Ok(output) } None => { // fall back to HTML classes. let mut html_generator = ClassedHTMLGenerator::new_with_class_style( syntax, &self.syntax_set, ClassStyle::Spaced, ); for line in LinesWithEndings::from(code) { html_generator.parse_html_for_line_which_includes_newline(line)?; } Ok(html_generator.finalize()) } } } } impl SyntaxHighlighterAdapter for SyntectAdapter { fn write_highlighted( &self, output: &mut dyn Write, lang: Option<&str>, code: &str, ) -> io::Result<()> { let fallback_syntax = "Plain Text"; let lang: &str = match lang { Some(l) if !l.is_empty() => l, _ => fallback_syntax, }; let syntax = self .syntax_set .find_syntax_by_token(lang) .unwrap_or_else(|| { self.syntax_set .find_syntax_by_first_line(code) .unwrap_or_else(|| self.syntax_set.find_syntax_plain_text()) }); match self.highlight_html(code, syntax) { Ok(highlighted_code) => output.write_all(highlighted_code.as_bytes()), Err(_) => output.write_all(code.as_bytes()), } } fn write_pre_tag( &self, output: &mut dyn Write, attributes: HashMap<String, String>, ) -> io::Result<()> { match &self.theme { Some(theme) => { let theme = &self.theme_set.themes[theme]; let colour = theme.settings.background.unwrap_or(Color::WHITE); let style = format!( "background-color:#{:02x}{:02x}{:02x};", colour.r, colour.g, colour.b ); let mut pre_attributes = SyntectPreAttributes::new(attributes, &style); html::write_opening_tag(output, "pre", pre_attributes.iter_mut()) } None => { let mut attributes: HashMap<&str, &str> = HashMap::new(); attributes.insert("class", "syntax-highlighting"); html::write_opening_tag(output, "pre", attributes) } } } fn write_code_tag( &self, output: &mut dyn Write, attributes: HashMap<String, String>, ) -> io::Result<()> { html::write_opening_tag(output, "code", attributes) } } struct SyntectPreAttributes { syntect_style: String, attributes: HashMap<String, String>, } impl SyntectPreAttributes { fn new(attributes: HashMap<String, String>, syntect_style: &str) -> Self { Self { syntect_style: syntect_style.into(), attributes, } } fn iter_mut(&mut self) -> SyntectPreAttributesIter { SyntectPreAttributesIter { iter_mut: self.attributes.iter_mut(), syntect_style: &self.syntect_style, style_written: false, } } } struct SyntectPreAttributesIter<'a> { iter_mut: hash_map::IterMut<'a, String, String>, syntect_style: &'a str, style_written: bool, } impl<'a> Iterator for SyntectPreAttributesIter<'a> { type Item = (&'a str, &'a str); fn next(&mut self) -> Option<Self::Item> { match self.iter_mut.next() { Some((k, v)) if k == "style" && !self.style_written => { self.style_written = true; v.insert_str(0, self.syntect_style); Some((k, v)) } Some((k, v)) => Some((k, v)), None if !self.style_written => { self.style_written = true; Some(("style", self.syntect_style)) } None => None, } } } #[derive(Debug)] /// A builder for [`SyntectAdapter`]. /// /// Allows customization of `Theme`, [`ThemeSet`], and [`SyntaxSet`]. pub struct SyntectAdapterBuilder { theme: Option<String>, syntax_set: Option<SyntaxSet>, theme_set: Option<ThemeSet>, } impl Default for SyntectAdapterBuilder { fn default() -> Self { SyntectAdapterBuilder { theme: Some("InspiredGitHub".into()), syntax_set: None, theme_set: None, } } } impl SyntectAdapterBuilder { /// Create a new empty [`SyntectAdapterBuilder`]. pub fn new() -> Self { Default::default() } /// Set the theme. pub fn theme(mut self, s: &str) -> Self { self.theme.replace(s.into()); self } /// Uses CSS classes instead of a Syntect theme. pub fn css(mut self) -> Self { self.theme = None; self } /// Set the syntax set. pub fn syntax_set(mut self, s: SyntaxSet) -> Self { self.syntax_set.replace(s); self } /// Set the theme set. pub fn theme_set(mut self, s: ThemeSet) -> Self { self.theme_set.replace(s); self } /// Builds the [`SyntectAdapter`]. Default values: /// - `theme`: `InspiredGitHub` /// - `syntax_set`: [`SyntaxSet::load_defaults_newlines()`] /// - `theme_set`: [`ThemeSet::load_defaults()`] pub fn build(self) -> SyntectAdapter { SyntectAdapter { theme: self.theme, syntax_set: self .syntax_set .unwrap_or_else(SyntaxSet::load_defaults_newlines), theme_set: self.theme_set.unwrap_or_else(ThemeSet::load_defaults), } } } �������������������������������������������������������������������������������comrak-0.29.0/src/scanners.re�����������������������������������������������������������������������0000644�0000000�0000000�00000025141�10461020230�0014126�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������/*!re2c re2c:case-insensitive = 1; re2c:encoding:utf8 = 1; re2c:encoding-policy = substitute; re2c:define:YYCTYPE = u8; re2c:define:YYPEEK = "if cursor < len { *s.get_unchecked(cursor) } else { 0 }"; re2c:define:YYSKIP = "cursor += 1;"; re2c:define:YYBACKUP = "marker = cursor;"; re2c:define:YYRESTORE = "cursor = marker;"; re2c:define:YYBACKUPCTX = "ctxmarker = cursor;"; re2c:define:YYRESTORECTX = "cursor = ctxmarker;"; re2c:yyfill:enable = 0; re2c:indent:string = ' '; re2c:indent:top = 1; wordchar = [^\x00-\x20]; spacechar = [ \t\v\f\r\n]; reg_char = [^\\()\x00-\x20]; escaped_char = [\\][!"#$%&'()*+,./:;<=>?@[\\\]^_`{|}~-]; tagname = [A-Za-z][A-Za-z0-9-]*; blocktagname = 'address'|'article'|'aside'|'base'|'basefont'|'blockquote'|'body'|'caption'|'center'|'col'|'colgroup'|'dd'|'details'|'dialog'|'dir'|'div'|'dl'|'dt'|'fieldset'|'figcaption'|'figure'|'footer'|'form'|'frame'|'frameset'|'h1'|'h2'|'h3'|'h4'|'h5'|'h6'|'head'|'header'|'hr'|'html'|'iframe'|'legend'|'li'|'link'|'main'|'menu'|'menuitem'|'nav'|'noframes'|'ol'|'optgroup'|'option'|'p'|'param'|'search'|'section'|'title'|'summary'|'table'|'tbody'|'td'|'tfoot'|'th'|'thead'|'title'|'tr'|'track'|'ul'; attributename = [a-zA-Z_:][a-zA-Z0-9:._-]*; unquotedvalue = [^ \t\r\n\v\f"'=<>`\x00]+; singlequotedvalue = ['][^'\x00]*[']; doublequotedvalue = ["][^"\x00]*["]; attributevalue = unquotedvalue | singlequotedvalue | doublequotedvalue; attributevaluespec = spacechar* [=] spacechar* attributevalue; attribute = spacechar+ attributename attributevaluespec?; opentag = tagname attribute* spacechar* [/]? [>]; closetag = [/] tagname spacechar* [>]; htmlcomment = "--" ([^\x00-]+ | "-" [^\x00-] | "--" [^\x00>])* "-->"; processinginstruction = ([^?>\x00]+ | [?][^>\x00] | [>])+; declaration = [A-Z]+ spacechar+ [^>\x00]*; cdata = "CDATA[" ([^\]\x00]+ | "]" [^\]\x00] | "]]" [^>\x00])*; htmltag = opentag | closetag; in_parens_nosp = [(] (reg_char|escaped_char|[\\])* [)]; in_double_quotes = ["] (escaped_char|[^"\x00])* ["]; in_single_quotes = ['] (escaped_char|[^'\x00])* [']; in_parens = [(] (escaped_char|[^)\x00])* [)]; scheme = [A-Za-z][A-Za-z0-9.+-]{1,31}; */ pub fn atx_heading_start(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c [#]{1,6} ([ \t]+|[\r\n]) { return Some(cursor); } * { return None; } */ } pub fn html_block_end_1(s: &[u8]) -> bool { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c [^\n\x00]* [<] [/] ('script'|'pre'|'textarea'|'style') [>] { return true; } * { return false; } */ } pub fn html_block_end_2(s: &[u8]) -> bool { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c [^\n\x00]* '-->' { return true; } * { return false; } */ } pub fn html_block_end_3(s: &[u8]) -> bool { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c [^\n\x00]* '?>' { return true; } * { return false; } */ } pub fn html_block_end_4(s: &[u8]) -> bool { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c [^\n\x00]* '>' { return true; } * { return false; } */ } pub fn html_block_end_5(s: &[u8]) -> bool { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c [^\n\x00]* ']]>' { return true; } * { return false; } */ } pub fn open_code_fence(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let mut ctxmarker = 0; let len = s.len(); /*!re2c [`]{3,} / [^`\r\n\x00]*[\r\n] { return Some(cursor); } [~]{3,} / [^\r\n\x00]*[\r\n] { return Some(cursor); } * { return None; } */ } pub fn close_code_fence(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let mut ctxmarker = 0; let len = s.len(); /*!re2c [`]{3,} / [ \t]*[\r\n] { return Some(cursor); } [~]{3,} / [ \t]*[\r\n] { return Some(cursor); } * { return None; } */ } pub fn html_block_start(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c [<] ('script'|'pre'|'textarea'|'style') (spacechar | [>]) { return Some(1); } '<!--' { return Some(2); } '<?' { return Some(3); } '<!' [A-Z] { return Some(4); } '<![CDATA[' { return Some(5); } [<] [/]? blocktagname (spacechar | [/]? [>]) { return Some(6); } * { return None; } */ } pub fn html_block_start_7(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c [<] (opentag | closetag) [\t\n\f ]* [\r\n] { return Some(7); } * { return None; } */ } pub enum SetextChar { Equals, Hyphen, } pub fn setext_heading_line(s: &[u8]) -> Option<SetextChar> { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c [=]+ [ \t]* [\r\n] { return Some(SetextChar::Equals); } [-]+ [ \t]* [\r\n] { return Some(SetextChar::Hyphen); } * { return None; } */ } pub fn footnote_definition(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c '[^' ([^\] \r\n\x00\t]+) ']:' [ \t]* { return Some(cursor); } * { return None; } */ } pub fn scheme(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c scheme [:] { return Some(cursor); } * { return None; } */ } pub fn autolink_uri(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c scheme [:][^\x00-\x20<>]*[>] { return Some(cursor); } * { return None; } */ } pub fn autolink_email(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+ [@] [a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? ([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)* [>] { return Some(cursor); } * { return None; } */ } pub fn html_tag(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c htmltag { return Some(cursor); } * { return None; } */ } pub fn html_comment(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c htmlcomment { return Some(cursor); } * { return None; } */ } pub fn html_processing_instruction(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c processinginstruction { return Some(cursor); } * { return None; } */ } pub fn html_declaration(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c declaration { return Some(cursor); } * { return None; } */ } pub fn html_cdata(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c cdata { return Some(cursor); } * { return None; } */ } pub fn spacechars(s: &[u8]) -> Option<usize> { let mut cursor = 0; let len = s.len(); /*!re2c [ \t\v\f\r\n]+ { return Some(cursor); } * { return None; } */ } pub fn link_title(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c ["] (escaped_char|[^"\x00])* ["] { return Some(cursor); } ['] (escaped_char|[^'\x00])* ['] { return Some(cursor); } [(] (escaped_char|[^()\x00])* [)] { return Some(cursor); } * { return None; } */ } pub fn dangerous_url(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c 'data:image/' ('png'|'gif'|'jpeg'|'webp') { return None; } 'javascript:' | 'vbscript:' | 'file:' | 'data:' { return Some(cursor); } * { return None; } */ } /*!re2c table_spoiler = ['|']['|']; table_spacechar = [ \t\v\f]; table_newline = [\r]?[\n]; table_delimiter = (table_spacechar*[:]?[-]+[:]?table_spacechar*); table_cell = (escaped_char|[^\x00|\r\n])+; table_cell_spoiler = (escaped_char|table_spoiler|[^\x00|\r\n])+; */ pub fn table_start(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c [|]? table_delimiter ([|] table_delimiter)* [|]? table_spacechar* table_newline { return Some(cursor); } * { return None; } */ } pub fn table_cell(s: &[u8], spoiler: bool) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); // In fact, `table_cell` matches non-empty table cells only. The empty // string is also a valid table cell, but is handled by the default rule. // This approach prevents re2c's match-empty-string warning. if spoiler { /*!re2c table_cell_spoiler { return Some(cursor); } * { return None; } */ } else { /*!re2c table_cell { return Some(cursor); } * { return None; } */ } } pub fn table_cell_end(s: &[u8]) -> Option<usize> { let mut cursor = 0; let len = s.len(); /*!re2c [|] table_spacechar* { return Some(cursor); } * { return None; } */ } pub fn table_row_end(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c table_spacechar* table_newline { return Some(cursor); } * { return None; } */ } #[cfg(feature = "shortcodes")] pub fn shortcode(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); /*!re2c [A-Za-z0-9+_-]+ [:] { return Some(cursor); } * { return None; } */ } pub fn open_multiline_block_quote_fence(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let mut ctxmarker = 0; let len = s.len(); /*!re2c [>]{3,} / [ \t]*[\r\n] { return Some(cursor); } * { return None; } */ } pub fn close_multiline_block_quote_fence(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let mut ctxmarker = 0; let len = s.len(); /*!re2c [>]{3,} / [ \t]*[\r\n] { return Some(cursor); } * { return None; } */ } // Returns both the length of the match, and the tasklist character. pub fn tasklist(s: &[u8]) -> Option<(usize, u8)> { let mut cursor = 0; let mut marker = 0; let len = s.len(); let t1; /*!stags:re2c format = 'let mut @@{tag} = 0;'; */ /*!local:re2c re2c:define:YYSTAGP = "@@{tag} = cursor;"; re2c:tags = 1; spacechar* [[] @t1 [^\x00\r\n] [\]] (spacechar | [\x00]) { if cursor == len + 1 { cursor -= 1; } return Some((cursor, s[t1])); } * { return None; } */ } // vim: set ft=rust: �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/scanners.rs�����������������������������������������������������������������������0000644�0000000�0000000�00003124230�10461020230�0014146�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* Generated by re2c 3.1 */ pub fn atx_heading_start(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x23 => { yystate = 3; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x20 => { cursor += 1; yystate = 4; continue 'yyl; } 0x0A | 0x0D => { cursor += 1; yystate = 6; continue 'yyl; } 0x23 => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x20 => { cursor += 1; yystate = 4; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 5 => { return Some(cursor); } 6 => { yystate = 5; continue 'yyl; } 7 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x20 => { cursor += 1; yystate = 4; continue 'yyl; } 0x0A | 0x0D => { cursor += 1; yystate = 6; continue 'yyl; } 0x23 => { cursor += 1; yystate = 9; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 8 => { cursor = marker; yystate = 2; continue 'yyl; } 9 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x20 => { cursor += 1; yystate = 4; continue 'yyl; } 0x0A | 0x0D => { cursor += 1; yystate = 6; continue 'yyl; } 0x23 => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 10 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x20 => { cursor += 1; yystate = 4; continue 'yyl; } 0x0A | 0x0D => { cursor += 1; yystate = 6; continue 'yyl; } 0x23 => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 11 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x20 => { cursor += 1; yystate = 4; continue 'yyl; } 0x0A | 0x0D => { cursor += 1; yystate = 6; continue 'yyl; } 0x23 => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x20 => { cursor += 1; yystate = 4; continue 'yyl; } 0x0A | 0x0D => { cursor += 1; yystate = 6; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } pub fn html_block_end_1(s: &[u8]) -> bool { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yyaccept: usize = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x7F => { yystate = 3; continue 'yyl; } 0x3C => { yystate = 4; continue 'yyl; } 0xC2..=0xDF => { yystate = 5; continue 'yyl; } 0xE0 => { yystate = 6; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { yystate = 7; continue 'yyl; } 0xED => { yystate = 8; continue 'yyl; } 0xF0 => { yystate = 9; continue 'yyl; } 0xF1..=0xF3 => { yystate = 10; continue 'yyl; } 0xF4 => { yystate = 11; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return false; } 3 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x7F | 0xC2..=0xF4 => { yystate = 13; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x2E | 0x30..=0x7F | 0xC2..=0xF4 => { yystate = 13; continue 'yyl; } 0x2F => { cursor += 1; yystate = 23; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 5 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 6 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 7 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 8 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 9 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 10 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 11 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; yystate = 13; continue 'yyl; } 13 => match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } }, 14 => { cursor = marker; if yyaccept == 0 { yystate = 2; continue 'yyl; } else { yystate = 36; continue 'yyl; } } 15 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x2E | 0x30..=0x3B | 0x3D..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x2F => { cursor += 1; yystate = 23; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 17 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 18 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 19 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 20 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 21 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 22 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 23 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x4F | 0x51..=0x52 | 0x55..=0x6F | 0x71..=0x72 | 0x75..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0x50 | 0x70 => { cursor += 1; yystate = 24; continue 'yyl; } 0x53 | 0x73 => { cursor += 1; yystate = 25; continue 'yyl; } 0x54 | 0x74 => { cursor += 1; yystate = 26; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 24 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x51 | 0x53..=0x71 | 0x73..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0x52 | 0x72 => { cursor += 1; yystate = 27; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 25 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x42 | 0x44..=0x53 | 0x55..=0x62 | 0x64..=0x73 | 0x75..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0x43 | 0x63 => { cursor += 1; yystate = 28; continue 'yyl; } 0x54 | 0x74 => { cursor += 1; yystate = 29; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 26 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x44 | 0x46..=0x64 | 0x66..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0x45 | 0x65 => { cursor += 1; yystate = 30; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 27 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x44 | 0x46..=0x64 | 0x66..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0x45 | 0x65 => { cursor += 1; yystate = 31; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 28 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x51 | 0x53..=0x71 | 0x73..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0x52 | 0x72 => { cursor += 1; yystate = 32; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 29 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x58 | 0x5A..=0x78 | 0x7A..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0x59 | 0x79 => { cursor += 1; yystate = 33; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 30 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x57 | 0x59..=0x77 | 0x79..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0x58 | 0x78 => { cursor += 1; yystate = 34; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 31 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D | 0x3F..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0x3E => { cursor += 1; yystate = 35; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 32 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x48 | 0x4A..=0x68 | 0x6A..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0x49 | 0x69 => { cursor += 1; yystate = 37; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 33 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x4B | 0x4D..=0x6B | 0x6D..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0x4C | 0x6C => { cursor += 1; yystate = 27; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 34 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x53 | 0x55..=0x73 | 0x75..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0x54 | 0x74 => { cursor += 1; yystate = 38; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 35 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 36; continue 'yyl; } } } 36 => { return true; } 37 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x4F | 0x51..=0x6F | 0x71..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0x50 | 0x70 => { cursor += 1; yystate = 39; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 38 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x40 | 0x42..=0x60 | 0x62..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0x41 | 0x61 => { cursor += 1; yystate = 40; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 39 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x53 | 0x55..=0x73 | 0x75..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0x54 | 0x74 => { cursor += 1; yystate = 31; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 40 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x51 | 0x53..=0x71 | 0x73..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0x52 | 0x72 => { cursor += 1; yystate = 41; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 41 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x44 | 0x46..=0x64 | 0x66..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0x45 | 0x65 => { cursor += 1; yystate = 42; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 42 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3B | 0x3D..=0x40 | 0x42..=0x60 | 0x62..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3C => { cursor += 1; yystate = 15; continue 'yyl; } 0x41 | 0x61 => { cursor += 1; yystate = 31; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } pub fn html_block_end_2(s: &[u8]) -> bool { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yyaccept: usize = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x01..=0x09 | 0x0B..=0x2C | 0x2E..=0x7F => { yystate = 3; continue 'yyl; } 0x2D => { yystate = 4; continue 'yyl; } 0xC2..=0xDF => { yystate = 5; continue 'yyl; } 0xE0 => { yystate = 6; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { yystate = 7; continue 'yyl; } 0xED => { yystate = 8; continue 'yyl; } 0xF0 => { yystate = 9; continue 'yyl; } 0xF1..=0xF3 => { yystate = 10; continue 'yyl; } 0xF4 => { yystate = 11; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return false; } 3 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x7F | 0xC2..=0xF4 => { yystate = 13; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x2C | 0x2E..=0x7F | 0xC2..=0xF4 => { yystate = 13; continue 'yyl; } 0x2D => { cursor += 1; yystate = 23; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 5 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 6 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 7 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 8 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 9 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 10 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 11 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; yystate = 13; continue 'yyl; } 13 => match yych { 0x01..=0x09 | 0x0B..=0x2C | 0x2E..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x2D => { cursor += 1; yystate = 15; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } }, 14 => { cursor = marker; if yyaccept == 0 { yystate = 2; continue 'yyl; } else { yystate = 25; continue 'yyl; } } 15 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x2C | 0x2E..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x2D => { cursor += 1; yystate = 23; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 17 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 18 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 19 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 20 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 21 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 22 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 23 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x2C | 0x2E..=0x3D | 0x3F..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x2D => { cursor += 1; yystate = 23; continue 'yyl; } 0x3E => { cursor += 1; yystate = 24; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 24 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x2C | 0x2E..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x2D => { cursor += 1; yystate = 15; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 25; continue 'yyl; } } } 25 => { return true; } _ => { panic!("internal lexer error") } } } } } pub fn html_block_end_3(s: &[u8]) -> bool { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yyaccept: usize = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x01..=0x09 | 0x0B..=0x3E | 0x40..=0x7F => { yystate = 3; continue 'yyl; } 0x3F => { yystate = 4; continue 'yyl; } 0xC2..=0xDF => { yystate = 5; continue 'yyl; } 0xE0 => { yystate = 6; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { yystate = 7; continue 'yyl; } 0xED => { yystate = 8; continue 'yyl; } 0xF0 => { yystate = 9; continue 'yyl; } 0xF1..=0xF3 => { yystate = 10; continue 'yyl; } 0xF4 => { yystate = 11; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return false; } 3 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x7F | 0xC2..=0xF4 => { yystate = 13; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3D | 0x3F..=0x7F | 0xC2..=0xF4 => { yystate = 13; continue 'yyl; } 0x3E => { cursor += 1; yystate = 23; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 5 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 6 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 7 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 8 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 9 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 10 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 11 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; yystate = 13; continue 'yyl; } 13 => match yych { 0x01..=0x09 | 0x0B..=0x3E | 0x40..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3F => { cursor += 1; yystate = 15; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } }, 14 => { cursor = marker; if yyaccept == 0 { yystate = 2; continue 'yyl; } else { yystate = 24; continue 'yyl; } } 15 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3D | 0x40..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3E => { cursor += 1; yystate = 23; continue 'yyl; } 0x3F => { cursor += 1; yystate = 15; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 17 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 18 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 19 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 20 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 21 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 22 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 23 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3E | 0x40..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3F => { cursor += 1; yystate = 15; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 24; continue 'yyl; } } } 24 => { return true; } _ => { panic!("internal lexer error") } } } } } pub fn html_block_end_4(s: &[u8]) -> bool { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yyaccept: usize = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x01..=0x09 | 0x0B..=0x3D | 0x3F..=0x7F => { yystate = 3; continue 'yyl; } 0x3E => { yystate = 4; continue 'yyl; } 0xC2..=0xDF => { yystate = 6; continue 'yyl; } 0xE0 => { yystate = 7; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { yystate = 8; continue 'yyl; } 0xED => { yystate = 9; continue 'yyl; } 0xF0 => { yystate = 10; continue 'yyl; } 0xF1..=0xF3 => { yystate = 11; continue 'yyl; } 0xF4 => { yystate = 12; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return false; } 3 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x7F | 0xC2..=0xF4 => { yystate = 14; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3D | 0x3F..=0x7F => { cursor += 1; yystate = 13; continue 'yyl; } 0x3E => { cursor += 1; yystate = 4; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 5 => { return true; } 6 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 13; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 7 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 8 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 9 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 10 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 11 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 12 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 13 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; yystate = 14; continue 'yyl; } 14 => match yych { 0x01..=0x09 | 0x0B..=0x3D | 0x3F..=0x7F => { cursor += 1; yystate = 13; continue 'yyl; } 0x3E => { cursor += 1; yystate = 4; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } }, 15 => { cursor = marker; if yyaccept == 0 { yystate = 2; continue 'yyl; } else { yystate = 5; continue 'yyl; } } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 13; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 17 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 18 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 19 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 20 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 21 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 22 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } pub fn html_block_end_5(s: &[u8]) -> bool { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yyaccept: usize = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x01..=0x09 | 0x0B..=0x5C | 0x5E..=0x7F => { yystate = 3; continue 'yyl; } 0x5D => { yystate = 4; continue 'yyl; } 0xC2..=0xDF => { yystate = 5; continue 'yyl; } 0xE0 => { yystate = 6; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { yystate = 7; continue 'yyl; } 0xED => { yystate = 8; continue 'yyl; } 0xF0 => { yystate = 9; continue 'yyl; } 0xF1..=0xF3 => { yystate = 10; continue 'yyl; } 0xF4 => { yystate = 11; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return false; } 3 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x7F | 0xC2..=0xF4 => { yystate = 13; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x5C | 0x5E..=0x7F | 0xC2..=0xF4 => { yystate = 13; continue 'yyl; } 0x5D => { cursor += 1; yystate = 23; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 5 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 6 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 7 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 8 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 9 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 10 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 11 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; yystate = 13; continue 'yyl; } 13 => match yych { 0x01..=0x09 | 0x0B..=0x5C | 0x5E..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x5D => { cursor += 1; yystate = 15; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } }, 14 => { cursor = marker; if yyaccept == 0 { yystate = 2; continue 'yyl; } else { yystate = 25; continue 'yyl; } } 15 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x5C | 0x5E..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x5D => { cursor += 1; yystate = 23; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 17 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 18 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 19 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 20 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 21 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 22 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 23 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x3D | 0x3F..=0x5C | 0x5E..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x3E => { cursor += 1; yystate = 24; continue 'yyl; } 0x5D => { cursor += 1; yystate = 23; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 24 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x5C | 0x5E..=0x7F => { cursor += 1; yystate = 12; continue 'yyl; } 0x5D => { cursor += 1; yystate = 15; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 25; continue 'yyl; } } } 25 => { return true; } _ => { panic!("internal lexer error") } } } } } pub fn open_code_fence(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let mut ctxmarker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x60 => { yystate = 3; continue 'yyl; } 0x7E => { yystate = 4; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x60 => { cursor += 1; yystate = 5; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x7E => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 5 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x60 => { cursor += 1; yystate = 8; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 6 => { cursor = marker; yystate = 2; continue 'yyl; } 7 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x7E => { cursor += 1; yystate = 9; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 8 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x0C | 0x0E..=0x5F | 0x61..=0x7F => { ctxmarker = cursor; cursor += 1; yystate = 10; continue 'yyl; } 0x0A | 0x0D => { ctxmarker = cursor; cursor += 1; yystate = 11; continue 'yyl; } 0x60 => { cursor += 1; yystate = 8; continue 'yyl; } 0xC2..=0xDF => { ctxmarker = cursor; cursor += 1; yystate = 12; continue 'yyl; } 0xE0 => { ctxmarker = cursor; cursor += 1; yystate = 13; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { ctxmarker = cursor; cursor += 1; yystate = 14; continue 'yyl; } 0xED => { ctxmarker = cursor; cursor += 1; yystate = 15; continue 'yyl; } 0xF0 => { ctxmarker = cursor; cursor += 1; yystate = 16; continue 'yyl; } 0xF1..=0xF3 => { ctxmarker = cursor; cursor += 1; yystate = 17; continue 'yyl; } 0xF4 => { ctxmarker = cursor; cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 9 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x0C | 0x0E..=0x7D | 0x7F => { ctxmarker = cursor; cursor += 1; yystate = 19; continue 'yyl; } 0x0A | 0x0D => { ctxmarker = cursor; cursor += 1; yystate = 20; continue 'yyl; } 0x7E => { cursor += 1; yystate = 9; continue 'yyl; } 0xC2..=0xDF => { ctxmarker = cursor; cursor += 1; yystate = 21; continue 'yyl; } 0xE0 => { ctxmarker = cursor; cursor += 1; yystate = 22; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { ctxmarker = cursor; cursor += 1; yystate = 23; continue 'yyl; } 0xED => { ctxmarker = cursor; cursor += 1; yystate = 24; continue 'yyl; } 0xF0 => { ctxmarker = cursor; cursor += 1; yystate = 25; continue 'yyl; } 0xF1..=0xF3 => { ctxmarker = cursor; cursor += 1; yystate = 26; continue 'yyl; } 0xF4 => { ctxmarker = cursor; cursor += 1; yystate = 27; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 10 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x0C | 0x0E..=0x5F | 0x61..=0x7F => { cursor += 1; yystate = 10; continue 'yyl; } 0x0A | 0x0D => { cursor += 1; yystate = 11; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 12; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 13; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 14; continue 'yyl; } 0xED => { cursor += 1; yystate = 15; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 16; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 17; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 11 => { cursor = ctxmarker; { return Some(cursor); } } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 13 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 14 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 15 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 14; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 17 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 14; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 18 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 14; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 19 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x0C | 0x0E..=0x7F => { cursor += 1; yystate = 19; continue 'yyl; } 0x0A | 0x0D => { cursor += 1; yystate = 20; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 21; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 22; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 23; continue 'yyl; } 0xED => { cursor += 1; yystate = 24; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 25; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 26; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 27; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 20 => { cursor = ctxmarker; { return Some(cursor); } } 21 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 19; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 22 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 21; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 23 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 21; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 24 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 21; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 25 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 23; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 26 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 23; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 27 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 23; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } pub fn close_code_fence(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let mut ctxmarker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x60 => { yystate = 3; continue 'yyl; } 0x7E => { yystate = 4; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x60 => { cursor += 1; yystate = 5; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x7E => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 5 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x60 => { cursor += 1; yystate = 8; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 6 => { cursor = marker; yystate = 2; continue 'yyl; } 7 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x7E => { cursor += 1; yystate = 9; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 8 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x20 => { ctxmarker = cursor; cursor += 1; yystate = 10; continue 'yyl; } 0x0A | 0x0D => { ctxmarker = cursor; cursor += 1; yystate = 11; continue 'yyl; } 0x60 => { cursor += 1; yystate = 8; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 9 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x20 => { ctxmarker = cursor; cursor += 1; yystate = 12; continue 'yyl; } 0x0A | 0x0D => { ctxmarker = cursor; cursor += 1; yystate = 13; continue 'yyl; } 0x7E => { cursor += 1; yystate = 9; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 10 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x20 => { cursor += 1; yystate = 10; continue 'yyl; } 0x0A | 0x0D => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 11 => { cursor = ctxmarker; { return Some(cursor); } } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x20 => { cursor += 1; yystate = 12; continue 'yyl; } 0x0A | 0x0D => { cursor += 1; yystate = 13; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 13 => { cursor = ctxmarker; { return Some(cursor); } } _ => { panic!("internal lexer error") } } } } } pub fn html_block_start(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x3C => { yystate = 3; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x21 => { cursor += 1; yystate = 4; continue 'yyl; } 0x2F => { cursor += 1; yystate = 6; continue 'yyl; } 0x3F => { cursor += 1; yystate = 7; continue 'yyl; } 0x41 | 0x61 => { cursor += 1; yystate = 8; continue 'yyl; } 0x42 | 0x62 => { cursor += 1; yystate = 9; continue 'yyl; } 0x43 | 0x63 => { cursor += 1; yystate = 10; continue 'yyl; } 0x44 | 0x64 => { cursor += 1; yystate = 11; continue 'yyl; } 0x46 | 0x66 => { cursor += 1; yystate = 12; continue 'yyl; } 0x48 | 0x68 => { cursor += 1; yystate = 13; continue 'yyl; } 0x49 | 0x69 => { cursor += 1; yystate = 14; continue 'yyl; } 0x4C | 0x6C => { cursor += 1; yystate = 15; continue 'yyl; } 0x4D | 0x6D => { cursor += 1; yystate = 16; continue 'yyl; } 0x4E | 0x6E => { cursor += 1; yystate = 17; continue 'yyl; } 0x4F | 0x6F => { cursor += 1; yystate = 18; continue 'yyl; } 0x50 | 0x70 => { cursor += 1; yystate = 19; continue 'yyl; } 0x53 | 0x73 => { cursor += 1; yystate = 20; continue 'yyl; } 0x54 | 0x74 => { cursor += 1; yystate = 21; continue 'yyl; } 0x55 | 0x75 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 23; continue 'yyl; } 0x41..=0x5A => { cursor += 1; yystate = 24; continue 'yyl; } 0x5B => { cursor += 1; yystate = 25; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 5 => { cursor = marker; yystate = 2; continue 'yyl; } 6 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 8; continue 'yyl; } 0x42 | 0x62 => { cursor += 1; yystate = 9; continue 'yyl; } 0x43 | 0x63 => { cursor += 1; yystate = 10; continue 'yyl; } 0x44 | 0x64 => { cursor += 1; yystate = 11; continue 'yyl; } 0x46 | 0x66 => { cursor += 1; yystate = 12; continue 'yyl; } 0x48 | 0x68 => { cursor += 1; yystate = 13; continue 'yyl; } 0x49 | 0x69 => { cursor += 1; yystate = 14; continue 'yyl; } 0x4C | 0x6C => { cursor += 1; yystate = 15; continue 'yyl; } 0x4D | 0x6D => { cursor += 1; yystate = 16; continue 'yyl; } 0x4E | 0x6E => { cursor += 1; yystate = 17; continue 'yyl; } 0x4F | 0x6F => { cursor += 1; yystate = 18; continue 'yyl; } 0x50 | 0x70 => { cursor += 1; yystate = 26; continue 'yyl; } 0x53 | 0x73 => { cursor += 1; yystate = 27; continue 'yyl; } 0x54 | 0x74 => { cursor += 1; yystate = 28; continue 'yyl; } 0x55 | 0x75 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 7 => { return Some(3); } 8 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x44 | 0x64 => { cursor += 1; yystate = 29; continue 'yyl; } 0x52 | 0x72 => { cursor += 1; yystate = 30; continue 'yyl; } 0x53 | 0x73 => { cursor += 1; yystate = 31; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 9 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 32; continue 'yyl; } 0x4C | 0x6C => { cursor += 1; yystate = 33; continue 'yyl; } 0x4F | 0x6F => { cursor += 1; yystate = 34; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 10 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 35; continue 'yyl; } 0x45 | 0x65 => { cursor += 1; yystate = 36; continue 'yyl; } 0x4F | 0x6F => { cursor += 1; yystate = 37; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 11 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x44 | 0x4C | 0x54 | 0x64 | 0x6C | 0x74 => { cursor += 1; yystate = 38; continue 'yyl; } 0x45 | 0x65 => { cursor += 1; yystate = 39; continue 'yyl; } 0x49 | 0x69 => { cursor += 1; yystate = 40; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x49 | 0x69 => { cursor += 1; yystate = 41; continue 'yyl; } 0x4F | 0x6F => { cursor += 1; yystate = 42; continue 'yyl; } 0x52 | 0x72 => { cursor += 1; yystate = 43; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 13 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x31..=0x36 | 0x52 | 0x72 => { cursor += 1; yystate = 38; continue 'yyl; } 0x45 | 0x65 => { cursor += 1; yystate = 44; continue 'yyl; } 0x54 | 0x74 => { cursor += 1; yystate = 45; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 14 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x46 | 0x66 => { cursor += 1; yystate = 46; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 15 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x45 | 0x65 => { cursor += 1; yystate = 47; continue 'yyl; } 0x49 | 0x69 => { cursor += 1; yystate = 48; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 49; continue 'yyl; } 0x45 | 0x65 => { cursor += 1; yystate = 50; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 17 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 51; continue 'yyl; } 0x4F | 0x6F => { cursor += 1; yystate = 52; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 18 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4C | 0x6C => { cursor += 1; yystate = 38; continue 'yyl; } 0x50 | 0x70 => { cursor += 1; yystate = 53; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 19 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 | 0x3E => { cursor += 1; yystate = 54; continue 'yyl; } 0x2F => { cursor += 1; yystate = 55; continue 'yyl; } 0x41 | 0x61 => { cursor += 1; yystate = 56; continue 'yyl; } 0x52 | 0x72 => { cursor += 1; yystate = 57; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 20 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x43 | 0x63 => { cursor += 1; yystate = 58; continue 'yyl; } 0x45 | 0x65 => { cursor += 1; yystate = 59; continue 'yyl; } 0x54 | 0x74 => { cursor += 1; yystate = 60; continue 'yyl; } 0x55 | 0x75 => { cursor += 1; yystate = 61; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 21 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 62; continue 'yyl; } 0x42 | 0x62 => { cursor += 1; yystate = 63; continue 'yyl; } 0x44 | 0x64 => { cursor += 1; yystate = 38; continue 'yyl; } 0x45 | 0x65 => { cursor += 1; yystate = 64; continue 'yyl; } 0x46 | 0x66 => { cursor += 1; yystate = 65; continue 'yyl; } 0x48 | 0x68 => { cursor += 1; yystate = 66; continue 'yyl; } 0x49 | 0x69 => { cursor += 1; yystate = 67; continue 'yyl; } 0x52 | 0x72 => { cursor += 1; yystate = 68; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 22 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4C | 0x6C => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 23 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 69; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 24 => { return Some(4); } 25 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x43 | 0x63 => { cursor += 1; yystate = 70; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 26 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 | 0x3E => { cursor += 1; yystate = 54; continue 'yyl; } 0x2F => { cursor += 1; yystate = 55; continue 'yyl; } 0x41 | 0x61 => { cursor += 1; yystate = 56; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 27 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x45 | 0x65 => { cursor += 1; yystate = 59; continue 'yyl; } 0x55 | 0x75 => { cursor += 1; yystate = 61; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 28 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 62; continue 'yyl; } 0x42 | 0x62 => { cursor += 1; yystate = 63; continue 'yyl; } 0x44 | 0x64 => { cursor += 1; yystate = 38; continue 'yyl; } 0x46 | 0x66 => { cursor += 1; yystate = 65; continue 'yyl; } 0x48 | 0x68 => { cursor += 1; yystate = 66; continue 'yyl; } 0x49 | 0x69 => { cursor += 1; yystate = 67; continue 'yyl; } 0x52 | 0x72 => { cursor += 1; yystate = 68; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 29 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x44 | 0x64 => { cursor += 1; yystate = 71; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 30 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x54 | 0x74 => { cursor += 1; yystate = 72; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 31 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x49 | 0x69 => { cursor += 1; yystate = 73; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 32 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x53 | 0x73 => { cursor += 1; yystate = 74; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 33 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4F | 0x6F => { cursor += 1; yystate = 75; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 34 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x44 | 0x64 => { cursor += 1; yystate = 76; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 35 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x50 | 0x70 => { cursor += 1; yystate = 77; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 36 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4E | 0x6E => { cursor += 1; yystate = 78; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 37 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4C | 0x6C => { cursor += 1; yystate = 79; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 38 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 | 0x3E => { cursor += 1; yystate = 54; continue 'yyl; } 0x2F => { cursor += 1; yystate = 55; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 39 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x54 | 0x74 => { cursor += 1; yystate = 80; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 40 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 81; continue 'yyl; } 0x52 | 0x56 | 0x72 | 0x76 => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 41 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x45 | 0x65 => { cursor += 1; yystate = 82; continue 'yyl; } 0x47 | 0x67 => { cursor += 1; yystate = 83; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 42 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4F | 0x6F => { cursor += 1; yystate = 78; continue 'yyl; } 0x52 | 0x72 => { cursor += 1; yystate = 84; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 43 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 85; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 44 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 86; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 45 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4D | 0x6D => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 46 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x52 | 0x72 => { cursor += 1; yystate = 87; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 47 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x47 | 0x67 => { cursor += 1; yystate = 88; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 48 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 | 0x3E => { cursor += 1; yystate = 54; continue 'yyl; } 0x2F => { cursor += 1; yystate = 55; continue 'yyl; } 0x4E | 0x6E => { cursor += 1; yystate = 89; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 49 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x49 | 0x69 => { cursor += 1; yystate = 90; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 50 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4E | 0x6E => { cursor += 1; yystate = 91; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 51 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x56 | 0x76 => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 52 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x46 | 0x66 => { cursor += 1; yystate = 92; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 53 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x54 | 0x74 => { cursor += 1; yystate = 93; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 54 => { return Some(6); } 55 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x3E => { cursor += 1; yystate = 54; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 56 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x52 | 0x72 => { cursor += 1; yystate = 94; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 57 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x45 | 0x65 => { cursor += 1; yystate = 95; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 58 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x52 | 0x72 => { cursor += 1; yystate = 96; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 59 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 97; continue 'yyl; } 0x43 | 0x63 => { cursor += 1; yystate = 77; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 60 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x59 | 0x79 => { cursor += 1; yystate = 98; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 61 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4D | 0x6D => { cursor += 1; yystate = 99; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 62 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x42 | 0x62 => { cursor += 1; yystate = 100; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 63 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4F | 0x6F => { cursor += 1; yystate = 34; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 64 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x58 | 0x78 => { cursor += 1; yystate = 101; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 65 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4F | 0x6F => { cursor += 1; yystate = 102; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 66 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 | 0x3E => { cursor += 1; yystate = 54; continue 'yyl; } 0x2F => { cursor += 1; yystate = 55; continue 'yyl; } 0x45 | 0x65 => { cursor += 1; yystate = 103; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 67 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x54 | 0x74 => { cursor += 1; yystate = 100; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 68 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 | 0x3E => { cursor += 1; yystate = 54; continue 'yyl; } 0x2F => { cursor += 1; yystate = 55; continue 'yyl; } 0x41 | 0x61 => { cursor += 1; yystate = 104; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 69 => { return Some(2); } 70 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x44 | 0x64 => { cursor += 1; yystate = 105; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 71 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x52 | 0x72 => { cursor += 1; yystate = 106; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 72 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x49 | 0x69 => { cursor += 1; yystate = 107; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 73 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x44 | 0x64 => { cursor += 1; yystate = 108; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 74 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x45 | 0x65 => { cursor += 1; yystate = 109; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 75 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x43 | 0x63 => { cursor += 1; yystate = 110; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 76 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x59 | 0x79 => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 77 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x54 | 0x74 => { cursor += 1; yystate = 111; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 78 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x54 | 0x74 => { cursor += 1; yystate = 112; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 79 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 | 0x3E => { cursor += 1; yystate = 54; continue 'yyl; } 0x2F => { cursor += 1; yystate = 55; continue 'yyl; } 0x47 | 0x67 => { cursor += 1; yystate = 113; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 80 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 114; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 81 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4C | 0x6C => { cursor += 1; yystate = 115; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 82 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4C | 0x6C => { cursor += 1; yystate = 116; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 83 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x43 | 0x63 => { cursor += 1; yystate = 117; continue 'yyl; } 0x55 | 0x75 => { cursor += 1; yystate = 118; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 84 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4D | 0x6D => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 85 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4D | 0x6D => { cursor += 1; yystate = 119; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 86 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x44 | 0x64 => { cursor += 1; yystate = 120; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 87 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 121; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 88 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x45 | 0x65 => { cursor += 1; yystate = 122; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 89 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4B | 0x6B => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 90 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4E | 0x6E => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 91 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x55 | 0x75 => { cursor += 1; yystate = 123; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 92 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x52 | 0x72 => { cursor += 1; yystate = 124; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 93 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x47 | 0x67 => { cursor += 1; yystate = 113; continue 'yyl; } 0x49 | 0x69 => { cursor += 1; yystate = 125; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 94 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 84; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 95 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 | 0x3E => { cursor += 1; yystate = 126; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 96 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x49 | 0x69 => { cursor += 1; yystate = 127; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 97 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x52 | 0x72 => { cursor += 1; yystate = 128; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 98 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4C | 0x6C => { cursor += 1; yystate = 57; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 99 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4D | 0x6D => { cursor += 1; yystate = 129; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 100 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4C | 0x6C => { cursor += 1; yystate = 108; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 101 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x54 | 0x74 => { cursor += 1; yystate = 130; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 102 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4F | 0x6F => { cursor += 1; yystate = 131; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 103 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 132; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 104 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x43 | 0x63 => { cursor += 1; yystate = 89; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 105 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 133; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 106 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x45 | 0x65 => { cursor += 1; yystate = 134; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 107 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x43 | 0x63 => { cursor += 1; yystate = 100; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 108 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x45 | 0x65 => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 109 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 | 0x3E => { cursor += 1; yystate = 54; continue 'yyl; } 0x2F => { cursor += 1; yystate = 55; continue 'yyl; } 0x46 | 0x66 => { cursor += 1; yystate = 135; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 110 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4B | 0x6B => { cursor += 1; yystate = 136; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 111 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x49 | 0x69 => { cursor += 1; yystate = 125; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 112 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x45 | 0x65 => { cursor += 1; yystate = 137; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 113 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x52 | 0x72 => { cursor += 1; yystate = 138; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 114 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x49 | 0x69 => { cursor += 1; yystate = 139; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 115 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4F | 0x6F => { cursor += 1; yystate = 140; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 116 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x44 | 0x64 => { cursor += 1; yystate = 141; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 117 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 35; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 118 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x52 | 0x72 => { cursor += 1; yystate = 108; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 119 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x45 | 0x65 => { cursor += 1; yystate = 142; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 120 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 | 0x3E => { cursor += 1; yystate = 54; continue 'yyl; } 0x2F => { cursor += 1; yystate = 55; continue 'yyl; } 0x45 | 0x65 => { cursor += 1; yystate = 137; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 121 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4D | 0x6D => { cursor += 1; yystate = 108; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 122 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4E | 0x6E => { cursor += 1; yystate = 132; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 123 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 | 0x3E => { cursor += 1; yystate = 54; continue 'yyl; } 0x2F => { cursor += 1; yystate = 55; continue 'yyl; } 0x49 | 0x69 => { cursor += 1; yystate = 143; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 124 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 144; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 125 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4F | 0x6F => { cursor += 1; yystate = 90; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 126 => { return Some(1); } 127 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x50 | 0x70 => { cursor += 1; yystate = 145; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 128 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x43 | 0x63 => { cursor += 1; yystate = 146; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 129 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 147; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 130 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 148; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 131 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x54 | 0x74 => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 132 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x44 | 0x64 => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 133 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x54 | 0x74 => { cursor += 1; yystate = 149; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 134 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x53 | 0x73 => { cursor += 1; yystate = 150; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 135 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4F | 0x6F => { cursor += 1; yystate = 151; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 136 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x51 | 0x71 => { cursor += 1; yystate = 152; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 137 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x52 | 0x72 => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 138 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4F | 0x6F => { cursor += 1; yystate = 153; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 139 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4C | 0x6C => { cursor += 1; yystate = 150; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 140 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x47 | 0x67 => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 141 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x53 | 0x73 => { cursor += 1; yystate = 154; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 142 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 | 0x3E => { cursor += 1; yystate = 54; continue 'yyl; } 0x2F => { cursor += 1; yystate = 55; continue 'yyl; } 0x53 | 0x73 => { cursor += 1; yystate = 154; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 143 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x54 | 0x74 => { cursor += 1; yystate = 155; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 144 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4D | 0x6D => { cursor += 1; yystate = 156; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 145 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x54 | 0x74 => { cursor += 1; yystate = 95; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 146 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x48 | 0x68 => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 147 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x52 | 0x72 => { cursor += 1; yystate = 76; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 148 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x52 | 0x72 => { cursor += 1; yystate = 157; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 149 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 158; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 150 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x53 | 0x73 => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 151 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4E | 0x6E => { cursor += 1; yystate = 131; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 152 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x55 | 0x75 => { cursor += 1; yystate = 159; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 153 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x55 | 0x75 => { cursor += 1; yystate = 160; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 154 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x45 | 0x65 => { cursor += 1; yystate = 131; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 155 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x45 | 0x65 => { cursor += 1; yystate = 84; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 156 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x45 | 0x65 => { cursor += 1; yystate = 150; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 157 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x45 | 0x65 => { cursor += 1; yystate = 161; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 158 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x5B => { cursor += 1; yystate = 162; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 159 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4F | 0x6F => { cursor += 1; yystate = 163; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 160 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x50 | 0x70 => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 161 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 95; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 162 => { return Some(5); } 163 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x54 | 0x74 => { cursor += 1; yystate = 108; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } pub fn html_block_start_7(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yyaccept: usize = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x3C => { yystate = 3; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2F => { cursor += 1; yystate = 4; continue 'yyl; } 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 6; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 5 => { cursor = marker; if yyaccept == 0 { yystate = 2; continue 'yyl; } else { yystate = 14; continue 'yyl; } } 6 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 8; continue 'yyl; } 0x2D | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 6; continue 'yyl; } 0x2F => { cursor += 1; yystate = 9; continue 'yyl; } 0x3E => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 7 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 11; continue 'yyl; } 0x2D | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 7; continue 'yyl; } 0x3E => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 8 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 8; continue 'yyl; } 0x2F => { cursor += 1; yystate = 9; continue 'yyl; } 0x3A | 0x41..=0x5A | 0x5F | 0x61..=0x7A => { cursor += 1; yystate = 12; continue 'yyl; } 0x3E => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 9 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x3E => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 10 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x0C | 0x20 => { cursor += 1; yystate = 10; continue 'yyl; } 0x0A => { cursor += 1; yystate = 13; continue 'yyl; } 0x0D => { cursor += 1; yystate = 15; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 11 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 11; continue 'yyl; } 0x3E => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 16; continue 'yyl; } 0x2D..=0x2E | 0x30..=0x3A | 0x41..=0x5A | 0x5F | 0x61..=0x7A => { cursor += 1; yystate = 12; continue 'yyl; } 0x2F => { cursor += 1; yystate = 9; continue 'yyl; } 0x3D => { cursor += 1; yystate = 17; continue 'yyl; } 0x3E => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 13 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x0C | 0x20 => { cursor += 1; yystate = 10; continue 'yyl; } 0x0A => { cursor += 1; yystate = 13; continue 'yyl; } 0x0D => { cursor += 1; yystate = 15; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 14 => { return Some(7); } 15 => { yystate = 14; continue 'yyl; } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 16; continue 'yyl; } 0x2F => { cursor += 1; yystate = 9; continue 'yyl; } 0x3A | 0x41..=0x5A | 0x5F | 0x61..=0x7A => { cursor += 1; yystate = 12; continue 'yyl; } 0x3D => { cursor += 1; yystate = 17; continue 'yyl; } 0x3E => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 17 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x08 | 0x0E..=0x1F | 0x21 | 0x23..=0x26 | 0x28..=0x3B | 0x3F..=0x5F | 0x61..=0x7F => { cursor += 1; yystate = 18; continue 'yyl; } 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 17; continue 'yyl; } 0x22 => { cursor += 1; yystate = 19; continue 'yyl; } 0x27 => { cursor += 1; yystate = 20; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 21; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 22; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 23; continue 'yyl; } 0xED => { cursor += 1; yystate = 24; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 25; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 26; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 27; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 18 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x08 | 0x0E..=0x1F | 0x21 | 0x23..=0x26 | 0x28..=0x3B | 0x3F..=0x5F | 0x61..=0x7F => { cursor += 1; yystate = 18; continue 'yyl; } 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 8; continue 'yyl; } 0x3E => { cursor += 1; yystate = 10; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 21; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 22; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 23; continue 'yyl; } 0xED => { cursor += 1; yystate = 24; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 25; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 26; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 27; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 19 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x21 | 0x23..=0x7F => { cursor += 1; yystate = 19; continue 'yyl; } 0x22 => { cursor += 1; yystate = 28; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 29; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 30; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 31; continue 'yyl; } 0xED => { cursor += 1; yystate = 32; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 33; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 34; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 35; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 20 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x26 | 0x28..=0x7F => { cursor += 1; yystate = 20; continue 'yyl; } 0x27 => { cursor += 1; yystate = 28; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 36; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 37; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 38; continue 'yyl; } 0xED => { cursor += 1; yystate = 39; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 40; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 41; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 42; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 21 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 22 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 21; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 23 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 21; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 24 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 21; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 25 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 23; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 26 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 23; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 27 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 23; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 28 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 8; continue 'yyl; } 0x2F => { cursor += 1; yystate = 9; continue 'yyl; } 0x3E => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 29 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 19; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 30 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 29; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 31 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 29; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 32 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 29; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 33 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 31; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 34 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 31; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 35 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 31; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 36 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 20; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 37 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 36; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 38 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 36; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 39 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 36; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 40 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 41 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 42 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } pub enum SetextChar { Equals, Hyphen, } pub fn setext_heading_line(s: &[u8]) -> Option<SetextChar> { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x2D => { yystate = 3; continue 'yyl; } 0x3D => { yystate = 4; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0A | 0x0D | 0x20 => { yystate = 6; continue 'yyl; } 0x2D => { cursor += 1; yystate = 9; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0A | 0x0D | 0x20 => { yystate = 11; continue 'yyl; } 0x3D => { cursor += 1; yystate = 13; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 5 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; yystate = 6; continue 'yyl; } 6 => match yych { 0x09 | 0x20 => { cursor += 1; yystate = 5; continue 'yyl; } 0x0A | 0x0D => { cursor += 1; yystate = 8; continue 'yyl; } _ => { yystate = 7; continue 'yyl; } }, 7 => { cursor = marker; yystate = 2; continue 'yyl; } 8 => { return Some(SetextChar::Hyphen); } 9 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x20 => { cursor += 1; yystate = 5; continue 'yyl; } 0x0A | 0x0D => { cursor += 1; yystate = 8; continue 'yyl; } 0x2D => { cursor += 1; yystate = 9; continue 'yyl; } _ => { yystate = 7; continue 'yyl; } } } 10 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; yystate = 11; continue 'yyl; } 11 => match yych { 0x09 | 0x20 => { cursor += 1; yystate = 10; continue 'yyl; } 0x0A | 0x0D => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 7; continue 'yyl; } }, 12 => { return Some(SetextChar::Equals); } 13 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x20 => { cursor += 1; yystate = 10; continue 'yyl; } 0x0A | 0x0D => { cursor += 1; yystate = 12; continue 'yyl; } 0x3D => { cursor += 1; yystate = 13; continue 'yyl; } _ => { yystate = 7; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } pub fn footnote_definition(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x5B => { yystate = 3; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x5E => { cursor += 1; yystate = 4; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x5D => { yystate = 5; continue 'yyl; } _ => { yystate = 7; continue 'yyl; } } } 5 => { cursor = marker; yystate = 2; continue 'yyl; } 6 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; yystate = 7; continue 'yyl; } 7 => match yych { 0x01..=0x08 | 0x0B..=0x0C | 0x0E..=0x1F | 0x21..=0x5C | 0x5E..=0x7F => { cursor += 1; yystate = 6; continue 'yyl; } 0x5D => { cursor += 1; yystate = 15; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 8; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 9; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 10; continue 'yyl; } 0xED => { cursor += 1; yystate = 11; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 12; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 13; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 14; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } }, 8 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 6; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 9 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 8; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 10 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 8; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 11 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 8; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 13 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 14 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 15 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x3A => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x20 => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 17; continue 'yyl; } } } 17 => { return Some(cursor); } _ => { panic!("internal lexer error") } } } } } pub fn scheme(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x41..=0x5A | 0x61..=0x7A => { yystate = 3; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 4; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 6; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 5 => { cursor = marker; yystate = 2; continue 'yyl; } 6 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 8; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 7 => { return Some(cursor); } 8 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 9; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 9 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 10; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 10 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 11; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 11 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 12; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 13; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 13 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 14; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 14 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 15; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 15 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 16; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 17; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 17 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 18; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 18 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 19; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 19 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 20; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 20 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 21; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 21 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 22; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 22 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 23; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 23 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 24; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 24 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 25; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 25 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 26; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 26 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 27; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 27 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 28; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 28 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 29; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 29 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 30; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 30 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 31; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 31 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 32; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 32 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 33; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 33 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 34; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 34 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 35; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 35 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 36; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 36 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } pub fn autolink_uri(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x41..=0x5A | 0x61..=0x7A => { yystate = 3; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 4; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 6; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 5 => { cursor = marker; yystate = 2; continue 'yyl; } 6 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 8; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 7 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x21..=0x3B | 0x3D | 0x3F..=0x7F => { cursor += 1; yystate = 7; continue 'yyl; } 0x3E => { cursor += 1; yystate = 9; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 10; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 11; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 12; continue 'yyl; } 0xED => { cursor += 1; yystate = 13; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 14; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 15; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 8 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 17; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 9 => { return Some(cursor); } 10 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 11 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 13 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 14 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 15 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 17 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 18; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 18 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 19; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 19 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 20; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 20 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 21; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 21 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 22; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 22 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 23; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 23 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 24; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 24 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 25; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 25 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 26; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 26 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 27; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 27 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 28; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 28 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 29; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 29 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 30; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 30 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 31; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 31 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 32; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 32 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 33; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 33 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 34; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 34 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 35; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 35 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 36; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 36 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 37; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 37 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 38; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 38 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 39; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 39 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 40; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 40 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 41; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 41 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 42; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 42 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 43; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 43 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D..=0x2E | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 44; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 44 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } pub fn autolink_email(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x21 | 0x23..=0x27 | 0x2A..=0x2B | 0x2D..=0x39 | 0x3D | 0x3F | 0x41..=0x5A | 0x5E..=0x7E => { yystate = 3; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x21 | 0x23..=0x27 | 0x2A..=0x2B | 0x2D..=0x39 | 0x3D | 0x3F..=0x5A | 0x5E..=0x7E => { yystate = 5; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; yystate = 5; continue 'yyl; } 5 => match yych { 0x21 | 0x23..=0x27 | 0x2A..=0x2B | 0x2D..=0x39 | 0x3D | 0x3F | 0x41..=0x5A | 0x5E..=0x7E => { cursor += 1; yystate = 4; continue 'yyl; } 0x40 => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } }, 6 => { cursor = marker; yystate = 2; continue 'yyl; } 7 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 8; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 8 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 9; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 10; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 9 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 12; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 13; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 10 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 12; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 13; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 11 => { return Some(cursor); } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 14; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 15; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 13 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 14; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 15; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 14 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 16; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 17; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 15 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 16; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 17; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 18; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 19; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 17 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 18; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 19; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 18 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 20; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 21; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 19 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 20; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 21; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 20 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 22; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 23; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 21 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 22; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 23; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 22 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 24; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 25; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 23 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 24; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 25; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 24 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 26; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 27; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 25 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 26; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 27; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 26 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 28; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 29; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 27 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 28; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 29; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 28 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 30; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 31; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 29 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 30; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 31; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 30 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 32; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 33; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 31 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 32; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 33; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 32 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 34; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 35; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 33 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 34; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 35; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 34 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 36; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 37; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 35 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 36; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 37; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 36 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 38; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 39; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 37 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 38; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 39; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 38 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 40; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 41; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 39 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 40; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 41; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 40 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 42; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 43; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 41 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 42; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 43; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 42 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 44; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 45; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 43 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 44; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 45; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 44 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 46; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 47; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 45 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 46; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 47; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 46 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 48; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 49; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 47 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 48; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 49; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 48 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 50; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 51; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 49 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 50; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 51; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 50 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 52; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 53; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 51 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 52; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 53; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 52 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 54; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 55; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 53 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 54; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 55; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 54 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 56; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 57; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 55 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 56; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 57; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 56 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 58; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 59; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 57 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 58; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 59; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 58 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 60; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 61; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 59 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 60; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 61; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 60 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 62; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 63; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 61 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 62; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 63; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 62 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 64; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 65; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 63 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 64; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 65; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 64 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 66; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 67; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 65 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 66; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 67; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 66 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 68; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 69; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 67 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 68; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 69; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 68 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 70; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 71; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 69 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 70; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 71; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 70 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 72; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 73; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 71 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 72; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 73; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 72 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 74; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 75; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 73 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 74; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 75; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 74 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 76; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 77; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 75 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 76; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 77; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 76 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 78; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 79; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 77 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 78; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 79; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 78 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 80; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 81; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 79 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 80; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 81; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 80 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 82; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 83; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 81 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 82; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 83; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 82 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 84; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 85; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 83 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 84; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 85; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 84 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 86; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 87; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 85 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 86; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 87; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 86 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 88; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 89; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 87 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 88; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 89; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 88 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 90; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 91; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 89 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 90; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 91; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 90 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 92; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 93; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 91 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 92; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 93; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 92 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 94; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 95; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 93 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 94; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 95; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 94 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 96; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 97; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 95 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 96; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 97; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 96 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 98; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 99; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 97 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 98; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 99; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 98 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 100; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 101; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 99 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 100; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 101; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 100 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 102; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 103; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 101 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 102; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 103; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 102 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 104; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 105; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 103 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 104; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 105; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 104 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 106; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 107; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 105 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 106; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 107; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 106 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 108; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 109; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 107 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 108; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 109; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 108 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 110; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 111; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 109 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 110; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 111; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 110 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 112; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 113; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 111 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 112; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 113; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 112 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 114; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 115; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 113 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 114; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 115; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 114 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 116; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 117; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 115 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 116; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 117; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 116 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 118; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 119; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 117 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 118; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 119; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 118 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 120; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 121; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 119 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 120; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 121; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 120 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 122; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 123; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 121 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 122; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 123; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 122 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 124; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 125; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 123 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 124; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 125; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 124 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 126; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 127; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 125 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 126; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 127; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 126 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 128; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 129; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 127 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 128; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 129; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 128 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 130; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 131; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 129 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 130; continue 'yyl; } 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 131; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 130 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 132; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 131 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 132; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 132 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2E => { cursor += 1; yystate = 7; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } pub fn html_tag(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x2F => { yystate = 3; continue 'yyl; } 0x41..=0x5A | 0x61..=0x7A => { yystate = 4; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 5; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 | 0x2D | 0x2F..=0x39 | 0x3E | 0x41..=0x5A | 0x61..=0x7A => { yystate = 9; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 5 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 12; continue 'yyl; } 0x2D | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 5; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 6 => { cursor = marker; yystate = 2; continue 'yyl; } 7 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 7; continue 'yyl; } 0x2F => { cursor += 1; yystate = 10; continue 'yyl; } 0x3A | 0x41..=0x5A | 0x5F | 0x61..=0x7A => { cursor += 1; yystate = 13; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 8 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; yystate = 9; continue 'yyl; } 9 => match yych { 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 7; continue 'yyl; } 0x2D | 0x30..=0x39 | 0x41..=0x5A | 0x61..=0x7A => { cursor += 1; yystate = 8; continue 'yyl; } 0x2F => { cursor += 1; yystate = 10; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } }, 10 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 11 => { return Some(cursor); } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 12; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 13 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 14; continue 'yyl; } 0x2D..=0x2E | 0x30..=0x3A | 0x41..=0x5A | 0x5F | 0x61..=0x7A => { cursor += 1; yystate = 13; continue 'yyl; } 0x2F => { cursor += 1; yystate = 10; continue 'yyl; } 0x3D => { cursor += 1; yystate = 15; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 14 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 14; continue 'yyl; } 0x2F => { cursor += 1; yystate = 10; continue 'yyl; } 0x3A | 0x41..=0x5A | 0x5F | 0x61..=0x7A => { cursor += 1; yystate = 13; continue 'yyl; } 0x3D => { cursor += 1; yystate = 15; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 15 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x08 | 0x0E..=0x1F | 0x21 | 0x23..=0x26 | 0x28..=0x3B | 0x3F..=0x5F | 0x61..=0x7F => { cursor += 1; yystate = 16; continue 'yyl; } 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 15; continue 'yyl; } 0x22 => { cursor += 1; yystate = 17; continue 'yyl; } 0x27 => { cursor += 1; yystate = 18; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 19; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 21; continue 'yyl; } 0xED => { cursor += 1; yystate = 22; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 23; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 24; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 25; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x08 | 0x0E..=0x1F | 0x21 | 0x23..=0x26 | 0x28..=0x3B | 0x3F..=0x5F | 0x61..=0x7F => { cursor += 1; yystate = 16; continue 'yyl; } 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 7; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 19; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 21; continue 'yyl; } 0xED => { cursor += 1; yystate = 22; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 23; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 24; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 25; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 17 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x21 | 0x23..=0x7F => { cursor += 1; yystate = 17; continue 'yyl; } 0x22 => { cursor += 1; yystate = 26; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 27; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 28; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 29; continue 'yyl; } 0xED => { cursor += 1; yystate = 30; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 31; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 32; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 33; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 18 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x26 | 0x28..=0x7F => { cursor += 1; yystate = 18; continue 'yyl; } 0x27 => { cursor += 1; yystate = 26; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 34; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 35; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 36; continue 'yyl; } 0xED => { cursor += 1; yystate = 37; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 38; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 39; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 40; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 19 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 20 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 19; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 21 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 19; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 22 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 19; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 23 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 21; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 24 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 21; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 25 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 21; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 26 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 7; continue 'yyl; } 0x2F => { cursor += 1; yystate = 10; continue 'yyl; } 0x3E => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 27 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 17; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 28 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 27; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 29 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 27; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 30 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 27; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 31 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 29; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 32 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 29; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 33 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 29; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 34 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 35 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 34; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 36 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 34; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 37 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 34; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 38 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 36; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 39 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 36; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 40 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 36; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } pub fn html_comment(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x2D => { yystate = 3; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 4; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x2C | 0x2E..=0x7F => { cursor += 1; yystate = 4; continue 'yyl; } 0x2D => { cursor += 1; yystate = 6; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 7; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 8; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 9; continue 'yyl; } 0xED => { cursor += 1; yystate = 10; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 11; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 12; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 13; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 5 => { cursor = marker; yystate = 2; continue 'yyl; } 6 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x2C | 0x2E..=0x7F => { cursor += 1; yystate = 4; continue 'yyl; } 0x2D => { cursor += 1; yystate = 14; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 7; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 8; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 9; continue 'yyl; } 0xED => { cursor += 1; yystate = 10; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 11; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 12; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 13; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 7 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 4; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 8 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 9 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 10 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 11 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 9; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 9; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 13 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 9; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 14 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x3D | 0x3F..=0x7F => { cursor += 1; yystate = 4; continue 'yyl; } 0x3E => { cursor += 1; yystate = 15; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 7; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 8; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 9; continue 'yyl; } 0xED => { cursor += 1; yystate = 10; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 11; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 12; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 13; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 15 => { return Some(cursor); } _ => { panic!("internal lexer error") } } } } } pub fn html_processing_instruction(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yyaccept: usize = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x01..=0x3E | 0x40..=0x7F => { yystate = 3; continue 'yyl; } 0x3F => { yystate = 6; continue 'yyl; } 0xC2..=0xDF => { yystate = 7; continue 'yyl; } 0xE0 => { yystate = 8; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { yystate = 9; continue 'yyl; } 0xED => { yystate = 10; continue 'yyl; } 0xF0 => { yystate = 11; continue 'yyl; } 0xF1..=0xF3 => { yystate = 12; continue 'yyl; } 0xF4 => { yystate = 13; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; yystate = 4; continue 'yyl; } 4 => match yych { 0x01..=0x3E | 0x40..=0x7F => { cursor += 1; yystate = 3; continue 'yyl; } 0x3F => { cursor += 1; yystate = 14; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } }, 5 => { return Some(cursor); } 6 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x3D | 0x40..=0x7F | 0xC2..=0xF4 => { yystate = 4; continue 'yyl; } 0x3F => { cursor += 1; yystate = 3; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 7 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 3; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 8 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 9 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 10 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 11 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 12 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 13 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 14 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x3D | 0x3F..=0x7F => { cursor += 1; yystate = 3; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 15 => { cursor = marker; if yyaccept == 0 { yystate = 5; continue 'yyl; } else { yystate = 2; continue 'yyl; } } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 3; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 17 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 18 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 19 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 20 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 21 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 22 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } pub fn html_declaration(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yyaccept: usize = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x41..=0x5A => { yystate = 3; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 4; continue 'yyl; } 0x41..=0x5A => { cursor += 1; yystate = 6; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x3D | 0x3F..=0x7F => { cursor += 1; yystate = 4; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 8; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 9; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 10; continue 'yyl; } 0xED => { cursor += 1; yystate = 11; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 12; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 13; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 14; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 5 => { return Some(cursor); } 6 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 4; continue 'yyl; } 0x41..=0x5A => { cursor += 1; yystate = 6; continue 'yyl; } _ => { yystate = 7; continue 'yyl; } } } 7 => { cursor = marker; if yyaccept == 0 { yystate = 2; continue 'yyl; } else { yystate = 5; continue 'yyl; } } 8 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 4; continue 'yyl; } _ => { yystate = 7; continue 'yyl; } } } 9 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 8; continue 'yyl; } _ => { yystate = 7; continue 'yyl; } } } 10 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 8; continue 'yyl; } _ => { yystate = 7; continue 'yyl; } } } 11 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 8; continue 'yyl; } _ => { yystate = 7; continue 'yyl; } } } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 7; continue 'yyl; } } } 13 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 7; continue 'yyl; } } } 14 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 7; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } pub fn html_cdata(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yyaccept: usize = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x43 | 0x63 => { yystate = 3; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x44 | 0x64 => { cursor += 1; yystate = 4; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 6; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 5 => { cursor = marker; if yyaccept == 0 { yystate = 2; continue 'yyl; } else { yystate = 10; continue 'yyl; } } 6 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x54 | 0x74 => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 7 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 8; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 8 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x5B => { cursor += 1; yystate = 9; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 9 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x5C | 0x5E..=0x7F => { cursor += 1; yystate = 9; continue 'yyl; } 0x5D => { cursor += 1; yystate = 11; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 12; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 13; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 14; continue 'yyl; } 0xED => { cursor += 1; yystate = 15; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 16; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 17; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 10; continue 'yyl; } } } 10 => { return Some(cursor); } 11 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x5C | 0x5E..=0x7F => { cursor += 1; yystate = 9; continue 'yyl; } 0x5D => { cursor += 1; yystate = 19; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 12; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 13; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 14; continue 'yyl; } 0xED => { cursor += 1; yystate = 15; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 16; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 17; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 9; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 13 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 14 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 15 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 14; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 17 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 14; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 18 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 14; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 19 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x3D | 0x3F..=0x7F => { cursor += 1; yystate = 9; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 12; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 13; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 14; continue 'yyl; } 0xED => { cursor += 1; yystate = 15; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 16; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 17; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } pub fn spacechars(s: &[u8]) -> Option<usize> { let mut cursor = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x09..=0x0D | 0x20 => { yystate = 2; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { return None; } 2 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 2; continue 'yyl; } _ => { yystate = 3; continue 'yyl; } } } 3 => { return Some(cursor); } _ => { panic!("internal lexer error") } } } } } pub fn link_title(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yyaccept: usize = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x22 => { yystate = 3; continue 'yyl; } 0x27 => { yystate = 4; continue 'yyl; } 0x28 => { yystate = 5; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x7F | 0xC2..=0xF4 => { yystate = 7; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x7F | 0xC2..=0xF4 => { yystate = 20; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 5 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x27 | 0x29..=0x7F | 0xC2..=0xF4 => { yystate = 32; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 6 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; yystate = 7; continue 'yyl; } 7 => match yych { 0x01..=0x21 | 0x23..=0x5B | 0x5D..=0x7F => { cursor += 1; yystate = 6; continue 'yyl; } 0x22 => { cursor += 1; yystate = 9; continue 'yyl; } 0x5C => { cursor += 1; yystate = 11; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 12; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 13; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 14; continue 'yyl; } 0xED => { cursor += 1; yystate = 15; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 16; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 17; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } }, 8 => { cursor = marker; match yyaccept { 0 => { yystate = 2; continue 'yyl; } 1 => { yystate = 10; continue 'yyl; } 2 => { yystate = 22; continue 'yyl; } _ => { yystate = 34; continue 'yyl; } } } 9 => { yystate = 10; continue 'yyl; } 10 => { return Some(cursor); } 11 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x21 | 0x23..=0x5B | 0x5D..=0x7F => { cursor += 1; yystate = 6; continue 'yyl; } 0x22 => { cursor += 1; yystate = 43; continue 'yyl; } 0x5C => { cursor += 1; yystate = 11; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 12; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 13; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 14; continue 'yyl; } 0xED => { cursor += 1; yystate = 15; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 16; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 17; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 6; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 13 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 14 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 15 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 14; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 17 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 14; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 18 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 14; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 19 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; yystate = 20; continue 'yyl; } 20 => match yych { 0x01..=0x26 | 0x28..=0x5B | 0x5D..=0x7F => { cursor += 1; yystate = 19; continue 'yyl; } 0x27 => { cursor += 1; yystate = 21; continue 'yyl; } 0x5C => { cursor += 1; yystate = 23; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 24; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 25; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 26; continue 'yyl; } 0xED => { cursor += 1; yystate = 27; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 28; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 29; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 30; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } }, 21 => { yystate = 22; continue 'yyl; } 22 => { return Some(cursor); } 23 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x26 | 0x28..=0x5B | 0x5D..=0x7F => { cursor += 1; yystate = 19; continue 'yyl; } 0x27 => { cursor += 1; yystate = 44; continue 'yyl; } 0x5C => { cursor += 1; yystate = 23; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 24; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 25; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 26; continue 'yyl; } 0xED => { cursor += 1; yystate = 27; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 28; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 29; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 30; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 24 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 19; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 25 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 24; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 26 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 24; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 27 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 24; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 28 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 26; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 29 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 26; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 30 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 26; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 31 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; yystate = 32; continue 'yyl; } 32 => match yych { 0x01..=0x27 | 0x2A..=0x5B | 0x5D..=0x7F => { cursor += 1; yystate = 31; continue 'yyl; } 0x29 => { cursor += 1; yystate = 33; continue 'yyl; } 0x5C => { cursor += 1; yystate = 35; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 36; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 37; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 38; continue 'yyl; } 0xED => { cursor += 1; yystate = 39; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 40; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 41; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 42; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } }, 33 => { yystate = 34; continue 'yyl; } 34 => { return Some(cursor); } 35 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x28 | 0x2A..=0x5B | 0x5D..=0x7F => { cursor += 1; yystate = 31; continue 'yyl; } 0x29 => { cursor += 1; yystate = 45; continue 'yyl; } 0x5C => { cursor += 1; yystate = 35; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 36; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 37; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 38; continue 'yyl; } 0xED => { cursor += 1; yystate = 39; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 40; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 41; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 42; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 36 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 31; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 37 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 36; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 38 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 36; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 39 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 36; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 40 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 41 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 42 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 43 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x21 | 0x23..=0x5B | 0x5D..=0x7F => { cursor += 1; yystate = 6; continue 'yyl; } 0x22 => { cursor += 1; yystate = 9; continue 'yyl; } 0x5C => { cursor += 1; yystate = 11; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 12; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 13; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 14; continue 'yyl; } 0xED => { cursor += 1; yystate = 15; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 16; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 17; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 10; continue 'yyl; } } } 44 => { yyaccept = 2; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x26 | 0x28..=0x5B | 0x5D..=0x7F => { cursor += 1; yystate = 19; continue 'yyl; } 0x27 => { cursor += 1; yystate = 21; continue 'yyl; } 0x5C => { cursor += 1; yystate = 23; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 24; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 25; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 26; continue 'yyl; } 0xED => { cursor += 1; yystate = 27; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 28; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 29; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 30; continue 'yyl; } _ => { yystate = 22; continue 'yyl; } } } 45 => { yyaccept = 3; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x27 | 0x2A..=0x5B | 0x5D..=0x7F => { cursor += 1; yystate = 31; continue 'yyl; } 0x29 => { cursor += 1; yystate = 33; continue 'yyl; } 0x5C => { cursor += 1; yystate = 35; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 36; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 37; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 38; continue 'yyl; } 0xED => { cursor += 1; yystate = 39; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 40; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 41; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 42; continue 'yyl; } _ => { yystate = 34; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } pub fn dangerous_url(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yyaccept: usize = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x44 | 0x64 => { yystate = 3; continue 'yyl; } 0x46 | 0x66 => { yystate = 4; continue 'yyl; } 0x4A | 0x6A => { yystate = 5; continue 'yyl; } 0x56 | 0x76 => { yystate = 6; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x49 | 0x69 => { cursor += 1; yystate = 9; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 5 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 6 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x42 | 0x62 => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 7 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x54 | 0x74 => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 8 => { cursor = marker; if yyaccept == 0 { yystate = 2; continue 'yyl; } else { yystate = 20; continue 'yyl; } } 9 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4C | 0x6C => { cursor += 1; yystate = 13; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 10 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x56 | 0x76 => { cursor += 1; yystate = 14; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 11 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x53 | 0x73 => { cursor += 1; yystate = 15; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 13 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x45 | 0x65 => { cursor += 1; yystate = 17; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 14 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 15 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x43 | 0x63 => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x3A => { cursor += 1; yystate = 19; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 17 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x3A => { cursor += 1; yystate = 21; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 18 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x52 | 0x72 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 19 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x49 | 0x69 => { cursor += 1; yystate = 23; continue 'yyl; } _ => { yystate = 20; continue 'yyl; } } } 20 => { return Some(cursor); } 21 => { yystate = 20; continue 'yyl; } 22 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x49 | 0x69 => { cursor += 1; yystate = 24; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 23 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4D | 0x6D => { cursor += 1; yystate = 25; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 24 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x50 | 0x70 => { cursor += 1; yystate = 26; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 25 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x41 | 0x61 => { cursor += 1; yystate = 27; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 26 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x54 | 0x74 => { cursor += 1; yystate = 17; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 27 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x47 | 0x67 => { cursor += 1; yystate = 28; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 28 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x45 | 0x65 => { cursor += 1; yystate = 29; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 29 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2F => { cursor += 1; yystate = 30; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 30 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x47 | 0x67 => { cursor += 1; yystate = 31; continue 'yyl; } 0x4A | 0x6A => { cursor += 1; yystate = 32; continue 'yyl; } 0x50 | 0x70 => { cursor += 1; yystate = 33; continue 'yyl; } 0x57 | 0x77 => { cursor += 1; yystate = 34; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 31 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x49 | 0x69 => { cursor += 1; yystate = 35; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 32 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x50 | 0x70 => { cursor += 1; yystate = 36; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 33 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x4E | 0x6E => { cursor += 1; yystate = 37; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 34 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x45 | 0x65 => { cursor += 1; yystate = 38; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 35 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x46 | 0x66 => { cursor += 1; yystate = 39; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 36 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x45 | 0x65 => { cursor += 1; yystate = 37; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 37 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x47 | 0x67 => { cursor += 1; yystate = 39; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 38 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x42 | 0x62 => { cursor += 1; yystate = 40; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } 39 => { return None; } 40 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x50 | 0x70 => { cursor += 1; yystate = 39; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } pub fn table_start(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x09 | 0x0B..=0x0C | 0x20 | 0x7C => { yystate = 3; continue 'yyl; } 0x2D => { yystate = 4; continue 'yyl; } 0x3A => { yystate = 5; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x0B..=0x0C | 0x20 => { cursor += 1; yystate = 6; continue 'yyl; } 0x2D => { cursor += 1; yystate = 8; continue 'yyl; } 0x3A => { cursor += 1; yystate = 9; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 | 0x7C => { yystate = 11; continue 'yyl; } 0x2D => { cursor += 1; yystate = 8; continue 'yyl; } 0x3A => { cursor += 1; yystate = 10; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 5 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 8; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 6 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x0B..=0x0C | 0x20 => { cursor += 1; yystate = 6; continue 'yyl; } 0x2D => { cursor += 1; yystate = 8; continue 'yyl; } 0x3A => { cursor += 1; yystate = 9; continue 'yyl; } _ => { yystate = 7; continue 'yyl; } } } 7 => { cursor = marker; yystate = 2; continue 'yyl; } 8 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x0B..=0x0C | 0x20 | 0x3A => { cursor += 1; yystate = 10; continue 'yyl; } 0x0A => { cursor += 1; yystate = 12; continue 'yyl; } 0x0D => { cursor += 1; yystate = 13; continue 'yyl; } 0x2D => { cursor += 1; yystate = 8; continue 'yyl; } 0x7C => { cursor += 1; yystate = 14; continue 'yyl; } _ => { yystate = 7; continue 'yyl; } } } 9 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2D => { cursor += 1; yystate = 8; continue 'yyl; } _ => { yystate = 7; continue 'yyl; } } } 10 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; yystate = 11; continue 'yyl; } 11 => match yych { 0x09 | 0x0B..=0x0C | 0x20 => { cursor += 1; yystate = 10; continue 'yyl; } 0x0A => { cursor += 1; yystate = 12; continue 'yyl; } 0x0D => { cursor += 1; yystate = 13; continue 'yyl; } 0x7C => { cursor += 1; yystate = 14; continue 'yyl; } _ => { yystate = 7; continue 'yyl; } }, 12 => { return Some(cursor); } 13 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x0A => { cursor += 1; yystate = 12; continue 'yyl; } _ => { yystate = 7; continue 'yyl; } } } 14 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x0B..=0x0C | 0x20 => { cursor += 1; yystate = 14; continue 'yyl; } 0x0A => { cursor += 1; yystate = 12; continue 'yyl; } 0x0D => { cursor += 1; yystate = 13; continue 'yyl; } 0x2D => { cursor += 1; yystate = 8; continue 'yyl; } 0x3A => { cursor += 1; yystate = 9; continue 'yyl; } _ => { yystate = 7; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } pub fn table_cell(s: &[u8], spoiler: bool) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); // In fact, `table_cell` matches non-empty table cells only. The empty // string is also a valid table cell, but is handled by the default rule. // This approach prevents re2c's match-empty-string warning. if spoiler { { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yyaccept: usize = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x01..=0x09 | 0x0B..=0x0C | 0x0E..=0x26 | 0x28..=0x5B | 0x5D..=0x7B | 0x7D..=0x7F => { yystate = 3; continue 'yyl; } 0x27 | 0x5C => { yystate = 5; continue 'yyl; } 0x7C => { yystate = 6; continue 'yyl; } 0xC2..=0xDF => { yystate = 7; continue 'yyl; } 0xE0 => { yystate = 8; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { yystate = 9; continue 'yyl; } 0xED => { yystate = 10; continue 'yyl; } 0xF0 => { yystate = 11; continue 'yyl; } 0xF1..=0xF3 => { yystate = 12; continue 'yyl; } 0xF4 => { yystate = 13; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x0C | 0x0E..=0x26 | 0x28..=0x5B | 0x5D..=0x7B | 0x7D..=0x7F => { cursor += 1; yystate = 3; continue 'yyl; } 0x27 | 0x5C => { cursor += 1; yystate = 5; continue 'yyl; } 0x7C => { cursor += 1; yystate = 14; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 4; continue 'yyl; } } } 4 => { return Some(cursor); } 5 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x0C | 0x0E..=0x26 | 0x28..=0x5B | 0x5D..=0x7B | 0x7D..=0x7F => { cursor += 1; yystate = 3; continue 'yyl; } 0x27 | 0x5C | 0x7C => { cursor += 1; yystate = 5; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 16; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 17; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 18; continue 'yyl; } 0xED => { cursor += 1; yystate = 19; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 20; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 21; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 22; continue 'yyl; } _ => { yystate = 4; continue 'yyl; } } } 6 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x27 | 0x7C => { cursor += 1; yystate = 3; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 7 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 3; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 8 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 9 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 10 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 11 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 12 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 13 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 14 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x27 | 0x7C => { cursor += 1; yystate = 3; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 15 => { cursor = marker; if yyaccept == 0 { yystate = 4; continue 'yyl; } else { yystate = 2; continue 'yyl; } } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 3; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 17 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 18 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 19 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 20 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 21 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } 22 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 18; continue 'yyl; } _ => { yystate = 15; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } else { { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yyaccept: usize = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x01..=0x09 | 0x0B..=0x0C | 0x0E..=0x5B | 0x5D..=0x7B | 0x7D..=0x7F => { yystate = 3; continue 'yyl; } 0x5C => { yystate = 5; continue 'yyl; } 0xC2..=0xDF => { yystate = 6; continue 'yyl; } 0xE0 => { yystate = 7; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { yystate = 8; continue 'yyl; } 0xED => { yystate = 9; continue 'yyl; } 0xF0 => { yystate = 10; continue 'yyl; } 0xF1..=0xF3 => { yystate = 11; continue 'yyl; } 0xF4 => { yystate = 12; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x0C | 0x0E..=0x5B | 0x5D..=0x7B | 0x7D..=0x7F => { cursor += 1; yystate = 3; continue 'yyl; } 0x5C => { cursor += 1; yystate = 5; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 13; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 15; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 16; continue 'yyl; } 0xED => { cursor += 1; yystate = 17; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 18; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 19; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 20; continue 'yyl; } _ => { yystate = 4; continue 'yyl; } } } 4 => { return Some(cursor); } 5 => { yyaccept = 0; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x0C | 0x0E..=0x5B | 0x5D..=0x7F => { cursor += 1; yystate = 3; continue 'yyl; } 0x5C => { cursor += 1; yystate = 5; continue 'yyl; } 0xC2..=0xDF => { cursor += 1; yystate = 13; continue 'yyl; } 0xE0 => { cursor += 1; yystate = 15; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { cursor += 1; yystate = 16; continue 'yyl; } 0xED => { cursor += 1; yystate = 17; continue 'yyl; } 0xF0 => { cursor += 1; yystate = 18; continue 'yyl; } 0xF1..=0xF3 => { cursor += 1; yystate = 19; continue 'yyl; } 0xF4 => { cursor += 1; yystate = 20; continue 'yyl; } _ => { yystate = 4; continue 'yyl; } } } 6 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 3; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 7 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 13; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 8 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 13; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 9 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 13; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 10 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 11 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 12 => { yyaccept = 1; marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 13 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 3; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 14 => { cursor = marker; if yyaccept == 0 { yystate = 4; continue 'yyl; } else { yystate = 2; continue 'yyl; } } 15 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 13; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 13; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 17 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 13; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 18 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 19 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } 20 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 14; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } } pub fn table_cell_end(s: &[u8]) -> Option<usize> { let mut cursor = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x7C => { yystate = 2; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { return None; } 2 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x0B..=0x0C | 0x20 => { cursor += 1; yystate = 2; continue 'yyl; } _ => { yystate = 3; continue 'yyl; } } } 3 => { return Some(cursor); } _ => { panic!("internal lexer error") } } } } } pub fn table_row_end(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x09 | 0x0B..=0x0C | 0x20 => { yystate = 3; continue 'yyl; } 0x0A => { yystate = 4; continue 'yyl; } 0x0D => { yystate = 5; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 => { yystate = 7; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { return Some(cursor); } 5 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x0A => { cursor += 1; yystate = 4; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 6 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; yystate = 7; continue 'yyl; } 7 => match yych { 0x09 | 0x0B..=0x0C | 0x20 => { cursor += 1; yystate = 6; continue 'yyl; } 0x0A => { cursor += 1; yystate = 4; continue 'yyl; } 0x0D => { cursor += 1; yystate = 9; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } }, 8 => { cursor = marker; yystate = 2; continue 'yyl; } 9 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x0A => { cursor += 1; yystate = 4; continue 'yyl; } _ => { yystate = 8; continue 'yyl; } } } _ => { panic!("internal lexer error") } } } } } #[cfg(feature = "shortcodes")] pub fn shortcode(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x2B | 0x2D | 0x30..=0x39 | 0x41..=0x5A | 0x5F | 0x61..=0x7A => { yystate = 3; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x2B | 0x2D | 0x30..=0x3A | 0x41..=0x5A | 0x5F | 0x61..=0x7A => { yystate = 5; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; yystate = 5; continue 'yyl; } 5 => match yych { 0x2B | 0x2D | 0x30..=0x39 | 0x41..=0x5A | 0x5F | 0x61..=0x7A => { cursor += 1; yystate = 4; continue 'yyl; } 0x3A => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } }, 6 => { cursor = marker; yystate = 2; continue 'yyl; } 7 => { return Some(cursor); } _ => { panic!("internal lexer error") } } } } } pub fn open_multiline_block_quote_fence(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let mut ctxmarker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x3E => { yystate = 3; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x3E => { cursor += 1; yystate = 4; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x3E => { cursor += 1; yystate = 6; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 5 => { cursor = marker; yystate = 2; continue 'yyl; } 6 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x20 => { ctxmarker = cursor; cursor += 1; yystate = 7; continue 'yyl; } 0x0A | 0x0D => { ctxmarker = cursor; cursor += 1; yystate = 8; continue 'yyl; } 0x3E => { cursor += 1; yystate = 6; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 7 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x20 => { cursor += 1; yystate = 7; continue 'yyl; } 0x0A | 0x0D => { cursor += 1; yystate = 8; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 8 => { cursor = ctxmarker; { return Some(cursor); } } _ => { panic!("internal lexer error") } } } } } pub fn close_multiline_block_quote_fence(s: &[u8]) -> Option<usize> { let mut cursor = 0; let mut marker = 0; let mut ctxmarker = 0; let len = s.len(); { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x3E => { yystate = 3; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x3E => { cursor += 1; yystate = 4; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x3E => { cursor += 1; yystate = 6; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 5 => { cursor = marker; yystate = 2; continue 'yyl; } 6 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x20 => { ctxmarker = cursor; cursor += 1; yystate = 7; continue 'yyl; } 0x0A | 0x0D => { ctxmarker = cursor; cursor += 1; yystate = 8; continue 'yyl; } 0x3E => { cursor += 1; yystate = 6; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 7 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09 | 0x20 => { cursor += 1; yystate = 7; continue 'yyl; } 0x0A | 0x0D => { cursor += 1; yystate = 8; continue 'yyl; } _ => { yystate = 5; continue 'yyl; } } } 8 => { cursor = ctxmarker; { return Some(cursor); } } _ => { panic!("internal lexer error") } } } } } // Returns both the length of the match, and the tasklist character. pub fn tasklist(s: &[u8]) -> Option<(usize, u8)> { let mut cursor = 0; let mut marker = 0; let len = s.len(); let t1; let mut yyt1 = 0; { #[allow(unused_assignments)] let mut yych: u8 = 0; let mut yystate: usize = 0; 'yyl: loop { match yystate { 0 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; cursor += 1; match yych { 0x09..=0x0D | 0x20 => { yystate = 3; continue 'yyl; } 0x5B => { yystate = 4; continue 'yyl; } _ => { yystate = 1; continue 'yyl; } } } 1 => { yystate = 2; continue 'yyl; } 2 => { return None; } 3 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 5; continue 'yyl; } 0x5B => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 4 => { marker = cursor; yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x0C | 0x0E..=0x7F => { yyt1 = cursor; cursor += 1; yystate = 8; continue 'yyl; } 0xC2..=0xDF => { yyt1 = cursor; cursor += 1; yystate = 9; continue 'yyl; } 0xE0 => { yyt1 = cursor; cursor += 1; yystate = 10; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { yyt1 = cursor; cursor += 1; yystate = 11; continue 'yyl; } 0xED => { yyt1 = cursor; cursor += 1; yystate = 12; continue 'yyl; } 0xF0 => { yyt1 = cursor; cursor += 1; yystate = 13; continue 'yyl; } 0xF1..=0xF3 => { yyt1 = cursor; cursor += 1; yystate = 14; continue 'yyl; } 0xF4 => { yyt1 = cursor; cursor += 1; yystate = 15; continue 'yyl; } _ => { yystate = 2; continue 'yyl; } } } 5 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 5; continue 'yyl; } 0x5B => { cursor += 1; yystate = 7; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 6 => { cursor = marker; yystate = 2; continue 'yyl; } 7 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x01..=0x09 | 0x0B..=0x0C | 0x0E..=0x7F => { yyt1 = cursor; cursor += 1; yystate = 8; continue 'yyl; } 0xC2..=0xDF => { yyt1 = cursor; cursor += 1; yystate = 9; continue 'yyl; } 0xE0 => { yyt1 = cursor; cursor += 1; yystate = 10; continue 'yyl; } 0xE1..=0xEC | 0xEE..=0xEF => { yyt1 = cursor; cursor += 1; yystate = 11; continue 'yyl; } 0xED => { yyt1 = cursor; cursor += 1; yystate = 12; continue 'yyl; } 0xF0 => { yyt1 = cursor; cursor += 1; yystate = 13; continue 'yyl; } 0xF1..=0xF3 => { yyt1 = cursor; cursor += 1; yystate = 14; continue 'yyl; } 0xF4 => { yyt1 = cursor; cursor += 1; yystate = 15; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 8 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x5D => { cursor += 1; yystate = 16; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 9 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 8; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 10 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0xA0..=0xBF => { cursor += 1; yystate = 9; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 11 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 9; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 12 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x9F => { cursor += 1; yystate = 9; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 13 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x90..=0xBF => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 14 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0xBF => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 15 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x80..=0x8F => { cursor += 1; yystate = 11; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 16 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) } else { 0 } }; match yych { 0x00 | 0x09..=0x0D | 0x20 => { cursor += 1; yystate = 17; continue 'yyl; } _ => { yystate = 6; continue 'yyl; } } } 17 => { t1 = yyt1; { if cursor == len + 1 { cursor -= 1; } return Some((cursor, s[t1])); } } _ => { panic!("internal lexer error") } } } } } // vim: set ft=rust: ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/strings.rs������������������������������������������������������������������������0000644�0000000�0000000�00000021173�10461020230�0014022�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use crate::ctype::{ispunct, isspace}; use crate::entity; use crate::parser::AutolinkType; use std::ptr; use std::str; #[derive(PartialEq, Eq)] pub enum Case { Preserve, Fold, } pub fn unescape(v: &mut Vec<u8>) { let mut r = 0; let mut prev = None; let mut found = 0; while r < v.len() { if v[r] == b'\\' && r + 1 < v.len() && ispunct(v[r + 1]) { if v[r + 1] == b'\\' { r += 1; } if let Some(prev) = prev { let window = &mut v[(prev + 1 - found)..r]; shift_buf_left(window, found); } prev = Some(r); found += 1; } r += 1; } if let Some(prev) = prev { let window = &mut v[(prev + 1 - found)..r]; shift_buf_left(window, found); } let new_size = v.len() - found; v.truncate(new_size); } pub fn clean_autolink(url: &[u8], kind: AutolinkType) -> Vec<u8> { let mut url_vec = url.to_vec(); trim(&mut url_vec); if url_vec.is_empty() { return url_vec; } let mut buf = Vec::with_capacity(url_vec.len()); if kind == AutolinkType::Email { buf.extend_from_slice(b"mailto:"); } buf.extend_from_slice(&entity::unescape_html(&url_vec)); buf } pub fn normalize_code(v: &[u8]) -> Vec<u8> { let mut r = Vec::with_capacity(v.len()); let mut i = 0; let mut contains_nonspace = false; while i < v.len() { match v[i] { b'\r' => { if i + 1 == v.len() || v[i + 1] != b'\n' { r.push(b' '); } } b'\n' => { r.push(b' '); } c => r.push(c), } if v[i] != b' ' && v[i] != b'\r' && v[i] != b'\n' { contains_nonspace = true; } i += 1 } if contains_nonspace && !r.is_empty() && r[0] == b' ' && r[r.len() - 1] == b' ' { r.remove(0); r.pop(); } r } pub fn remove_trailing_blank_lines(line: &mut String) { let line_bytes = line.as_bytes(); let mut i = line.len() - 1; loop { let c = line_bytes[i]; if c != b' ' && c != b'\t' && !is_line_end_char(c) { break; } if i == 0 { line.clear(); return; } i -= 1; } for (i, c) in line_bytes.iter().enumerate().take(line.len()).skip(i) { if !is_line_end_char(*c) { continue; } line.truncate(i); break; } } pub fn is_line_end_char(ch: u8) -> bool { matches!(ch, 10 | 13) } pub fn is_space_or_tab(ch: u8) -> bool { matches!(ch, 9 | 32) } pub fn chop_trailing_hashtags(line: &mut Vec<u8>) { rtrim(line); let orig_n = line.len() - 1; let mut n = orig_n; while line[n] == b'#' { if n == 0 { return; } n -= 1; } if n != orig_n && is_space_or_tab(line[n]) { line.truncate(n); rtrim(line); } } pub fn rtrim(line: &mut Vec<u8>) { let spaces = line.iter().rev().take_while(|&&b| isspace(b)).count(); let new_len = line.len() - spaces; line.truncate(new_len); } pub fn ltrim(line: &mut Vec<u8>) { let spaces = line.iter().take_while(|&&b| isspace(b)).count(); shift_buf_left(line, spaces); let new_len = line.len() - spaces; line.truncate(new_len); } pub fn trim(line: &mut Vec<u8>) { ltrim(line); rtrim(line); } pub fn ltrim_slice(mut i: &[u8]) -> &[u8] { while let [first, rest @ ..] = i { if isspace(*first) { i = rest; } else { break; } } i } pub fn rtrim_slice(mut i: &[u8]) -> &[u8] { while let [rest @ .., last] = i { if isspace(*last) { i = rest; } else { break; } } i } pub fn trim_slice(mut i: &[u8]) -> &[u8] { i = ltrim_slice(i); i = rtrim_slice(i); i } fn shift_buf_left(buf: &mut [u8], n: usize) { assert!(n <= buf.len()); let keep = buf.len() - n; unsafe { let dst = buf.as_mut_ptr(); let src = dst.add(n); ptr::copy(src, dst, keep); } } pub fn clean_url(url: &[u8]) -> Vec<u8> { let url = trim_slice(url); let url_len = url.len(); if url_len == 0 { return vec![]; } let mut b = entity::unescape_html(url); unescape(&mut b); b } pub fn clean_title(title: &[u8]) -> Vec<u8> { let title_len = title.len(); if title_len == 0 { return vec![]; } let first = title[0]; let last = title[title_len - 1]; let mut b = if (first == b'\'' && last == b'\'') || (first == b'(' && last == b')') || (first == b'"' && last == b'"') { entity::unescape_html(&title[1..title_len - 1]) } else { entity::unescape_html(title) }; unescape(&mut b); b } pub fn is_blank(s: &[u8]) -> bool { for &c in s { match c { 10 | 13 => return true, 32 | 9 => (), _ => return false, } } true } pub fn normalize_label(i: &str, casing: Case) -> String { // trim_slice only removes bytes from start and end that match isspace(); // result is UTF-8. let i = unsafe { str::from_utf8_unchecked(trim_slice(i.as_bytes())) }; let mut v = String::with_capacity(i.len()); let mut last_was_whitespace = false; for c in i.chars() { if c.is_whitespace() { if !last_was_whitespace { last_was_whitespace = true; v.push(' '); } } else { last_was_whitespace = false; v.push(c); } } if casing == Case::Fold { caseless::default_case_fold_str(&v) } else { v } } #[test] fn normalize_label_fold_test() { assert_eq!(normalize_label("Abc \t\ndef", Case::Preserve), "Abc def"); assert_eq!(normalize_label("Abc \t\ndef", Case::Fold), "abc def"); assert_eq!(normalize_label("Straẞe", Case::Preserve), "Straẞe"); assert_eq!(normalize_label("Straẞe", Case::Fold), "strasse"); } pub fn split_off_front_matter<'s>(mut s: &'s str, delimiter: &str) -> Option<(&'s str, &'s str)> { s = trim_start_match(s, "\u{feff}"); if !s.starts_with(delimiter) { return None; } let mut start = delimiter.len(); if s[start..].starts_with('\n') { start += 1; } else if s[start..].starts_with("\r\n") { start += 2; } else { return None; } start += match s[start..] .find(&("\n".to_string() + delimiter + "\r\n")) .or_else(|| s[start..].find(&("\n".to_string() + delimiter + "\n"))) { Some(n) => n + 1 + delimiter.len(), None => return None, }; start += if s[start..].starts_with('\n') { 1 } else if s[start..].starts_with("\r\n") { 2 } else { return None; }; start += if s[start..].starts_with('\n') { 1 } else if s[start..].starts_with("\r\n") { 2 } else { 0 }; Some((&s[..start], &s[start..])) } pub fn trim_start_match<'s>(s: &'s str, pat: &str) -> &'s str { s.strip_prefix(pat).unwrap_or(s) } #[cfg(test)] pub mod tests { use super::{normalize_code, normalize_label, split_off_front_matter}; use crate::strings::Case; #[test] fn normalize_code_handles_lone_newline() { assert_eq!(normalize_code(&[b'\n']), vec![b' ']); } #[test] fn normalize_code_handles_lone_space() { assert_eq!(normalize_code(&[b' ']), vec![b' ']); } #[test] fn front_matter() { assert_eq!( split_off_front_matter("---\nfoo: bar\n---\nHiiii", "---"), Some(("---\nfoo: bar\n---\n", "Hiiii")) ); assert_eq!( split_off_front_matter( "\u{feff}!@#\r\n\r\nfoo: !@# \r\nquux\n!@#\r\n\n\nYes!\n", "!@#" ), Some(("!@#\r\n\r\nfoo: !@# \r\nquux\n!@#\r\n\n", "\nYes!\n")) ); assert_eq!( split_off_front_matter( "\u{feff}!@#\r\n\r\nfoo: \n!@# \r\nquux\n!@#\r\n\n\nYes!\n", "!@#" ), Some(("!@#\r\n\r\nfoo: \n!@# \r\nquux\n!@#\r\n\n", "\nYes!\n")) ); } #[test] fn normalize_label_lowercase() { assert_eq!(normalize_label(" Foo\u{A0}BAR ", Case::Fold), "foo bar"); assert_eq!(normalize_label(" FooΔ°BAR ", Case::Fold), "fooi\u{307}bar"); } #[test] fn normalize_label_preserve() { assert_eq!( normalize_label(" Foo\u{A0}BAR ", Case::Preserve), "Foo BAR" ); assert_eq!(normalize_label(" FooΔ°BAR ", Case::Preserve), "FooΔ°BAR"); } } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/api.rs����������������������������������������������������������������������0000644�0000000�0000000�00000022047�10461020230�0014245�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use std::sync::{Arc, Mutex}; use parser::BrokenLinkReference; use crate::{ adapters::{HeadingAdapter, HeadingMeta, SyntaxHighlighterAdapter}, nodes::Sourcepos, }; use super::*; #[test] fn exercise_full_api() { let arena = Arena::new(); let default_options = Options::default(); let default_plugins = Plugins::default(); let node = parse_document(&arena, "# My document\n", &default_options); let mut buffer = vec![]; // Use every member of the exposed API without any defaults. // Not looking for specific outputs, just want to know if the API changes shape. let _: std::io::Result<()> = format_commonmark(node, &default_options, &mut buffer); let _: std::io::Result<()> = format_html(node, &default_options, &mut buffer); let _: std::io::Result<()> = format_html_with_plugins(node, &default_options, &mut buffer, &default_plugins); let _: String = Anchorizer::new().anchorize("header".to_string()); let _: &AstNode = parse_document(&arena, "document", &default_options); // Ensure the closure can modify its context. let mut blr_ctx_0 = 0; #[allow(deprecated)] let _: &AstNode = parse_document_with_broken_link_callback( &arena, "document", &Options::default(), Some(&mut |blr: BrokenLinkReference| { blr_ctx_0 += 1; let _: &str = blr.normalized; let _: &str = blr.original; Some(ResolvedReference { url: String::new(), title: String::new(), }) }), ); let mut extension = ExtensionOptionsBuilder::default(); extension.strikethrough(false); extension.tagfilter(false); extension.table(false); extension.autolink(false); extension.tasklist(false); extension.superscript(false); extension.header_ids(Some("abc".to_string())); extension.footnotes(false); extension.description_lists(false); extension.math_dollars(false); extension.math_code(false); extension.front_matter_delimiter(None); extension.multiline_block_quotes(false); extension.math_dollars(false); extension.math_code(false); #[cfg(feature = "shortcodes")] extension.shortcodes(true); extension.wikilinks_title_after_pipe(true); extension.wikilinks_title_before_pipe(true); extension.underline(true); extension.spoiler(true); extension.greentext(true); let mut parse = ParseOptionsBuilder::default(); parse.smart(false); parse.default_info_string(Some("abc".to_string())); parse.relaxed_tasklist_matching(false); parse.relaxed_autolinks(false); let mut blr_ctx_1 = 0; parse.broken_link_callback(Some(Arc::new(Mutex::new( &mut |blr: BrokenLinkReference| { blr_ctx_1 += 1; let _: &str = blr.normalized; let _: &str = blr.original; Some(ResolvedReference { url: String::new(), title: String::new(), }) }, )))); let mut render = RenderOptionsBuilder::default(); render.hardbreaks(false); render.github_pre_lang(false); render.full_info_string(false); render.width(123456); render.unsafe_(false); render.escape(false); render.list_style(ListStyleType::Dash); render.sourcepos(false); render.experimental_inline_sourcepos(false); render.escaped_char_spans(false); render.ignore_setext(true); render.ignore_empty_links(true); render.gfm_quirks(true); render.prefer_fenced(true); render.figure_with_caption(true); pub struct MockAdapter {} impl SyntaxHighlighterAdapter for MockAdapter { fn write_highlighted( &self, _output: &mut dyn Write, _lang: Option<&str>, _code: &str, ) -> io::Result<()> { unreachable!() } fn write_pre_tag( &self, _output: &mut dyn Write, _attributes: HashMap<String, String>, ) -> io::Result<()> { unreachable!() } fn write_code_tag( &self, _output: &mut dyn Write, _attributes: HashMap<String, String>, ) -> io::Result<()> { unreachable!() } } impl HeadingAdapter for MockAdapter { fn enter( &self, _output: &mut dyn Write, _heading: &HeadingMeta, _sourcepos: Option<Sourcepos>, ) -> io::Result<()> { unreachable!() } fn exit(&self, _output: &mut dyn Write, _heading: &HeadingMeta) -> io::Result<()> { unreachable!() } } let mock_adapter = MockAdapter {}; let mut render_plugins = RenderPluginsBuilder::default(); render_plugins.codefence_syntax_highlighter(Some(&mock_adapter)); render_plugins.heading_adapter(Some(&mock_adapter)); let mut plugins = PluginsBuilder::default(); plugins.render(render_plugins.build().unwrap()); let _: String = markdown_to_html("# Yes", &default_options); // let ast = node.data.borrow(); let _: usize = ast.sourcepos.start.line; let _: usize = ast.sourcepos.start.column; let _: usize = ast.sourcepos.end.line; let _: usize = ast.sourcepos.end.column; match &ast.value { nodes::NodeValue::Document => {} nodes::NodeValue::FrontMatter(_) => {} nodes::NodeValue::BlockQuote => {} nodes::NodeValue::List(nl) | nodes::NodeValue::Item(nl) => { match nl.list_type { nodes::ListType::Bullet => {} nodes::ListType::Ordered => {} } let _: usize = nl.start; match nl.delimiter { nodes::ListDelimType::Period => {} nodes::ListDelimType::Paren => {} } let _: u8 = nl.bullet_char; let _: bool = nl.tight; } nodes::NodeValue::DescriptionList => {} nodes::NodeValue::DescriptionItem(_ndi) => {} nodes::NodeValue::DescriptionTerm => {} nodes::NodeValue::DescriptionDetails => {} nodes::NodeValue::CodeBlock(ncb) => { let _: bool = ncb.fenced; let _: u8 = ncb.fence_char; let _: usize = ncb.fence_length; let _: String = ncb.info; let _: String = ncb.literal; } nodes::NodeValue::HtmlBlock(nhb) => { let _: String = nhb.literal; } nodes::NodeValue::Paragraph => {} nodes::NodeValue::Heading(nh) => { let _: u8 = nh.level; let _: bool = nh.setext; } nodes::NodeValue::ThematicBreak => {} nodes::NodeValue::FootnoteDefinition(nfd) => { let _: &String = &nfd.name; let _: u32 = nfd.total_references; } nodes::NodeValue::Table(nt) => { let _: &Vec<nodes::TableAlignment> = &nt.alignments; let _: usize = nt.num_nonempty_cells; let _: usize = nt.num_rows; match nt.alignments[0] { nodes::TableAlignment::None => {} nodes::TableAlignment::Left => {} nodes::TableAlignment::Center => {} nodes::TableAlignment::Right => {} } } nodes::NodeValue::TableRow(header) => { let _: &bool = header; } nodes::NodeValue::TableCell => {} nodes::NodeValue::Text(text) => { let _: &String = text; } nodes::NodeValue::TaskItem(symbol) => { let _: &Option<char> = symbol; } nodes::NodeValue::SoftBreak => {} nodes::NodeValue::LineBreak => {} nodes::NodeValue::Code(code) => { let _: usize = code.num_backticks; let _: String = code.literal; } nodes::NodeValue::HtmlInline(html) => { let _: &String = html; } nodes::NodeValue::Emph => {} nodes::NodeValue::Strong => {} nodes::NodeValue::Strikethrough => {} nodes::NodeValue::Superscript => {} nodes::NodeValue::Link(nl) | nodes::NodeValue::Image(nl) => { let _: String = nl.url; let _: String = nl.title; } #[cfg(feature = "shortcodes")] nodes::NodeValue::ShortCode(nsc) => { let _: String = nsc.code; let _: String = nsc.emoji; } nodes::NodeValue::FootnoteReference(nfr) => { let _: String = nfr.name; let _: u32 = nfr.ix; } nodes::NodeValue::MultilineBlockQuote(mbc) => { let _: usize = mbc.fence_length; let _: usize = mbc.fence_offset; } nodes::NodeValue::Escaped => {} nodes::NodeValue::Math(math) => { let _: bool = math.display_math; let _: bool = math.dollar_math; let _: String = math.literal; } nodes::NodeValue::WikiLink(nl) => { let _: String = nl.url; } nodes::NodeValue::Underline => {} nodes::NodeValue::SpoileredText => {} nodes::NodeValue::EscapedTag(data) => { let _: &String = data; } } } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/autolink.rs�����������������������������������������������������������������0000644�0000000�0000000�00000030351�10461020230�0015317�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; #[test] fn autolink_www() { html_opts!( [extension.autolink], concat!("www.autolink.com\n"), concat!("<p><a href=\"http://www.autolink.com\">www.autolink.com</a></p>\n"), ); } #[test] fn autolink_email() { html_opts!( [extension.autolink], concat!("john@smith.com\n"), concat!("<p><a href=\"mailto:john@smith.com\">john@smith.com</a></p>\n"), ); } #[test] fn autolink_scheme() { html_opts!( [extension.autolink], concat!("https://google.com/search\n", "rdar://localhost.com/blah"), concat!( "<p><a href=\"https://google.com/search\">https://google.com/search</a>\n", "rdar://localhost.com/blah</p>\n" ), ); } #[test] fn autolink_scheme_multiline() { html_opts!( [extension.autolink], concat!("https://google.com/search\nhttps://www.google.com/maps"), concat!( "<p><a href=\"https://google.com/search\">https://google.\ com/search</a>\n<a href=\"https://www.google.com/maps\">\ https://www.google.com/maps</a></p>\n" ), ); } #[test] fn autolink_no_link_bad() { html_opts!( [extension.autolink], concat!("@a.b.c@. x\n", "\n", "n@. x\n"), concat!("<p>@a.b.c@. x</p>\n", "<p>n@. x</p>\n"), ); } #[test] fn autolink_parentheses_balanced() { let examples = [ [ "http://www.pokemon.com/Pikachu_(Electric)", "<p><a href=\"http://www.pokemon.com/Pikachu_(Electric)\">http://www.pokemon.com/Pikachu_(Electric)</a></p>\n", ], [ "http://www.pokemon.com/Pikachu_((Electric)", "<p><a href=\"http://www.pokemon.com/Pikachu_((Electric)\">http://www.pokemon.com/Pikachu_((Electric)</a></p>\n", ], [ "http://www.pokemon.com/Pikachu_(Electric))", "<p><a href=\"http://www.pokemon.com/Pikachu_(Electric)\">http://www.pokemon.com/Pikachu_(Electric)</a>)</p>\n", ], [ "http://www.pokemon.com/Pikachu_((Electric))", "<p><a href=\"http://www.pokemon.com/Pikachu_((Electric))\">http://www.pokemon.com/Pikachu_((Electric))</a></p>\n", ], ]; for example in examples { html_opts!([extension.autolink], example[0], example[1]); } for example in examples { html_opts!( [extension.autolink, parse.relaxed_autolinks], example[0], example[1] ); } } #[test] fn autolink_brackets_unbalanced() { html_opts!( [extension.autolink], concat!("http://example.com/[abc]]...\n"), concat!( "<p><a href=\"http://example.com/%5Babc%5D%5D\">http://example.com/[abc]]</a>...</p>\n" ), ); } #[test] fn autolink_ignore_links_in_brackets() { let examples = [ ["[https://foo.com]", "<p>[https://foo.com]</p>\n"], ["[[https://foo.com]]", "<p>[[https://foo.com]]</p>\n"], [ "[[Foo|https://foo.com]]", "<p>[[Foo|https://foo.com]]</p>\n", ], [ "[<https://foo.com>]", "<p>[<a href=\"https://foo.com\">https://foo.com</a>]</p>\n", ], ]; for example in examples { html_opts!([extension.autolink], example[0], example[1], no_roundtrip); } } #[test] fn autolink_relaxed_links_in_brackets() { let examples = [ [ "[https://foo.com]", "<p>[<a href=\"https://foo.com\">https://foo.com</a>]</p>\n", ], [ "[[https://foo.com]]", "<p>[[<a href=\"https://foo.com\">https://foo.com</a>]]</p>\n", ], [ "[[Foo|https://foo.com]]", "<p>[[Foo|<a href=\"https://foo.com\">https://foo.com</a>]]</p>\n", ], [ "[<https://foo.com>]", "<p>[<a href=\"https://foo.com\">https://foo.com</a>]</p>\n", ], [ "[http://foo.com/](url)", "<p><a href=\"url\">http://foo.com/</a></p>\n", ], ["[http://foo.com/](url", "<p>[http://foo.com/](url</p>\n"], [ "[www.foo.com/](url)", "<p><a href=\"url\">www.foo.com/</a></p>\n", ], [ "{https://foo.com}", "<p>{<a href=\"https://foo.com\">https://foo.com</a>}</p>\n", ], [ "[this http://and.com that](url)", "<p><a href=\"url\">this http://and.com that</a></p>\n", ], [ "[this <http://and.com> that](url)", "<p><a href=\"url\">this http://and.com that</a></p>\n", ], [ "{this http://and.com that}(url)", "<p>{this <a href=\"http://and.com\">http://and.com</a> that}(url)</p>\n", ], [ "[http://foo.com](url)\n[http://bar.com]\n\n[http://bar.com]: http://bar.com/extra", "<p><a href=\"url\">http://foo.com</a>\n<a href=\"http://bar.com/extra\">http://bar.com</a></p>\n", ], ]; for example in examples { html_opts!( [extension.autolink, parse.relaxed_autolinks], example[0], example[1] ); } } #[test] fn autolink_relaxed_links_brackets_balanced() { html_opts!( [extension.autolink, parse.relaxed_autolinks], concat!("http://example.com/[abc]]...\n"), concat!( "<p><a href=\"http://example.com/%5Babc%5D\">http://example.com/[abc]</a>]...</p>\n" ), ); } #[test] fn autolink_relaxed_links_curly_braces_balanced() { html_opts!( [extension.autolink, parse.relaxed_autolinks], concat!("http://example.com/{abc}}...\n"), concat!( "<p><a href=\"http://example.com/%7Babc%7D\">http://example.com/{abc}</a>}...</p>\n" ), ); } #[test] fn autolink_relaxed_links_curly_parentheses_balanced() { html_opts!( [extension.autolink, parse.relaxed_autolinks], concat!("http://example.com/(abc))...\n"), concat!("<p><a href=\"http://example.com/(abc)\">http://example.com/(abc)</a>)...</p>\n"), ); } #[test] fn autolink_relaxed_links_schemes() { let examples = [ [ "https://foo.com", "<p><a href=\"https://foo.com\">https://foo.com</a></p>\n", ], [ "smb:///Volumes/shared/foo.pdf", "<p><a href=\"smb:///Volumes/shared/foo.pdf\">smb:///Volumes/shared/foo.pdf</a></p>\n", ], [ "irc://irc.freenode.net/git", "<p><a href=\"irc://irc.freenode.net/git\">irc://irc.freenode.net/git</a></p>\n", ], [ "rdar://localhost.com/blah", "<p><a href=\"rdar://localhost.com/blah\">rdar://localhost.com/blah</a></p>\n", ], ]; for example in examples { html_opts!( [extension.autolink, parse.relaxed_autolinks], example[0], example[1] ); } } #[test] fn sourcepos_correctly_restores_context() { // There's unsoundness in trying to maintain and adjust sourcepos // when doing autolinks in the light of: // // a) Some source elements introducing a different number of characters // to the content text than they take in source, i.e. smart // punctuation. // // b) Text node consolidation happening before autolinking. // // (b) is obviously non-optional, but it means we end up with Text // nodes with different byte counts than their sourcepos span lengths. // // One possible solution would be to actually accumulate multiple // sourcepos spans per Text node, each also tracking the number of // bytes of content text it's responsible for. This would work well // enough as long as we never had to adjust a sourcepos into a spot // within a sourcepos span that had a target text width where it // wasn't equal. That probably wouldn't happen, though -- i.e. we're // never autolinking into the middle of a rendered smart punctuation. // // For now the desired sourcepos is documented in comment. What we // have currently (after backing out the adjustments, having hit the // above case) matches cmark-gfm. assert_ast_match!( [], "ab _cde_ f@g.ee h*ijklm* n", (document (1:1-1:26) [ (paragraph (1:1-1:26) [ (text (1:1-1:3) "ab ") (emph (1:4-1:8) [ (text (1:5-1:7) "cde") ]) (text (1:9-1:17) " f@g.ee h") (emph (1:18-1:24) [ (text (1:19-1:23) "ijklm") ]) (text (1:25-1:26) " n") ]) ]) ); assert_ast_match!( [extension.autolink], "ab _cde_ f@g.ee h*ijklm* n", (document (1:1-1:26) [ (paragraph (1:1-1:26) [ (text (1:1-1:3) "ab ") (emph (1:4-1:8) [ (text (1:5-1:7) "cde") ]) (text (1:9-1:17) " ") // (text (1:9-1:9) " ") (link (XXX) [ // (link (1:10-1:15) [ (text (XXX) "f@g.ee") // (text (1:10-1:15) "f@g.ee") ]) (text (XXX) " h") // (text (1:16-1:17) " h") (emph (1:18-1:24) [ (text (1:19-1:23) "ijklm") ]) (text (1:25-1:26) " n") ]) ]) ); } #[test] fn autolink_cmark_edge_382() { html_opts!( [extension.autolink], "See &lt;&lt;&lt;http://example.com/&gt;&gt;&gt;", "<p>See &lt;&lt;&lt;<a href=\"http://example.com/\">http://example.com/</a>&gt;&gt;&gt;</p>\n", ); } #[test] fn autolink_cmark_edge_388() { html_opts!( [extension.autolink], "http://example.com/src/_mocks_/vscode.js", "<p><a href=\"http://example.com/src/_mocks_/vscode.js\">http://example.com/src/_mocks_/vscode.js</a></p>\n", ); } #[test] fn autolink_cmark_edge_423() { html_opts!( [extension.autolink, extension.strikethrough], concat!( "Here's an autolink: ", "https://www.unicode.org/review/pri453/feedback.html#:~:text=Fri%20Jun%2024%2009:56:01%20CDT%202022", " and another one ", "https://www.unicode.org/review/pri453/feedback.html#:~:text=Fri%20Jun%2024%2009:56:01%20CDT%202022", ".", ), concat!( "<p>Here's an autolink: ", r#"<a href="https://www.unicode.org/review/pri453/feedback.html#:~:text=Fri%20Jun%2024%2009:56:01%20CDT%202022">"#, "https://www.unicode.org/review/pri453/feedback.html#:~:text=Fri%20Jun%2024%2009:56:01%20CDT%202022", "</a> and another one ", r#"<a href="https://www.unicode.org/review/pri453/feedback.html#:~:text=Fri%20Jun%2024%2009:56:01%20CDT%202022">"#, "https://www.unicode.org/review/pri453/feedback.html#:~:text=Fri%20Jun%2024%2009:56:01%20CDT%202022", "</a>.</p>\n", ), ); } #[test] fn autolink_cmark_edge_58() { html_opts!( [extension.autolink, extension.superscript], "https://www.wolframalpha.com/input/?i=x^2+(y-(x^2)^(1/3))^2=1", concat!( "<p>", r#"<a href="https://www.wolframalpha.com/input/?i=x%5E2+(y-(x%5E2)%5E(1/3))%5E2=1">"#, "https://www.wolframalpha.com/input/?i=x^2+(y-(x^2)^(1/3))^2=1", "</a></p>\n", ), ); } #[test] fn autolink_failing_spec_image() { html_opts!( [extension.autolink], "![http://inline.com/image](http://inline.com/image)", "<p><img src=\"http://inline.com/image\" alt=\"http://inline.com/image\" /></p>\n", ); } #[test] fn autolink_failing_spec_underscores() { html_opts!( [extension.autolink], "Underscores not allowed in host name www.xxx.yyy._zzz", "<p>Underscores not allowed in host name www.xxx.yyy._zzz</p>\n", ); } #[test] fn autolink_fuzz_leading_colon() { html_opts!( [extension.autolink, parse.relaxed_autolinks], "://-", "<p><a href=\"://-\">://-</a></p>\n", no_roundtrip, ); } #[test] fn autolink_fuzz_we() { html_opts!( [extension.autolink, parse.relaxed_autolinks], "we://w", "<p><a href=\"we://w\">we://w</a></p>\n", no_roundtrip, ); } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/commonmark.rs���������������������������������������������������������������0000644�0000000�0000000�00000004570�10461020230�0015640�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use std::cell::RefCell; use self::nodes::{Ast, LineColumn}; use super::*; use ntest::test_case; #[test] fn commonmark_removes_redundant_strong() { let input = "This is **something **even** better**"; let output = "This is **something even better**\n"; commonmark(input, output, None); } #[test] fn commonmark_avoids_spurious_backslash() { let arena = Arena::new(); let options = Options::default(); let empty = LineColumn { line: 0, column: 0 }; let ast = |val: NodeValue| arena.alloc(AstNode::new(RefCell::new(Ast::new(val, empty)))); let root = ast(NodeValue::Document); let p1 = ast(NodeValue::Paragraph); p1.append(ast(NodeValue::Text("Line 1".to_owned()))); p1.append(ast(NodeValue::LineBreak)); root.append(p1); let p2 = ast(NodeValue::Paragraph); p2.append(ast(NodeValue::Text("Line 2".to_owned()))); root.append(p2); let mut output = vec![]; cm::format_document(root, &options, &mut output).unwrap(); compare_strs( &String::from_utf8(output).unwrap(), "Line 1\n\nLine 2\n", "rendered", "<synthetic>", ); } #[test_case("$$x^2$$ and $1 + 2$ and $`y^2`$", "$$x^2$$ and $1 + 2$ and $`y^2`$\n")] #[test_case("$$\nx^2\n$$", "$$\nx^2\n$$\n")] #[test_case("```math\nx^2\n```", "``` math\nx^2\n```\n")] fn math(markdown: &str, cm: &str) { let mut options = Options::default(); options.extension.math_dollars = true; options.extension.math_code = true; commonmark(markdown, cm, None); } #[test_case("This [[url]] that", "This [[url|url]] that\n")] #[test_case("This [[url|link label]] that", "This [[url|link%20label]] that\n")] fn wikilinks(markdown: &str, cm: &str) { let mut options = Options::default(); options.extension.wikilinks_title_before_pipe = true; commonmark(markdown, cm, Some(&options)); } #[test] fn commonmark_relist() { commonmark( concat!("3. one\n", "5. two\n",), // Note that right now we always include enough room for up to two // digits. TODO: Ideally we determine the maximum digit length before // getting this far. concat!("3. one\n", "4. two\n",), None, ); let mut options = Options::default(); options.extension.tasklist = true; commonmark( concat!("3. [ ] one\n", "5. [ ] two\n",), concat!("3. [ ] one\n", "4. [ ] two\n",), Some(&options), ); } ����������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/core.rs���������������������������������������������������������������������0000644�0000000�0000000�00000042577�10461020230�0014436�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use crate::nodes::{NodeCode, NodeValue}; use super::*; #[test] fn basic() { html( concat!( "My **document**.\n", "\n", "It's mine.\n", "\n", "> Yes.\n", "\n", "## Hi!\n", "\n", "Okay.\n" ), concat!( "<p>My <strong>document</strong>.</p>\n", "<p>It's mine.</p>\n", "<blockquote>\n", "<p>Yes.</p>\n", "</blockquote>\n", "<h2>Hi!</h2>\n", "<p>Okay.</p>\n" ), ); } #[test] fn codefence() { html( concat!("``` rust yum\n", "fn main<'a>();\n", "```\n"), concat!( "<pre><code class=\"language-rust\">fn main&lt;'a&gt;();\n", "</code></pre>\n" ), ); } #[test] fn lists() { html( concat!("2. Hello.\n", "3. Hi.\n"), concat!( "<ol start=\"2\">\n", "<li>Hello.</li>\n", "<li>Hi.</li>\n", "</ol>\n" ), ); html( concat!("- Hello.\n", "- Hi.\n"), concat!("<ul>\n", "<li>Hello.</li>\n", "<li>Hi.</li>\n", "</ul>\n"), ); } #[test] fn thematic_breaks() { html( concat!("---\n", "\n", "- - -\n", "\n", "\n", "_ _ _\n"), concat!("<hr />\n", "<hr />\n", "<hr />\n"), ); } #[test] fn atx_heading() { html( concat!("# h1\n", "foo\n", "## h2\n"), concat!("<h1>h1</h1>\n", "<p>foo</p>\n", "<h2>h2</h2>\n"), ); } #[test] fn atx_heading_sourcepos() { assert_ast_match!( [], "# h1\n" "foo\n" "## h2\n", (document (1:1-3:5) [ (heading (1:1-1:4) [ (text (1:3-1:4) "h1") ]) (paragraph (2:1-2:3) [ (text (2:1-2:3) "foo") ]) (heading (3:1-3:5) [ (text (3:4-3:5) "h2") ]) ]) ); } #[test] fn setext_heading() { html( concat!("Hi\n", "==\n", "\n", "Ok\n", "-----\n"), concat!("<h1>Hi</h1>\n", "<h2>Ok</h2>\n"), ); } #[test] fn setext_heading_sourcepos() { assert_ast_match!( [], "Header\n" "---\n" "this", (document (1:1-3:4) [ (heading (1:1-2:3) [ (text (1:1-1:6) "Header") ]) (paragraph (3:1-3:4) [ (text (3:1-3:4) "this") ]) ]) ); } #[test] fn ignore_setext_heading() { html_opts!( [render.ignore_setext], concat!("text text\n---"), concat!("<p>text text</p>\n<hr />\n"), ); } #[test] fn figure_with_caption_with_title() { html_opts!( [render.figure_with_caption], concat!("![image](https://example.com/image.png \"this is an image\")\n"), concat!("<p><figure><img src=\"https://example.com/image.png\" alt=\"image\" title=\"this is an image\" /><figcaption>this is an image</figcaption></figure></p>\n"), ); } #[test] fn figure_with_caption_without_title() { html_opts!( [render.figure_with_caption], concat!("![image](https://example.com/image.png)\n"), concat!( "<p><figure><img src=\"https://example.com/image.png\" alt=\"image\" /></figure></p>\n" ), ); } #[test] fn html_block_1() { html_opts!( [render.unsafe_], concat!( "<script>\n", "*ok* </script> *ok*\n", "\n", "*ok*\n", "\n", "*ok*\n", "\n", "<pre x>\n", "*ok*\n", "</style>\n", "*ok*\n", "<style>\n", "*ok*\n", "</style>\n", "\n", "*ok*\n" ), concat!( "<script>\n", "*ok* </script> *ok*\n", "<p><em>ok</em></p>\n", "<p><em>ok</em></p>\n", "<pre x>\n", "*ok*\n", "</style>\n", "<p><em>ok</em></p>\n", "<style>\n", "*ok*\n", "</style>\n", "<p><em>ok</em></p>\n" ), ); } #[test] fn html_block_2() { html_opts!( [render.unsafe_], concat!(" <!-- abc\n", "\n", "ok --> *hi*\n", "*hi*\n"), concat!( " <!-- abc\n", "\n", "ok --> *hi*\n", "<p><em>hi</em></p>\n" ), ); } #[test] fn html_block_3() { html_opts!( [render.unsafe_], concat!(" <? o\n", "k ?> *a*\n", "*a*\n"), concat!(" <? o\n", "k ?> *a*\n", "<p><em>a</em></p>\n"), ); } #[test] fn html_block_4() { html_opts!( [render.unsafe_], concat!("<!X >\n", "ok\n", "<!X\n", "um > h\n", "ok\n"), concat!("<!X >\n", "<p>ok</p>\n", "<!X\n", "um > h\n", "<p>ok</p>\n"), ); } #[test] fn html_block_5() { html_opts!( [render.unsafe_], concat!( "<![CDATA[\n", "\n", "hm >\n", "*ok*\n", "]]> *ok*\n", "*ok*\n" ), concat!( "<![CDATA[\n", "\n", "hm >\n", "*ok*\n", "]]> *ok*\n", "<p><em>ok</em></p>\n" ), ); } #[test] fn html_block_6() { html_opts!( [render.unsafe_], concat!(" </table>\n", "*x*\n", "\n", "ok\n", "\n", "<li\n", "*x*\n"), concat!(" </table>\n", "*x*\n", "<p>ok</p>\n", "<li\n", "*x*\n"), ); } #[test] fn html_block_7() { html_opts!( [render.unsafe_], concat!( "<a b >\n", "ok\n", "\n", "<a b=>\n", "ok\n", "\n", "<a b \n", "<a b> c\n", "ok\n" ), concat!( "<a b >\n", "ok\n", "<p>&lt;a b=&gt;\n", "ok</p>\n", "<p>&lt;a b\n", "<a b> c\n", "ok</p>\n" ), ); html_opts!( [render.unsafe_], concat!("<a b c=x d='y' z=\"f\" >\n", "ok\n", "\n", "ok\n"), concat!("<a b c=x d='y' z=\"f\" >\n", "ok\n", "<p>ok</p>\n"), ); } #[test] fn backticks() { html( "Some `code\\` yep.\n", "<p>Some <code>code\\</code> yep.</p>\n", ); } #[test] fn backticks_empty_with_newline_should_be_space() { html("`\n`", "<p><code> </code></p>\n"); } #[test] fn blockquote_hard_linebreak_space() { html(">\\\n A", "<blockquote>\n<p><br />\nA</p>\n</blockquote>\n"); } #[test] fn blockquote_hard_linebreak_nonlazy_space() { html( "> A\\\n> B", "<blockquote>\n<p>A<br />\nB</p>\n</blockquote>\n", ); } #[test] fn backticks_num() { let input = "Some `code1`. More ``` code2 ```.\n"; let arena = Arena::new(); let options = Options::default(); let root = parse_document(&arena, input, &options); let code1 = NodeValue::Code(NodeCode { num_backticks: 1, literal: "code1".to_string(), }); asssert_node_eq(root, &[0, 1], &code1); let code2 = NodeValue::Code(NodeCode { num_backticks: 3, literal: "code2".to_string(), }); asssert_node_eq(root, &[0, 3], &code2); } #[test] fn backslashes() { html( concat!( "Some \\`fake code\\`.\n", "\n", "Some fake linebreaks:\\\n", "Yes.\\\n", "See?\n", "\n", "Ga\\rbage.\n" ), concat!( "<p>Some `fake code`.</p>\n", "<p>Some fake linebreaks:<br />\n", "Yes.<br />\n", "See?</p>\n", "<p>Ga\\rbage.</p>\n" ), ); } #[test] fn entities() { html( concat!( "This is &amp;, &copy;, &trade;, \\&trade;, &xyz;, &NotEqualTilde;.\n", "\n", "&#8734; &#x221e;\n" ), concat!( "<p>This is &amp;, Β©, β„’, &amp;trade;, &amp;xyz;, \u{2242}\u{338}.</p>\n", "<p>∞ ∞</p>\n" ), ); } #[test] fn links() { html( concat!( "Where are you [going](https://microsoft.com (today))?\n", "\n", "[Where am I?](/here)\n" ), concat!( "<p>Where are you <a href=\"https://microsoft.com\" \ title=\"today\">going</a>?</p>\n", "<p><a href=\"/here\">Where am I?</a></p>\n" ), ); html( concat!( r"Where are you [going](#1\.-link (today))?", "\n", "\n", "[Where am I?](/here)\n" ), concat!( "<p>Where are you <a href=\"#1.-link\" \ title=\"today\">going</a>?</p>\n", "<p><a href=\"/here\">Where am I?</a></p>\n" ), ); html( r"[Link Text](\\\\)", concat!(r##"<p><a href="%5C%5C">Link Text</a></p>"##, '\n'), ); html( r"[Link Text](\\\\\\\\\\)", concat!(r##"<p><a href="%5C%5C%5C%5C%5C">Link Text</a></p>"##, '\n'), ); html( r"[Link Text](\\\\ (title))", concat!( r##"<p><a href="%5C%5C" title="title">Link Text</a></p>"##, '\n' ), ); html( r"[Link Text](\#)", concat!(r##"<p><a href="#">Link Text</a></p>"##, '\n'), ); } #[test] fn images() { html( concat!("I am ![eating [things](/url)](http://i.imgur.com/QqK1vq7.png).\n"), concat!( "<p>I am <img src=\"http://i.imgur.com/QqK1vq7.png\" alt=\"eating things\" \ />.</p>\n" ), ); } #[test] fn reference_links() { html( concat!( "This [is] [legit], [very][honestly] legit.\n", "\n", "[legit]: ok\n", "[honestly]: sure \"hm\"\n" ), concat!( "<p>This [is] <a href=\"ok\">legit</a>, <a href=\"sure\" title=\"hm\">very</a> \ legit.</p>\n" ), ); } #[test] fn reference_links_casefold() { html( concat!("[ẞ]\n", "\n", "[SS]: /url \n",), "<p><a href=\"/url\">ẞ</a></p>\n", ); } #[test] fn safety() { html( concat!( "[data:image/png](data:image/png/x)\n\n", "[data:image/gif](data:image/gif/x)\n\n", "[data:image/jpeg](data:image/jpeg/x)\n\n", "[data:image/webp](data:image/webp/x)\n\n", "[data:malicious](data:malicious/x)\n\n", "[javascript:malicious](javascript:malicious)\n\n", "[vbscript:malicious](vbscript:malicious)\n\n", "[file:malicious](file:malicious)\n\n", ), concat!( "<p><a href=\"data:image/png/x\">data:image/png</a></p>\n", "<p><a href=\"data:image/gif/x\">data:image/gif</a></p>\n", "<p><a href=\"data:image/jpeg/x\">data:image/jpeg</a></p>\n", "<p><a href=\"data:image/webp/x\">data:image/webp</a></p>\n", "<p><a href=\"\">data:malicious</a></p>\n", "<p><a href=\"\">javascript:malicious</a></p>\n", "<p><a href=\"\">vbscript:malicious</a></p>\n", "<p><a href=\"\">file:malicious</a></p>\n", ), ) } #[test] fn link_backslash_requires_punct() { // Test should probably be in the spec. html("[a](\\ b)", "<p>[a](\\ b)</p>\n"); } #[test] fn nul_replacement_1() { html("a\0b", "<p>a\u{fffd}b</p>\n"); } #[test] fn nul_replacement_2() { html("a\0b\0c", "<p>a\u{fffd}b\u{fffd}c</p>\n"); } #[test] fn nul_replacement_3() { html("a\0\nb", "<p>a\u{fffd}\nb</p>\n"); } #[test] fn nul_replacement_4() { html("a\0\r\nb", "<p>a\u{fffd}\nb</p>\n"); } #[test] fn nul_replacement_5() { html("a\r\n\0b", "<p>a\n\u{fffd}b</p>\n"); } #[test] fn case_insensitive_safety() { html( "[a](javascript:a) [b](Javascript:b) [c](jaVascript:c) [d](data:xyz) [e](Data:xyz) [f](vbscripT:f) [g](FILE:g)\n", "<p><a href=\"\">a</a> <a href=\"\">b</a> <a href=\"\">c</a> <a href=\"\">d</a> <a href=\"\">e</a> <a href=\"\">f</a> <a href=\"\">g</a></p>\n", ); } #[test] fn link_sourcepos_baseline() { assert_ast_match!( [], "[ABCD](/)\n", (document (1:1-1:9) [ (paragraph (1:1-1:9) [ (link (1:1-1:9) [ (text (1:2-1:5) "ABCD") ]) ]) ]) ); } // https://github.com/kivikakk/comrak/issues/301 #[test] fn link_sourcepos_newline() { assert_ast_match!( [], "[AB\nCD](/)\n", (document (1:1-2:6) [ (paragraph (1:1-2:6) [ (link (1:1-2:6) [ (text (1:2-1:3) "AB") (softbreak (1:4-1:4)) (text (2:1-2:2) "CD") ]) ]) ]) ); } #[test] fn link_sourcepos_truffle() { assert_ast_match!( [], "- A\n[![B](/B.png)](/B)\n", (document (1:1-2:18) [ (list (1:1-2:18) [ (item (1:1-2:18) [ (paragraph (1:3-2:18) [ (text (1:3-1:3) "A") (softbreak (1:4-1:4)) (link (2:1-2:18) [ (image (2:2-2:13) [ (text (2:4-2:4) "B") ]) ]) ]) ]) ]) ]) ); } #[test] fn link_sourcepos_truffle_twist() { assert_ast_match!( [], "- A\n [![B](/B.png)](/B)\n", (document (1:1-2:20) [ (list (1:1-2:20) [ (item (1:1-2:20) [ (paragraph (1:3-2:20) [ (text (1:3-1:3) "A") (softbreak (1:4-1:4)) (link (2:3-2:20) [ (image (2:4-2:15) [ (text (2:6-2:6) "B") ]) ]) ]) ]) ]) ]) ); } #[test] fn link_sourcepos_truffle_bergamot() { assert_ast_match!( [], "- A\n [![B](/B.png)](/B)\n", (document (1:1-2:21) [ (list (1:1-2:21) [ (item (1:1-2:21) [ (paragraph (1:3-2:21) [ (text (1:3-1:3) "A") (softbreak (1:4-1:4)) (link (2:4-2:21) [ (image (2:5-2:16) [ (text (2:7-2:7) "B") ]) ]) ]) ]) ]) ]) ); } #[test] fn paragraph_sourcepos_multiline() { assert_ast_match!( [], " A\n" " B\n", (document (1:1-2:4) [ (paragraph (1:3-2:4) [ (text (1:3-1:3) "A") (softbreak (1:4-1:4)) (text (2:4-2:4) "B") ]) ]) ); } #[test] fn listitem_sourcepos_multiline() { assert_ast_match!( [], "- A\n" "B\n", (document (1:1-2:1) [ (list (1:1-2:1) [ (item (1:1-2:1) [ (paragraph (1:3-2:1) [ (text (1:3-1:3) "A") (softbreak (1:4-1:4)) (text (2:1-2:1) "B") ]) ]) ]) ]) ); } #[test] fn listitem_sourcepos_multiline_2() { assert_ast_match!( [], "- A\n" " B\n" "- C\n" " D", (document (1:1-4:2) [ (list (1:1-4:2) [ (item (1:1-2:4) [ (paragraph (1:3-2:4) [ (text (1:3-1:3) "A") (softbreak (1:4-1:4)) (text (2:4-2:4) "B") ]) ]) (item (3:1-4:2) [ (paragraph (3:4-4:2) [ (text (3:4-3:4) "C") (softbreak (3:5-3:5)) (text (4:2-4:2) "D") ]) ]) ]) ]) ); } #[test] fn emphasis_sourcepos_double_1() { assert_ast_match!( [], "_**this**_\n", (document (1:1-1:10) [ (paragraph (1:1-1:10) [ (emph (1:1-1:10) [ (strong (1:2-1:9) [ (text (1:4-1:7) "this") ]) ]) ]) ]) ); } #[test] fn emphasis_sourcepos_double_2() { assert_ast_match!( [], "**_this_**\n", (document (1:1-1:10) [ (paragraph (1:1-1:10) [ (strong (1:1-1:10) [ (emph (1:3-1:8) [ (text (1:4-1:7) "this") ]) ]) ]) ]) ); } #[test] fn emphasis_sourcepos_double_3() { assert_ast_match!( [], "___this___\n", (document (1:1-1:10) [ (paragraph (1:1-1:10) [ (emph (1:1-1:10) [ (strong (1:2-1:9) [ (text (1:4-1:7) "this") ]) ]) ]) ]) ); } #[test] fn emphasis_sourcepos_double_4() { assert_ast_match!( [], "***this***\n", (document (1:1-1:10) [ (paragraph (1:1-1:10) [ (emph (1:1-1:10) [ (strong (1:2-1:9) [ (text (1:4-1:7) "this") ]) ]) ]) ]) ); } ���������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/description_lists.rs��������������������������������������������������������0000644�0000000�0000000�00000007511�10461020230�0017234�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; #[test] fn description_lists() { html_opts!( [extension.description_lists], concat!( "Term 1\n", "\n", ": Definition 1\n", "\n", "Term 2 with *inline markup*\n", "\n", ": Definition 2\n" ), concat!( "<dl>", "<dt>Term 1</dt>\n", "<dd>\n", "<p>Definition 1</p>\n", "</dd>\n", "<dt>Term 2 with <em>inline markup</em></dt>\n", "<dd>\n", "<p>Definition 2</p>\n", "</dd>\n", "</dl>\n", ), ); html_opts!( [extension.description_lists], concat!( "* Nested\n", "\n", " Term 1\n\n", " : Definition 1\n\n", " Term 2 with *inline markup*\n\n", " : Definition 2\n\n" ), concat!( "<ul>\n", "<li>\n", "<p>Nested</p>\n", "<dl>", "<dt>Term 1</dt>\n", "<dd>\n", "<p>Definition 1</p>\n", "</dd>\n", "<dt>Term 2 with <em>inline markup</em></dt>\n", "<dd>\n", "<p>Definition 2</p>\n", "</dd>\n", "</dl>\n", "</li>\n", "</ul>\n", ), ); } #[test] fn sourcepos() { // TODO There's plenty of work to do here still. The test currently represents // how things *are* -- see comments for what should be different. // See partner comment in crate::parser::Parser::parse_desc_list_details. assert_ast_match!( [extension.description_lists], "ta\n" "\n" ": da\n" "\n" "t*b*\n" "\n" ": d*b*\n" "\n" "tc\n" "\n" ": dc\n", (document (1:1-11:4) [ (description_list (1:1-11:4) [ (description_item (1:1-4:0) [ // (description_item (1:1-3:4) [ (description_term (3:1-3:0) [ // (description_term (1:1-1:2) [ (paragraph (1:1-1:2) [ (text (1:1-1:2) "ta") ]) ]) (description_details (3:1-4:0) [ // (description_details (3:1-3:4) [ (paragraph (3:3-3:4) [ (text (3:3-3:4) "da") ]) ]) ]) (description_item (5:1-8:0) [ // (description_item (5:1-7:6) [ (description_term (7:1-7:0) [ // (description_term (5:1-5:4) [ (paragraph (5:1-5:4) [ (text (5:1-5:1) "t") (emph (5:2-5:4) [ (text (5:3-5:3) "b") ]) ]) ]) (description_details (7:1-8:0) [ // (description_details (7:1-7:6) [ (paragraph (7:3-7:6) [ (text (7:3-7:3) "d") (emph (7:4-7:6) [ (text (7:5-7:5) "b") ]) ]) ]) ]) (description_item (9:1-11:4) [ (description_term (11:1-11:0) [ // (description_term (9:1-11:4) [ (paragraph (9:1-9:2) [ (text (9:1-9:2) "tc") ]) ]) (description_details (11:1-11:4) [ (paragraph (11:3-11:4) [ (text (11:3-11:4) "dc") ]) ]) ]) ]) ]) ); } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/empty.rs��������������������������������������������������������������������0000644�0000000�0000000�00000001702�10461020230�0014625�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; #[test] fn no_empty_link() { html_opts!( [render.ignore_empty_links], "[](https://example.com/evil-link-for-seo-spam)", "<p>[](https://example.com/evil-link-for-seo-spam)</p>\n", ); html_opts!( [render.ignore_empty_links], "[ ](https://example.com/evil-link-for-seo-spam)", "<p>[ ](https://example.com/evil-link-for-seo-spam)</p>\n", ); } #[test] fn empty_image_allowed() { html_opts!( [render.ignore_empty_links], "![ ](https://example.com/evil-link-for-seo-spam)", "<p><img src=\"https://example.com/evil-link-for-seo-spam\" alt=\" \" /></p>\n", ); } #[test] fn image_inside_link_allowed() { html_opts!( [render.ignore_empty_links], "[![](https://example.com/image.png)](https://example.com/)", "<p><a href=\"https://example.com/\"><img src=\"https://example.com/image.png\" alt=\"\" /></a></p>\n", ); } ��������������������������������������������������������������comrak-0.29.0/src/tests/escaped_char_spans.rs�������������������������������������������������������0000644�0000000�0000000�00000000776�10461020230�0017306�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; use ntest::test_case; #[test_case("\\@user", "<p><span data-escaped-char>@</span>user</p>\n")] #[test_case("This\\@that", "<p>This<span data-escaped-char>@</span>that</p>\n")] fn escaped_char_spans(markdown: &str, html: &str) { html_opts!([render.escaped_char_spans], markdown, html, no_roundtrip); } #[test_case("\\@user", "<p>@user</p>\n")] #[test_case("This\\@that", "<p>This@that</p>\n")] fn disabled_escaped_char_spans(markdown: &str, expected: &str) { html(markdown, expected); } ��comrak-0.29.0/src/tests/fixtures/math_code.md�������������������������������������������������������0000644�0000000�0000000�00000004040�10461020230�0017235�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������--- title: Code Math version: 0.1 date: '2024-03-22' license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' based_on: https://docs.gitlab.com/ee/user/markdown.html#math --- # TeX Math Inline math goes between `` $` `` characters, and display math use the code block ``` ```math ````: ```````````````````````````````` example Let $`x`$ and $`y`$ be integers such that ```math x=y + 2 ``` . <p>Let <code data-math-style="inline">x</code> and <code data-math-style="inline">y</code> be integers such that</p> <pre><code class="language-math" data-math-style="display">x=y + 2 </code></pre> ```````````````````````````````` In inline math, it behaves just like inline code. ```````````````````````````````` example This is math:$`2000`$. . <p>This is math:<code data-math-style="inline">2000</code>.</p> ```````````````````````````````` Note that math can contain embedded math. In scanning for a closing delimiter, we do not need to skip material in balanced curly braces: ```````````````````````````````` example This is display math: ```math \text{Hello $x^2$} ``` And this is inline math: $`\text{Hello $x$ there!}`$ . <p>This is display math:</p> <pre><code class="language-math" data-math-style="display">\text{Hello $x^2$} </code></pre> <p>And this is inline math: <code data-math-style="inline">\text{Hello $x$ there!}</code></p> ```````````````````````````````` Dollar signs not required to be backslashed. It may be required to render the math properly, but it's not required for parsing: ```````````````````````````````` example $`\text{$}`$ . <p><code data-math-style="inline">\text{$}</code></p> ```````````````````````````````` Everything inside the math construction is treated as math, and not given its normal commonmark meaning. ```````````````````````````````` example $`b<a>c`$ . <p><code data-math-style="inline">b&lt;a&gt;c</code></p> ```````````````````````````````` Double dollar signs are not supported ```````````````````````````````` example $$`1 + 2`$$ . <p>$$<code>1 + 2</code>$$</p> ```````````````````````````````` ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/fixtures/math_dollars.md����������������������������������������������������0000644�0000000�0000000�00000005110�10461020230�0017762�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������--- title: Dollar Math based_on: https://github.com/jgm/commonmark-hs/blob/master/commonmark-extensions/test/math.md --- # TeX Math Inline math goes between `$` characters, and display math goes between `$$`: ```````````````````````````````` example Let $x$ and $y$ be integers such that $$x=y + 2$$ . <p>Let <span data-math-style="inline">x</span> and <span data-math-style="inline">y</span> be integers such that <span data-math-style="display">x=y + 2</span></p> ```````````````````````````````` In inline math, the opening `$` must not be followed by a whitespace, and the closing `$` must not be preceded by whitespace. ```````````````````````````````` example This is not math: 2000$. And neither is this $ 4 $. Or this $4 $. . <p>This is not math: 2000$. And neither is this $ 4 $. Or this $4 $.</p> ```````````````````````````````` Display math delimiters can be surrounded by whitespace: ```````````````````````````````` example This is display math: $$ e=mc^2 $$ . <p>This is display math: <span data-math-style="display"> e=mc^2 </span></p> ```````````````````````````````` Note that math can contain embedded math. In scanning for a closing delimiter, we skip material in balanced curly braces: ```````````````````````````````` example disabled This is display math: $$ \text{Hello $x^2$} $$ And this is inline math: $\text{Hello $x$ there!}$ . <p>This is display math: <span class="math display">\[ \text{Hello $x^2$} \]</span> And this is inline math: <span class="math inline">\(\text{Hello $x$ there!}\)</span></p> ```````````````````````````````` To avoid treating currency signs as math delimiters, one may occasionally have to backslash-escape them: ```````````````````````````````` example The cost is between \$10 and 30$. . <p>The cost is between $10 and 30$.</p> ```````````````````````````````` Dollar signs must also be backslash-escaped if they occur within math: ```````````````````````````````` example $\text{\$}$ . <p><span data-math-style="inline">\text{\$}</span></p> ```````````````````````````````` Everything inside the math construction is treated as math, and not given its normal commonmark meaning. ```````````````````````````````` example $b<a>c$ . <p><span data-math-style="inline">b&lt;a&gt;c</span></p> ```````````````````````````````` Block math can directly follow a paragraph. ```````````````````````````````` example This is inline display math $$1+2$$ This is block math $$ 1+2 $$ . <p>This is inline display math <span data-math-style="display">1+2</span></p> <p>This is block math <span data-math-style="display"> 1+2 </span></p> ```````````````````````````````` ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/fixtures/multiline_blockquote.md��������������������������������������������0000644�0000000�0000000�00000010224�10461020230�0021545�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������--- title: GitLab Flavored Markdown Spec version: 0.1 date: '2023-12-18' license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' --- ## Multi-line Blockquotes Simple container ```````````````````````````````` example >>> *content* >>> . <blockquote> <p><em>content</em></p> </blockquote> ```````````````````````````````` Can contain block elements ```````````````````````````````` example >>> ### heading ----------- >>> . <blockquote> <h3>heading</h3> <hr /> </blockquote> ```````````````````````````````` Ending marker can be longer ```````````````````````````````` example >>>>>> hello world >>>>>>>>>>> normal . <blockquote> <p>hello world</p> </blockquote> <p>normal</p> ```````````````````````````````` Nested blockquotes ```````````````````````````````` example >>>>> >>>> foo >>>> >>>>> . <blockquote> <blockquote> <p>foo</p> </blockquote> </blockquote> ```````````````````````````````` Incorrectly nested blockquotes ```````````````````````````````` example >>>> this block is closed with 5 markers below >>>>> auto-closed blocks >>>>> >>>> . <blockquote> <p>this block is closed with 5 markers below</p> </blockquote> <p>auto-closed blocks</p> <blockquote> <blockquote> </blockquote> </blockquote> ```````````````````````````````` Marker can be indented up to 3 spaces ```````````````````````````````` example >>>> first-level blockquote >>> second-level blockquote >>> >>>> regular paragraph . <blockquote> <p>first-level blockquote</p> <blockquote> <p>second-level blockquote</p> </blockquote> </blockquote> <p>regular paragraph</p> ```````````````````````````````` Fours spaces makes it a code block ```````````````````````````````` example >>> content >>> . <pre><code>&gt;&gt;&gt; content &gt;&gt;&gt; </code></pre> ```````````````````````````````` Detection of embedded 4 spaces code block starts in the column the blockquote starts, not from the beginning of the line. ```````````````````````````````` example >>> code block >>> . <blockquote> <pre><code>code block </code></pre> </blockquote> ```````````````````````````````` ```````````````````````````````` example >>>> content >>> code block >>> >>>> . <blockquote> <p>content</p> <blockquote> <pre><code>code block </code></pre> </blockquote> </blockquote> ```````````````````````````````` Closing marker can't have text on the same line ```````````````````````````````` example >>> foo >>> arg=123 . <blockquote> <p>foo</p> <blockquote> <blockquote> <blockquote> <p>arg=123</p> </blockquote> </blockquote> </blockquote> </blockquote> ```````````````````````````````` Blockquotes self-close at the end of the document ```````````````````````````````` example >>> foo . <blockquote> <p>foo</p> </blockquote> ```````````````````````````````` They should terminate paragraphs ```````````````````````````````` example blah blah >>> content >>> . <p>blah blah</p> <blockquote> <p>content</p> </blockquote> ```````````````````````````````` They can be nested in lists ```````````````````````````````` example - >>> - foo >>> . <ul> <li> <blockquote> <ul> <li>foo</li> </ul> </blockquote> </li> </ul> ```````````````````````````````` Or in blockquotes ```````````````````````````````` example > >>> > foo >> bar > baz > >>> . <blockquote> <blockquote> <p>foo</p> <blockquote> <p>bar baz</p> </blockquote> </blockquote> </blockquote> ```````````````````````````````` List indentation ```````````````````````````````` example - >>> foo bar >>> - >>> foo bar >>> . <ul> <li> <blockquote> <p>foo bar</p> </blockquote> </li> <li> <blockquote> <p>foo bar</p> </blockquote> </li> </ul> ```````````````````````````````` Ignored inside code blocks: ```````````````````````````````` example ```txt # Code >>> # Code >>> # Code ``` . <pre><code class="language-txt"># Code &gt;&gt;&gt; # Code &gt;&gt;&gt; # Code </code></pre> ```````````````````````````````` Does not require a leading or trailing blank line ```````````````````````````````` example Some text >>> A quote >>> Some other text . <p>Some text</p> <blockquote> <p>A quote</p> </blockquote> <p>Some other text</p> ```````````````````````````````` ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/fixtures/wikilinks_title_after_pipe.md��������������������������������������0000644�0000000�0000000�00000003723�10461020230�0022724�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������--- title: Wikilinks based_on: https://github.com/jgm/commonmark-hs/blob/master/commonmark-extensions/test/wikilinks_title_after_pipe.md --- # Wikilinks, title after pipe Wikilinks can have one of the following forms: [[https://example.org]] [[https://example.org|title]] [[name of page]] [[name of page|title]] With this version of wikilinks, the title comes after the pipe. ```````````````````````````````` example [[https://example.org]] . <p><a href="https://example.org" data-wikilink="true">https://example.org</a></p> ```````````````````````````````` ```````````````````````````````` example [[https://example.org|title]] . <p><a href="https://example.org" data-wikilink="true">title</a></p> ```````````````````````````````` ```````````````````````````````` example [[Name of page]] . <p><a href="Name%20of%20page" data-wikilink="true">Name of page</a></p> ```````````````````````````````` ```````````````````````````````` example [[Name of page|Title]] . <p><a href="Name%20of%20page" data-wikilink="true">Title</a></p> ```````````````````````````````` HTML entities are recognized both in the name of page and in the link title. ```````````````````````````````` example [[Gesch&uuml;tztes Leerzeichen|&#xDC;ber &amp;nbsp;]] . <p><a href="Gesch%C3%BCtztes%20Leerzeichen" data-wikilink="true">Über &amp;nbsp;</a></p> ```````````````````````````````` Escaping characters is supported ```````````````````````````````` example [[https://example.org|foo\[\]bar]] . <p><a href="https://example.org" data-wikilink="true">foo[]bar</a></p> ```````````````````````````````` ```````````````````````````````` example [[Name \[of\] page]] . <p><a href="Name%20%5Bof%5D%20page" data-wikilink="true">Name [of] page</a></p> ```````````````````````````````` Emphasis or other inline markdown is not supported ```````````````````````````````` example [[Name _of_ page]] . <p><a href="Name%20_of_%20page" data-wikilink="true">Name _of_ page</a></p> ```````````````````````````````` ���������������������������������������������comrak-0.29.0/src/tests/fixtures/wikilinks_title_before_pipe.md�������������������������������������0000644�0000000�0000000�00000004213�10461020230�0023060�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������--- title: Wikilinks based_on: https://github.com/jgm/commonmark-hs/blob/master/commonmark-extensions/test/wikilinks_title_before_pipe.md --- # Wikilinks, title before pipe Wikilinks can have one of the following forms: [[https://example.org]] [[title|https://example.org]] [[name of page]] [[title|name of page]] With this version of wikilinks, the title comes before the pipe. ```````````````````````````````` example [[https://example.org]] . <p><a href="https://example.org" data-wikilink="true">https://example.org</a></p> ```````````````````````````````` ```````````````````````````````` example [[title|https://example.org]] . <p><a href="https://example.org" data-wikilink="true">title</a></p> ```````````````````````````````` ```````````````````````````````` example [[Name of page]] . <p><a href="Name%20of%20page" data-wikilink="true">Name of page</a></p> ```````````````````````````````` ```````````````````````````````` example [[Title|Name of page]] . <p><a href="Name%20of%20page" data-wikilink="true">Title</a></p> ```````````````````````````````` Regular links should still work! ```````````````````````````````` example [Title](Name%20of%20page) . <p><a href="Name%20of%20page">Title</a></p> ```````````````````````````````` HTML entities are recognized both in the name of page and in the link title. ```````````````````````````````` example [[&#xDC;ber &amp;nbsp;|Gesch&uuml;tztes Leerzeichen]] . <p><a href="Gesch%C3%BCtztes%20Leerzeichen" data-wikilink="true">Über &amp;nbsp;</a></p> ```````````````````````````````` Escaping characters is supported ```````````````````````````````` example [[foo\[\]bar|https://example.org]] . <p><a href="https://example.org" data-wikilink="true">foo[]bar</a></p> ```````````````````````````````` ```````````````````````````````` example [[Name \[of\] page]] . <p><a href="Name%20%5Bof%5D%20page" data-wikilink="true">Name [of] page</a></p> ```````````````````````````````` Emphasis or other inline markdown is not supported ```````````````````````````````` example [[Name _of_ page]] . <p><a href="Name%20_of_%20page" data-wikilink="true">Name _of_ page</a></p> ```````````````````````````````` �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/footnotes.rs����������������������������������������������������������������0000644�0000000�0000000�00000026256�10461020230�0015522�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; #[test] fn footnotes() { html_opts!( [extension.footnotes], concat!( "Here is a[^nowhere] footnote reference,[^1] and another.[^longnote]\n", "\n", "This is another note.[^note] And footnote[^longnote] is referenced again.\n", "\n", "[^note]: Hi.\n", "\n", "[^1]: Here is the footnote.\n", "\n", "[^longnote]: Here's one with multiple blocks.\n", "\n", " Subsequent paragraphs are indented.\n", "\n", " code\n", "\n", "This is regular content.\n", "\n", "[^unused]: This is not used.\n" ), concat!( "<p>Here is a[^nowhere] footnote reference,<sup class=\"footnote-ref\"><a href=\"#fn-1\" \ id=\"fnref-1\" data-footnote-ref>1</a></sup> and another.<sup class=\"footnote-ref\"><a \ href=\"#fn-longnote\" id=\"fnref-longnote\" data-footnote-ref>2</a></sup></p>\n", "<p>This is another note.<sup class=\"footnote-ref\"><a \ href=\"#fn-note\" id=\"fnref-note\" data-footnote-ref>3</a></sup> And footnote<sup class=\"footnote-ref\"><a \ href=\"#fn-longnote\" id=\"fnref-longnote-2\" data-footnote-ref>2</a></sup> is referenced again.</p>\n", "<p>This is regular content.</p>\n", "<section class=\"footnotes\" data-footnotes>\n", "<ol>\n", "<li id=\"fn-1\">\n", "<p>Here is the footnote. <a href=\"#fnref-1\" \ class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"1\" aria-label=\"Back to reference 1\">↩</a></p>\n", "</li>\n", "<li id=\"fn-longnote\">\n", "<p>Here's one with multiple blocks.</p>\n", "<p>Subsequent paragraphs are indented.</p>\n", "<pre><code>code\n", "</code></pre>\n", "<a href=\"#fnref-longnote\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"2\" aria-label=\"Back to reference 2\">↩</a> \ <a href=\"#fnref-longnote-2\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"2-2\" aria-label=\"Back to reference 2-2\">↩<sup class=\"footnote-ref\">2</sup></a>\n", "</li>\n", "<li id=\"fn-note\">\n", "<p>Hi. <a href=\"#fnref-note\" \ class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"3\" aria-label=\"Back to reference 3\">↩</a></p>\n", "</li>\n", "</ol>\n", "</section>\n" ), ); } #[test] fn footnote_does_not_eat_exclamation() { html_opts!( [extension.footnotes], concat!("Here's my footnote![^a]\n", "\n", "[^a]: Yep.\n"), concat!( "<p>Here's my footnote!<sup class=\"footnote-ref\"><a href=\"#fn-a\" id=\"fnref-a\" data-footnote-ref>1</a></sup></p>\n", "<section class=\"footnotes\" data-footnotes>\n", "<ol>\n", "<li id=\"fn-a\">\n", "<p>Yep. <a href=\"#fnref-a\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"1\" aria-label=\"Back to reference 1\">↩</a></p>\n", "</li>\n", "</ol>\n", "</section>\n" ), ); } #[test] fn footnote_in_table() { html_opts!( [extension.table, extension.footnotes], concat!( "A footnote in a paragraph[^1]\n", "\n", "| Column1 | Column2 |\n", "| --------- | ------- |\n", "| foot [^1] | note |\n", "\n", "[^1]: a footnote\n", ), concat!( "<p>A footnote in a paragraph<sup class=\"footnote-ref\"><a href=\"#fn-1\" id=\"fnref-1\" data-footnote-ref>1</a></sup></p>\n", "<table>\n", "<thead>\n", "<tr>\n", "<th>Column1</th>\n", "<th>Column2</th>\n", "</tr>\n", "</thead>\n", "<tbody>\n", "<tr>\n", "<td>foot <sup class=\"footnote-ref\"><a href=\"#fn-1\" id=\"fnref-1-2\" data-footnote-ref>1</a></sup></td>\n", "<td>note</td>\n", "</tr>\n", "</tbody>\n", "</table>\n", "<section class=\"footnotes\" data-footnotes>\n", "<ol>\n", "<li id=\"fn-1\">\n", "<p>a footnote <a href=\"#fnref-1\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"1\" aria-label=\"Back to reference 1\">↩</a> <a href=\"#fnref-1-2\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"1-2\" aria-label=\"Back to reference 1-2\">↩<sup class=\"footnote-ref\">2</sup></a></p>\n", "</li>\n", "</ol>\n", "</section>\n", )); } #[test] fn footnote_with_superscript() { html_opts!( [extension.superscript, extension.footnotes], concat!( "Here is a footnote reference.[^1]\n", "\n", "Here is a longer footnote reference.[^ref]\n", "\n", "e = mc^2^.\n", "\n", "[^1]: Here is the footnote.\n", "[^ref]: Here is another footnote.\n", ), concat!( "<p>Here is a footnote reference.<sup class=\"footnote-ref\"><a href=\"#fn-1\" \ id=\"fnref-1\" data-footnote-ref>1</a></sup></p>\n", "<p>Here is a longer footnote reference.<sup class=\"footnote-ref\"><a href=\"#fn-ref\" \ id=\"fnref-ref\" data-footnote-ref>2</a></sup></p>\n", "<p>e = mc<sup>2</sup>.</p>\n", "<section class=\"footnotes\" data-footnotes>\n", "<ol>\n", "<li id=\"fn-1\">\n", "<p>Here is the footnote. <a href=\"#fnref-1\" \ class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"1\" aria-label=\"Back to reference 1\">↩</a></p>\n", "</li>\n", "<li id=\"fn-ref\">\n", "<p>Here is another footnote. <a href=\"#fnref-ref\" \ class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"2\" aria-label=\"Back to reference 2\">↩</a></p>\n", "</li>\n", "</ol>\n", "</section>\n" ), ); } #[test] fn footnote_escapes_name() { html_opts!( [extension.footnotes], concat!( "Here is a footnote reference.[^πŸ˜„ref]\n", "\n", "[^πŸ˜„ref]: Here is the footnote.\n", ), concat!( "<p>Here is a footnote reference.<sup class=\"footnote-ref\"><a href=\"#fn-%F0%9F%98%84ref\" id=\"fnref-%F0%9F%98%84ref\" data-footnote-ref>1</a></sup></p>\n", "<section class=\"footnotes\" data-footnotes>\n", "<ol>\n", "<li id=\"fn-%F0%9F%98%84ref\">\n", "<p>Here is the footnote. <a href=\"#fnref-%F0%9F%98%84ref\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"1\" aria-label=\"Back to reference 1\">↩</a></p>\n", "</li>\n", "</ol>\n", "</section>\n" ), ); } #[test] fn footnote_case_insensitive_and_case_preserving() { html_opts!( [extension.footnotes], concat!( "Here is a footnote reference.[^AB] and [^ab]\n", "\n", "[^aB]: Here is the footnote.\n", ), concat!( "<p>Here is a footnote reference.<sup class=\"footnote-ref\"><a href=\"#fn-aB\" id=\"fnref-aB\" data-footnote-ref>1</a></sup> and <sup class=\"footnote-ref\"><a href=\"#fn-aB\" id=\"fnref-aB-2\" data-footnote-ref>1</a></sup></p>\n", "<section class=\"footnotes\" data-footnotes>\n", "<ol>\n", "<li id=\"fn-aB\">\n", "<p>Here is the footnote. <a href=\"#fnref-aB\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"1\" aria-label=\"Back to reference 1\">↩</a> <a href=\"#fnref-aB-2\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"1-2\" aria-label=\"Back to reference 1-2\">↩<sup class=\"footnote-ref\">2</sup></a></p>\n", "</li>\n", "</ol>\n", "</section>\n" ), ); } #[test] fn footnote_name_parsed_into_multiple_nodes() { html_opts!( [extension.footnotes], concat!( "Foo.[^_ab]\n", "\n", "[^_ab]: Here is the footnote.\n", ), concat!( "<p>Foo.<sup class=\"footnote-ref\"><a href=\"#fn-_ab\" id=\"fnref-_ab\" data-footnote-ref>1</a></sup></p>\n", "<section class=\"footnotes\" data-footnotes>\n", "<ol>\n", "<li id=\"fn-_ab\">\n", "<p>Here is the footnote. <a href=\"#fnref-_ab\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"1\" aria-label=\"Back to reference 1\">↩</a></p>\n", "</li>\n", "</ol>\n", "</section>\n" ), ); } #[test] fn footnote_invalid_with_missing_name() { html_opts!( [extension.footnotes], "Foo.[^]\n\n[^]: Here is the footnote.\n", "<p>Foo.[^]</p>\n<p>[^]: Here is the footnote.</p>\n" ); } #[test] fn footnote_does_not_allow_spaces_in_name() { html_opts!( [extension.footnotes], "Foo.[^one two]\n\n[^one two]: Here is the footnote.\n", "<p>Foo.[^one two]</p>\n<p>[^one two]: Here is the footnote.</p>\n" ); } #[test] fn footnote_does_not_expand_emphasis_in_name() { html_opts!( [extension.footnotes], "Foo[^**one**]\n[^**one**]: bar\n", concat!( "<p>Foo<sup class=\"footnote-ref\"><a href=\"#fn-**one**\" id=\"fnref-**one**\" data-footnote-ref>1</a></sup></p>\n", "<section class=\"footnotes\" data-footnotes>\n", "<ol>\n", "<li id=\"fn-**one**\">\n", "<p>bar <a href=\"#fnref-**one**\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"1\" aria-label=\"Back to reference 1\">↩</a></p>\n", "</li>\n", "</ol>\n", "</section>\n" ), ); } #[test] fn sourcepos() { assert_ast_match!( [extension.footnotes], "Here is a footnote reference.[^1]\n" "\n" "Here is a longer footnote reference.[^ref]\n" "\n" "[^1]: Here is the footnote.\n" "[^ref]: Here is another footnote.\n", (document (1:1-6:33) [ (paragraph (1:1-1:33) [ (text (1:1-1:29) "Here is a footnote reference.") (footnote_reference (1:30-1:33)) ]) (paragraph (3:1-3:42) [ (text (3:1-3:36) "Here is a longer footnote reference.") (footnote_reference (3:37-3:42)) ]) (footnote_definition (5:1-5:27) [ (paragraph (5:7-5:27) [ (text (5:7-5:27) "Here is the footnote.") ]) ]) (footnote_definition (6:1-6:33) [ (paragraph (6:9-6:33) [ (text (6:9-6:33) "Here is another footnote.") ]) ]) ]) ); } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/fuzz.rs���������������������������������������������������������������������0000644�0000000�0000000�00000002671�10461020230�0014473�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::{html, html_opts}; #[test] fn pointy_brace_open() { html("<!-", "<p>&lt;!-</p>\n"); } #[test] fn tasklist() { html_opts!( [extension.tasklist, parse.relaxed_tasklist_matching], "* [*]", "<ul>\n<li><input type=\"checkbox\" checked=\"\" disabled=\"\" /> </li>\n</ul>\n", ); } #[test] fn table_nul() { html_opts!( [extension.table], "\0|.\n-|-\nZ", r##"<table> <thead> <tr> <th>οΏ½</th> <th>.</th> </tr> </thead> <tbody> <tr> <td>Z</td> <td></td> </tr> </tbody> </table> "##, ); } #[test] fn footnote_def() { html_opts!( [ extension.autolink, extension.footnotes, render.sourcepos, render.hardbreaks ], "\u{15}\u{b}\r[^ ]:", "<p data-sourcepos=\"1:1-2:5\">\u{15}\u{b}<br />\n[^ ]:</p>\n", ); } #[test] fn line_end() { html("\u{2}\n\\\n\t-", "<p>\u{2}\n<br />\n-</p>\n"); } #[test] fn bracket_match() { html("[;\0V\n]::g\n[;\0V\n]", "<p><a href=\":g\">;οΏ½V\n</a></p>\n"); } #[test] fn trailing_hyphen() { html_opts!( [extension.autolink, parse.smart, render.sourcepos], "3@.l-", "<p data-sourcepos=\"1:1-1:5\">3@.l-</p>\n" ); } #[test] fn trailing_hyphen_matches() { html_opts!( [extension.autolink, parse.smart, render.sourcepos], "3@.l--", "<p data-sourcepos=\"1:1-1:6\"><a href=\"mailto:3@.l\">3@.l</a>–</p>\n" ); } �����������������������������������������������������������������������comrak-0.29.0/src/tests/greentext.rs����������������������������������������������������������������0000644�0000000�0000000�00000001606�10461020230�0015477�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; #[test] fn greentext_preserved() { html_opts!( [extension.greentext, render.hardbreaks], ">implying\n>>implying", "<p>&gt;implying<br />\n&gt;&gt;implying</p>\n" ); } #[test] fn separate_quotes_on_line_end() { html_opts!( [extension.greentext], "> 1\n>\n> 2", "<blockquote>\n<p>1</p>\n</blockquote>\n<p>&gt;</p>\n<blockquote>\n<p>2</p>\n</blockquote>\n" ); } #[test] fn unnest_quotes_on_line_end() { html_opts!( [extension.greentext], "> 1\n> > 2\n> 1", "<blockquote>\n<p>1</p>\n<blockquote>\n<p>2</p>\n</blockquote>\n<p>1</p>\n</blockquote>\n" ); } #[test] fn unnest_quotes_on_line_end_commonmark() { html_opts!( [extension.greentext], "> 1\n> > 2\n> \n> 1", "<blockquote>\n<p>1</p>\n<blockquote>\n<p>2</p>\n</blockquote>\n<p>1</p>\n</blockquote>\n" ); } ��������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/header_ids.rs���������������������������������������������������������������0000644�0000000�0000000�00000002345�10461020230�0015562�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; #[test] fn header_ids() { html_opts_i( concat!( "# Hi.\n", "## Hi 1.\n", "### Hi.\n", "#### Hello.\n", "##### Hi.\n", "###### Hello.\n", "# Isn't it grand?" ), concat!( "<h1><a href=\"#hi\" aria-hidden=\"true\" class=\"anchor\" id=\"user-content-hi\"></a>Hi.</h1>\n", "<h2><a href=\"#hi-1\" aria-hidden=\"true\" class=\"anchor\" id=\"user-content-hi-1\"></a>Hi 1.</h2>\n", "<h3><a href=\"#hi-2\" aria-hidden=\"true\" class=\"anchor\" id=\"user-content-hi-2\"></a>Hi.</h3>\n", "<h4><a href=\"#hello\" aria-hidden=\"true\" class=\"anchor\" id=\"user-content-hello\"></a>Hello.</h4>\n", "<h5><a href=\"#hi-3\" aria-hidden=\"true\" class=\"anchor\" id=\"user-content-hi-3\"></a>Hi.</h5>\n", "<h6><a href=\"#hello-1\" aria-hidden=\"true\" class=\"anchor\" id=\"user-content-hello-1\"></a>Hello.</h6>\n", "<h1><a href=\"#isnt-it-grand\" aria-hidden=\"true\" class=\"anchor\" id=\"user-content-isnt-it-grand\"></a>Isn't it grand?</h1>\n" ), true, |opts| opts.extension.header_ids = Some("user-content-".to_owned()), ); } �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/math.rs���������������������������������������������������������������������0000644�0000000�0000000�00000012340�10461020230�0014420�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; use ntest::test_case; #[test_case("$2+2$", "<p><math>2+2</math></p>\n")] #[test_case("$22 and $2+2$", "<p>$22 and <math>2+2</math></p>\n")] #[test_case("$a!$", "<p><math>a!</math></p>\n")] #[test_case("$x$", "<p><math>x</math></p>\n")] #[test_case("$1+2\\$$", "<p><math>1+2\\$</math></p>\n")] #[test_case("$1+\\$2$", "<p><math>1+\\$2</math></p>\n")] #[test_case("$1+\\%2$", "<p><math>1+\\%2</math></p>\n")] #[test_case( "$22+1$ and $22 + a^2$", "<p><math>22+1</math> and <math>22 + a^2</math></p>\n" )] #[test_case( "$2+2$ $22 and dollars$22 $2+2$", "<p><math>2+2</math> $22 and dollars$22 <math>2+2</math></p>\n" )] #[test_case( "$1/2$ &lt;b&gt;test&lt;/b&gt;", "<p><math>1/2</math> &lt;b&gt;test&lt;/b&gt;</p>\n" )] fn math_dollars_inline(markdown: &str, html: &str) { let result = html .replace("<math>", "<span data-math-style=\"inline\">") .replace("</math>", "</span>"); html_opts!([extension.math_dollars], markdown, &result); } #[test_case("$$2+2$$", "<p><math>2+2</math></p>\n")] #[test_case("$$ 2+2 $$", "<p><math> 2+2 </math></p>\n")] #[test_case("$22 and $$2+2$$", "<p>$22 and <math>2+2</math></p>\n")] #[test_case("$$a!$$", "<p><math>a!</math></p>\n")] #[test_case("$$x$$", "<p><math>x</math></p>\n")] #[test_case("$$20,000 and $$30,000", "<p><math>20,000 and </math>30,000</p>\n")] #[test_case("test $$\n2+2\n$$", "<p>test <math>\n2+2\n</math></p>\n")] #[test_case( "$$22+1$$ and $$22 + a^2$$", "<p><math>22+1</math> and <math>22 + a^2</math></p>\n" )] #[test_case( "$$2+2$$ $22 and dollars$22 $$2+2$$", "<p><math>2+2</math> $22 and dollars$22 <math>2+2</math></p>\n" )] #[test_case( "dollars$22 and $$a^2 + b^2 = c^2$$", "<p>dollars$22 and <math>a^2 + b^2 = c^2</math></p>\n" )] fn math_dollars_inline_display(markdown: &str, html: &str) { let result = html .replace("<math>", "<span data-math-style=\"display\">") .replace("</math>", "</span>"); html_opts!([extension.math_dollars], markdown, &result); } #[test_case("$$\n2+2\n4+4\n$$", "<p><math>\n2+2\n4+4\n</math></p>\n")] fn math_dollars_block(markdown: &str, html: &str) { let result = html .replace("<math>", "<span data-math-style=\"display\">") .replace("</math>", "</span>"); html_opts!([extension.math_dollars], markdown, &result); } #[test_case("$`2+2`$", "<p><math>2+2</math></p>\n")] #[test_case("$22 and $`2+2`$", "<p>$22 and <math>2+2</math></p>\n")] #[test_case("$`1+\\$2`$", "<p><math>1+\\$2</math></p>\n")] #[test_case( "$`22+1`$ and $`22 + a^2`$", "<p><math>22+1</math> and <math>22 + a^2</math></p>\n" )] #[test_case( "$`2+2`$ $22 and dollars$22 $`2+2`$", "<p><math>2+2</math> $22 and dollars$22 <math>2+2</math></p>\n" )] fn math_code_inline(markdown: &str, html: &str) { let result = html .replace("<math>", "<code data-math-style=\"inline\">") .replace("</math>", "</code>"); html_opts!([extension.math_code], markdown, &result); } #[test_case("```math\n2+2\n4+4\n```", "<pre><math>2+2\n4+4\n</math></pre>\n")] fn math_code_block(markdown: &str, html: &str) { let result = html .replace( "<math>", "<code class=\"language-math\" data-math-style=\"display\">", ) .replace("</math>", "</code>"); html_opts!([extension.math_dollars], markdown, &result); } #[test_case("`2+2`", "<p><code>2+2</code></p>\n")] #[test_case("test `2+2`$ test", "<p>test <code>2+2</code>$ test</p>\n")] #[test_case("$20,000 and $30,000", "<p>$20,000 and $30,000</p>\n")] #[test_case("$20,000 in $USD", "<p>$20,000 in $USD</p>\n")] #[test_case("$ a^2 $", "<p>$ a^2 $</p>\n")] #[test_case("$\n$", "<p>$\n$</p>\n")] #[test_case("$$$", "<p>$$$</p>\n")] #[test_case("`$1+2$`", "<p><code>$1+2$</code></p>\n")] #[test_case("`$$1+2$$`", "<p><code>$$1+2$$</code></p>\n")] #[test_case("`$\\$1+2$$`", "<p><code>$\\$1+2$$</code></p>\n")] #[test_case("test $`2+2` test", "<p>test $<code>2+2</code> test</p>\n")] fn math_unrecognized_syntax(markdown: &str, html: &str) { html_opts!( [extension.math_dollars, extension.math_code], markdown, html ); } // html_opts! does a roundtrip check unless sourcepos is set. // These cases don't work roundtrip, because converting to commonmark // automatically escapes certain characters. #[test_case("$`$", "<p data-sourcepos=\"1:1-1:3\">$`$</p>\n")] fn math_unrecognized_syntax_non_roundtrip(markdown: &str, html: &str) { html_opts!( [ extension.math_dollars, extension.math_code, render.sourcepos ], markdown, html ); } #[test] fn sourcepos() { assert_ast_match!( [extension.math_dollars, extension.math_code], "$x^2$ and $$y^2$$ and $`z^2`$\n" "\n" "$$\n" "a^2\n" "$$\n" "\n" "```math\n" "b^2\n" "```\n", (document (1:1-9:3) [ (paragraph (1:1-1:29) [ (math (1:2-1:4)) (text (1:6-1:10) " and ") (math (1:13-1:15)) (text (1:18-1:22) " and ") (math (1:25-1:27)) ]) (paragraph (3:1-5:2) [ (math (3:3-5:0)) ]) (code_block (7:1-9:3)) ]) ); } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/multiline_block_quotes.rs���������������������������������������������������0000644�0000000�0000000�00000003540�10461020230�0020245�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; #[test] fn multiline_block_quotes() { html_opts!( [extension.multiline_block_quotes], concat!(">>>\n", "Paragraph 1\n", "\n", "Paragraph 2\n", ">>>\n",), concat!( "<blockquote>\n", "<p>Paragraph 1</p>\n", "<p>Paragraph 2</p>\n", "</blockquote>\n", ), ); html_opts!( [extension.multiline_block_quotes], concat!( "- item one\n", "\n", " >>>\n", " Paragraph 1\n", "\n", " Paragraph 2\n", " >>>\n", "- item two\n" ), concat!( "<ul>\n", "<li>\n", "<p>item one</p>\n", "<blockquote>\n", "<p>Paragraph 1</p>\n", "<p>Paragraph 2</p>\n", "</blockquote>\n", "</li>\n", "<li>\n", "<p>item two</p>\n", "</li>\n", "</ul>\n", ), ); } #[test] fn sourcepos() { assert_ast_match!( [extension.multiline_block_quotes], "- item one\n" "\n" " >>>\n" " Paragraph 1\n" " >>>\n" "- item two\n", (document (1:1-6:10) [ (list (1:1-6:10) [ (item (1:1-5:5) [ (paragraph (1:3-1:10) [ (text (1:3-1:10) "item one") ]) (multiline_block_quote (3:3-5:5) [ (paragraph (4:3-4:13) [ (text (4:3-4:13) "Paragraph 1") ]) ]) ]) (item (6:1-6:10) [ (paragraph (6:3-6:10) [ (text (6:3-6:10) "item two") ]) ]) ]) ]) ); } ����������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/options.rs������������������������������������������������������������������0000644�0000000�0000000�00000005762�10461020230�0015174�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use std::sync::{Arc, Mutex}; use super::*; #[test] fn markdown_list_bullets() { let dash = concat!("- a\n"); let plus = concat!("+ a\n"); let star = concat!("* a\n"); let mut dash_opts = Options::default(); dash_opts.render.list_style = ListStyleType::Dash; let mut plus_opts = Options::default(); plus_opts.render.list_style = ListStyleType::Plus; let mut star_opts = Options::default(); star_opts.render.list_style = ListStyleType::Star; commonmark(dash, dash, Some(&dash_opts)); commonmark(plus, dash, Some(&dash_opts)); commonmark(star, dash, Some(&dash_opts)); commonmark(dash, plus, Some(&plus_opts)); commonmark(plus, plus, Some(&plus_opts)); commonmark(star, plus, Some(&plus_opts)); commonmark(dash, star, Some(&star_opts)); commonmark(plus, star, Some(&star_opts)); commonmark(star, star, Some(&star_opts)); } #[test] fn width_breaks() { let mut options = Options::default(); options.render.width = 72; let input = concat!( "this should break because it has breakable characters. break right here newline\n", "\n", "don't break\n", "\n", "a-long-line-that-won't-break-because-there-is-no-character-it-can-break-on\n" ); let output = concat!( "this should break because it has breakable characters. break right here\n", "newline\n", "\n", "don't break\n", "\n", "a-long-line-that-won't-break-because-there-is-no-character-it-can-break-on\n" ); commonmark(input, output, Some(&options)); } #[test] fn smart_chars() { html_opts!( [parse.smart], "Why 'hello' \"there\". It's good.", "<p>Why β€˜hello’ β€œthere”. It’s good.</p>\n", ); html_opts!( [parse.smart], "Hm. Hm.. hm... yes- indeed-- quite---!", "<p>Hm. Hm.. hm… yes- indeed– quiteβ€”!</p>\n", ); } #[test] fn broken_link_callback() { let arena = Arena::new(); let mut cb = |link_ref: BrokenLinkReference| match link_ref.normalized { "foo" => Some(ResolvedReference { url: "https://www.rust-lang.org/".to_string(), title: "The Rust Language".to_string(), }), _ => None, }; let options = Options { parse: ParseOptionsBuilder::default() .broken_link_callback(Some(Arc::new(Mutex::new(&mut cb)))) .build() .unwrap(), ..Default::default() }; let root = parse_document( &arena, "# Cool input!\nWow look at this cool [link][foo]. A [broken link] renders as text.", &options, ); let mut output = Vec::new(); format_html(root, &Options::default(), &mut output).unwrap(); let output_str = std::str::from_utf8(&output).unwrap(); assert_eq!( output_str, "<h1>Cool input!</h1>\n<p>Wow look at this cool \ <a href=\"https://www.rust-lang.org/\" title=\"The Rust Language\">link</a>. \ A [broken link] renders as text.</p>\n" ); } ��������������comrak-0.29.0/src/tests/pathological.rs�������������������������������������������������������������0000644�0000000�0000000�00000003224�10461020230�0016136�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; use ntest::timeout; // input: python3 -c 'n = 50000; print("*a_ " * n)' #[test] #[timeout(4000)] fn pathological_emphases() { let n = 50_000; let input = "*a_ ".repeat(n).to_string(); let mut exp = format!("<p>{}", input); // Right-most space is trimmed in output. exp.pop(); exp += "</p>\n"; html(&input, &exp); } // input: python3 -c 'n = 10000; print("|" + "x|" * n + "\n|" + "-|" * n)' #[test] #[timeout(4000)] fn pathological_table_columns_1() { let n = 100_000; let input = format!("{}{}{}{}", "|", "x|".repeat(n), "\n|", "-|".repeat(n)); let exp = format!("<p>{}</p>\n", input); html_opts!([extension.table], &input, &exp); } // input: python3 -c 'n = 70000; print("|" + "x|" * n + "\n|" + "-|" * n + "\n" + "a\n" * n)' #[test] #[timeout(4000)] fn pathological_table_columns_2() { let n = 100_000; let input = format!( "{}{}{}{}{}{}", "|", "x|".repeat(n), "\n|", "-|".repeat(n), "\n", "a\n".repeat(n) ); let extension = parser::ExtensionOptions { table: true, ..Default::default() }; // Not interested in the actual html, just that we don't timeout markdown_to_html( &input, &Options { extension, parse: Default::default(), render: RenderOptions::default(), }, ); } // input: python3 -c 'n = 10000; print("[^1]:" * n + "\n" * n)' #[test] #[timeout(4000)] fn pathological_footnotes() { let n = 10_000; let input = format!("{}{}", "[^1]:".repeat(n), "\n".repeat(n)); let exp = ""; html_opts!([extension.footnotes], &input, &exp); } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/plugins.rs������������������������������������������������������������������0000644�0000000�0000000�00000011302�10461020230�0015145�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use crate::{ adapters::{HeadingAdapter, HeadingMeta, SyntaxHighlighterAdapter}, nodes::Sourcepos, }; use super::*; #[test] fn syntax_highlighter_plugin() { pub struct MockAdapter {} impl SyntaxHighlighterAdapter for MockAdapter { fn write_highlighted( &self, output: &mut dyn Write, lang: Option<&str>, code: &str, ) -> io::Result<()> { write!(output, "<!--{}--><span>{}</span>", lang.unwrap(), code) } fn write_pre_tag( &self, output: &mut dyn Write, attributes: HashMap<String, String>, ) -> io::Result<()> { html::write_opening_tag(output, "pre", attributes) } fn write_code_tag( &self, output: &mut dyn Write, attributes: HashMap<String, String>, ) -> io::Result<()> { html::write_opening_tag(output, "code", attributes) } } let input = concat!("``` rust yum\n", "fn main<'a>();\n", "```\n"); let expected = concat!( "<pre><code class=\"language-rust\"><!--rust--><span>fn main<'a>();\n</span>", "</code></pre>\n" ); let mut plugins = Plugins::default(); let adapter = MockAdapter {}; plugins.render.codefence_syntax_highlighter = Some(&adapter); html_plugins(input, expected, &plugins); } #[test] fn heading_adapter_plugin() { struct MockAdapter; impl HeadingAdapter for MockAdapter { fn enter( &self, output: &mut dyn Write, heading: &HeadingMeta, _sourcepos: Option<Sourcepos>, ) -> io::Result<()> { write!(output, "<h{} data-heading=\"true\">", heading.level + 1) } fn exit(&self, output: &mut dyn Write, heading: &HeadingMeta) -> io::Result<()> { write!(output, "</h{}>", heading.level + 1) } } let mut plugins = Plugins::default(); let adapter = MockAdapter {}; plugins.render.heading_adapter = Some(&adapter); let cases: Vec<(&str, &str)> = vec![ ("# Simple heading", "<h2 data-heading=\"true\">Simple heading</h2>"), ( "## Heading with **bold text** and `code`", "<h3 data-heading=\"true\">Heading with <strong>bold text</strong> and <code>code</code></h3>", ), ("###### Whoa, an h7!", "<h7 data-heading=\"true\">Whoa, an h7!</h7>"), ("####### This is not a heading", "<p>####### This is not a heading</p>\n") ]; for (input, expected) in cases { html_plugins(input, expected, &plugins); } } #[test] #[cfg(feature = "syntect")] fn syntect_plugin_with_base16_ocean_dark_theme() { let adapter = crate::plugins::syntect::SyntectAdapter::new(Some("base16-ocean.dark")); let input = concat!("```rust\n", "fn main<'a>();\n", "```\n"); let expected = concat!( "<pre style=\"background-color:#2b303b;\"><code class=\"language-rust\">", "<span style=\"color:#b48ead;\">fn </span><span style=\"color:#8fa1b3;\">main</span><span style=\"color:#c0c5ce;\">", "&lt;</span><span style=\"color:#b48ead;\">&#39;a</span><span style=\"color:#c0c5ce;\">&gt;();\n</span>", "</code></pre>\n" ); let mut plugins = Plugins::default(); plugins.render.codefence_syntax_highlighter = Some(&adapter); html_plugins(input, expected, &plugins); } #[test] #[cfg(feature = "syntect")] fn syntect_plugin_with_css_classes() { let adapter = crate::plugins::syntect::SyntectAdapter::new(None); let input = concat!("```rust\n", "fn main<'a>();\n", "```\n"); let expected = concat!( "<pre class=\"syntax-highlighting\"><code class=\"language-rust\">", "<span class=\"source rust\"><span class=\"meta function rust\"><span class=\"meta function rust\"><span class=\"storage type function rust\">fn</span> </span><span class=\"entity name function rust\">main</span></span><span class=\"meta generic rust\"><span class=\"punctuation definition generic begin rust\">&lt;</span>", "<span class=\"storage modifier lifetime rust\">&#39;a</span><span class=\"punctuation definition generic end rust\">&gt;</span></span><span class=\"meta function rust\"><span class=\"meta function parameters rust\"><span class=\"punctuation section parameters begin rust\">(</span></span><span class=\"meta function rust\">", "<span class=\"meta function parameters rust\"><span class=\"punctuation section parameters end rust\">)</span></span></span></span><span class=\"punctuation terminator rust\">;</span>\n</span>", "</code></pre>\n", ); let mut plugins = Plugins::default(); plugins.render.codefence_syntax_highlighter = Some(&adapter); html_plugins(input, expected, &plugins); } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/regressions.rs��������������������������������������������������������������0000644�0000000�0000000�00000007136�10461020230�0016041�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; #[test] fn pointy_brace() { html_opts!( [render.unsafe_], concat!( "URI autolink: <https://www.pixiv.net>\n", "\n", "Email autolink: <bill@microsoft.com>\n", "\n", "* Inline <em>tag</em> **ha**.\n", "* Inline <!-- comment --> **ha**.\n", "* Inline <? processing instruction ?> **ha**.\n", "* Inline <!DECLARATION OKAY> **ha**.\n", "* Inline <![CDATA[ok]ha **ha** ]]> **ha**.\n" ), concat!( "<p>URI autolink: <a \ href=\"https://www.pixiv.net\">https://www.pixiv.net</a></p>\n", "<p>Email autolink: <a \ href=\"mailto:bill@microsoft.com\">bill@microsoft.com</a></p>\n", "<ul>\n", "<li>Inline <em>tag</em> <strong>ha</strong>.</li>\n", "<li>Inline <!-- comment --> <strong>ha</strong>.</li>\n", "<li>Inline <? processing instruction ?> <strong>ha</strong>.</li>\n", "<li>Inline <!DECLARATION OKAY> <strong>ha</strong>.</li>\n", "<li>Inline <![CDATA[ok]ha **ha** ]]> <strong>ha</strong>.</li>\n", "</ul>\n" ), ); } #[test] fn no_control_characters_in_reference_links() { html( "[A]:\u{1b}\n\nX [A] Y\n", "<p>[A]:\u{1b}</p>\n<p>X [A] Y</p>\n", ) } #[test] fn link_entity_regression() { html( "[link](&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29)", "<p><a href=\"&amp;#x6A&amp;#x61&amp;#x76&amp;#x61&amp;#x73&amp;#x63&amp;#x72&amp;#x69&amp;#x70&amp;#x74&amp;#x3A&amp;#x61&amp;#x6C&amp;#x65&amp;#x72&amp;#x74&amp;#x28&amp;#x27&amp;#x58&amp;#x53&amp;#x53&amp;#x27&amp;#x29\">link</a></p>\n", ); } #[test] fn regression_back_to_back_ranges() { html( "**bold*****bold+italic***", "<p><strong>bold</strong><em><strong>bold+italic</strong></em></p>\n", ); } #[test] fn no_panic_on_empty_bookended_atx_headers() { html("# #", "<h1></h1>\n"); } #[test] fn no_stack_smash_html() { let s: String = ">".repeat(150_000); let arena = Arena::new(); let root = parse_document(&arena, &s, &Options::default()); let mut output = vec![]; html::format_document(root, &Options::default(), &mut output).unwrap() } #[test] fn no_stack_smash_cm() { let s: String = ">".repeat(150_000); let arena = Arena::new(); let root = parse_document(&arena, &s, &Options::default()); let mut output = vec![]; cm::format_document(root, &Options::default(), &mut output).unwrap() } #[test] fn cm_autolink_regression() { // Testing that the cm renderer handles this case without crashing html("<a+c:dd>", "<p><a href=\"a+c:dd\">a+c:dd</a></p>\n"); } #[test] fn regression_424() { html( "*text* [link](#section)", "<p><em>text</em> <a href=\"#section\">link</a></p>\n", ); } #[test] fn example_61() { html( r##" `Foo ---- ` <a title="a lot --- of dashes"/> "##, r##"<h2>`Foo</h2> <p>`</p> <h2>&lt;a title=&quot;a lot</h2> <p>of dashes&quot;/&gt;</p> "##, ); } #[test] fn nul_at_eof() { html("foo\0", "<p>foo\u{fffd}</p>\n"); html("foo\0ba", "<p>foo\u{fffd}ba</p>\n"); html("foo\0ba\0", "<p>foo\u{fffd}ba\u{fffd}</p>\n"); } #[test] fn sourcepos_para() { html_opts!( [render.sourcepos], "abc\ndef\n\nghi\n", "<p data-sourcepos=\"1:1-2:3\">abc\ndef</p>\n<p data-sourcepos=\"4:1-4:3\">ghi</p>\n", ); } #[test] #[cfg(feature = "shortcodes")] fn gemoji() { html_opts!([extension.shortcodes], ":x:", "<p>❌</p>\n"); } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/shortcodes.rs���������������������������������������������������������������0000644�0000000�0000000�00000002255�10461020230�0015650�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#![cfg(feature = "shortcodes")] use super::*; #[test] fn emojis() { // Test match html_opts!( [extension.shortcodes], concat!("Hello, happy days! :smile:\n"), concat!("<p>Hello, happy days! πŸ˜„</p>\n"), ); // Test match html_opts!( [extension.shortcodes], concat!(":smile::smile::smile::smile:\n"), concat!("<p>πŸ˜„πŸ˜„πŸ˜„πŸ˜„</p>\n"), ); // Test match html_opts!( [extension.shortcodes], concat!(":smile:::smile:::smile:::smile:\n"), concat!("<p>πŸ˜„:πŸ˜„:πŸ˜„:πŸ˜„</p>\n"), ); // Test no match html_opts!( [extension.shortcodes], concat!("Hello, happy days! :diego:\n"), concat!("<p>Hello, happy days! :diego:</p>\n"), ); } #[test] fn emojis_specials() { // Take a quick trip to https://raw.githubusercontent.com/github/gemoji/master/db/emoji.json // with `jq -r .[].aliases[] | sort | grep -E '[^a-z_-]'` to see what else there is to see. html_opts!( [extension.shortcodes], ":+1: :-1: :clock12::1234: :1st_place_medal: :e-mail: :non-potable_water:", "<p>πŸ‘ πŸ‘Ž πŸ•›πŸ”’ πŸ₯‡ πŸ“§ 🚱</p>\n", ); } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/spoiler.rs������������������������������������������������������������������0000644�0000000�0000000�00000003464�10461020230�0015153�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; #[test] fn spoiler() { html_opts!( [extension.spoiler], concat!("The ||dog dies at the end of Marley and Me||.\n"), concat!( "<p>The <span class=\"spoiler\">dog dies at the end of Marley and Me</span>.</p>\n" ), ); } #[test] fn spoiler_in_table() { html_opts!( [extension.table, extension.spoiler], concat!("Text | Result\n--- | ---\n`||some clever text||` | ||some clever text||\n"), concat!( "<table>\n", "<thead>\n", "<tr>\n", "<th>Text</th>\n", "<th>Result</th>\n", "</tr>\n", "</thead>\n", "<tbody>\n", "<tr>\n", "<td><code>||some clever text||</code></td>\n", "<td><span class=\"spoiler\">some clever text</span></td>\n", "</tr>\n", "</tbody>\n", "</table>\n" ), ); } #[test] fn spoiler_regressions() { html_opts!( [extension.spoiler], concat!("|should not be spoiler|\n||should be spoiler||\n|||should be spoiler surrounded by pipes|||"), concat!( "<p>|should not be spoiler|\n", "<span class=\"spoiler\">should be spoiler</span>\n", "|<span class=\"spoiler\">should be spoiler surrounded by pipes</span>|</p>\n" ), ); } #[test] fn mismatched_spoilers() { html_opts!( [extension.spoiler], concat!("|||this is a spoiler with pipe in front||\n||this is not a spoiler|\n||this is a spoiler with pipe after|||"), concat!( "<p>|<span class=\"spoiler\">this is a spoiler with pipe in front</span>\n", "||this is not a spoiler|\n", "<span class=\"spoiler\">this is a spoiler with pipe after</span>|</p>\n" ), ); } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/strikethrough.rs������������������������������������������������������������0000644�0000000�0000000�00000000560�10461020230�0016372�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; #[test] fn strikethrough() { html_opts!( [extension.strikethrough], concat!( "This is ~strikethrough~.\n", "\n", "As is ~~this, okay~~?\n" ), concat!( "<p>This is <del>strikethrough</del>.</p>\n", "<p>As is <del>this, okay</del>?</p>\n" ), ); } ������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/superscript.rs��������������������������������������������������������������0000644�0000000�0000000�00000000266�10461020230�0016056�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; #[test] fn superscript() { html_opts!( [extension.superscript], concat!("e = mc^2^.\n"), concat!("<p>e = mc<sup>2</sup>.</p>\n"), ); } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/table.rs��������������������������������������������������������������������0000644�0000000�0000000�00000012264�10461020230�0014563�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; #[test] fn table() { html_opts!( [extension.table], concat!("| a | b |\n", "|---|:-:|\n", "| c | d |\n"), concat!( "<table>\n", "<thead>\n", "<tr>\n", "<th>a</th>\n", "<th align=\"center\">b</th>\n", "</tr>\n", "</thead>\n", "<tbody>\n", "<tr>\n", "<td>c</td>\n", "<td align=\"center\">d</td>\n", "</tr>\n", "</tbody>\n", "</table>\n" ), ); } #[test] fn table_regression() { html_opts!( [extension.table], concat!("123\n", "456\n", "| a | b |\n", "| ---| --- |\n", "d | e\n"), concat!( "<p>123\n", "456</p>\n", "<table>\n", "<thead>\n", "<tr>\n", "<th>a</th>\n", "<th>b</th>\n", "</tr>\n", "</thead>\n", "<tbody>\n", "<tr>\n", "<td>d</td>\n", "<td>e</td>\n", "</tr>\n", "</tbody>\n", "</table>\n" ), ); } #[test] fn table_misparse_1() { html_opts!([extension.table], "a\n-b", "<p>a\n-b</p>\n"); } #[test] fn table_misparse_2() { html_opts!([extension.table], "a\n-b\n-c", "<p>a\n-b\n-c</p>\n"); } #[test] fn nested_tables_1() { html_opts!( [extension.table], concat!("- p\n", "\n", " |a|b|\n", " |-|-|\n", " |c|d|\n",), concat!( "<ul>\n", "<li>\n", "<p>p</p>\n", "<table>\n", "<thead>\n", "<tr>\n", "<th>a</th>\n", "<th>b</th>\n", "</tr>\n", "</thead>\n", "<tbody>\n", "<tr>\n", "<td>c</td>\n", "<td>d</td>\n", "</tr>\n", "</tbody>\n", "</table>\n", "</li>\n", "</ul>\n", ), ); } #[test] fn nested_tables_2() { html_opts!( [extension.table], concat!("- |a|b|\n", " |-|-|\n", " |c|d|\n",), concat!( "<ul>\n", "<li>\n", "<table>\n", "<thead>\n", "<tr>\n", "<th>a</th>\n", "<th>b</th>\n", "</tr>\n", "</thead>\n", "<tbody>\n", "<tr>\n", "<td>c</td>\n", "<td>d</td>\n", "</tr>\n", "</tbody>\n", "</table>\n", "</li>\n", "</ul>\n", ), ); } #[test] fn nested_tables_3() { html_opts!( [extension.table], concat!("> |a|b|\n", "> |-|-|\n", "> |c|d|\n",), concat!( "<blockquote>\n", "<table>\n", "<thead>\n", "<tr>\n", "<th>a</th>\n", "<th>b</th>\n", "</tr>\n", "</thead>\n", "<tbody>\n", "<tr>\n", "<td>c</td>\n", "<td>d</td>\n", "</tr>\n", "</tbody>\n", "</table>\n", "</blockquote>\n", ), ); } #[test] fn sourcepos_with_preceding_para() { assert_ast_match!( [extension.table], "123\n" "456\n" "| a | b |\n" "| - | - |\n" "| c | d |\n" , (document (1:1-5:9) [ (paragraph (1:1-2:3) [ (text (1:1-1:3) "123") (softbreak (1:4-1:4)) (text (2:1-2:3) "456") ]) (table (3:1-5:9) [ (table_row (3:1-3:9) [ (table_cell (3:2-3:4) [ (text (3:3-3:3) "a") ]) (table_cell (3:6-3:8) [ (text (3:7-3:7) "b") ]) ]) (table_row (5:1-5:9) [ (table_cell (5:2-5:4) [ (text (5:3-5:3) "c") ]) (table_cell (5:6-5:8) [ (text (5:7-5:7) "d") ]) ]) ]) ]) ); } #[test] fn sourcepos_with_preceding_para_offset() { assert_ast_match!( [extension.table], " 123\n" " 456\n" " | a | b |\n" " | - | - |\n" " | c | d |\n" , (document (1:1-5:10) [ (paragraph (1:2-2:5) [ (text (1:2-1:4) "123") (softbreak (1:5-1:5)) (text (2:3-2:5) "456") ]) (table (3:2-5:10) [ (table_row (3:2-3:10) [ (table_cell (3:3-3:5) [ (text (3:4-3:4) "a") ]) (table_cell (3:7-3:9) [ (text (3:8-3:8) "b") ]) ]) (table_row (5:2-5:10) [ (table_cell (5:3-5:5) [ (text (5:4-5:4) "c") ]) (table_cell (5:7-5:9) [ (text (5:8-5:8) "d") ]) ]) ]) ]) ); } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/tagfilter.rs����������������������������������������������������������������0000644�0000000�0000000�00000000335�10461020230�0015451�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; #[test] fn tagfilter() { html_opts!( [render.unsafe_, extension.tagfilter], concat!("hi <xmp> ok\n", "\n", "<xmp>\n"), concat!("<p>hi &lt;xmp> ok</p>\n", "&lt;xmp>\n"), ); } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/tasklist.rs�����������������������������������������������������������������0000644�0000000�0000000�00000010740�10461020230�0015327�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; #[test] fn tasklist() { html_opts!( [ render.unsafe_, extension.tasklist, parse.relaxed_tasklist_matching ], concat!( "* [ ] Red\n", "* [x] Green\n", "* [ ] Blue\n", "* [!] Papayawhip\n", "<!-- end list -->\n", "1. [ ] Bird\n", "2. [ ] McHale\n", "3. [x] Parish\n", "<!-- end list -->\n", "* [ ] Red\n", " * [x] Green\n", " * [ ] Blue\n" ), concat!( "<ul>\n", "<li><input type=\"checkbox\" disabled=\"\" /> Red</li>\n", "<li><input type=\"checkbox\" checked=\"\" disabled=\"\" /> Green</li>\n", "<li><input type=\"checkbox\" disabled=\"\" /> Blue</li>\n", "<li><input type=\"checkbox\" checked=\"\" disabled=\"\" /> Papayawhip</li>\n", "</ul>\n", "<!-- end list -->\n", "<ol>\n", "<li><input type=\"checkbox\" disabled=\"\" /> Bird</li>\n", "<li><input type=\"checkbox\" disabled=\"\" /> McHale</li>\n", "<li><input type=\"checkbox\" checked=\"\" disabled=\"\" /> Parish</li>\n", "</ol>\n", "<!-- end list -->\n", "<ul>\n", "<li><input type=\"checkbox\" disabled=\"\" /> Red\n", "<ul>\n", "<li><input type=\"checkbox\" checked=\"\" disabled=\"\" /> Green\n", "<ul>\n", "<li><input type=\"checkbox\" disabled=\"\" /> Blue</li>\n", "</ul>\n", "</li>\n", "</ul>\n", "</li>\n", "</ul>\n" ), ); } #[test] fn tasklist_relaxed_regression() { html_opts!( [extension.tasklist, parse.relaxed_tasklist_matching], "* [!] Red\n", concat!( "<ul>\n", "<li><input type=\"checkbox\" checked=\"\" disabled=\"\" /> Red</li>\n", "</ul>\n" ), ); html_opts!( [extension.tasklist], "* [!] Red\n", concat!("<ul>\n", "<li>[!] Red</li>\n", "</ul>\n"), ); html_opts!( [extension.tasklist, parse.relaxed_tasklist_matching], "* [!] Red\n", concat!( "<ul>\n", "<li><input type=\"checkbox\" checked=\"\" disabled=\"\" /> Red</li>\n", "</ul>\n" ), ); } #[test] fn tasklist_32() { html_opts!( [render.unsafe_, extension.tasklist], concat!( "- [ ] List item 1\n", "- [ ] This list item is **bold**\n", "- [x] There is some `code` here\n" ), concat!( "<ul>\n", "<li><input type=\"checkbox\" disabled=\"\" /> List item 1</li>\n", "<li><input type=\"checkbox\" disabled=\"\" /> This list item is <strong>bold</strong></li>\n", "<li><input type=\"checkbox\" checked=\"\" disabled=\"\" /> There is some <code>code</code> here</li>\n", "</ul>\n" ), ); } #[test] fn sourcepos() { assert_ast_match!( [], "h\n" "- [ ] xy\n" " - [x] zw\n", (document (1:1-3:10) [ (paragraph (1:1-1:1) [ (text (1:1-1:1) "h") ]) (list (2:1-3:10) [ (item (2:1-3:10) [ (paragraph (2:3-2:8) [ (text (2:3-2:8) "[ ] xy") ]) (list (3:3-3:10) [ (item (3:3-3:10) [ (paragraph (3:5-3:10) [ (text (3:5-3:10) "[x] zw") ]) ]) ]) ]) ]) ]) ); assert_ast_match!( [extension.tasklist], "h\n" "- [ ] xy\n" " - [x] zw\n", (document (1:1-3:10) [ (paragraph (1:1-1:1) [ (text (1:1-1:1) "h") ]) (list (2:1-3:10) [ (taskitem (2:1-3:10) [ (paragraph (2:7-2:8) [ (text (2:7-2:8) "xy") ]) (list (3:3-3:10) [ (taskitem (3:3-3:10) [ (paragraph (3:9-3:10) [ (text (3:9-3:10) "zw") ]) ]) ]) ]) ]) ]) ); } ��������������������������������comrak-0.29.0/src/tests/underline.rs����������������������������������������������������������������0000644�0000000�0000000�00000001600�10461020230�0015451�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; #[test] fn underline() { html_opts!( [extension.underline], concat!("__underlined text__\n"), concat!("<p><u>underlined text</u></p>\n"), ); } #[test] fn underline_sourcepos() { assert_ast_match!( [extension.underline], "__this__\n", (document (1:1-1:8) [ (paragraph (1:1-1:8) [ (underline (1:1-1:8) [ (text (1:3-1:6) "this") ]) ]) ]) ); } #[test] fn underline_sourcepos_emphasis() { assert_ast_match!( [extension.underline], "___this___\n", (document (1:1-1:10) [ (paragraph (1:1-1:10) [ (emph (1:1-1:10) [ (underline (1:2-1:9) [ (text (1:4-1:7) "this") ]) ]) ]) ]) ); } ��������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/wikilinks.rs����������������������������������������������������������������0000644�0000000�0000000�00000020317�10461020230�0015476�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; #[test] fn wikilinks_does_not_unescape_html_entities_in_link_label() { html_opts!( [extension.wikilinks_title_after_pipe], concat!("This is [[&lt;script&gt;alert(0)&lt;/script&gt;|a &lt;link]]",), concat!("<p>This is <a href=\"%3Cscript%3Ealert(0)%3C/script%3E\" data-wikilink=\"true\">a &lt;link</a></p>\n"), no_roundtrip, ); html_opts!( [extension.wikilinks_title_before_pipe], concat!("This is [[a &lt;link|&lt;script&gt;alert(0)&lt;/script&gt;]]",), concat!("<p>This is <a href=\"%3Cscript%3Ealert(0)%3C/script%3E\" data-wikilink=\"true\">a &lt;link</a></p>\n"), no_roundtrip, ); } #[test] fn wikilinks_sanitizes_the_href_attribute_case_1() { html_opts!( [extension.wikilinks_title_after_pipe], concat!("[[http:\'\"injected=attribute&gt;&lt;img/src=\"0\"onerror=\"alert(0)\"&gt;https://example.com|a]]",), concat!("<p><a href=\"http:&#x27;%22injected=attribute%3E%3Cimg/src=%220%22onerror=%22alert(0)%22%3Ehttps://example.com\" data-wikilink=\"true\">a</a></p>\n"), ); html_opts!( [extension.wikilinks_title_before_pipe], concat!("[[a|http:\'\"injected=attribute&gt;&lt;img/src=\"0\"onerror=\"alert(0)\"&gt;https://example.com]]",), concat!("<p><a href=\"http:&#x27;%22injected=attribute%3E%3Cimg/src=%220%22onerror=%22alert(0)%22%3Ehttps://example.com\" data-wikilink=\"true\">a</a></p>\n"), ); } #[test] fn wikilinks_sanitizes_the_href_attribute_case_2() { html_opts!( [extension.wikilinks_title_after_pipe], concat!("<i>[[\'\"&gt;&lt;svg&gt;&lt;i/class=gl-show-field-errors&gt;&lt;input/title=\"&lt;script&gt;alert(0)&lt;/script&gt;\"/&gt;&lt;/svg&gt;https://example.com|a]]",), concat!("<p><!-- raw HTML omitted --><a href=\"&#x27;%22%3E%3Csvg%3E%3Ci/class=gl-show-field-errors%3E%3Cinput/title=%22%3Cscript%3Ealert(0)%3C/script%3E%22/%3E%3C/svg%3Ehttps://example.com\" data-wikilink=\"true\">a</a></p>\n"), ); html_opts!( [extension.wikilinks_title_before_pipe], concat!("<i>[[a|\'\"&gt;&lt;svg&gt;&lt;i/class=gl-show-field-errors&gt;&lt;input/title=\"&lt;script&gt;alert(0)&lt;/script&gt;\"/&gt;&lt;/svg&gt;https://example.com]]",), concat!("<p><!-- raw HTML omitted --><a href=\"&#x27;%22%3E%3Csvg%3E%3Ci/class=gl-show-field-errors%3E%3Cinput/title=%22%3Cscript%3Ealert(0)%3C/script%3E%22/%3E%3C/svg%3Ehttps://example.com\" data-wikilink=\"true\">a</a></p>\n"), ); } #[test] fn wikilinks_title_escape_chars() { html_opts!( [extension.wikilinks_title_before_pipe, render.escaped_char_spans], concat!("[[Name \\[of\\] page|http://example.com]]",), concat!("<p><a href=\"http://example.com\" data-wikilink=\"true\">Name <span data-escaped-char>[</span>of<span data-escaped-char>]</span> page</a></p>\n"), no_roundtrip, ); } #[test] fn wikilinks_supercedes_relaxed_autolinks() { html_opts!( [ extension.wikilinks_title_after_pipe, parse.relaxed_autolinks ], concat!("[[http://example.com]]",), concat!( "<p><a href=\"http://example.com\" data-wikilink=\"true\">http://example.com</a></p>\n" ), ); html_opts!( [ extension.wikilinks_title_before_pipe, parse.relaxed_autolinks ], concat!("[[http://example.com]]",), concat!( "<p><a href=\"http://example.com\" data-wikilink=\"true\">http://example.com</a></p>\n" ), ); } #[test] fn wikilinks_only_url_in_tables() { html_opts!( [extension.wikilinks_title_after_pipe, extension.table], concat!("| header |\n", "| ------- |\n", "| [[url]] |\n",), concat!( "<table>\n", "<thead>\n", "<tr>\n", "<th>header</th>\n", "</tr>\n", "</thead>\n", "<tbody>\n", "<tr>\n", "<td><a href=\"url\" data-wikilink=\"true\">url</a></td>\n", "</tr>\n", "</tbody>\n", "</table>\n", ), ); html_opts!( [extension.wikilinks_title_before_pipe, extension.table], concat!("| header |\n", "| ------- |\n", "| [[url]] |\n",), concat!( "<table>\n", "<thead>\n", "<tr>\n", "<th>header</th>\n", "</tr>\n", "</thead>\n", "<tbody>\n", "<tr>\n", "<td><a href=\"url\" data-wikilink=\"true\">url</a></td>\n", "</tr>\n", "</tbody>\n", "</table>\n", ), ); } #[test] fn wikilinks_full_in_tables_not_supported() { html_opts!( [extension.wikilinks_title_after_pipe, extension.table], concat!("| header |\n", "| ------- |\n", "| [[url|link label]] |\n",), concat!( "<table>\n", "<thead>\n", "<tr>\n", "<th>header</th>\n", "</tr>\n", "</thead>\n", "<tbody>\n", "<tr>\n", "<td>[[url</td>\n", "</tr>\n", "</tbody>\n", "</table>\n", ), ); html_opts!( [extension.wikilinks_title_before_pipe, extension.table], concat!("| header |\n", "| ------- |\n", "| [[link label|url]] |\n",), concat!( "<table>\n", "<thead>\n", "<tr>\n", "<th>header</th>\n", "</tr>\n", "</thead>\n", "<tbody>\n", "<tr>\n", "<td>[[link label</td>\n", "</tr>\n", "</tbody>\n", "</table>\n", ), ); } #[test] fn wikilinks_exceeds_label_limit() { let long_label = format!("[[{:b<1100}]]", "a"); let expected = format!("<p>{}</p>\n", long_label); html_opts!( [extension.wikilinks_title_after_pipe], &long_label, &expected, ); } #[test] fn wikilinks_autolinker_ignored() { html_opts!( [extension.wikilinks_title_after_pipe, extension.autolink], concat!("[[http://example.com]]",), concat!( "<p><a href=\"http://example.com\" data-wikilink=\"true\">http://example.com</a></p>\n" ), ); html_opts!( [extension.wikilinks_title_before_pipe, extension.autolink], concat!("[[http://example.com]]",), concat!( "<p><a href=\"http://example.com\" data-wikilink=\"true\">http://example.com</a></p>\n" ), ); } #[test] fn sourcepos() { assert_ast_match!( [extension.wikilinks_title_after_pipe], "This [[http://example.com|link label]] that\n", (document (1:1-1:43) [ (paragraph (1:1-1:43) [ (text (1:1-1:5) "This ") (wikilink (1:6-1:38) [ (text (1:27-1:36) "link label") ]) (text (1:39-1:43) " that") ]) ]) ); assert_ast_match!( [extension.wikilinks_title_before_pipe], "This [[link label|http://example.com]] that\n", (document (1:1-1:43) [ (paragraph (1:1-1:43) [ (text (1:1-1:5) "This ") (wikilink (1:6-1:38) [ (text (1:8-1:17) "link label") ]) (text (1:39-1:43) " that") ]) ]) ); assert_ast_match!( [extension.wikilinks_title_before_pipe], "This [[http://example.com]] that\n", (document (1:1-1:32) [ (paragraph (1:1-1:32) [ (text (1:1-1:5) "This ") (wikilink (1:6-1:27) [ (text (1:8-1:25) "http://example.com") ]) (text (1:28-1:32) " that") ]) ]) ); assert_ast_match!( [extension.wikilinks_title_before_pipe], "This [[link\\[label|http://example.com]] that\n", (document (1:1-1:44) [ (paragraph (1:1-1:44) [ (text (1:1-1:5) "This ") (wikilink (1:6-1:39) [ (text (1:8-1:11) "link") (text (1:12-1:13) "[") (text (1:14-1:18) "label") ]) (text (1:40-1:44) " that") ]) ]) ); } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests/xml.rs����������������������������������������������������������������������0000644�0000000�0000000�00000004073�10461020230�0014273�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use super::*; #[test] fn base() { let input = concat!( "foo *bar*\n", "\n", "paragraph 2\n", "\n", "```\n", "code\n", "```\n", ); xml( input, concat!( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n", "<document xmlns=\"http://commonmark.org/xml/1.0\">\n", " <paragraph>\n", " <text xml:space=\"preserve\">foo </text>\n", " <emph>\n", " <text xml:space=\"preserve\">bar</text>\n", " </emph>\n", " </paragraph>\n", " <paragraph>\n", " <text xml:space=\"preserve\">paragraph 2</text>\n", " </paragraph>\n", " <code_block xml:space=\"preserve\">code\n", "</code_block>\n", "</document>\n", ), ); } #[test] fn sourcepos() { let input = concat!( "foo *bar*\n", "\n", "paragraph 2\n", "\n", "```\n", "code\n", "```\n", ); xml_opts( input, concat!( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n", "<document sourcepos=\"1:1-7:3\" xmlns=\"http://commonmark.org/xml/1.0\">\n", " <paragraph sourcepos=\"1:1-1:9\">\n", " <text sourcepos=\"1:1-1:4\" xml:space=\"preserve\">foo </text>\n", " <emph sourcepos=\"1:5-1:9\">\n", " <text sourcepos=\"1:6-1:8\" xml:space=\"preserve\">bar</text>\n", " </emph>\n", " </paragraph>\n", " <paragraph sourcepos=\"3:1-3:11\">\n", " <text sourcepos=\"3:1-3:11\" xml:space=\"preserve\">paragraph 2</text>\n", " </paragraph>\n", " <code_block sourcepos=\"5:1-7:3\" xml:space=\"preserve\">code\n", "</code_block>\n", "</document>\n", ), |opts| opts.render.sourcepos = true, ); } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������comrak-0.29.0/src/tests.rs��������������������������������������������������������������������������0000644�0000000�0000000�00000022752�10461020230�0013477�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use crate::nodes::{AstNode, NodeValue, Sourcepos}; use crate::*; use std::collections::HashMap; use std::io::{self, Write}; use std::panic; mod api; mod autolink; mod commonmark; mod core; mod description_lists; mod empty; mod escaped_char_spans; mod footnotes; mod fuzz; mod greentext; mod header_ids; mod math; mod multiline_block_quotes; mod options; mod pathological; mod plugins; mod regressions; mod shortcodes; mod spoiler; mod strikethrough; mod superscript; mod table; mod tagfilter; mod tasklist; mod underline; mod wikilinks; mod xml; #[track_caller] fn compare_strs(output: &str, expected: &str, kind: &str, original_input: &str) { if output != expected { println!("Running {} test", kind); println!("Original Input:"); println!("=============================="); println!("{}", original_input); println!("=============================="); println!("Got:"); println!("=============================="); println!("{}", output); println!("=============================="); println!(); println!("Expected:"); println!("=============================="); println!("{}", expected); println!("=============================="); println!(); } assert_eq!(output, expected); } #[track_caller] fn commonmark(input: &str, expected: &str, opts: Option<&Options>) { let arena = Arena::new(); let defaults = Options::default(); let options = opts.unwrap_or(&defaults); let root = parse_document(&arena, input, options); let mut output = vec![]; cm::format_document(root, options, &mut output).unwrap(); compare_strs( &String::from_utf8(output).unwrap(), expected, "regular", input, ); } #[track_caller] pub fn html(input: &str, expected: &str) { html_opts_i(input, expected, true, |_| ()); } #[track_caller] fn html_opts_i<F>(input: &str, expected: &str, roundtrip: bool, opts: F) where F: Fn(&mut Options), { let mut options = Options::default(); opts(&mut options); html_opts_w(input, expected, roundtrip, &options); } #[track_caller] fn html_opts_w(input: &str, expected: &str, roundtrip: bool, options: &Options) { let arena = Arena::new(); let root = parse_document(&arena, input, options); let mut output = vec![]; html::format_document(root, options, &mut output).unwrap(); compare_strs( &String::from_utf8(output).unwrap(), expected, "regular", input, ); if options.render.sourcepos || !roundtrip { return; } let mut md = vec![]; cm::format_document(root, options, &mut md).unwrap(); let md_string = &String::from_utf8(md).unwrap(); let root = parse_document(&arena, md_string, options); let mut output_from_rt = vec![]; html::format_document(root, options, &mut output_from_rt).unwrap(); compare_strs( &String::from_utf8(output_from_rt).unwrap(), expected, "roundtrip", md_string, ); } macro_rules! html_opts { ([$($optclass:ident.$optname:ident),*], $lhs:expr, $rhs:expr) => { html_opts!([$($optclass.$optname),*], $lhs, $rhs,) }; ([$($optclass:ident.$optname:ident),*], $lhs:expr, $rhs:expr,) => { html_opts!([$($optclass.$optname),*], $lhs, $rhs, roundtrip) }; ([$($optclass:ident.$optname:ident),*], $lhs:expr, $rhs:expr, $rt:ident) => { html_opts!([$($optclass.$optname),*], $lhs, $rhs, $rt,) }; ([$($optclass:ident.$optname:ident),*], $lhs:expr, $rhs:expr, roundtrip,) => { $crate::tests::html_opts_i($lhs, $rhs, true, |opts| { $(opts.$optclass.$optname = true;)* }); }; ([$($optclass:ident.$optname:ident),*], $lhs:expr, $rhs:expr, no_roundtrip,) => { $crate::tests::html_opts_i($lhs, $rhs, false, |opts| { $(opts.$optclass.$optname = true;)* }); }; } pub(crate) use html_opts; #[track_caller] fn html_plugins(input: &str, expected: &str, plugins: &Plugins) { let arena = Arena::new(); let options = Options::default(); let root = parse_document(&arena, input, &options); let mut output = vec![]; html::format_document_with_plugins(root, &options, &mut output, plugins).unwrap(); compare_strs( &String::from_utf8(output).unwrap(), expected, "regular", input, ); if options.render.sourcepos { return; } let mut md = vec![]; cm::format_document(root, &options, &mut md).unwrap(); let md_string = &String::from_utf8(md).unwrap(); let root = parse_document(&arena, md_string, &options); let mut output_from_rt = vec![]; html::format_document_with_plugins(root, &options, &mut output_from_rt, plugins).unwrap(); compare_strs( &String::from_utf8(output_from_rt).unwrap(), expected, "roundtrip", md_string, ); } #[track_caller] fn xml(input: &str, expected: &str) { xml_opts(input, expected, |_| ()); } #[track_caller] fn xml_opts<F>(input: &str, expected: &str, opts: F) where F: Fn(&mut Options), { let arena = Arena::new(); let mut options = Options::default(); opts(&mut options); let root = parse_document(&arena, input, &options); let mut output = vec![]; crate::xml::format_document(root, &options, &mut output).unwrap(); compare_strs( &String::from_utf8(output).unwrap(), expected, "regular", input, ); if options.render.sourcepos { return; } let mut md = vec![]; cm::format_document(root, &options, &mut md).unwrap(); let md_string = &String::from_utf8(md).unwrap(); let root = parse_document(&arena, md_string, &options); let mut output_from_rt = vec![]; crate::xml::format_document(root, &options, &mut output_from_rt).unwrap(); compare_strs( &String::from_utf8(output_from_rt).unwrap(), expected, "roundtrip", md_string, ); } fn asssert_node_eq<'a>(node: &'a AstNode<'a>, location: &[usize], expected: &NodeValue) { let node = location .iter() .fold(node, |node, &n| node.children().nth(n).unwrap()); let data = node.data.borrow(); let actual = format!("{:?}", data.value); let expected = format!("{:?}", expected); compare_strs(&actual, &expected, "ast comparison", "ast node"); } macro_rules! sourcepos { (($spsl:literal:$spsc:literal-$spel:literal:$spec:literal)) => { ($spsl, $spsc, $spel, $spec).into() }; ((XXX)) => { (0, 1, 0, 1).into() }; } pub(crate) use sourcepos; macro_rules! ast { (($name:tt $sp:tt)) => { ast!(($name $sp [])) }; (($name:tt $sp:tt $content:tt)) => { AstMatchTree { name: stringify!($name).to_string(), sourcepos: sourcepos!($sp), content: ast!($content), } }; ($text:literal) => {AstMatchContent::Text($text.to_string())}; ([ $( $children:tt )* ]) => { AstMatchContent::Children(vec![ $( ast!($children), )* ]) }; } pub(crate) use ast; #[track_caller] fn assert_ast_match_i<F>(md: &str, amt: AstMatchTree, opts: F) where F: Fn(&mut Options), { let mut options = Options::default(); options.render.sourcepos = true; opts(&mut options); let result = panic::catch_unwind(|| { let arena = Arena::new(); let root = parse_document(&arena, md, &options); amt.assert_match(root); }); if let Err(err) = result { let arena = Arena::new(); let root = parse_document(&arena, md, &options); let mut output = vec![]; format_xml(root, &options, &mut output).unwrap(); eprintln!("{}", std::str::from_utf8(&output).unwrap()); panic::resume_unwind(err) } } macro_rules! assert_ast_match { ([ $( $optclass:ident.$optname:ident ),* ], $( $md:literal )+, $amt:tt,) => { assert_ast_match!( [ $( $optclass.$optname ),* ], $( $md )+, $amt ) }; ([ $( $optclass:ident.$optname:ident ),* ], $( $md:literal )+, $amt:tt) => { crate::tests::assert_ast_match_i( concat!( $( $md ),+ ), ast!($amt), |#[allow(unused_variables)] opts| {$(opts.$optclass.$optname = true;)*}, ); }; } pub(crate) use assert_ast_match; struct AstMatchTree { name: String, sourcepos: Sourcepos, content: AstMatchContent, } enum AstMatchContent { Text(String), Children(Vec<AstMatchTree>), } impl AstMatchTree { #[track_caller] fn assert_match<'a>(&self, node: &'a AstNode<'a>) { let ast = node.data.borrow(); assert_eq!(self.name, ast.value.xml_node_name(), "node type matches"); assert_eq!(self.sourcepos, ast.sourcepos, "sourcepos are equal"); match &self.content { AstMatchContent::Text(text) => { assert_eq!( 0, node.children().count(), "text node should have no children" ); assert_eq!( text, ast.value.text().unwrap(), "text node content should match" ); } AstMatchContent::Children(children) => { assert_eq!( children.len(), node.children().count(), "children count should match" ); for (e, a) in children.iter().zip(node.children()) { e.assert_match(a); } } } } } ����������������������comrak-0.29.0/src/xml.rs����������������������������������������������������������������������������0000644�0000000�0000000�00000027116�10461020230�0013134�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use crate::nodes::{AstNode, ListType, NodeCode, NodeMath, NodeTable, NodeValue}; use crate::parser::{Options, Plugins}; use once_cell::sync::Lazy; use std::cmp; use std::io::{self, Write}; use crate::nodes::NodeHtmlBlock; const MAX_INDENT: u32 = 40; /// Formats an AST as HTML, modified by the given options. pub fn format_document<'a>( root: &'a AstNode<'a>, options: &Options, output: &mut dyn Write, ) -> io::Result<()> { format_document_with_plugins(root, options, output, &Plugins::default()) } /// Formats an AST as HTML, modified by the given options. Accepts custom plugins. pub fn format_document_with_plugins<'a>( root: &'a AstNode<'a>, options: &Options, output: &mut dyn Write, plugins: &Plugins, ) -> io::Result<()> { output.write_all(b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")?; output.write_all(b"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n")?; XmlFormatter::new(options, output, plugins).format(root, false) } struct XmlFormatter<'o, 'c> { output: &'o mut dyn Write, options: &'o Options<'c>, _plugins: &'o Plugins<'o>, indent: u32, } impl<'o, 'c> XmlFormatter<'o, 'c> { fn new(options: &'o Options<'c>, output: &'o mut dyn Write, plugins: &'o Plugins) -> Self { XmlFormatter { options, output, _plugins: plugins, indent: 0, } } fn escape(&mut self, buffer: &[u8]) -> io::Result<()> { static XML_SAFE: Lazy<[bool; 256]> = Lazy::new(|| { let mut a = [true; 256]; for &c in b"&<>\"".iter() { a[c as usize] = false; } a }); let mut offset = 0; for (i, &byte) in buffer.iter().enumerate() { if !XML_SAFE[byte as usize] { let esc: &[u8] = match byte { b'"' => b"&quot;", b'&' => b"&amp;", b'<' => b"&lt;", b'>' => b"&gt;", _ => unreachable!(), }; self.output.write_all(&buffer[offset..i])?; self.output.write_all(esc)?; offset = i + 1; } } self.output.write_all(&buffer[offset..])?; Ok(()) } fn format<'a>(&mut self, node: &'a AstNode<'a>, plain: bool) -> io::Result<()> { // Traverse the AST iteratively using a work stack, with pre- and // post-child-traversal phases. During pre-order traversal render the // opening tags, then push the node back onto the stack for the // post-order traversal phase, then push the children in reverse order // onto the stack and begin rendering first child. enum Phase { Pre, Post, } let mut stack = vec![(node, plain, Phase::Pre)]; while let Some((node, plain, phase)) = stack.pop() { match phase { Phase::Pre => { let new_plain = if plain { match node.data.borrow().value { NodeValue::Text(ref literal) | NodeValue::Code(NodeCode { ref literal, .. }) | NodeValue::HtmlInline(ref literal) => { self.escape(literal.as_bytes())?; } NodeValue::LineBreak | NodeValue::SoftBreak => { self.output.write_all(b" ")?; } NodeValue::Math(NodeMath { ref literal, .. }) => { self.escape(literal.as_bytes())?; } _ => (), } plain } else { stack.push((node, false, Phase::Post)); self.format_node(node, true)? }; for ch in node.reverse_children() { stack.push((ch, new_plain, Phase::Pre)); } } Phase::Post => { debug_assert!(!plain); self.format_node(node, false)?; } } } Ok(()) } fn indent(&mut self) -> io::Result<()> { for _ in 0..(cmp::min(self.indent, MAX_INDENT)) { self.output.write_all(b" ")?; } Ok(()) } fn format_node<'a>(&mut self, node: &'a AstNode<'a>, entering: bool) -> io::Result<bool> { if entering { self.indent()?; let ast = node.data.borrow(); write!(self.output, "<{}", ast.value.xml_node_name())?; if self.options.render.sourcepos && ast.sourcepos.start.line != 0 { write!(self.output, " sourcepos=\"{}\"", ast.sourcepos)?; } let mut was_literal = false; match ast.value { NodeValue::Document => self .output .write_all(b" xmlns=\"http://commonmark.org/xml/1.0\"")?, NodeValue::Text(ref literal) | NodeValue::Code(NodeCode { ref literal, .. }) | NodeValue::HtmlBlock(NodeHtmlBlock { ref literal, .. }) | NodeValue::HtmlInline(ref literal) => { self.output.write_all(b" xml:space=\"preserve\">")?; self.escape(literal.as_bytes())?; write!(self.output, "</{}", ast.value.xml_node_name())?; was_literal = true; } NodeValue::List(ref nl) => { if nl.list_type == ListType::Bullet { self.output.write_all(b" type=\"bullet\"")?; } else { write!( self.output, " type=\"ordered\" start=\"{}\" delim=\"{}\"", nl.start, nl.delimiter.xml_name() )?; } write!(self.output, " tight=\"{}\"", nl.tight)?; } NodeValue::FrontMatter(_) => (), NodeValue::BlockQuote => {} NodeValue::MultilineBlockQuote(..) => {} NodeValue::Item(..) => {} NodeValue::DescriptionList => {} NodeValue::DescriptionItem(..) => (), NodeValue::DescriptionTerm => {} NodeValue::DescriptionDetails => {} NodeValue::Heading(ref nch) => { write!(self.output, " level=\"{}\"", nch.level)?; } NodeValue::CodeBlock(ref ncb) => { if !ncb.info.is_empty() { self.output.write_all(b" info=\"")?; self.output.write_all(ncb.info.as_bytes())?; self.output.write_all(b"\"")?; if ncb.info.eq("math") { self.output.write_all(b" math_style=\"display\"")?; } } self.output.write_all(b" xml:space=\"preserve\">")?; self.escape(ncb.literal.as_bytes())?; write!(self.output, "</{}", ast.value.xml_node_name())?; was_literal = true; } NodeValue::ThematicBreak => {} NodeValue::Paragraph => {} NodeValue::LineBreak => {} NodeValue::SoftBreak => {} NodeValue::Strong => {} NodeValue::Emph => {} NodeValue::Strikethrough => {} NodeValue::Superscript => {} NodeValue::Link(ref nl) | NodeValue::Image(ref nl) => { self.output.write_all(b" destination=\"")?; self.escape(nl.url.as_bytes())?; self.output.write_all(b"\" title=\"")?; self.escape(nl.title.as_bytes())?; self.output.write_all(b"\"")?; } NodeValue::Table(..) => { // noop } NodeValue::TableRow(..) => { // noop } NodeValue::TableCell => { let mut ancestors = node.ancestors().skip(1); let header_row = &ancestors.next().unwrap().data.borrow().value; let table = &ancestors.next().unwrap().data.borrow().value; if let ( NodeValue::TableRow(true), NodeValue::Table(NodeTable { alignments, .. }), ) = (header_row, table) { let ix = node.preceding_siblings().count() - 1; if let Some(xml_align) = alignments[ix].xml_name() { write!(self.output, " align=\"{}\"", xml_align)?; } } } NodeValue::FootnoteDefinition(ref fd) => { self.output.write_all(b" label=\"")?; self.escape(fd.name.as_bytes())?; self.output.write_all(b"\"")?; } NodeValue::FootnoteReference(ref nfr) => { self.output.write_all(b" label=\"")?; self.escape(nfr.name.as_bytes())?; self.output.write_all(b"\"")?; } NodeValue::TaskItem(Some(_)) => { self.output.write_all(b" completed=\"true\"")?; } NodeValue::TaskItem(None) => { self.output.write_all(b" completed=\"false\"")?; } #[cfg(feature = "shortcodes")] NodeValue::ShortCode(ref nsc) => { self.output.write_all(b" id=\"")?; self.escape(nsc.code.as_bytes())?; self.output.write_all(b"\"")?; } NodeValue::Escaped => { // noop } NodeValue::Math(ref math, ..) => { if math.display_math { self.output.write_all(b" math_style=\"display\"")?; } else { self.output.write_all(b" math_style=\"inline\"")?; } self.output.write_all(b" xml:space=\"preserve\">")?; self.escape(math.literal.as_bytes())?; write!(self.output, "</{}", ast.value.xml_node_name())?; was_literal = true; } NodeValue::WikiLink(ref nl) => { self.output.write_all(b" destination=\"")?; self.escape(nl.url.as_bytes())?; self.output.write_all(b"\"")?; } NodeValue::Underline => {} NodeValue::SpoileredText => {} NodeValue::EscapedTag(ref data) => { self.output.write_all(data.as_bytes())?; } } if node.first_child().is_some() { self.indent += 2; } else if !was_literal { self.output.write_all(b" /")?; } self.output.write_all(b">\n")?; } else if node.first_child().is_some() { self.indent -= 2; self.indent()?; writeln!( self.output, "</{}>", node.data.borrow().value.xml_node_name() )?; } Ok(false) } } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������