libxml-0.3.5/.cargo_vcs_info.json0000644000000001360000000000100123300ustar { "git": { "sha1": "2fa5b5a153de0bf2df9b5d63eba6073f88b0df5e" }, "path_in_vcs": "" }libxml-0.3.5/.github/dependabot.yml000064400000000000000000000002211046102023000153030ustar 00000000000000version: 2 updates: - package-ecosystem: cargo directory: "/" schedule: interval: daily time: "04:00" open-pull-requests-limit: 10 libxml-0.3.5/.github/workflows/CI.yml000064400000000000000000000035051046102023000155360ustar 00000000000000on: [push, pull_request] name: CI Linux jobs: test: name: rust-libxml CI runs-on: ubuntu-latest strategy: matrix: with_default_bindings: [false, true] steps: - name: install dependencies uses: ryankurte/action-apt@v0.2.0 with: packages: "libxml2-dev" - name: Set up LIBXML2 env var if compiling with the default bindings run: echo "LIBXML2=$(pkg-config libxml-2.0 --variable=libdir)/libxml2.so" >> "$GITHUB_ENV" if: ${{ matrix.with_default_bindings }} - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: stable override: true - name: run tests uses: actions-rs/cargo@v1 with: command: test test-newer-libxml2: strategy: matrix: libxml_version: ["2.12.9", "2.13.8","2.14.1"] name: With libxml ${{ matrix.libxml_version }} runs-on: ubuntu-latest steps: - name: install dependencies uses: ryankurte/action-apt@v0.2.0 with: packages: "libpython3-dev" - uses: actions/checkout@v2 - name: Install libxml ${{ matrix.libxml_version }} by hand run: | wget https://download.gnome.org/sources/libxml2/$(echo ${{ matrix.libxml_version }} | sed -e 's/\.[0-9]*$//')/libxml2-${{ matrix.libxml_version }}.tar.xz tar xf libxml2-${{ matrix.libxml_version }}.tar.xz cd libxml2-${{ matrix.libxml_version }} ./configure make sudo make install - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: stable override: true - name: run tests uses: actions-rs/cargo@v1 with: command: test env: LD_LIBRARY_PATH: /usr/local/liblibxml-0.3.5/.github/workflows/windows.yml000064400000000000000000000030171046102023000167330ustar 00000000000000on: [push, pull_request] name: CI Windows jobs: test-default-windows: name: Windows vcpkg (default) runs-on: windows-latest env: VCPKGRS_DYNAMIC: 1 VCPKG_DEFAULT_TRIPLET: x64-windows VCPKG_ROOT: C:\vcpkg steps: - uses: actions/checkout@v4 - name: Setup vcpkg libxml2 Cache uses: actions/cache@v4 id: vcpkg-cache with: path: C:\vcpkg key: vcpkg-libxml2 - name: Install libxml2 with vcpkg run: | vcpkg install libxml2:x64-windows vcpkg integrate install - name: run tests uses: actions-rs/cargo@v1 with: command: test test-mingw64-windows: name: Windows (mingw64) runs-on: windows-latest defaults: run: shell: msys2 {0} steps: - uses: actions/checkout@v4 - uses: msys2/setup-msys2@v2 with: path-type: minimal release: false update: false msystem: MINGW64 install: >- mingw64/mingw-w64-x86_64-pkg-config mingw64/mingw-w64-x86_64-libxml2 - name: Install stable windows-gnu Rust toolchain uses: actions-rs/toolchain@v1 with: toolchain: stable-x86_64-pc-windows-gnu target: x86_64-pc-windows-gnu override: true - name: Ensure mingw64 pkg-config is in path run: echo "C:\msys64\mingw64\bin" >> "$GITHUB_PATH" - name: run tests uses: actions-rs/cargo@v1 with: command: testlibxml-0.3.5/.gitignore000064400000000000000000000002661046102023000131140ustar 00000000000000# Compiled files *.o *.so *.rlib *.dll Cargo.lock # Executables *.exe # Vim swap files *.swp # VSCode project folder .vscode/ # Generated by Cargo /target/ # Test results /tags libxml-0.3.5/.rustfmt.toml000064400000000000000000000001651046102023000136010ustar 00000000000000# Detailed instructions at: https://github.com/rust-lang-nursery/rustfmt/blob/master/Configurations.md tab_spaces = 2libxml-0.3.5/.travis.yml000064400000000000000000000016651046102023000132410ustar 00000000000000sudo: false dist: trusty language: rust rust: - stable - beta - nightly addons: apt: packages: libxml2-dev after_success: ./scripts/doc-upload.sh env: global: - RUST_TEST_THREADS=1 - SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt - secure: "kuvtFj8UpLj0NQhk3a9PDLRhXq4cDhd9UNT9Sn0S2TBFF23AsOX2ffXN+5ey/2mfYEk18d5MpUM15Ha/7PbxTebkqZxhFeZimrqAHTC6605AUICmuJv06tmLYetoqgNvyU0Tgt4MCYblja8oJCs/TkEFtbVxZX0uZ2lgsyLG8PQyROEyiaPQyeld7rd4k6s+13w4EOFO0kGh992BmOAUiICqsDqKYddaI7KL49b4AwkGrfaXAf/mtlJT7E4NlloI/5AmCYlQdYwQui3SJojvzd9lDBF7syuPesgPz3S6dlzr80uKVuI0rVx6K6Xo+vzZLWP4HZExIjF12G8DuBKAWLmoN/QfR+ipkXGrTau78+8Jp0qCQsy4ti4rY8PvhwipkdGS+pUV8a06UwTZARLnknnhfqKFoNvIUjLwdu1HVwftXtIgdFtU7RZMJfUxdq8/dNdXNerAm4c3kdwcKl4nP6Fnus4OWkeekuuCCObcBI1qlTFQT2pp5ae+3oe3kq1Srq8/HrLGlQkcWlbYsB9/mKuSxqRQxTbGli3eWBALecpeCQPdrxygFarX6q18HTwoqprvbcCp9BM4soADv5gVUeiOuYmvn2DbREMMZJKd6cpcXpXem4epK+MxpXh7jhQ60xw9GuIIeZxcgyKf+pP3lRoII3nPZKoecUoDcJct1l4=" libxml-0.3.5/CHANGELOG.md000064400000000000000000000346441046102023000127440ustar 00000000000000# Change Log ## [0.3.6] (in development) ## [0.3.5] (2025-28-04) This release stabilizes the new "bindgen during build" approach of v0.3.4 to more platforms. Support for newer libxml2 versions has been improved. - CI support for v2.12.9, v2.13.8, v2.14.1 Thanks go to @wetneb and @charmitro for contributing. ### Added * cargo build: expose libxml2 version to main build script; * creating a new `Parser` now initializes via `bindings::xmlInitParser`, called at most once. ### Changes * cargo build: mark `max_align_t` as opaque to fix i386 build failure * cfg: adapt mutability of error pointer depending on libxml version * change the return type of `xmlGetNodeType` from `u32` to the more portable `bindings::xmlElementType` * change the argument type of `NodeType::from_int` from `u32` to the more portable `bindings::xmlElementType` * protect `Schema` initialization to be closer to thread-safe (note that this wrapper is NOT thread-safe in general) ### Removed * The use of `SchemaParserContext` and `SchemaValidationContext` is currently NOT thread safe. Hence, The `schema_test` has been weakened to run in a single thread (future improvements welcome). ## [0.3.4] (2025-16-04) Thanks go to @wetneb, @anwaralameddin, @rudolphfroger, @jcamiel, @imcsk8 for contributions to this release. ### Added * Node methods: `get_property_no_ns` (alias: `get_attribute_no_ns`), `get_properties_ns` (alias: `get_attributes_ns`), `has_property_no_ns` (alias: `has_attribute_no_ns`), `remove_property_no_ns` (alias: `remove_attribute_no_ns`), `get_property_node_ns` (alias: `get_attribute_node_ns`), `get_property_node_no_ns` (alias: `get_attribute_node_no_ns`) * Added implementations of `Hash`, `PartialEq` and `Eq` traits for `Namespace` ### Changed * Call bindgen at build time on Unix platforms (thanks @wetneb) ## [0.3.3] 2023-17-07 ### Changed * Update the implementation of `StructuredError` so that all validation errors are returned from the validation methods present on `SchemaValidationContext`. Previously, all returned validation errors were identical due to libxml reusing a global memory address for all reported errors. Thanks @JDSeiler ! * The `message` method of `StructuredError` has been deprecated. ## [0.3.2] 2023-07-05 ### Added * XPath: `Context::findvalues`, with optional node-bound evaluation, obtaining `String` values. * `Node::findvalues` method for direct XPath search obtaining `String` values, without first explicitly instantiating a `Context`. Reusing a `Context` remains more efficient. ## [0.3.1] 2022-26-03 * Added: Thanks to @lepapareil, @hurlenko and @ballsteve for contributing installation docs for Windows and MacOS. * Added: `Node` and `RoNode` now have `has_property` (alias `has_attribute`) and `has_property_ns` (alias `has_attribute_ns`) to check attribute presence without allocating the value. * Added: `xpath::is_well_formed_xpath`, thanks @bcpeinhardt ! ## [0.3.0] 2021-27-05 * Change `Parser::parse_file/string_with_encoding` to `Parser::parse_file/string_with_options`. * Introduce `ParserOptions` which encapsulates the forced encoding setting together with libxml2s HTML and XML parser options. * For systems without a pkg-config capability, we now use the `LIBXML2` environment variable to detect an installed libxml2 toolchain. (thanks @przygienda !) ## [0.2.16] 2021-31-01 ### Added * More element-oriented methods: `get_next_element_sibling`, `get_prev_element_sibling`, `get_last_element_child`, added to both `Node` and `RoNode`. * `Document::ronode_to_string` for printing read-only nodes * `RoNode::node_ptr` for getting the internal libxml2 raw pointer of a read-only node ## [0.2.15] 2020-28-09 Thanks to @JoshuaNitschke for contributing OS-based package detection for the native libxml2! Also thanks to @coding-yogi, @ignatenkobrain and @simoin for investigating platform-specific issues with this crate, some of which expect resolution in upcoming versions. ### Added * Support for x64-Windows use via the vcpkg package manager (with new CI monitoring via appveyor). ### Changed * Added back an `Error` trait implementation for `XmlParseError` ### Removed * Dropped a large number of memory layout tests that were auto-generated by bindgen, until we have a more sophisticated test-generation setup that can enforce multiple architectures. Ideally this has no day-to-day impact and just makes portability easier in the short-term. ## [0.2.14] 2020-27-03 ### Changed More consistently use `c_char` to successfully compile on ARM targets ## [0.2.13] 2020-16-01 Thanks to @jangernert for the upgrades to `Document` serialization. Thanks to @lweberk for contributing the `Schema` featureset and to @cbarber for refining the FFI interop. ### Added * `Document::to_string_with_options` allowing to customize document serialization * `Document::SaveOptions` containing the currently supported serialization options, as provided internally by libxml * `Schema` holding and managing `xmlSchemaPtr` as created while parsing by `SchemaParserContext` * `SchemaParserContext` holding source of XSD and parsing into a `Schema` while gathering and –in case returning– errors that arise from the XSD parser across the FFI to libxml * `SchemaValidationContext` holding the `Schema` from resulting `SchemaParserContext` parse and offering validation methods for `Document`, `Node` or file path to XML, while gathering and –in case returning– validation errors from the XML validator across the FFI to libxml ### Changed * the `Document::to_string()` serialization method is now implemented through `fmt::Display` and no longer takes an optional boolean flag. The default behavior is now unformatted serialization - previously `to_string(false)`, while `to_string(true)` can be realized via ``` .to_string_with_options(SaveOptions { format: true, ..SaveOptions::default()})` ``` ## [0.2.12] 2019-16-06 Thanks to @Alexhuszagh for contributing all enhancements for the `0.2.12` release! ### Added * BOM-aware Unicode support * New `Parser` methods allowing to specify an explicit encoding: `parse_file_with_encoding`, `parse_string_with_encoding`, `is_well_formed_html_with_encoding` ### Changed * Default encodings in `Parser` are now left for libxml to guess internally, rather than defaulted to `utf-8`. ## [0.2.11] 2019-15-04 ### Added * `RoNode::to_hashable` and `RoNode::null` for parity with existing `Node`-leveraging applications ## [0.2.10] 2019-14-04 ### Added * `RoNode` primitive for simple and efficient **read-only** parallel processing * Benchmarking a 120 MB XML document shows a twenty five fold speedup, when comparing `Node` to parallel rayon processing over `RoNode` with a 32 logical core desktop * While `RoNode` is added as an experiment for high performance read-only scans, any mutability requires using `Node` and incurring a bookkeeping cost of safety at runtime. * Introduced benchmarking via `criterion`, only installed during development. * `benches/parsing_benchmarks` contains examples of parallel scanning via `rayon` iterators. * added `Document::get_root_readonly` method for obtaining a `RoNode` root. * added `Context::node_evaluate_readonly` method for searching over a `RoNode` * added `Context::get_readonly_nodes_as_vec` method for collecting xpath results as `RoNode` ## [0.2.9] 2019-28-03 * Squash memory leak in creating new `Node`s from the Rust API * Safely unlink `Node`s obtained via XPath searches ## [0.2.8] 2019-25-03 ### Changed Minor internal changes to make the crate compile more reliably under MacOS, and other platforms which enable the `LIBXML_THREAD_ENABLED` compile-time flag. Thank you @caldwell ! ## [0.2.7] 2019-09-03 ### Added * implement and test `replace_child_node` for element nodes ## [0.2.6] 2018-07-12 * Internal update to Rust 2018 Edition * fix deallocation bugs with `.import_node()` and `.get_namespaces()` ## [0.2.5] 2018-26-09 ### Added * `Node::null` placeholder that avoids the tricky memory management of `Node::mock` that can lead to memory leaks. Really a poor substitute for the better `Option` type with a `None` value, which is **recommended** instead. ## [0.2.4] 2018-24-09 ### Added * `Context::from_node` method for convenient XPath context initialization via a Node object. Possible as nodes keep a reference to their owner `Document` object. ### Changed * Ensured memory safety of cloning xpath `Context` objects * Switched to using `Weak` references to the owner document, in `Node`, `Context` and `Object`, to prevent memory leaks in mutli-document pipelines. * Speedup to XPath node retrieval ## [0.2.3] 2018-19-09 ### Added * `Node::findnodes` method for direct XPath search, without first explicitly instantiating a `Context`. Reusing a `Context` remains more efficient. ## [0.2.2] 2018-23-07 * Expose the underlying `libxml2` data structures in the public crate interface, to enable a first [libxslt](https://crates.io/crates/libxslt) crate proof of concept. ## [0.2.1] 2018-23-07 ### Added * `Node::set_node_rc_guard` which allows customizing the reference-count mutability threshold for Nodes. * serialization tests for `Document` * (crate internal) full set of libxml2 bindings as produced via `bindgen` (see #39) * (crate internal) using libxml2's type language in the wrapper Rust modules * (crate internal) setup bindings for reuse in higher-level crates, such as libxslt ### Changed * `NodeType::from_c_int` renamed to `NodeType::from_int`, now accepting a `u32` argument ### Removed * Removed dependence on custom C code; also removed gcc from build dependencies ## [0.2.0] 2018-19-07 This release adds fundamental breaking changes to the API. The API continues to be considered unstable until the `1.0.0` release. ### Added * `dup` and `dup_from` methods for deeply duplicating a libxml2 document * `is_unlinked` for quick check if a `Node` has been unlinked from a parent ### Changed * safe API for `Node`s and `Document`s, with automatic pointer bookkeeping and memory deallocation, by @triptec * `Node`s are now bookkept by their owning document * libxml2 low-level memory deallocation is postponed until the `Document` is dropped, with the exception of unlinked nodes, who are deallocated on drop. * `Document::get_root_element` now has an option type, and returns `None` for an empty Document * `Node::mock` now takes owner `Document` as argument * proofed tests with `valgrind` and removed all obvious memory leaks * All node operations that modify a `Node` now both require a `&mut Node` argument and return a `Result` type. * Full list of changed signatures in Node: `remove_attribute`, `remove_property`, `set_name`, `set_content`, `set_property`, `set_property_ns`, `set_attribute`, `set_attribute_ns`, `remove_attribute`, `set_namespace`, `recursively_remove_namespaces`, `append_text` * Tree transforming operations that use operate on `&mut self`, and no longer return a Node if the return value is identical to the argument. * Changed signatures: `add_child`, `add_prev_sibling`, `add_next_sibling` * `Result` types should always be checked for errors, as mutability conflicts are reported during runtime. ### Removed * `global` module, which attempted to manage global libxml state for threaded workflows. May be readed after the API stabilizes ## [0.1.2] 2018-12-01 * We welcome Andreas (@triptec) to the core developer team! ### Added * Workaround `.free` method for freeing nodes, until the `Rc>` free-on-drop solution by Andreas is introduced in 0.2 ## [0.1.1] 2017-18-12 ### Added * `get_first_element_child` - similar to `get_first_child` but only returns XML Elements * `is_element_node` - check if a given `Node` is an XML Element ### Changed * Requiring owned `Node` function arguments only when consumed - `add_*` methods largely take `&Node` now. ## [0.1.0] 2017-09-11 Pushing up release to a 0.1, as contributor interest is starting to pick up, and the 0. version were getting a bit silly/wrong. ### Added * Node methods: `unbind_node`, `recursively_remove_namespaces`, `set_name`, * Document methods: `import_node` ### Changed * Updated gcc build to newer incantation, upped dependency version. ## [0.0.75] 2017-04-06 ### Added * Node methods: `get_namespace_declarations`, `get_property_ns` (alias: `get_attribute_ns`), `remove_property` (alias: `remove_attribute`), `get_attribute_node`, `get_namespace`, `lookup_namespace_prefix`, `lookup_namespace_uri` * XPath methods: `findvalue` and `findnodes`, with optional node-bound evaluation. ### Changed * The Node setter for a namespaced attribute is now `set_property_ns` (alias: `set_attribute_ns`) * Node set_* methods are now consistently defined on `&mut self` * Refactored wrongly used `url` to `href` for namespace-related Node ops. * Fixed bug with Node's `get_content` method always returning empty * More stable `append_text` for node, added tests ## [0.0.74] 2016-25-12 ### Changed * Namespace::new only requires a borrowed &Node now * Fixed bug with wrongly discarded namespace prefixes on Namespace::new ### Added * Namespace methods: `get_prefix`, `get_url` ## [0.0.73] 2016-25-12 ### Added * Document method: `as_node` ## [0.0.72] 2016-25-12 ### Added * Node methods: `get_last_child`, `get_child_nodes`, `get_child_elements`, `get_properties`, `get_attributes` ## [0.0.71] 2016-29-11 ### Changed * Namespace::new takes Node argument last ### Added * Node namespace accessors - `set_namespace`, `get_namespaces`, `set_ns_attribute`, `set_ns_property` * Namespace registration for XPath ## [0.0.7] 2016-27-11 ### Changed * stricter dependency spec in Cargo.toml * cargo clippy compliant * Document's `get_root_element` returns the document pointer as a Node for empty documents, type change from `Option` to simple `` ### Added * Node accessors: `set_attribute`, `get_attribute`, `set_property` (the `attribute` callers are simple aliases for `property`) * Node `to_hashable` for simple hashing of nodes * Node `mock` for simple mock nodes in testing ## [0.0.5] 2016-07-01 Thanks to @grray for most of these improvements! ### Changed * Switched to using the more permissive MIT license, consistent with libxml2 licensing * Fixed segfault issues with xpath contexts ### Added * Can now evaluate ```string(/foo//@bar)``` type XPath expressions, and use their result via ```.to_string()``` ## [0.0.4] 2016-04-25 ### Changed * The ```Node.add_child``` method now adds a Node, while the old behavior of creating a new node with a given namespace and name is now ```Node.new_child``` ### Added * Can add following siblings via ```Node.add_next_sibling``` * Can now add text nodes via ```Node.new_text``` libxml-0.3.5/Cargo.lock0000644000000447650000000000100103230ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 4 [[package]] name = "aho-corasick" version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] [[package]] name = "anes" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstyle" version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] name = "autocfg" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "bindgen" version = "0.71.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" dependencies = [ "bitflags", "cexpr", "clang-sys", "itertools 0.13.0", "proc-macro2", "quote", "regex", "rustc-hash", "shlex", "syn", ] [[package]] name = "bitflags" version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" [[package]] name = "bumpalo" version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" [[package]] name = "cast" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cexpr" version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" dependencies = [ "nom", ] [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "ciborium" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" dependencies = [ "ciborium-io", "ciborium-ll", "serde", ] [[package]] name = "ciborium-io" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" [[package]] name = "ciborium-ll" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" dependencies = [ "ciborium-io", "half", ] [[package]] name = "clang-sys" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" dependencies = [ "glob", "libc", "libloading", ] [[package]] name = "clap" version = "4.5.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eccb054f56cbd38340b380d4a8e69ef1f02f1af43db2f0cc817a4774d80ae071" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" version = "4.5.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "efd9466fac8543255d3b1fcad4762c5e116ffe808c8a3043d4263cd4fd4862a2" dependencies = [ "anstyle", "clap_lex", ] [[package]] name = "clap_lex" version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "criterion" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" dependencies = [ "anes", "cast", "ciborium", "clap", "criterion-plot", "is-terminal", "itertools 0.10.5", "num-traits", "once_cell", "oorandom", "plotters", "rayon", "regex", "serde", "serde_derive", "serde_json", "tinytemplate", "walkdir", ] [[package]] name = "criterion-plot" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", "itertools 0.10.5", ] [[package]] name = "crossbeam-deque" version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" dependencies = [ "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-utils" version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crunchy" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" [[package]] name = "either" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "glob" version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" [[package]] name = "half" version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" dependencies = [ "cfg-if", "crunchy", ] [[package]] name = "hermit-abi" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbd780fe5cc30f81464441920d82ac8740e2e46b29a6fad543ddd075229ce37e" [[package]] name = "is-terminal" version = "0.4.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" dependencies = [ "hermit-abi", "libc", "windows-sys", ] [[package]] name = "itertools" version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ "either", ] [[package]] name = "itertools" version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" dependencies = [ "either", ] [[package]] name = "itoa" version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "js-sys" version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" dependencies = [ "once_cell", "wasm-bindgen", ] [[package]] name = "libc" version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" [[package]] name = "libloading" version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", "windows-targets", ] [[package]] name = "libxml" version = "0.3.5" dependencies = [ "bindgen", "criterion", "libc", "pkg-config", "rayon", "vcpkg", ] [[package]] name = "log" version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "memchr" version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "minimal-lexical" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "nom" version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" dependencies = [ "memchr", "minimal-lexical", ] [[package]] name = "num-traits" version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", ] [[package]] name = "once_cell" version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "oorandom" version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" [[package]] name = "pkg-config" version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "plotters" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" dependencies = [ "num-traits", "plotters-backend", "plotters-svg", "wasm-bindgen", "web-sys", ] [[package]] name = "plotters-backend" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" [[package]] name = "plotters-svg" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" dependencies = [ "plotters-backend", ] [[package]] name = "proc-macro2" version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] [[package]] name = "rayon" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ "either", "rayon-core", ] [[package]] name = "rayon-core" version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ "crossbeam-deque", "crossbeam-utils", ] [[package]] name = "regex" version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", "regex-automata", "regex-syntax", ] [[package]] name = "regex-automata" version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] [[package]] name = "regex-syntax" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rustc-hash" version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "rustversion" version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" [[package]] name = "ryu" version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "same-file" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" dependencies = [ "winapi-util", ] [[package]] name = "serde" version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.140" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" dependencies = [ "itoa", "memchr", "ryu", "serde", ] [[package]] name = "shlex" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "syn" version = "2.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "tinytemplate" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" dependencies = [ "serde", "serde_json", ] [[package]] name = "unicode-ident" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "vcpkg" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] name = "walkdir" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", ] [[package]] name = "wasm-bindgen" version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ "cfg-if", "once_cell", "rustversion", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" dependencies = [ "bumpalo", "log", "proc-macro2", "quote", "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ "quote", "wasm-bindgen-macro-support", ] [[package]] name = "wasm-bindgen-macro-support" version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" dependencies = [ "unicode-ident", ] [[package]] name = "web-sys" version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" dependencies = [ "js-sys", "wasm-bindgen", ] [[package]] name = "winapi-util" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ "windows-sys", ] [[package]] name = "windows-sys" version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ "windows-targets", ] [[package]] name = "windows-targets" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_gnullvm", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" libxml-0.3.5/Cargo.toml0000644000000050300000000000100103240ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "libxml" version = "0.3.5" authors = [ "Andreas Franzén ", "Deyan Ginev ", "Jan Frederik Schaefer ", ] build = "build.rs" exclude = ["scripts/*"] autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "A Rust wrapper for libxml2 - the XML C parser and toolkit developed for the Gnome project" documentation = "https://kwarc.github.io/rust-libxml/libxml/index.html" readme = "README.md" keywords = [ "xml", "libxml", "xpath", "parser", "html", ] license = "MIT" repository = "https://github.com/KWARC/rust-libxml" [lib] name = "libxml" path = "src/lib.rs" [[example]] name = "schema_example" path = "examples/schema_example.rs" [[example]] name = "tree_example" path = "examples/tree_example.rs" [[example]] name = "xpath_example" path = "examples/xpath_example.rs" [[test]] name = "base_tests" path = "tests/base_tests.rs" [[test]] name = "codec_tests" path = "tests/codec_tests.rs" [[test]] name = "mutability_guards" path = "tests/mutability_guards.rs" [[test]] name = "readonly_tests" path = "tests/readonly_tests.rs" [[test]] name = "schema_tests" path = "tests/schema_tests.rs" [[test]] name = "tree_tests" path = "tests/tree_tests.rs" [[test]] name = "xpath_tests" path = "tests/xpath_tests.rs" [[bench]] name = "parsing_benchmarks" path = "benches/parsing_benchmarks.rs" harness = false [dependencies.libc] version = "0.2" [dev-dependencies.criterion] version = "0.5.1" [dev-dependencies.rayon] version = "1.0.0" [build-dependencies.bindgen] version = "0.71" features = [ "runtime", "which-rustfmt", ] default-features = false [target.'cfg(all(target_family = "windows", target_env = "gnu"))'.build-dependencies.pkg-config] version = "0.3.2" [target.'cfg(all(target_family = "windows", target_env = "msvc"))'.build-dependencies.vcpkg] version = "0.2" [target."cfg(macos)".build-dependencies.pkg-config] version = "0.3.2" [target."cfg(unix)".build-dependencies.pkg-config] version = "0.3.2" libxml-0.3.5/Cargo.toml.orig0000644000000022560000000000100112720ustar [package] name = "libxml" version = "0.3.5" edition = "2021" authors = ["Andreas Franzén ", "Deyan Ginev ","Jan Frederik Schaefer "] description = "A Rust wrapper for libxml2 - the XML C parser and toolkit developed for the Gnome project" repository = "https://github.com/KWARC/rust-libxml" documentation = "https://kwarc.github.io/rust-libxml/libxml/index.html" readme = "README.md" license = "MIT" keywords = ["xml", "libxml","xpath", "parser", "html"] build = "build.rs" exclude = [ "scripts/*" ] [lib] name = "libxml" [dependencies] libc = "0.2" [target.'cfg(all(target_family = "windows", target_env = "msvc"))'.build-dependencies] vcpkg = "0.2" [target.'cfg(all(target_family = "windows", target_env = "gnu"))'.build-dependencies] pkg-config = "0.3.2" [target.'cfg(macos)'.build-dependencies] pkg-config = "0.3.2" [target.'cfg(unix)'.build-dependencies] pkg-config = "0.3.2" [build-dependencies.bindgen] version = "0.71" features = [ "runtime", "which-rustfmt", ] default-features = false [dev-dependencies] rayon = "1.0.0" criterion = "0.5.1" [[bench]] name = "parsing_benchmarks" harness = false libxml-0.3.5/Cargo.toml.orig000064400000000000000000000022561046102023000140140ustar 00000000000000[package] name = "libxml" version = "0.3.5" edition = "2021" authors = ["Andreas Franzén ", "Deyan Ginev ","Jan Frederik Schaefer "] description = "A Rust wrapper for libxml2 - the XML C parser and toolkit developed for the Gnome project" repository = "https://github.com/KWARC/rust-libxml" documentation = "https://kwarc.github.io/rust-libxml/libxml/index.html" readme = "README.md" license = "MIT" keywords = ["xml", "libxml","xpath", "parser", "html"] build = "build.rs" exclude = [ "scripts/*" ] [lib] name = "libxml" [dependencies] libc = "0.2" [target.'cfg(all(target_family = "windows", target_env = "msvc"))'.build-dependencies] vcpkg = "0.2" [target.'cfg(all(target_family = "windows", target_env = "gnu"))'.build-dependencies] pkg-config = "0.3.2" [target.'cfg(macos)'.build-dependencies] pkg-config = "0.3.2" [target.'cfg(unix)'.build-dependencies] pkg-config = "0.3.2" [build-dependencies.bindgen] version = "0.71" features = [ "runtime", "which-rustfmt", ] default-features = false [dev-dependencies] rayon = "1.0.0" criterion = "0.5.1" [[bench]] name = "parsing_benchmarks" harness = false libxml-0.3.5/LICENSE000064400000000000000000000021441046102023000121260ustar 00000000000000The MIT License (MIT) Copyright (c) 2015-2021 Andreas Franzén, Deyan Ginev, Jan Frederik Schaefer Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. libxml-0.3.5/README.md000064400000000000000000000066121046102023000124040ustar 00000000000000[![CI Linux](https://github.com/KWARC/rust-libxml/actions/workflows/CI.yml/badge.svg?branch=master)](https://github.com/KWARC/rust-libxml/actions/workflows/CI.yml) [![CI Windows](https://github.com/KWARC/rust-libxml/actions/workflows/windows.yml/badge.svg?branch=master)](https://github.com/KWARC/rust-libxml/actions/workflows/windows.yml) [![API Documentation](https://img.shields.io/badge/docs-API-blue.svg)](http://KWARC.github.io/rust-libxml/libxml/index.html) [![License](http://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/KWARC/rust-libxml/master/LICENSE) [![crates.io](https://img.shields.io/crates/v/libxml.svg)](https://crates.io/crates/libxml) Rust wrapper for [libxml2](http://xmlsoft.org/). The main goal of this project is to benefit from libxml2's maturity and stability while the native Rust XML crates mature to be near-drop-in replacements. As of the `0.2.0` release of the crate, there are some modest safety guarantees: * Mutability, as well as ownership - we use `Rc>` wrappers to ensure runtime safety of libxml2 operations already in the Rust layer. * Memory safety guarantees - in particular `Node` and `Document` objects have automatic bookkeeping and deallocation on drop, for leak-free wrapper use. * No thread safety - libxml2's global memory management is a challenge to adapt in a thread-safe way with minimal intervention **Coverage**: Only covers a subset of libxml2 at the moment, contributions are welcome. We try to increase support with each release. **Welcome!** With these caveats, the contributors to the project are migrating production work towards Rust and find a continuing reliance on libxml2 a helpful relief for initial ports. As such, contributions to this crate are welcome, if your workflow is not yet fully supported. ## Installation prerequisites Before performing the usual cargo build/install steps, you need to have the relevant components for using the original libxml2 code. These may become gradually outdated with time - please do let us know by opening a new issue/PR whenever that's the case. ### Linux/Debian On linux systems you'd need the development headers of libxml2 (e.g. `libxml2-dev` in Debian), as well as `pkg-config`. ### MacOS [Community contributed](https://github.com/KWARC/rust-libxml/issues/88#issuecomment-890876895): ``` $ brew install libxml2 # e.g. version 2.9.12 $ ln -s /usr/local/Cellar/libxml2/2.9.12/lib/libxml2.2.dylib /usr/local/lib/libxml-2.0.dylib $ export LIBXML2=/usr/local/Cellar/libxml2/2.9.12/lib/pkgconfig/libxml-2.0.pc ``` ### FreeBSD [Community contributed](https://github.com/KWARC/rust-libxml/issues/130#issuecomment-1976348349) ``` $ pkg install libxml2 pkgconf ``` ### Windows #### msvc [Community contributed](https://github.com/KWARC/rust-libxml/issues/81#issuecomment-760364976): * manually install builds tools c++ and english language by visiting [BuildTools](https://visualstudio.microsoft.com/fr/thank-you-downloading-visual-studio/?sku=BuildTools&rel=16) * launch cmd prompt with admin privileges and execute these commands sequentially: ``` C:\> git clone https://github.com/microsoft/vcpkg C:\> .\vcpkg\bootstrap-vcpkg.bat C:\> setx /M PATH "%PATH%;c:\vcpkg" && setx VCPKGRS_DYNAMIC "1" /M C:\> refreshenv C:\> vcpkg install libxml2:x64-windows C:\> vcpkg integrate install ``` #### gnu On mingw64 environment you could install libxml2 with `pacman -S mingw64/mingw-w64-x86_64-libxml2`.libxml-0.3.5/appveyor.yml000064400000000000000000000017671046102023000135230ustar 00000000000000# Based on the "trust" template v0.1.2 # https://github.com/japaric/trust/tree/v0.1.2 environment: global: RUST_VERSION: stable CRATE_NAME: rust-libxml matrix: - CHANNEL: stable ARCH: x86_64 TOOLCHAIN: msvc FEATURES: vcpkg VCPKG_DEFAULT_TRIPLET: x64-windows VCPKGRS_DYNAMIC: 1 TARGET: x86_64-pc-windows-msvc install: - vcpkg install libxml2 - curl -sSf -o rustup-init.exe https://win.rustup.rs/ - rustup-init.exe -y --default-host %TARGET% --default-toolchain %RUST_VERSION% - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin - rustc -Vv - cargo -V test_script: - if [%APPVEYOR_REPO_TAG%]==[false] ( cargo build --target %TARGET% && cargo test --target %TARGET% ) cache: - '%USERPROFILE%\.cargo\registry' - C:\tools\vcpkg\installed - target branches: only: - master notifications: - provider: Email on_build_success: false # Building is done in the test phase, so we disable Appveyor's build phase. build: falselibxml-0.3.5/build.rs000064400000000000000000000121001046102023000125570ustar 00000000000000use std::{env, fs, path::{Path, PathBuf}}; struct ProbedLib { version: String, include_paths: Vec, } /// Finds libxml2 and optionally return a list of header /// files from which the bindings can be generated. fn find_libxml2() -> Option { #![allow(unreachable_code)] // for platform-dependent dead code if let Ok(ref s) = std::env::var("LIBXML2") { // println!("{:?}", std::env::vars()); // panic!("set libxml2."); let p = std::path::Path::new(s); let fname = std::path::Path::new( p.file_name() .unwrap_or_else(|| panic!("no file name in LIBXML2 env ({s})")), ); assert!( p.is_file(), "{}", &format!("not a file in LIBXML2 env ({s})") ); println!( "cargo:rustc-link-lib={}", fname .file_stem() .unwrap() .to_string_lossy() .strip_prefix("lib") .unwrap() ); println!( "cargo:rustc-link-search={}", p.parent() .expect("no library path in LIBXML2 env") .to_string_lossy() ); None } else { #[cfg(any(target_family = "unix", target_os = "macos", all(target_family="windows", target_env="gnu")))] { let lib = pkg_config::Config::new() .probe("libxml-2.0") .expect("Couldn't find libxml2 via pkg-config"); return Some(ProbedLib { include_paths: lib.include_paths, version: lib.version, }) } #[cfg(all(target_family = "windows", target_env = "msvc"))] { if let Some(meta) = vcpkg_dep::vcpkg_find_libxml2() { return Some(meta); } else { eprintln!("vcpkg did not succeed in finding libxml2."); } } panic!("Could not find libxml2.") } } fn generate_bindings(header_dirs: Vec, output_path: &Path) { let bindings = bindgen::Builder::default() .header("src/wrapper.h") .opaque_type("max_align_t") // invalidate build as soon as the wrapper changes .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) .layout_tests(true) .clang_args(&["-DPKG-CONFIG"]) .clang_args( header_dirs.iter() .map(|dir| format!("-I{}", dir.display())) ); bindings .generate() .expect("failed to generate bindings with bindgen") .write_to_file(output_path) .expect("Failed to write bindings.rs"); } fn main() { let bindings_path = PathBuf::from(env::var_os("OUT_DIR").unwrap()).join("bindings.rs"); // declare availability of config variable (without setting it) println!("cargo::rustc-check-cfg=cfg(libxml_older_than_2_12)"); if let Some(probed_lib) = find_libxml2() { // if we could find header files, generate fresh bindings from them generate_bindings(probed_lib.include_paths, &bindings_path); // and expose the libxml2 version to the code let version_parts: Vec = probed_lib.version.split('.') .map(|part| part.parse::().unwrap_or(-1)).collect(); let older_than_2_12 = version_parts.len() > 1 && (version_parts[0] < 2 || version_parts[0] == 2 && version_parts[1] < 12); println!("cargo::rustc-check-cfg=cfg(libxml_older_than_2_12)"); if older_than_2_12 { println!("cargo::rustc-cfg=libxml_older_than_2_12"); } } else { // otherwise, use the default bindings on platforms where pkg-config isn't available fs::copy(PathBuf::from("src/default_bindings.rs"), bindings_path) .expect("Failed to copy the default bindings to the build directory"); // for now, assume that the library is older than 2.12, because that's what those bindings are computed with println!("cargo::rustc-cfg=libxml_older_than_2_12"); } } #[cfg(all(target_family = "windows", target_env = "msvc"))] mod vcpkg_dep { use crate::ProbedLib; pub fn vcpkg_find_libxml2() -> Option { if let Ok(metadata) = vcpkg::Config::new() .find_package("libxml2") { Some(ProbedLib { version: vcpkg_version(), include_paths: metadata.include_paths }) } else { None } } fn vcpkg_version() -> String { // What is the best way to obtain the version on Windows *before* bindgen runs? // here we attempt asking the shell for "vcpkg list libxml2" let mut vcpkg_exe = vcpkg::find_vcpkg_root(&vcpkg::Config::new()).unwrap(); vcpkg_exe.push("vcpkg.exe"); let vcpkg_list_libxml2 = std::process::Command::new(vcpkg_exe) .args(["list","libxml2"]) .output() .expect("vcpkg.exe failed to execute in vcpkg_dep build step"); if vcpkg_list_libxml2.status.success() { let libxml2_list_str = String::from_utf8_lossy(&vcpkg_list_libxml2.stdout); for line in libxml2_list_str.lines() { if line.starts_with("libxml2:") { let mut version_piece = line.split("2."); version_piece.next(); if let Some(version_tail) = version_piece.next() { if let Some(version) = version_tail.split(' ').next() .unwrap().split('#').next() { return format!("2.{version}"); } } } } } // default to a recent libxml2 from Windows 10 // (or should this panic?) String::from("2.13.5") } }libxml-0.3.5/examples/schema_example.rs000064400000000000000000000015331046102023000162610ustar 00000000000000//! //! Example Usage of XSD Schema Validation //! use libxml::schemas::SchemaParserContext; use libxml::schemas::SchemaValidationContext; use libxml::parser::Parser; fn main() { let xml = Parser::default() .parse_file("tests/resources/schema.xml") .expect("Expected to be able to parse XML Document from file"); let mut xsdparser = SchemaParserContext::from_file("tests/resources/schema.xsd"); let xsd = SchemaValidationContext::from_parser(&mut xsdparser); if let Err(errors) = xsd { for err in &errors { println!("{}", err.message.as_ref().unwrap()); } panic!("Failed to parse schema"); } let mut xsd = xsd.unwrap(); if let Err(errors) = xsd.validate_document(&xml) { for err in &errors { println!("{}", err.message.as_ref().unwrap()); } panic!("Invalid XML accoding to XSD schema"); } } libxml-0.3.5/examples/tree_example.rs000064400000000000000000000013411046102023000157550ustar 00000000000000use libxml::parser::Parser; use libxml::tree::*; fn my_recurse(node: &Node) { match node.get_type().unwrap() { NodeType::ElementNode => { println!("Entering {}", node.get_name()); } NodeType::TextNode => { println!("Text: {}", node.get_content()); } _ => {} } let mut c: Option = node.get_first_child(); while let Some(child) = c { my_recurse(&child); c = child.get_next_sibling(); } if node.get_type().unwrap() == NodeType::ElementNode { println!("Leaving {}", node.get_name()); } } fn main() { let parser = Parser::default(); let doc = parser.parse_file("tests/resources/file01.xml").unwrap(); let root = doc.get_root_element().unwrap(); my_recurse(&root); } libxml-0.3.5/examples/xpath_example.rs000064400000000000000000000005671046102023000161530ustar 00000000000000use libxml::parser::Parser; use libxml::xpath::Context; fn main() { let parser = Parser::default(); let doc = parser.parse_file("tests/resources/file01.xml").unwrap(); let context = Context::new(&doc).unwrap(); let result = context.evaluate("//child/text()").unwrap(); for node in &result.get_nodes_as_vec() { println!("Found: {}", node.get_content()); } } libxml-0.3.5/src/bindings.rs000064400000000000000000000005511046102023000140530ustar 00000000000000// Issues coming from bindgen #![allow(non_upper_case_globals)] #![allow(non_camel_case_types)] #![allow(non_snake_case)] #![allow(dead_code)] #![allow(improper_ctypes)] #![allow(missing_docs)] /* * helper var until we figure out well-formedness checks */ pub static mut HACKY_WELL_FORMED: bool = false; include!(concat!(env!("OUT_DIR"), "/bindings.rs")); libxml-0.3.5/src/c_helpers.rs000064400000000000000000000113471046102023000142270ustar 00000000000000#![allow(non_camel_case_types)] #![allow(non_snake_case)] use crate::bindings::*; use libc::{c_char, c_int, size_t}; use std::os::raw::c_void; use std::ptr; use std::slice; // error handling functions // pub fn xmlSetGenericErrorFunc(ctx: *mut c_void, handler: *mut c_void); // pub fn xmlThrDefSetGenericErrorFunc(ctx: *mut c_void, handler: *mut c_void); // Taken from Nokogiri (https://github.com/sparklemotion/nokogiri/blob/24bb843327306d2d71e4b2dc337c1e327cbf4516/ext/nokogiri/xml_document.c#L64) pub fn xmlNodeRecursivelyRemoveNs(node: xmlNodePtr) { unsafe { let mut property: xmlAttrPtr; xmlSetNs(node, ptr::null_mut()); let mut child: xmlNodePtr = (*node).children; while !child.is_null() { xmlNodeRecursivelyRemoveNs(child); child = (*child).next; } if (((*node).type_ == xmlElementType_XML_ELEMENT_NODE) || ((*node).type_ == xmlElementType_XML_XINCLUDE_START) || ((*node).type_ == xmlElementType_XML_XINCLUDE_END)) && !(*node).nsDef.is_null() { xmlFreeNsList((*node).nsDef); (*node).nsDef = ptr::null_mut(); } if (*node).type_ == xmlElementType_XML_ELEMENT_NODE && !(*node).properties.is_null() { property = (*node).properties; while !property.is_null() { if !(*property).ns.is_null() { (*property).ns = ptr::null_mut(); } property = (*property).next; } } } } pub fn xmlGetDoc(cur: xmlNodePtr) -> xmlDocPtr { unsafe { (*cur).doc } } pub fn xmlNextNsSibling(ns: xmlNsPtr) -> xmlNsPtr { unsafe { (*ns).next } } pub fn xmlNsPrefix(ns: xmlNsPtr) -> *const c_char { unsafe { (*ns).prefix as *const c_char } } pub fn xmlNsHref(ns: xmlNsPtr) -> *const c_char { unsafe { (*ns).href as *const c_char } } pub fn xmlNodeNsDeclarations(cur: xmlNodePtr) -> xmlNsPtr { unsafe { (*cur).nsDef } } pub fn xmlNodeNs(cur: xmlNodePtr) -> xmlNsPtr { unsafe { (*cur).ns } } pub fn xmlNextPropertySibling(attr: xmlAttrPtr) -> xmlAttrPtr { unsafe { (*attr).next } } pub fn xmlAttrName(attr: xmlAttrPtr) -> *const c_char { unsafe { (*attr).name as *const c_char } } pub fn xmlAttrNs(attr: xmlAttrPtr) -> xmlNsPtr { unsafe { (*attr).ns } } pub fn xmlGetFirstProperty(node: xmlNodePtr) -> xmlAttrPtr { unsafe { (*node).properties } } pub fn xmlGetNodeType(cur: xmlNodePtr) -> xmlElementType { unsafe { (*cur).type_ } } pub fn xmlGetParent(cur: xmlNodePtr) -> xmlNodePtr { unsafe { (*cur).parent } } pub fn xmlGetFirstChild(cur: xmlNodePtr) -> xmlNodePtr { unsafe { (*cur).children } } pub fn xmlPrevSibling(cur: xmlNodePtr) -> xmlNodePtr { unsafe { (*cur).prev } } // helper for tree pub fn xmlNextSibling(cur: xmlNodePtr) -> xmlNodePtr { unsafe { (*cur).next } } pub fn xmlNodeGetName(cur: xmlNodePtr) -> *const c_char { unsafe { (*cur).name as *const c_char } } // dummy function: no debug output at all #[cfg(libxml_older_than_2_12)] unsafe extern "C" fn _ignoreInvalidTagsErrorFunc(_user_data: *mut c_void, error: xmlErrorPtr) { unsafe { if !error.is_null() && (*error).code as xmlParserErrors == xmlParserErrors_XML_HTML_UNKNOWN_TAG { // do not record invalid, in fact (out of despair) claim we ARE well-formed, when a tag is invalid. HACKY_WELL_FORMED = true; } } } #[cfg(not(libxml_older_than_2_12))] unsafe extern "C" fn _ignoreInvalidTagsErrorFunc(_user_data: *mut c_void, error: *const xmlError) { unsafe { if !error.is_null() && (*error).code as xmlParserErrors == xmlParserErrors_XML_HTML_UNKNOWN_TAG { // do not record invalid, in fact (out of despair) claim we ARE well-formed, when a tag is invalid. HACKY_WELL_FORMED = true; } } } pub fn setWellFormednessHandler(ctxt: *mut xmlParserCtxt) { unsafe { HACKY_WELL_FORMED = false; xmlSetStructuredErrorFunc(ctxt as *mut c_void, Some(_ignoreInvalidTagsErrorFunc)); } } // helper for parser pub fn htmlWellFormed(ctxt: *mut xmlParserCtxt) -> bool { unsafe { (!ctxt.is_null() && (*ctxt).wellFormed > 0) || HACKY_WELL_FORMED } } // helper for xpath pub fn xmlXPathObjectNumberOfNodes(val: xmlXPathObjectPtr) -> c_int { unsafe { if val.is_null() { -1 } else if (*val).nodesetval.is_null() { -2 } else { (*(*val).nodesetval).nodeNr } } } pub fn xmlXPathObjectGetNodes(val: xmlXPathObjectPtr, size: size_t) -> Vec { unsafe { slice::from_raw_parts((*(*val).nodesetval).nodeTab, size).to_vec() } } #[cfg(any(target_family = "unix", target_os = "macos", all(target_family="windows", target_env="gnu")))] pub fn bindgenFree(val: *mut c_void) { unsafe { if let Some(xml_free_fn) = xmlFree { xml_free_fn(val); } else { libc::free(val); } } } #[cfg(all(target_family="windows", target_env="msvc"))] pub fn bindgenFree(val: *mut c_void) { unsafe { libc::free(val as *mut c_void); } }libxml-0.3.5/src/error.rs000064400000000000000000000074131046102023000134130ustar 00000000000000//! //! Wrapper for xmlError //! use super::bindings; use std::ffi::{c_char, c_int, CStr}; /// Rust enum variant of libxml2's xmlErrorLevel #[derive(Debug)] pub enum XmlErrorLevel { /// No error None, /// A simple warning Warning, /// A recoverable error Error, /// A fatal error Fatal, } impl XmlErrorLevel { /// Convert an xmlErrorLevel provided by libxml2 (as an integer) into a Rust enum pub fn from_raw(error_level: bindings::xmlErrorLevel) -> XmlErrorLevel { match error_level { bindings::xmlErrorLevel_XML_ERR_NONE => XmlErrorLevel::None, bindings::xmlErrorLevel_XML_ERR_WARNING => XmlErrorLevel::Warning, bindings::xmlErrorLevel_XML_ERR_ERROR => XmlErrorLevel::Error, bindings::xmlErrorLevel_XML_ERR_FATAL => XmlErrorLevel::Fatal, _ => unreachable!("Should never receive an error level not in the range 0..=3"), } } } /// Wrapper around xmlErrorPtr. /// Some fields have been omitted for simplicity/safety #[derive(Debug)] pub struct StructuredError { /// Human-friendly error message, lossily converted into UTF-8 from the underlying /// C string. May be `None` if an error message is not provided by libxml2. pub message: Option, /// The error's level pub level: XmlErrorLevel, /// The filename, lossily converted into UTF-8 from the underlying C string. /// May be `None` if a filename is not provided by libxml2, such as when validating /// an XML document stored entirely in memory. pub filename: Option, /// The linenumber, or None if not applicable. pub line: Option, /// The column where the error is present, or None if not applicable. pub col: Option, /// The module that the error came from. See libxml's xmlErrorDomain enum. pub domain: c_int, /// The variety of error. See libxml's xmlParserErrors enum. pub code: c_int, } impl StructuredError { /// Copies the error information stored at `error_ptr` into a new `StructuredError` /// /// # Safety /// This function must be given a pointer to a valid `xmlError` struct. Typically, you /// will acquire such a pointer by implementing one of a number of callbacks /// defined in libXml which are provided an `xmlError` as an argument. /// /// This function copies data from the memory `error_ptr` but does not deallocate /// the error. Depending on the context in which this function is used, you may /// need to take additional steps to avoid a memory leak. pub unsafe fn from_raw(error_ptr: *const bindings::xmlError) -> Self { let error = *error_ptr; let message = StructuredError::ptr_to_string(error.message); let level = XmlErrorLevel::from_raw(error.level); let filename = StructuredError::ptr_to_string(error.file); let line = if error.line == 0 { None } else { Some(error.line) }; let col = if error.int2 == 0 { None } else { Some(error.int2) }; StructuredError { message, level, filename, line, col, domain: error.domain, code: error.code, } } /// Human-readable informative error message. /// /// This function is a hold-over from the original bindings to libxml's error /// reporting mechanism. Instead of calling this method, you can access the /// StructuredError `message` field directly. #[deprecated(since="0.3.3", note="Please use the `message` field directly instead.")] pub fn message(&self) -> &str { self.message.as_deref().unwrap_or("") } /// Returns the provided c_str as Some(String), or None if the provided pointer is null. fn ptr_to_string(c_str: *mut c_char) -> Option { if c_str.is_null() { return None; } let raw_str = unsafe { CStr::from_ptr(c_str) }; Some(String::from_utf8_lossy(raw_str.to_bytes()).to_string()) } } libxml-0.3.5/src/lib.rs000064400000000000000000000013571046102023000130310ustar 00000000000000//! # A wrapper for libxml2 //! This library provides an interface to a subset of the libxml API. //! The idea is to extend it whenever more functionality is needed. //! Providing a more or less complete wrapper would be too much work. #![deny(missing_docs)] // Our new methods return Result types #![allow(clippy::new_ret_no_self, clippy::result_unit_err)] /// Bindings to the C interface pub mod bindings; mod c_helpers; /// XML and HTML parsing pub mod parser; /// Manipulations on the DOM representation pub mod tree; /// XML Global Error Structures and Handling pub mod error; /// `XPath` module for global lookup in the DOM pub mod xpath; /// Schema Validation pub mod schemas; /// Read-only parallel primitives pub mod readonly; libxml-0.3.5/src/parser.rs000064400000000000000000000301151046102023000135510ustar 00000000000000//! The parser functionality use crate::bindings::*; use crate::c_helpers::*; use crate::tree::*; use std::convert::AsRef; use std::error::Error; use std::ffi::c_void; use std::ffi::{CStr, CString}; use std::fmt; use std::fs; use std::io; use std::os::raw::{c_char, c_int}; use std::ptr; use std::slice; use std::str; use std::sync::Once; static INIT_LIBXML_PARSER: Once = Once::new(); enum XmlParserOption { Recover = 1, Nodefdtd = 4, Noerror = 32, Nowarning = 64, Pedantic = 128, Noblanks = 256, Nonet = 2048, Noimplied = 8192, Compact = 65_536, Huge = 524_288, Ignoreenc = 2_097_152, } enum HtmlParserOption { Recover = 1, Nodefdtd = 4, Noerror = 32, Nowarning = 64, Pedantic = 128, Noblanks = 256, Nonet = 2048, Noimplied = 8192, Huge = 524_288, Compact = 65_536, Ignoreenc = 2_097_152, } /// Parser Options pub struct ParserOptions<'a> { /// Relaxed parsing pub recover: bool, /// do not default a doctype if not found pub no_def_dtd: bool, /// do not default a doctype if not found pub no_error: bool, /// suppress warning reports pub no_warning: bool, /// pedantic error reporting pub pedantic: bool, /// remove blank nodes pub no_blanks: bool, /// Forbid network access pub no_net: bool, /// Do not add implied html/body... elements pub no_implied: bool, /// relax any hardcoded limit from the parser pub huge: bool, /// compact small text nodes pub compact: bool, /// ignore internal document encoding hint pub ignore_enc: bool, /// manually-specified encoding pub encoding: Option<&'a str>, } impl ParserOptions<'_> { pub(crate) fn to_flags(&self, format: &ParseFormat) -> i32 { macro_rules! to_option_flag { ( $condition:expr => $variant:ident ) => { if $condition { match format { ParseFormat::HTML => HtmlParserOption::$variant as i32, ParseFormat::XML => XmlParserOption::$variant as i32, } } else { 0 } }; } // return the combined flags to_option_flag!(self.recover => Recover) + to_option_flag!(self.no_def_dtd => Nodefdtd) + to_option_flag!(self.no_error => Noerror) + to_option_flag!(self.no_warning => Nowarning) + to_option_flag!(self.pedantic => Pedantic) + to_option_flag!(self.no_blanks => Noblanks) + to_option_flag!(self.no_net => Nonet) + to_option_flag!(self.no_implied => Noimplied) + to_option_flag!(self.huge => Huge) + to_option_flag!(self.compact => Compact) + to_option_flag!(self.ignore_enc => Ignoreenc) } } impl Default for ParserOptions<'_> { fn default() -> Self { ParserOptions { recover: true, no_def_dtd: false, no_error: true, no_warning: true, pedantic: false, no_blanks: false, no_net: false, no_implied: false, huge: false, compact: false, ignore_enc: false, encoding: None, } } } ///Parser Errors pub enum XmlParseError { ///Parsing returned a null pointer as document pointer GotNullPointer, ///Could not open file error. FileOpenError, ///Document too large for libxml2. DocumentTooLarge, } impl Error for XmlParseError {} impl fmt::Debug for XmlParseError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{self}") } } impl fmt::Display for XmlParseError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, "{}", match self { XmlParseError::GotNullPointer => "Got a Null pointer", XmlParseError::FileOpenError => "Unable to open path to file.", XmlParseError::DocumentTooLarge => "Document too large for i32.", } ) } } /// Default encoding when not provided. const DEFAULT_ENCODING: *const c_char = ptr::null(); /// Default URL when not provided. const DEFAULT_URL: *const c_char = ptr::null(); /// Open file function. fn xml_open(filename: &str) -> io::Result<*mut c_void> { let ptr = Box::into_raw(Box::new(fs::File::open(filename)?)); Ok(ptr as *mut c_void) } /// Read callback for an FS file. unsafe extern "C" fn xml_read(context: *mut c_void, buffer: *mut c_char, len: c_int) -> c_int { // Len is always positive, typically 40-4000 bytes. let file = context as *mut fs::File; let buf = slice::from_raw_parts_mut(buffer as *mut u8, len as usize); match io::Read::read(&mut *file, buf) { Ok(v) => v as c_int, Err(_) => -1, } } type XmlReadCallback = unsafe extern "C" fn(*mut c_void, *mut c_char, c_int) -> c_int; /// Close callback for an FS file. unsafe extern "C" fn xml_close(context: *mut c_void) -> c_int { // Take rust ownership of the context and then drop it. let file = context as *mut fs::File; let _ = Box::from_raw(file); 0 } type XmlCloseCallback = unsafe extern "C" fn(*mut c_void) -> c_int; ///Convert usize to i32 safely. fn try_usize_to_i32(value: usize) -> Result { if cfg!(target_pointer_width = "16") || (value < i32::MAX as usize) { // Cannot safely use our value comparison, but the conversion if always safe. // Or, if the value can be safely represented as a 32-bit signed integer. Ok(value as i32) } else { // Document too large, cannot parse using libxml2. Err(XmlParseError::DocumentTooLarge) } } #[derive(Debug, PartialEq, Eq)] /// Enum for the parse formats supported by libxml2 pub enum ParseFormat { /// Strict parsing for XML XML, /// Relaxed parsing for HTML HTML, } /// Parsing API wrapper for libxml2 pub struct Parser { /// The `ParseFormat` for this parser pub format: ParseFormat, } impl Default for Parser { /// Create a parser for XML documents fn default() -> Self { // avoid deadlocks from using multiple parsers INIT_LIBXML_PARSER.call_once(|| unsafe { crate::bindings::xmlInitParser(); }); Parser { format: ParseFormat::XML, } } } impl Parser { /// Create a parser for HTML documents pub fn default_html() -> Self { // avoid deadlocks from using multiple parsers INIT_LIBXML_PARSER.call_once(|| unsafe { crate::bindings::xmlInitParser(); }); Parser { format: ParseFormat::HTML, } } /// Parses the XML/HTML file `filename` to generate a new `Document` pub fn parse_file(&self, filename: &str) -> Result { self.parse_file_with_options(filename, ParserOptions::default()) } /// Parses the XML/HTML file `filename` with a manually-specified parser-options /// to generate a new `Document` pub fn parse_file_with_options( &self, filename: &str, parser_options: ParserOptions, ) -> Result { // Create extern C callbacks for to read and close a Rust file through // a void pointer. let ioread: Option = Some(xml_read); let ioclose: Option = Some(xml_close); let ioctx = match xml_open(filename) { Ok(v) => v, Err(_) => return Err(XmlParseError::FileOpenError), }; // Process encoding. let encoding_cstring: Option = parser_options.encoding.map(|v| CString::new(v).unwrap()); let encoding_ptr = match encoding_cstring { Some(v) => v.as_ptr(), None => DEFAULT_ENCODING, }; // Process url. let url_ptr = DEFAULT_URL; unsafe { xmlKeepBlanksDefault(1); } let options = parser_options.to_flags(&self.format); match self.format { ParseFormat::XML => unsafe { let doc_ptr = xmlReadIO(ioread, ioclose, ioctx, url_ptr, encoding_ptr, options); if doc_ptr.is_null() { Err(XmlParseError::GotNullPointer) } else { Ok(Document::new_ptr(doc_ptr)) } }, ParseFormat::HTML => unsafe { let doc_ptr = htmlReadIO(ioread, ioclose, ioctx, url_ptr, encoding_ptr, options); if doc_ptr.is_null() { Err(XmlParseError::GotNullPointer) } else { Ok(Document::new_ptr(doc_ptr)) } }, } } ///Parses the XML/HTML bytes `input` to generate a new `Document` pub fn parse_string>(&self, input: Bytes) -> Result { self.parse_string_with_options(input, ParserOptions::default()) } ///Parses the XML/HTML bytes `input` with a manually-specified ///parser-options to generate a new `Document` pub fn parse_string_with_options>( &self, input: Bytes, parser_options: ParserOptions, ) -> Result { // Process input bytes. let input_bytes = input.as_ref(); let input_ptr = input_bytes.as_ptr() as *const c_char; let input_len = try_usize_to_i32(input_bytes.len())?; // Process encoding. let encoding_cstring: Option = parser_options.encoding.map(|v| CString::new(v).unwrap()); let encoding_ptr = match encoding_cstring { Some(v) => v.as_ptr(), None => DEFAULT_ENCODING, }; // Process url. let url_ptr = DEFAULT_URL; let options = parser_options.to_flags(&self.format); match self.format { ParseFormat::XML => unsafe { let docptr = xmlReadMemory(input_ptr, input_len, url_ptr, encoding_ptr, options); if docptr.is_null() { Err(XmlParseError::GotNullPointer) } else { Ok(Document::new_ptr(docptr)) } }, ParseFormat::HTML => unsafe { let docptr = htmlReadMemory(input_ptr, input_len, url_ptr, encoding_ptr, options); if docptr.is_null() { Err(XmlParseError::GotNullPointer) } else { Ok(Document::new_ptr(docptr)) } }, } } /// Checks a string for well-formedness. pub fn is_well_formed_html>(&self, input: Bytes) -> bool { self.is_well_formed_html_with_encoding(input, None) } /// Checks a string for well-formedness with manually-specified encoding. /// IMPORTANT: This function is currently implemented in a HACKY way, to ignore invalid errors for HTML5 elements (such as ) /// this means you should NEVER USE IT WHILE THREADING, it is CERTAIN TO BREAK /// /// Help is welcome in implementing it correctly. pub fn is_well_formed_html_with_encoding>( &self, input: Bytes, encoding: Option<&str>, ) -> bool { // Process input string. let input_bytes = input.as_ref(); if input_bytes.is_empty() { return false; } let input_ptr = input_bytes.as_ptr() as *const c_char; let input_len = match try_usize_to_i32(input_bytes.len()) { Ok(v) => v, Err(_) => return false, }; // Process encoding. let encoding_cstring: Option = encoding.map(|v| CString::new(v).unwrap()); let encoding_ptr = match encoding_cstring { Some(v) => v.as_ptr(), None => DEFAULT_ENCODING, }; // Process url. let url_ptr = DEFAULT_URL; // disable generic error lines from libxml2 match self.format { ParseFormat::XML => false, // TODO: Add support for XML at some point ParseFormat::HTML => unsafe { let ctxt = htmlNewParserCtxt(); setWellFormednessHandler(ctxt); let docptr = htmlCtxtReadMemory(ctxt, input_ptr, input_len, url_ptr, encoding_ptr, 10_596); // htmlParserOption = 4+32+64+256+2048+8192 let well_formed_final = if htmlWellFormed(ctxt) { // Basic well-formedness passes, let's check if we have an element as root too if !docptr.is_null() { let node_ptr = xmlDocGetRootElement(docptr); if node_ptr.is_null() { return false } let name_ptr = xmlNodeGetName(node_ptr); if name_ptr.is_null() { false } //empty string else { let c_root_name = CStr::from_ptr(name_ptr); let root_name = str::from_utf8(c_root_name.to_bytes()).unwrap().to_owned(); root_name == "html" } } else { false } } else { false }; if !ctxt.is_null() { htmlFreeParserCtxt(ctxt); } if !docptr.is_null() { xmlFreeDoc(docptr); } well_formed_final }, } } } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������libxml-0.3.5/src/readonly/tree.rs�������������������������������������������������������������������0000644�0000000�0000000�00000041511�10461020230�0015033�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������use libc::{c_char, c_void}; use std::collections::{HashMap, HashSet}; use std::ffi::{CStr, CString}; use std::ptr; use std::str; use crate::bindings::*; use crate::c_helpers::*; use crate::tree::namespace::Namespace; use crate::tree::nodetype::NodeType; use crate::tree::Document; use crate::xpath::Context; /// Lightweight struct for read-only parallel processing #[derive(Debug, Copy, Clone)] pub struct RoNode(pub(crate) xmlNodePtr); // we claim Sync and Send, as we are in read-only mode over the owning document unsafe impl Sync for RoNode {} unsafe impl Send for RoNode {} impl PartialEq for RoNode { /// Two nodes are considered equal, if they point to the same xmlNode. fn eq(&self, other: &RoNode) -> bool { std::ptr::eq(self.0, other.0) } } impl Eq for RoNode {} impl RoNode { /// Immutably borrows the underlying libxml2 `xmlNodePtr` pointer pub fn node_ptr(&self) -> xmlNodePtr { self.0 } /// Returns the next sibling if it exists pub fn get_next_sibling(self) -> Option { let ptr = xmlNextSibling(self.0); self.ptr_as_option(ptr) } /// Returns the previous sibling if it exists pub fn get_prev_sibling(self) -> Option { let ptr = xmlPrevSibling(self.0); self.ptr_as_option(ptr) } /// Returns the first child if it exists pub fn get_first_child(self) -> Option { let ptr = xmlGetFirstChild(self.0); self.ptr_as_option(ptr) } /// Returns the last child if it exists pub fn get_last_child(self) -> Option { let ptr = unsafe { xmlGetLastChild(self.0) }; self.ptr_as_option(ptr) } /// Returns the next element sibling if it exists pub fn get_next_element_sibling(&self) -> Option { match self.get_next_sibling() { None => None, Some(child) => { let mut current_node = child; while !current_node.is_element_node() { if let Some(sibling) = current_node.get_next_sibling() { current_node = sibling; } else { break; } } if current_node.is_element_node() { Some(current_node) } else { None } } } } /// Returns the previous element sibling if it exists pub fn get_prev_element_sibling(&self) -> Option { match self.get_prev_sibling() { None => None, Some(child) => { let mut current_node = child; while !current_node.is_element_node() { if let Some(sibling) = current_node.get_prev_sibling() { current_node = sibling; } else { break; } } if current_node.is_element_node() { Some(current_node) } else { None } } } } /// Returns the first element child if it exists pub fn get_first_element_child(self) -> Option { match self.get_first_child() { None => None, Some(child) => { let mut current_node = child; while !current_node.is_element_node() { if let Some(sibling) = current_node.get_next_sibling() { current_node = sibling; } else { break; } } if current_node.is_element_node() { Some(current_node) } else { None } } } } /// Returns the last element child if it exists pub fn get_last_element_child(&self) -> Option { match self.get_last_child() { None => None, Some(child) => { let mut current_node = child; while !current_node.is_element_node() { if let Some(sibling) = current_node.get_prev_sibling() { current_node = sibling; } else { break; } } if current_node.is_element_node() { Some(current_node) } else { None } } } } /// Returns all child nodes of the given node as a vector pub fn get_child_nodes(self) -> Vec { let mut children = Vec::new(); if let Some(first_child) = self.get_first_child() { children.push(first_child); while let Some(sibling) = children.last().unwrap().get_next_sibling() { children.push(sibling) } } children } /// Returns all child elements of the given node as a vector pub fn get_child_elements(self) -> Vec { self .get_child_nodes() .into_iter() .filter(|n| n.get_type() == Some(NodeType::ElementNode)) .collect::>() } /// Returns the parent if it exists pub fn get_parent(self) -> Option { let ptr = xmlGetParent(self.0); self.ptr_as_option(ptr) } /// Get the node type pub fn get_type(self) -> Option { NodeType::from_int(xmlGetNodeType(self.0)) } /// Returns true if it is a text node pub fn is_text_node(self) -> bool { self.get_type() == Some(NodeType::TextNode) } /// Checks if the given node is an Element pub fn is_element_node(self) -> bool { self.get_type() == Some(NodeType::ElementNode) } /// Checks if the underlying libxml2 pointer is `NULL` pub fn is_null(self) -> bool { self.0.is_null() } /// Returns the name of the node (empty string if name pointer is `NULL`) pub fn get_name(self) -> String { let name_ptr = xmlNodeGetName(self.0); if name_ptr.is_null() { return String::new(); } //empty string let c_string = unsafe { CStr::from_ptr(name_ptr) }; c_string.to_string_lossy().into_owned() } /// Returns the content of the node /// (assumes UTF-8 XML document) pub fn get_content(self) -> String { let content_ptr = unsafe { xmlNodeGetContent(self.0) }; if content_ptr.is_null() { //empty string when none return String::new(); } let c_string = unsafe { CStr::from_ptr(content_ptr as *const c_char) }; let rust_utf8 = c_string.to_string_lossy().into_owned(); bindgenFree(content_ptr as *mut c_void); rust_utf8 } /// Returns the value of property `name` pub fn get_property(self, name: &str) -> Option { let c_name = CString::new(name).unwrap(); let value_ptr = unsafe { xmlGetProp(self.0, c_name.as_bytes().as_ptr()) }; if value_ptr.is_null() { return None; } let c_value_string = unsafe { CStr::from_ptr(value_ptr as *const c_char) }; let prop_str = c_value_string.to_string_lossy().into_owned(); bindgenFree(value_ptr as *mut c_void); Some(prop_str) } /// Returns the value of property `name` in namespace `ns` pub fn get_property_ns(self, name: &str, ns: &str) -> Option { let c_name = CString::new(name).unwrap(); let c_ns = CString::new(ns).unwrap(); let value_ptr = unsafe { xmlGetNsProp(self.0, c_name.as_bytes().as_ptr(), c_ns.as_bytes().as_ptr()) }; if value_ptr.is_null() { return None; } let c_value_string = unsafe { CStr::from_ptr(value_ptr as *const c_char) }; let prop_str = c_value_string.to_string_lossy().into_owned(); bindgenFree(value_ptr as *mut c_void); Some(prop_str) } /// Returns the value of property `name` with no namespace pub fn get_property_no_ns(self, name: &str) -> Option { let c_name = CString::new(name).unwrap(); let value_ptr = unsafe { xmlGetNoNsProp(self.0, c_name.as_bytes().as_ptr()) }; if value_ptr.is_null() { return None; } let c_value_string = unsafe { CStr::from_ptr(value_ptr as *const c_char) }; let prop_str = c_value_string.to_string_lossy().into_owned(); bindgenFree(value_ptr as *mut c_void); Some(prop_str) } /// Return an attribute as a `Node` struct of type AttributeNode pub fn get_property_node(self, name: &str) -> Option { let c_name = CString::new(name).unwrap(); unsafe { let attr_node = xmlHasProp(self.0, c_name.as_bytes().as_ptr()); self.ptr_as_option(attr_node as xmlNodePtr) } } /// Return an attribute in a namespace `ns` as a `Node` of type AttributeNode pub fn get_property_node_ns(self, name: &str, ns: &str) -> Option { let c_name = CString::new(name).unwrap(); let c_ns = CString::new(ns).unwrap(); let attr_node = unsafe { xmlHasNsProp(self.0, c_name.as_bytes().as_ptr(), c_ns.as_bytes().as_ptr()) }; self.ptr_as_option(attr_node as xmlNodePtr) } /// Return an attribute with no namespace as a `Node` of type AttributeNode pub fn get_property_node_no_ns(self, name: &str) -> Option { let c_name = CString::new(name).unwrap(); let attr_node = unsafe { xmlHasNsProp(self.0, c_name.as_bytes().as_ptr(), ptr::null()) }; self.ptr_as_option(attr_node as xmlNodePtr) } /// Alias for get_property pub fn get_attribute(self, name: &str) -> Option { self.get_property(name) } /// Alias for get_property_ns pub fn get_attribute_ns(self, name: &str, ns: &str) -> Option { self.get_property_ns(name, ns) } /// Alias for get_property_no_ns pub fn get_attribute_no_ns(self, name: &str) -> Option { self.get_property_no_ns(name) } /// Alias for get_property_node pub fn get_attribute_node(self, name: &str) -> Option { self.get_property_node(name) } /// Alias for get_property_node_ns pub fn get_attribute_node_ns(self, name: &str, ns: &str) -> Option { self.get_property_node_ns(name, ns) } /// Alias for get_property_node_no_ns pub fn get_attribute_node_no_ns(self, name: &str) -> Option { self.get_property_node_no_ns(name) } /// Get a copy of the attributes of this node pub fn get_properties(self) -> HashMap { let mut attributes = HashMap::new(); let mut current_prop = xmlGetFirstProperty(self.0); while !current_prop.is_null() { let name_ptr = xmlAttrName(current_prop); let c_name_string = unsafe { CStr::from_ptr(name_ptr) }; let name = c_name_string.to_string_lossy().into_owned(); let value = self.get_property(&name).unwrap_or_default(); attributes.insert(name, value); current_prop = xmlNextPropertySibling(current_prop); } attributes } /// Get a copy of this node's attributes and their namespaces pub fn get_properties_ns(self) -> HashMap<(String, Option), String> { let mut attributes = HashMap::new(); let mut current_prop = xmlGetFirstProperty(self.0); while !current_prop.is_null() { let name_ptr = xmlAttrName(current_prop); let c_name_string = unsafe { CStr::from_ptr(name_ptr) }; let name = c_name_string.to_string_lossy().into_owned(); let ns_ptr = xmlAttrNs(current_prop); if ns_ptr.is_null() { let value = self.get_property_no_ns(&name).unwrap_or_default(); attributes.insert((name, None), value); } else { let ns = Namespace { ns_ptr }; let value = self .get_property_ns(&name, &ns.get_href()) .unwrap_or_default(); attributes.insert((name, Some(ns)), value); } current_prop = xmlNextPropertySibling(current_prop); } attributes } /// Alias for `get_properties` pub fn get_attributes(self) -> HashMap { self.get_properties() } /// Alias for `get_properties_ns` pub fn get_attributes_ns(self) -> HashMap<(String, Option), String> { self.get_properties_ns() } /// Check if a property has been defined, without allocating its value pub fn has_property(self, name: &str) -> bool { let c_name = CString::new(name).unwrap(); let value_ptr = unsafe { xmlHasProp(self.0, c_name.as_bytes().as_ptr()) }; !value_ptr.is_null() } /// Check if property `name` in namespace `ns` exists pub fn has_property_ns(self, name: &str, ns: &str) -> bool { let c_name = CString::new(name).unwrap(); let c_ns = CString::new(ns).unwrap(); let value_ptr = unsafe { xmlHasNsProp(self.0, c_name.as_bytes().as_ptr(), c_ns.as_bytes().as_ptr()) }; !value_ptr.is_null() } /// Check if property `name` with no namespace exists pub fn has_property_no_ns(self, name: &str) -> bool { let c_name = CString::new(name).unwrap(); let value_ptr = unsafe { xmlHasNsProp(self.0, c_name.as_bytes().as_ptr(), ptr::null()) }; !value_ptr.is_null() } /// Alias for has_property pub fn has_attribute(self, name: &str) -> bool { self.has_property(name) } /// Alias for has_property_ns pub fn has_attribute_ns(self, name: &str, ns: &str) -> bool { self.has_property_ns(name, ns) } /// Alias for has_property_no_ns pub fn has_attribute_no_ns(self, name: &str) -> bool { self.has_property_no_ns(name) } /// Gets the active namespace associated of this node pub fn get_namespace(self) -> Option { let ns_ptr = xmlNodeNs(self.0); if ns_ptr.is_null() { None } else { Some(Namespace { ns_ptr }) } } /// Gets a list of namespaces associated with this node pub fn get_namespaces(self, doc: &Document) -> Vec { let list_ptr_raw = unsafe { xmlGetNsList(doc.doc_ptr(), self.0) }; if list_ptr_raw.is_null() { Vec::new() } else { let mut namespaces = Vec::new(); let mut ptr_iter = list_ptr_raw as *mut xmlNsPtr; unsafe { while !ptr_iter.is_null() && !(*ptr_iter).is_null() { namespaces.push(Namespace { ns_ptr: *ptr_iter }); ptr_iter = ptr_iter.add(1); } /* TODO: valgrind suggests this technique isn't sufficiently fluent: ==114895== Conditional jump or move depends on uninitialised value(s) ==114895== at 0x4E9962F: xmlFreeNs (in /usr/lib/x86_64-linux-gnu/libxml2.so.2.9.4) ==114895== by 0x195CE8: libxml::tree::Node::get_namespaces (tree.rs:723) ==114895== by 0x12E7B6: base_tests::can_work_with_namespaces (base_tests.rs:537) DG: I could not improve on this state without creating memory leaks after ~1 hour, so I am marking it as future work. */ /* TODO: How do we properly deallocate here? The approach bellow reliably segfaults tree_tests on 1 thread */ // println!("\n-- xmlfreens on : {:?}", list_ptr_raw); // xmlFreeNs(list_ptr_raw as xmlNsPtr); } namespaces } } /// Get a list of namespaces declared with this node pub fn get_namespace_declarations(self) -> Vec { if self.get_type() != Some(NodeType::ElementNode) { // only element nodes can have declarations return Vec::new(); } let mut namespaces = Vec::new(); let mut ns_ptr = xmlNodeNsDeclarations(self.0); while !ns_ptr.is_null() { if !xmlNsPrefix(ns_ptr).is_null() || !xmlNsHref(ns_ptr).is_null() { namespaces.push(Namespace { ns_ptr }); } ns_ptr = xmlNextNsSibling(ns_ptr); } namespaces } /// Looks up the prefix of a namespace from its URI, basedo around a given `Node` pub fn lookup_namespace_prefix(self, href: &str) -> Option { if href.is_empty() { return None; } let c_href = CString::new(href).unwrap(); unsafe { let ptr_mut = self.0; let ns_ptr = xmlSearchNsByHref(xmlGetDoc(ptr_mut), ptr_mut, c_href.as_bytes().as_ptr()); if !ns_ptr.is_null() { let ns = Namespace { ns_ptr }; let ns_prefix = ns.get_prefix(); Some(ns_prefix) } else { None } } } /// Looks up the uri of a namespace from its prefix, basedo around a given `Node` pub fn lookup_namespace_uri(self, prefix: &str) -> Option { if prefix.is_empty() { return None; } let c_prefix = CString::new(prefix).unwrap(); unsafe { let ns_ptr = xmlSearchNs(xmlGetDoc(self.0), self.0, c_prefix.as_bytes().as_ptr()); if !ns_ptr.is_null() { let ns = Namespace { ns_ptr }; let ns_prefix = ns.get_href(); if !ns_prefix.is_empty() { Some(ns_prefix) } else { None } } else { None } } } /// Get a set of class names from this node's attributes pub fn get_class_names(self) -> HashSet { let mut set = HashSet::new(); if let Some(value) = self.get_property("class") { for n in value.split(' ') { set.insert(n.to_owned()); } } set } /// find read-only nodes via xpath, at the specified node and a given document pub fn findnodes(self, xpath: &str, owner: &Document) -> Result, ()> { let context = Context::new(owner)?; let evaluated = context.node_evaluate_readonly(xpath, self)?; Ok(evaluated.get_readonly_nodes_as_vec()) } /// Read-only nodes are always linked pub fn is_unlinked(self) -> bool { false } /// Read-only nodes only need a null check fn ptr_as_option(self, node_ptr: xmlNodePtr) -> Option { if node_ptr.is_null() { None } else { Some(RoNode(node_ptr)) } } /// `libc::c_void` isn't hashable and cannot be made hashable pub fn to_hashable(self) -> usize { self.0 as usize } /// Create a mock node, used for a placeholder argument pub fn null() -> Self { RoNode(ptr::null_mut()) } } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������libxml-0.3.5/src/readonly.rs������������������������������������������������������������������������0000644�0000000�0000000�00000000047�10461020230�0014073�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������mod tree; pub use self::tree::RoNode; �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������libxml-0.3.5/src/schemas/common.rs������������������������������������������������������������������0000644�0000000�0000000�00000001532�10461020230�0015171�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������//! //! Common Utilities //! use crate::bindings; use crate::error::StructuredError; use std::ffi::c_void; /// Provides a callback to the C side of things to accumulate xmlErrors to be /// handled back on the Rust side. #[cfg(libxml_older_than_2_12)] pub unsafe extern "C" fn structured_error_handler(ctx: *mut c_void, error: bindings::xmlErrorPtr) { assert!(!ctx.is_null()); let errlog = unsafe { &mut *{ ctx as *mut Vec } }; let error = unsafe { StructuredError::from_raw(error) }; errlog.push(error); } #[cfg(not(libxml_older_than_2_12))] pub unsafe extern "C" fn structured_error_handler(ctx: *mut c_void, error: *const bindings::xmlError) { assert!(!ctx.is_null()); let errlog = unsafe { &mut *{ ctx as *mut Vec } }; let error = unsafe { StructuredError::from_raw(error) }; errlog.push(error); }����������������������������������������������������������������������������������������������������������������������������������������������������������������������libxml-0.3.5/src/schemas/mod.rs���������������������������������������������������������������������0000644�0000000�0000000�00000000771�10461020230�0014464�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������//! //! Schema Validation Support (XSD) //! //! This module exposes wraps xmlschemas in libxml2. See original documentation or //! look at the example at examples/schema_example.rs for usage. //! //! WARNING: This module has not been tested in a multithreaded or multiprocessing //! environment. //! mod common; mod parser; mod schema; mod validation; use schema::Schema; // internally handled by SchemaValidationContext pub use parser::SchemaParserContext; pub use validation::SchemaValidationContext; �������libxml-0.3.5/src/schemas/parser.rs������������������������������������������������������������������0000644�0000000�0000000�00000005622�10461020230�0015201�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������//! //! Wrapping of the Parser Context (xmlSchemaParserCtxt) //! use super::common; use crate::bindings; use crate::error::StructuredError; use crate::tree::document::Document; use std::ffi::CString; use std::os::raw::c_char; /// Wrapper on xmlSchemaParserCtxt pub struct SchemaParserContext { inner: *mut bindings::_xmlSchemaParserCtxt, errlog: *mut Vec, } impl SchemaParserContext { /// Create a schema parsing context from a Document object pub fn from_document(doc: &Document) -> Self { let parser = unsafe { bindings::xmlSchemaNewDocParserCtxt(doc.doc_ptr()) }; if parser.is_null() { panic!("Failed to create schema parser context from XmlDocument"); // TODO error handling } Self::from_raw(parser) } /// Create a schema parsing context from a buffer in memory pub fn from_buffer>(buff: Bytes) -> Self { let buff_bytes = buff.as_ref(); let buff_ptr = buff_bytes.as_ptr() as *const c_char; let buff_len = buff_bytes.len() as i32; let parser = unsafe { bindings::xmlSchemaNewMemParserCtxt(buff_ptr, buff_len) }; if parser.is_null() { panic!("Failed to create schema parser context from buffer"); // TODO error handling } Self::from_raw(parser) } /// Create a schema parsing context from an URL pub fn from_file(path: &str) -> Self { let path = CString::new(path).unwrap(); // TODO error handling for \0 containing strings let path_ptr = path.as_bytes_with_nul().as_ptr() as *const c_char; let parser = unsafe { bindings::xmlSchemaNewParserCtxt(path_ptr) }; if parser.is_null() { panic!("Failed to create schema parser context from path"); // TODO error handling } Self::from_raw(parser) } /// Drains error log from errors that might have accumulated while parsing schema pub fn drain_errors(&mut self) -> Vec { assert!(!self.errlog.is_null()); let errors = unsafe { &mut *self.errlog }; std::mem::take(errors) } /// Return a raw pointer to the underlying xmlSchemaParserCtxt structure pub fn as_ptr(&self) -> *mut bindings::_xmlSchemaParserCtxt { self.inner } } /// Private Interface impl SchemaParserContext { fn from_raw(parser: *mut bindings::_xmlSchemaParserCtxt) -> Self { let errors: Box> = Box::default(); unsafe { let reference: *mut Vec = std::mem::transmute(errors); bindings::xmlSchemaSetParserStructuredErrors( parser, Some(common::structured_error_handler), reference as *mut _, ); Self { inner: parser, errlog: reference, } } } } impl Drop for SchemaParserContext { fn drop(&mut self) { unsafe { bindings::xmlSchemaFreeParserCtxt(self.inner); if !self.errlog.is_null() { let errors: Box> = std::mem::transmute(self.errlog); drop(errors) } } } } ��������������������������������������������������������������������������������������������������������������libxml-0.3.5/src/schemas/schema.rs������������������������������������������������������������������0000644�0000000�0000000�00000002566�10461020230�0015151�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������//! //! Wrapping of the Schema (xmlSchema) //! use std::sync::OnceLock; use super::SchemaParserContext; use crate::bindings; use crate::error::StructuredError; static SCHEMA_TYPES_LOCK: OnceLock = OnceLock::new(); /// Wrapper on xmlSchema pub struct Schema(*mut bindings::_xmlSchema); impl Schema { /// Create schema by having a SchemaParserContext do the actual parsing of the schema it was provided pub fn from_parser(parser: &mut SchemaParserContext) -> Result> { // `xmlSchemaParse` calls `xmlSchemaInitTypes`. // `xmlSchemaInitTypes` is a lazy function which is only intended to be // called once for optimization purposes - but libxml2 doesn't do this // in a thread-safe manner. We wrap the call in a OnceLock so that it // only ever needs to be invoked once - and will do it in a thread-safe // way. let _ = SCHEMA_TYPES_LOCK.get_or_init(|| { unsafe { bindings::xmlSchemaInitTypes() }; true }); let raw = unsafe { bindings::xmlSchemaParse(parser.as_ptr()) }; if raw.is_null() { Err(parser.drain_errors()) } else { Ok(Self(raw)) } } /// Return a raw pointer to the underlying xmlSchema structure pub fn as_ptr(&self) -> *mut bindings::_xmlSchema { self.0 } } impl Drop for Schema { fn drop(&mut self) { unsafe { bindings::xmlSchemaFree(self.0) } } } ������������������������������������������������������������������������������������������������������������������������������������������libxml-0.3.5/src/schemas/validation.rs��������������������������������������������������������������0000644�0000000�0000000�00000007450�10461020230�0016040�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������//! //! Wrapping of the Validation Context (xmlSchemaValidCtxt) //! use super::common; use super::Schema; use super::SchemaParserContext; use crate::bindings; use crate::tree::document::Document; use crate::tree::node::Node; use crate::error::StructuredError; use std::ffi::CString; use std::os::raw::c_char; /// Wrapper on xmlSchemaValidCtxt pub struct SchemaValidationContext { ctxt: *mut bindings::_xmlSchemaValidCtxt, errlog: *mut Vec, _schema: Schema, } impl SchemaValidationContext { /// Create a schema validation context from a parser object pub fn from_parser(parser: &mut SchemaParserContext) -> Result> { let schema = Schema::from_parser(parser); match schema { Ok(s) => { let ctx = unsafe { bindings::xmlSchemaNewValidCtxt(s.as_ptr()) }; if ctx.is_null() { panic!("Failed to create validation context from XML schema") // TODO error handling } Ok(Self::from_raw(ctx, s)) } Err(e) => Err(e), } } /// Validates a given Document, that is to be tested to comply with the loaded XSD schema definition pub fn validate_document(&mut self, doc: &Document) -> Result<(), Vec> { let rc = unsafe { bindings::xmlSchemaValidateDoc(self.ctxt, doc.doc_ptr()) }; match rc { -1 => panic!("Failed to validate document due to internal error"), // TODO error handling 0 => Ok(()), _ => Err(self.drain_errors()), } } /// Validates a given file from path for its compliance with the loaded XSD schema definition pub fn validate_file(&mut self, path: &str) -> Result<(), Vec> { let path = CString::new(path).unwrap(); // TODO error handling for \0 containing strings let path_ptr = path.as_bytes_with_nul().as_ptr() as *const c_char; let rc = unsafe { bindings::xmlSchemaValidateFile(self.ctxt, path_ptr, 0) }; match rc { -1 => panic!("Failed to validate file due to internal error"), // TODO error handling 0 => Ok(()), _ => Err(self.drain_errors()), } } /// Validates a branch or leaf of a document given as a Node against the loaded XSD schema definition pub fn validate_node(&mut self, node: &Node) -> Result<(), Vec> { let rc = unsafe { bindings::xmlSchemaValidateOneElement(self.ctxt, node.node_ptr()) }; match rc { -1 => panic!("Failed to validate element due to internal error"), // TODO error handling 0 => Ok(()), _ => Err(self.drain_errors()), } } /// Drains error log from errors that might have accumulated while validating something pub fn drain_errors(&mut self) -> Vec { assert!(!self.errlog.is_null()); let errors = unsafe { &mut *self.errlog }; std::mem::take(errors) } /// Return a raw pointer to the underlying xmlSchemaValidCtxt structure pub fn as_ptr(&self) -> *mut bindings::_xmlSchemaValidCtxt { self.ctxt } } /// Private Interface impl SchemaValidationContext { fn from_raw(ctx: *mut bindings::_xmlSchemaValidCtxt, schema: Schema) -> Self { let errors: Box> = Box::default(); unsafe { let reference: *mut Vec = std::mem::transmute(errors); bindings::xmlSchemaSetValidStructuredErrors( ctx, Some(common::structured_error_handler), reference as *mut _, // Box::into_raw(Box::new(Rc::downgrade(&errors))) as *mut _, ); Self { ctxt: ctx, errlog: reference, _schema: schema, } } } } impl Drop for SchemaValidationContext { fn drop(&mut self) { unsafe { bindings::xmlSchemaFreeValidCtxt(self.ctxt); if !self.errlog.is_null() { let errors: Box> = std::mem::transmute(self.errlog); drop(errors) } } } } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������libxml-0.3.5/src/tree/document.rs�������������������������������������������������������������������0000644�0000000�0000000�00000022624�10461020230�0015040�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������//! Document feature set //! use libc::{c_char, c_int}; use std::cell::RefCell; use std::collections::HashMap; use std::ffi::{CStr, CString}; use std::fmt; use std::ptr; use std::rc::{Rc, Weak}; use std::str; use crate::bindings::*; use crate::readonly::RoNode; use crate::tree::node::Node; pub(crate) type DocumentRef = Rc>; pub(crate) type DocumentWeak = Weak>; #[derive(Debug, Copy, Clone, Default)] /// Save Options for Document pub struct SaveOptions { /// format save output pub format: bool, /// drop the xml declaration pub no_declaration: bool, /// no empty tags pub no_empty_tags: bool, /// disable XHTML1 specific rules pub no_xhtml: bool, /// force XHTML1 specific rules pub xhtml: bool, /// force XML serialization on HTML doc pub as_xml: bool, /// force HTML serialization on XML doc pub as_html: bool, /// format with non-significant whitespace pub non_significant_whitespace: bool, } #[derive(Debug)] pub(crate) struct _Document { /// pointer to a libxml document pub(crate) doc_ptr: xmlDocPtr, /// hashed pointer-to-Node bookkeeping table nodes: HashMap, } impl _Document { /// Internal bookkeeping function, so far only used by `Node::wrap` pub(crate) fn insert_node(&mut self, node_ptr: xmlNodePtr, node: Node) { self.nodes.insert(node_ptr, node); } /// Internal bookkeeping function, so far only used by `Node::wrap` pub(crate) fn get_node(&self, node_ptr: xmlNodePtr) -> Option<&Node> { self.nodes.get(&node_ptr) } /// Internal bookkeeping function pub(crate) fn forget_node(&mut self, node_ptr: xmlNodePtr) { self.nodes.remove(&node_ptr); } } /// A libxml2 Document #[derive(Clone)] pub struct Document(pub(crate) DocumentRef); impl Drop for _Document { ///Free document when it goes out of scope fn drop(&mut self) { unsafe { if !self.doc_ptr.is_null() { xmlFreeDoc(self.doc_ptr); } } } } impl fmt::Display for Document { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.to_string_with_options(SaveOptions::default())) } } impl Document { /// Creates a new empty libxml2 document pub fn new() -> Result { unsafe { let c_version = CString::new("1.0").unwrap(); let c_version_bytes = c_version.as_bytes(); let doc_ptr = xmlNewDoc(c_version_bytes.as_ptr()); if doc_ptr.is_null() { Err(()) } else { let doc = _Document { doc_ptr, nodes: HashMap::new(), }; Ok(Document(Rc::new(RefCell::new(doc)))) } } } /// Obtain the underlying libxml2 `xmlDocPtr` for this Document pub fn doc_ptr(&self) -> xmlDocPtr { self.0.borrow().doc_ptr } /// Creates a new `Document` from an existing libxml2 pointer pub fn new_ptr(doc_ptr: xmlDocPtr) -> Self { let doc = _Document { doc_ptr, nodes: HashMap::new(), }; Document(Rc::new(RefCell::new(doc))) } pub(crate) fn null_ref() -> DocumentRef { Rc::new(RefCell::new(_Document { doc_ptr: ptr::null_mut(), nodes: HashMap::new(), })) } /// Write document to `filename` pub fn save_file(&self, filename: &str) -> Result { let c_filename = CString::new(filename).unwrap(); unsafe { let retval = xmlSaveFile(c_filename.as_ptr(), self.doc_ptr()); if retval < 0 { return Err(()); } Ok(retval) } } pub(crate) fn register_node(&self, node_ptr: xmlNodePtr) -> Node { Node::wrap(node_ptr, &self.0) } /// Get the root element of the document pub fn get_root_element(&self) -> Option { unsafe { let node_ptr = xmlDocGetRootElement(self.doc_ptr()); if node_ptr.is_null() { None } else { Some(self.register_node(node_ptr)) } } } /// Get the root element of the document (read-only) pub fn get_root_readonly(&self) -> Option { unsafe { let node_ptr = xmlDocGetRootElement(self.doc_ptr()); if node_ptr.is_null() { None } else { Some(RoNode(node_ptr)) } } } /// Sets the root element of the document pub fn set_root_element(&mut self, root: &Node) { unsafe { xmlDocSetRootElement(self.doc_ptr(), root.node_ptr()); } root.set_linked(); } fn ptr_as_result(&mut self, node_ptr: xmlNodePtr) -> Result { if node_ptr.is_null() { Err(()) } else { let node = self.register_node(node_ptr); Ok(node) } } /// Import a `Node` from another `Document` pub fn import_node(&mut self, node: &mut Node) -> Result { if !node.is_unlinked() { return Err(()); } // Also remove this node from the prior document hash node .get_docref() .upgrade() .unwrap() .borrow_mut() .forget_node(node.node_ptr()); let node_ptr = unsafe { xmlDocCopyNode(node.node_ptr(), self.doc_ptr(), 1) }; node.set_linked(); self.ptr_as_result(node_ptr) } /// Serializes the `Document` with options pub fn to_string_with_options(&self, options: SaveOptions) -> String { unsafe { // allocate a buffer to dump into let buf = xmlBufferCreate(); let c_utf8 = CString::new("UTF-8").unwrap(); let mut xml_options = 0; if options.format { xml_options += xmlSaveOption_XML_SAVE_FORMAT; } if options.no_declaration { xml_options += xmlSaveOption_XML_SAVE_NO_DECL; } if options.no_empty_tags { xml_options += xmlSaveOption_XML_SAVE_NO_EMPTY; } if options.no_xhtml { xml_options += xmlSaveOption_XML_SAVE_NO_XHTML; } if options.xhtml { xml_options += xmlSaveOption_XML_SAVE_XHTML; } if options.as_xml { xml_options += xmlSaveOption_XML_SAVE_AS_XML; } if options.as_html { xml_options += xmlSaveOption_XML_SAVE_AS_HTML; } if options.non_significant_whitespace { xml_options += xmlSaveOption_XML_SAVE_WSNONSIG; } let save_ctx = xmlSaveToBuffer(buf, c_utf8.as_ptr(), xml_options as i32); let _size = xmlSaveDoc(save_ctx, self.doc_ptr()); let _size = xmlSaveClose(save_ctx); let result = xmlBufferContent(buf); let c_string = CStr::from_ptr(result as *const c_char); let node_string = c_string.to_string_lossy().into_owned(); xmlBufferFree(buf); node_string } } /// Serializes a `Node` owned by this `Document` pub fn node_to_string(&self, node: &Node) -> String { unsafe { // allocate a buffer to dump into let buf = xmlBufferCreate(); // dump the node xmlNodeDump( buf, self.doc_ptr(), node.node_ptr(), 1, // level of indentation 0, /* disable formatting */ ); let result = xmlBufferContent(buf); let c_string = CStr::from_ptr(result as *const c_char); let node_string = c_string.to_string_lossy().into_owned(); xmlBufferFree(buf); node_string } } /// Serializes a `RoNode` owned by this `Document` pub fn ronode_to_string(&self, node: &RoNode) -> String { unsafe { // allocate a buffer to dump into let buf = xmlBufferCreate(); // dump the node xmlNodeDump( buf, self.doc_ptr(), node.node_ptr(), 1, // level of indentation 0, /* disable formatting */ ); let result = xmlBufferContent(buf); let c_string = CStr::from_ptr(result as *const c_char); let node_string = c_string.to_string_lossy().into_owned(); xmlBufferFree(buf); node_string } } /// Creates a node for an XML processing instruction pub fn create_processing_instruction(&mut self, name: &str, content: &str) -> Result { unsafe { let c_name = CString::new(name).unwrap(); let c_name_bytes = c_name.as_bytes(); let c_content = CString::new(content).unwrap(); let c_content_bytes = c_content.as_bytes(); let node_ptr: xmlNodePtr = xmlNewDocPI( self.doc_ptr(), c_name_bytes.as_ptr(), c_content_bytes.as_ptr(), ); if node_ptr.is_null() { Err(()) } else { Ok(self.register_node(node_ptr)) } } } /// Cast the document as a libxml Node pub fn as_node(&self) -> Node { // Note: this method is important to keep, as it enables certain low-level libxml2 idioms // In particular, method dispatch based on NodeType is only possible when the document can be cast as a Node // // Memory management is not an issue, as a document node can not be unbound/removed, and does not require // any additional deallocation than the Drop of a Document object. self.register_node(self.doc_ptr() as xmlNodePtr) } /// Duplicates the libxml2 Document into a new instance pub fn dup(&self) -> Result { let doc_ptr = unsafe { xmlCopyDoc(self.doc_ptr(), 1) }; if doc_ptr.is_null() { Err(()) } else { let doc = _Document { doc_ptr, nodes: HashMap::new(), }; Ok(Document(Rc::new(RefCell::new(doc)))) } } /// Duplicates a source libxml2 Document into the empty Document self pub fn dup_from(&mut self, source: &Self) -> Result<(), ()> { if !self.doc_ptr().is_null() { return Err(()); } let doc_ptr = unsafe { xmlCopyDoc(source.doc_ptr(), 1) }; if doc_ptr.is_null() { return Err(()); } self.0.borrow_mut().doc_ptr = doc_ptr; Ok(()) } } ������������������������������������������������������������������������������������������������������������libxml-0.3.5/src/tree/mod.rs������������������������������������������������������������������������0000644�0000000�0000000�00000000557�10461020230�0014002�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������//! The tree functionality //! pub mod document; pub mod namespace; pub mod node; pub mod nodetype; pub use self::document::{Document, SaveOptions}; pub(crate) use self::document::{DocumentRef, DocumentWeak}; pub use self::namespace::Namespace; pub use self::node::set_node_rc_guard; pub use self::node::{Node, NODE_RC_MAX_GUARD}; pub use self::nodetype::NodeType; �������������������������������������������������������������������������������������������������������������������������������������������������libxml-0.3.5/src/tree/namespace.rs������������������������������������������������������������������0000644�0000000�0000000�00000004473�10461020230�0015160�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������//! Namespace feature set //! use std::error::Error; use std::ffi::{CStr, CString}; use std::hash::{Hash, Hasher}; use std::ptr; use std::str; use crate::bindings::*; use crate::c_helpers::*; use crate::tree::node::Node; ///An xml namespace #[derive(Clone)] pub struct Namespace { ///libxml's xmlNsPtr pub(crate) ns_ptr: xmlNsPtr, } impl PartialEq for Namespace { fn eq(&self, other: &Self) -> bool { self.get_prefix() == other.get_prefix() && self.get_href() == other.get_href() } } impl Eq for Namespace {} impl Hash for Namespace { fn hash(&self, state: &mut H) { self.get_prefix().hash(state); self.get_href().hash(state); } } impl Namespace { /// Creates a new namespace pub fn new( prefix: &str, href: &str, node: &mut Node, ) -> Result> { let c_href = CString::new(href).unwrap(); let c_prefix = CString::new(prefix).unwrap(); let c_prefix_ptr = if prefix.is_empty() { ptr::null() } else { c_prefix.as_ptr() }; unsafe { let ns = xmlNewNs( node.node_ptr_mut()?, c_href.as_bytes().as_ptr(), c_prefix_ptr as *const u8, ); if ns.is_null() { Err(From::from("xmlNewNs returned NULL")) } else { Ok(Namespace { ns_ptr: ns }) } } } /// Immutably borrows the underlying libxml2 `xmlNsPtr` pointer pub fn ns_ptr(&self) -> xmlNsPtr { self.ns_ptr } /// Mutably borrows the underlying libxml2 `xmlNsPtr` pointer pub fn ns_ptr_mut(&mut self) -> xmlNsPtr { self.ns_ptr } /// The namespace prefix pub fn get_prefix(&self) -> String { unsafe { let prefix_ptr = xmlNsPrefix(self.ns_ptr()); if prefix_ptr.is_null() { String::new() } else { let c_prefix = CStr::from_ptr(prefix_ptr); c_prefix.to_string_lossy().into_owned() } } } /// The namespace href pub fn get_href(&self) -> String { unsafe { let href_ptr = xmlNsHref(self.ns_ptr()); if href_ptr.is_null() { String::new() } else { let c_href = CStr::from_ptr(href_ptr); c_href.to_string_lossy().into_owned() } } } /// Explicit free method, until (if?) we implement automatic+safe free-on-drop pub fn free(&mut self) { unsafe { xmlFreeNs(self.ns_ptr()) } } } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������libxml-0.3.5/src/tree/node.rs�����������������������������������������������������������������������0000644�0000000�0000000�00000101135�10461020230�0014142�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������//! Node, and related, feature set //! use libc::{c_char, c_void}; use std::cell::RefCell; use std::collections::{HashMap, HashSet}; use std::error::Error; use std::ffi::{CStr, CString}; use std::hash::{Hash, Hasher}; use std::ptr; use std::rc::Rc; use std::str; use crate::bindings::*; use crate::c_helpers::*; use crate::tree::namespace::Namespace; use crate::tree::nodetype::NodeType; use crate::tree::{Document, DocumentRef, DocumentWeak}; use crate::xpath::Context; /// Guard treshold for enforcing runtime mutability checks for Nodes pub static mut NODE_RC_MAX_GUARD: usize = 2; /// Set the guard value for the max Rc "strong count" allowed for mutable use of a Node /// Default is 2 pub fn set_node_rc_guard(value: usize) { unsafe { NODE_RC_MAX_GUARD = value; } } type NodeRef = Rc>; #[derive(Debug)] struct _Node { /// libxml's xmlNodePtr node_ptr: xmlNodePtr, /// Reference to parent `Document` document: DocumentWeak, /// Bookkeep removal from a parent unlinked: bool, } /// An xml node #[derive(Clone, Debug)] pub struct Node(NodeRef); impl Hash for Node { /// Generates a hash value from the `node_ptr` value. fn hash(&self, state: &mut H) { self.node_ptr().hash(state); } } impl PartialEq for Node { /// Two nodes are considered equal, if they point to the same xmlNode. fn eq(&self, other: &Node) -> bool { std::ptr::eq(self.node_ptr(), other.node_ptr()) } } impl Eq for Node {} impl Drop for _Node { /// Free node if it isn't bound in some document /// Warning: xmlFreeNode is RECURSIVE into the node's children, so this may lead to segfaults if used carelessly fn drop(&mut self) { if self.unlinked { let node_ptr = self.node_ptr; if !node_ptr.is_null() { unsafe { xmlFreeNode(node_ptr); } } } } } impl Node { /// Create a new node, bound to a given document. pub fn new(name: &str, ns: Option, doc: &Document) -> Result { // We will only allow to work with document-bound nodes for now, to avoid the problems of memory management. let c_name = CString::new(name).unwrap(); let ns_ptr = match ns { None => ptr::null_mut(), Some(ns) => ns.ns_ptr(), }; unsafe { let node = xmlNewDocRawNode( doc.doc_ptr(), ns_ptr, c_name.as_bytes().as_ptr(), ptr::null(), ); if node.is_null() { Err(()) } else { Ok(Node::wrap_new(node, &doc.0)) } } } /// Immutably borrows the underlying libxml2 `xmlNodePtr` pointer pub fn node_ptr(&self) -> xmlNodePtr { self.0.borrow().node_ptr } /// Mutably borrows the underlying libxml2 `xmlNodePtr` pointer /// Also protects against mutability conflicts at runtime. pub fn node_ptr_mut(&mut self) -> Result { let weak_count = Rc::weak_count(&self.0); let strong_count = Rc::strong_count(&self.0); // The basic idea would be to use `Rc::get_mut` to guard against multiple borrows. // However, our approach to bookkeeping nodes implies there is *always* a second Rc reference // in the document.nodes Hash. So rather than use `get_mut` directly, the // correct check would be to have a weak count of 0 and a strong count <=2 (one for self, one for .nodes) let guard_ok = unsafe { weak_count == 0 && strong_count <= NODE_RC_MAX_GUARD }; if guard_ok { Ok(self.0.borrow_mut().node_ptr) } else { Err(format!( "Can not mutably reference a shared Node {:?}! Rc: weak count: {:?}; strong count: {:?}", self.get_name(), weak_count, strong_count, )) } } /// Wrap a libxml node ptr with a Node fn _wrap(node_ptr: xmlNodePtr, unlinked: bool, document: &DocumentRef) -> Node { // If already seen, return saved Node if let Some(node) = document.borrow().get_node(node_ptr) { return node.clone(); } // If newly encountered pointer, wrap let node = _Node { node_ptr, document: Rc::downgrade(document), unlinked, }; let wrapped_node = Node(Rc::new(RefCell::new(node))); document .borrow_mut() .insert_node(node_ptr, wrapped_node.clone()); wrapped_node } /// Wrap a node already linked to a `document` tree pub(crate) fn wrap(node_ptr: xmlNodePtr, document: &DocumentRef) -> Node { Node::_wrap(node_ptr, false, document) } /// Wrap, a node owned by, but not yet linked to, a `document` pub(crate) fn wrap_new(node_ptr: xmlNodePtr, document: &DocumentRef) -> Node { Node::_wrap(node_ptr, true, document) } /// Create a new text node, bound to a given document pub fn new_text(content: &str, doc: &Document) -> Result { // We will only allow to work with document-bound nodes for now, to avoid the problems of memory management. let c_content = CString::new(content).unwrap(); unsafe { let node = xmlNewDocText(doc.doc_ptr(), c_content.as_bytes().as_ptr()); if node.is_null() { Err(()) } else { Ok(Node::wrap_new(node, &doc.0)) } } } /// Create a mock node, used for a placeholder argument pub fn mock(doc: &Document) -> Self { Node::new("mock", None, doc).unwrap() } /// Create a mock node, used for a placeholder argument pub fn null() -> Self { Node(Rc::new(RefCell::new(_Node { node_ptr: ptr::null_mut(), document: Rc::downgrade(&Document::null_ref()), unlinked: true, }))) } /// `libc::c_void` isn't hashable and cannot be made hashable pub fn to_hashable(&self) -> usize { self.node_ptr() as usize } pub(crate) fn get_docref(&self) -> DocumentWeak { self.0.borrow().document.clone() } /// Returns the next sibling if it exists pub fn get_next_sibling(&self) -> Option { let ptr = xmlNextSibling(self.node_ptr()); self.ptr_as_option(ptr) } /// Returns the previous sibling if it exists pub fn get_prev_sibling(&self) -> Option { let ptr = xmlPrevSibling(self.node_ptr()); self.ptr_as_option(ptr) } /// Returns the first child if it exists pub fn get_first_child(&self) -> Option { let ptr = xmlGetFirstChild(self.node_ptr()); self.ptr_as_option(ptr) } /// Returns the last child if it exists pub fn get_last_child(&self) -> Option { let ptr = unsafe { xmlGetLastChild(self.node_ptr()) }; self.ptr_as_option(ptr) } /// Returns the next element sibling if it exists pub fn get_next_element_sibling(&self) -> Option { match self.get_next_sibling() { None => None, Some(child) => { let mut current_node = child; while !current_node.is_element_node() { if let Some(sibling) = current_node.get_next_sibling() { current_node = sibling; } else { break; } } if current_node.is_element_node() { Some(current_node) } else { None } } } } /// Returns the previous element sibling if it exists pub fn get_prev_element_sibling(&self) -> Option { match self.get_prev_sibling() { None => None, Some(child) => { let mut current_node = child; while !current_node.is_element_node() { if let Some(sibling) = current_node.get_prev_sibling() { current_node = sibling; } else { break; } } if current_node.is_element_node() { Some(current_node) } else { None } } } } /// Returns the first element child if it exists pub fn get_first_element_child(&self) -> Option { match self.get_first_child() { None => None, Some(child) => { let mut current_node = child; while !current_node.is_element_node() { if let Some(sibling) = current_node.get_next_sibling() { current_node = sibling; } else { break; } } if current_node.is_element_node() { Some(current_node) } else { None } } } } /// Returns the last element child if it exists pub fn get_last_element_child(&self) -> Option { match self.get_last_child() { None => None, Some(child) => { let mut current_node = child; while !current_node.is_element_node() { if let Some(sibling) = current_node.get_prev_sibling() { current_node = sibling; } else { break; } } if current_node.is_element_node() { Some(current_node) } else { None } } } } /// Returns all child nodes of the given node as a vector pub fn get_child_nodes(&self) -> Vec { let mut children = Vec::new(); if let Some(first_child) = self.get_first_child() { children.push(first_child); while let Some(sibling) = children.last().unwrap().get_next_sibling() { children.push(sibling) } } children } /// Returns all child elements of the given node as a vector pub fn get_child_elements(&self) -> Vec { self .get_child_nodes() .into_iter() .filter(|n| n.get_type() == Some(NodeType::ElementNode)) .collect::>() } /// Returns the parent if it exists pub fn get_parent(&self) -> Option { let ptr = xmlGetParent(self.node_ptr()); self.ptr_as_option(ptr) } /// Get the node type pub fn get_type(&self) -> Option { NodeType::from_int(xmlGetNodeType(self.node_ptr())) } /// Add a previous sibling pub fn add_prev_sibling( &mut self, new_sibling: &mut Node, ) -> Result<(), Box> { new_sibling.set_linked(); unsafe { if xmlAddPrevSibling(self.node_ptr_mut()?, new_sibling.node_ptr_mut()?).is_null() { Err(From::from("add_prev_sibling returned NULL")) } else { Ok(()) } } } /// Add a next sibling pub fn add_next_sibling( &mut self, new_sibling: &mut Node, ) -> Result<(), Box> { new_sibling.set_linked(); unsafe { if xmlAddNextSibling(self.node_ptr_mut()?, new_sibling.node_ptr_mut()?).is_null() { Err(From::from("add_next_sibling returned NULL")) } else { Ok(()) } } } /// Returns true if it is a text node pub fn is_text_node(&self) -> bool { self.get_type() == Some(NodeType::TextNode) } /// Checks if the given node is an Element pub fn is_element_node(&self) -> bool { self.get_type() == Some(NodeType::ElementNode) } /// Checks if the underlying libxml2 pointer is `NULL` pub fn is_null(&self) -> bool { self.node_ptr().is_null() } /// Returns the name of the node (empty string if name pointer is `NULL`) pub fn get_name(&self) -> String { let name_ptr = xmlNodeGetName(self.node_ptr()); if name_ptr.is_null() { return String::new(); } //empty string let c_string = unsafe { CStr::from_ptr(name_ptr) }; c_string.to_string_lossy().into_owned() } /// Sets the name of this `Node` pub fn set_name(&mut self, name: &str) -> Result<(), Box> { let c_name = CString::new(name).unwrap(); unsafe { xmlNodeSetName(self.node_ptr_mut()?, c_name.as_bytes().as_ptr()) } Ok(()) } /// Returns the content of the node /// (assumes UTF-8 XML document) pub fn get_content(&self) -> String { let content_ptr = unsafe { xmlNodeGetContent(self.node_ptr()) }; if content_ptr.is_null() { //empty string when none return String::new(); } let c_string = unsafe { CStr::from_ptr(content_ptr as *const c_char) }; let rust_utf8 = c_string.to_string_lossy().into_owned(); bindgenFree(content_ptr as *mut c_void); rust_utf8 } /// Sets the text content of this `Node` pub fn set_content(&mut self, content: &str) -> Result<(), Box> { let c_content = CString::new(content).unwrap(); unsafe { xmlNodeSetContent(self.node_ptr_mut()?, c_content.as_bytes().as_ptr()); } Ok(()) } /// Returns the value of property `name` pub fn get_property(&self, name: &str) -> Option { let c_name = CString::new(name).unwrap(); let value_ptr = unsafe { xmlGetProp(self.node_ptr(), c_name.as_bytes().as_ptr()) }; if value_ptr.is_null() { return None; } let c_value_string = unsafe { CStr::from_ptr(value_ptr as *const c_char) }; let prop_str = c_value_string.to_string_lossy().into_owned(); bindgenFree(value_ptr as *mut c_void); Some(prop_str) } /// Returns the value of property `name` in namespace `ns` pub fn get_property_ns(&self, name: &str, ns: &str) -> Option { let c_name = CString::new(name).unwrap(); let c_ns = CString::new(ns).unwrap(); let value_ptr = unsafe { xmlGetNsProp( self.node_ptr(), c_name.as_bytes().as_ptr(), c_ns.as_bytes().as_ptr(), ) }; if value_ptr.is_null() { return None; } let c_value_string = unsafe { CStr::from_ptr(value_ptr as *const c_char) }; let prop_str = c_value_string.to_string_lossy().into_owned(); bindgenFree(value_ptr as *mut c_void); Some(prop_str) } /// Returns the value of property `name` with no namespace pub fn get_property_no_ns(&self, name: &str) -> Option { let c_name = CString::new(name).unwrap(); let value_ptr = unsafe { xmlGetNoNsProp(self.node_ptr(), c_name.as_bytes().as_ptr()) }; if value_ptr.is_null() { return None; } let c_value_string = unsafe { CStr::from_ptr(value_ptr as *const c_char) }; let prop_str = c_value_string.to_string_lossy().into_owned(); bindgenFree(value_ptr as *mut c_void); Some(prop_str) } /// Return an attribute as a `Node` struct of type AttributeNode pub fn get_property_node(&self, name: &str) -> Option { let c_name = CString::new(name).unwrap(); unsafe { let attr_node = xmlHasProp(self.node_ptr(), c_name.as_bytes().as_ptr()); self.ptr_as_option(attr_node as xmlNodePtr) } } /// Return an attribute in a namespace `ns` as a `Node` of type AttributeNode pub fn get_property_node_ns(&self, name: &str, ns: &str) -> Option { let c_name = CString::new(name).unwrap(); let c_ns = CString::new(ns).unwrap(); let attr_node = unsafe { xmlHasNsProp( self.node_ptr(), c_name.as_bytes().as_ptr(), c_ns.as_bytes().as_ptr(), ) }; self.ptr_as_option(attr_node as xmlNodePtr) } /// Return an attribute with no namespace as a `Node` of type AttributeNode pub fn get_property_node_no_ns(&self, name: &str) -> Option { let c_name = CString::new(name).unwrap(); let attr_node = unsafe { xmlHasNsProp(self.node_ptr(), c_name.as_bytes().as_ptr(), ptr::null()) }; self.ptr_as_option(attr_node as xmlNodePtr) } /// Check if a property has been defined, without allocating its value pub fn has_property(&self, name: &str) -> bool { let c_name = CString::new(name).unwrap(); let value_ptr = unsafe { xmlHasProp(self.node_ptr(), c_name.as_bytes().as_ptr()) }; !value_ptr.is_null() } /// Check if property `name` in namespace `ns` exists pub fn has_property_ns(&self, name: &str, ns: &str) -> bool { let c_name = CString::new(name).unwrap(); let c_ns = CString::new(ns).unwrap(); let value_ptr = unsafe { xmlHasNsProp( self.node_ptr(), c_name.as_bytes().as_ptr(), c_ns.as_bytes().as_ptr(), ) }; !value_ptr.is_null() } /// Check if property `name` with no namespace exists pub fn has_property_no_ns(&self, name: &str) -> bool { let c_name = CString::new(name).unwrap(); let value_ptr = unsafe { xmlHasNsProp(self.node_ptr(), c_name.as_bytes().as_ptr(), ptr::null()) }; !value_ptr.is_null() } /// Alias for has_property pub fn has_attribute(&self, name: &str) -> bool { self.has_property(name) } /// Alias for has_property_ns pub fn has_attribute_ns(&self, name: &str, ns: &str) -> bool { self.has_property_ns(name, ns) } /// Alias for has_property_no_ns pub fn has_attribute_no_ns(&self, name: &str) -> bool { self.has_property_no_ns(name) } /// Sets the value of property `name` to `value` pub fn set_property( &mut self, name: &str, value: &str, ) -> Result<(), Box> { let c_name = CString::new(name).unwrap(); let c_value = CString::new(value).unwrap(); unsafe { xmlSetProp( self.node_ptr_mut()?, c_name.as_bytes().as_ptr(), c_value.as_bytes().as_ptr(), ) }; Ok(()) } /// Sets a namespaced attribute pub fn set_property_ns( &mut self, name: &str, value: &str, ns: &Namespace, ) -> Result<(), Box> { let c_name = CString::new(name).unwrap(); let c_value = CString::new(value).unwrap(); unsafe { xmlSetNsProp( self.node_ptr_mut()?, ns.ns_ptr(), c_name.as_bytes().as_ptr(), c_value.as_bytes().as_ptr(), ) }; Ok(()) } /// Removes the property of given `name` pub fn remove_property(&mut self, name: &str) -> Result<(), Box> { let c_name = CString::new(name).unwrap(); unsafe { let attr_node = xmlHasProp(self.node_ptr_mut()?, c_name.as_bytes().as_ptr()); if !attr_node.is_null() { let remove_prop_status = xmlRemoveProp(attr_node); if remove_prop_status == 0 { Ok(()) } else { // Propagate libxml2 failure to remove Err(From::from(format!( "libxml2 failed to remove property with status: {remove_prop_status:?}"))) } } else { // silently no-op if asked to remove a property which is not present Ok(()) } } } /// Removes the property of given `name` and namespace (`ns`) pub fn remove_property_ns( &mut self, name: &str, ns: &str, ) -> Result<(), Box> { let c_name = CString::new(name).unwrap(); let c_ns = CString::new(ns).unwrap(); unsafe { let attr_node = xmlHasNsProp( self.node_ptr_mut()?, c_name.as_bytes().as_ptr(), c_ns.as_bytes().as_ptr(), ); if !attr_node.is_null() { let remove_prop_status = xmlRemoveProp(attr_node); if remove_prop_status == 0 { Ok(()) } else { // Propagate libxml2 failure to remove Err(From::from(format!( "libxml2 failed to remove property with status: {remove_prop_status:?}"))) } } else { // silently no-op if asked to remove a property which is not present Ok(()) } } } /// Removes the property of given `name` with no namespace pub fn remove_property_no_ns(&mut self, name: &str) -> Result<(), Box> { let c_name = CString::new(name).unwrap(); let attr_node = unsafe { xmlHasNsProp( self.node_ptr_mut()?, c_name.as_bytes().as_ptr(), ptr::null(), ) }; if !attr_node.is_null() { let remove_prop_status = unsafe { xmlRemoveProp(attr_node) }; if remove_prop_status == 0 { Ok(()) } else { // Propagate libxml2 failure to remove Err(From::from(format!( "libxml2 failed to remove property with status: {remove_prop_status:?}"))) } } else { // silently no-op if asked to remove a property which is not present Ok(()) } } /// Alias for get_property pub fn get_attribute(&self, name: &str) -> Option { self.get_property(name) } /// Alias for get_property_ns pub fn get_attribute_ns(&self, name: &str, ns: &str) -> Option { self.get_property_ns(name, ns) } /// Alias for get_property_no_ns pub fn get_attribute_no_ns(&self, name: &str) -> Option { self.get_property_no_ns(name) } /// Alias for get_property_node pub fn get_attribute_node(&self, name: &str) -> Option { self.get_property_node(name) } /// Alias for get_property_node_ns pub fn get_attribute_node_ns(&self, name: &str, ns: &str) -> Option { self.get_property_node_ns(name, ns) } /// Alias for get_property_node_no_ns pub fn get_attribute_node_no_ns(&self, name: &str) -> Option { self.get_property_node_no_ns(name) } /// Alias for set_property pub fn set_attribute( &mut self, name: &str, value: &str, ) -> Result<(), Box> { self.set_property(name, value) } /// Alias for set_property_ns pub fn set_attribute_ns( &mut self, name: &str, value: &str, ns: &Namespace, ) -> Result<(), Box> { self.set_property_ns(name, value, ns) } /// Alias for remove_property pub fn remove_attribute(&mut self, name: &str) -> Result<(), Box> { self.remove_property(name) } /// Alias for remove_property_ns pub fn remove_attribute_ns( &mut self, name: &str, ns: &str, ) -> Result<(), Box> { self.remove_property_ns(name, ns) } /// Alias for remove_property_no_ns pub fn remove_attribute_no_ns(&mut self, name: &str) -> Result<(), Box> { self.remove_property_no_ns(name) } /// Get a copy of the attributes of this node pub fn get_properties(&self) -> HashMap { let mut attributes = HashMap::new(); let mut current_prop = xmlGetFirstProperty(self.node_ptr()); while !current_prop.is_null() { let name_ptr = xmlAttrName(current_prop); let c_name_string = unsafe { CStr::from_ptr(name_ptr) }; let name = c_name_string.to_string_lossy().into_owned(); let value = self.get_property(&name).unwrap_or_default(); attributes.insert(name, value); current_prop = xmlNextPropertySibling(current_prop); } attributes } /// Get a copy of this node's attributes and their namespaces pub fn get_properties_ns(&self) -> HashMap<(String, Option), String> { let mut attributes = HashMap::new(); let mut current_prop = xmlGetFirstProperty(self.node_ptr()); while !current_prop.is_null() { let name_ptr = xmlAttrName(current_prop); let c_name_string = unsafe { CStr::from_ptr(name_ptr) }; let name = c_name_string.to_string_lossy().into_owned(); let ns_ptr = xmlAttrNs(current_prop); if ns_ptr.is_null() { let value = self.get_property_no_ns(&name).unwrap_or_default(); attributes.insert((name, None), value); } else { let ns = Namespace { ns_ptr }; let value = self .get_property_ns(&name, &ns.get_href()) .unwrap_or_default(); attributes.insert((name, Some(ns)), value); } current_prop = xmlNextPropertySibling(current_prop); } attributes } /// Alias for `get_properties` pub fn get_attributes(&self) -> HashMap { self.get_properties() } /// Alias for `get_properties_ns` pub fn get_attributes_ns(&self) -> HashMap<(String, Option), String> { self.get_properties_ns() } /// Gets the active namespace associated of this node pub fn get_namespace(&self) -> Option { let ns_ptr = xmlNodeNs(self.node_ptr()); if ns_ptr.is_null() { None } else { Some(Namespace { ns_ptr }) } } /// Gets a list of namespaces associated with this node pub fn get_namespaces(&self, doc: &Document) -> Vec { let list_ptr_raw = unsafe { xmlGetNsList(doc.doc_ptr(), self.node_ptr()) }; if list_ptr_raw.is_null() { Vec::new() } else { let mut namespaces = Vec::new(); let mut ptr_iter = list_ptr_raw as *mut xmlNsPtr; unsafe { while !ptr_iter.is_null() && !(*ptr_iter).is_null() { namespaces.push(Namespace { ns_ptr: *ptr_iter }); ptr_iter = ptr_iter.add(1); } /* TODO: valgrind suggests this technique isn't sufficiently fluent: ==114895== Conditional jump or move depends on uninitialised value(s) ==114895== at 0x4E9962F: xmlFreeNs (in /usr/lib/x86_64-linux-gnu/libxml2.so.2.9.4) ==114895== by 0x195CE8: libxml::tree::Node::get_namespaces (tree.rs:723) ==114895== by 0x12E7B6: base_tests::can_work_with_namespaces (base_tests.rs:537) DG: I could not improve on this state without creating memory leaks after ~1 hour, so I am marking it as future work. */ /* TODO: How do we properly deallocate here? The approach bellow reliably segfaults tree_tests on 1 thread */ // println!("\n-- xmlfreens on : {:?}", list_ptr_raw); // xmlFreeNs(list_ptr_raw as xmlNsPtr); } namespaces } } /// Get a list of namespaces declared with this node pub fn get_namespace_declarations(&self) -> Vec { if self.get_type() != Some(NodeType::ElementNode) { // only element nodes can have declarations return Vec::new(); } let mut namespaces = Vec::new(); let mut ns_ptr = xmlNodeNsDeclarations(self.node_ptr()); while !ns_ptr.is_null() { if !xmlNsPrefix(ns_ptr).is_null() || !xmlNsHref(ns_ptr).is_null() { namespaces.push(Namespace { ns_ptr }); } ns_ptr = xmlNextNsSibling(ns_ptr); } namespaces } /// Sets a `Namespace` for the node pub fn set_namespace( &mut self, namespace: &Namespace, ) -> Result<(), Box> { unsafe { xmlSetNs(self.node_ptr_mut()?, namespace.ns_ptr()); } Ok(()) } /// Looks up the prefix of a namespace from its URI, basedo around a given `Node` pub fn lookup_namespace_prefix(&self, href: &str) -> Option { if href.is_empty() { return None; } let c_href = CString::new(href).unwrap(); unsafe { let ptr_mut = self.node_ptr(); let ns_ptr = xmlSearchNsByHref(xmlGetDoc(ptr_mut), ptr_mut, c_href.as_bytes().as_ptr()); if !ns_ptr.is_null() { let ns = Namespace { ns_ptr }; let ns_prefix = ns.get_prefix(); Some(ns_prefix) } else { None } } } /// Looks up the uri of a namespace from its prefix, basedo around a given `Node` pub fn lookup_namespace_uri(&self, prefix: &str) -> Option { if prefix.is_empty() { return None; } let c_prefix = CString::new(prefix).unwrap(); unsafe { let ns_ptr = xmlSearchNs( xmlGetDoc(self.node_ptr()), self.node_ptr(), c_prefix.as_bytes().as_ptr(), ); if !ns_ptr.is_null() { let ns = Namespace { ns_ptr }; let ns_prefix = ns.get_href(); if !ns_prefix.is_empty() { Some(ns_prefix) } else { None } } else { None } } } // TODO: Clear a future Document namespaces vec /// Removes the namespaces of this `Node` and it's children! pub fn recursively_remove_namespaces(&mut self) -> Result<(), Box> { xmlNodeRecursivelyRemoveNs(self.node_ptr_mut()?); Ok(()) } /// Get a set of class names from this node's attributes pub fn get_class_names(&self) -> HashSet { let mut set = HashSet::new(); if let Some(value) = self.get_property("class") { for n in value.split(' ') { set.insert(n.to_owned()); } } set } /// Creates a new `Node` as child to the self `Node` pub fn add_child(&mut self, child: &mut Node) -> Result<(), String> { child.set_linked(); unsafe { let new_child_ptr = xmlAddChild(self.node_ptr_mut()?, child.node_ptr_mut()?); if new_child_ptr.is_null() { Err("add_child encountered NULL pointer".to_string()) } else { Ok(()) } } } /// Creates a new `Node` as child to the self `Node` pub fn new_child( &mut self, ns: Option, name: &str, ) -> Result> { let c_name = CString::new(name).unwrap(); let ns_ptr = match ns { None => ptr::null_mut(), Some(mut ns) => ns.ns_ptr_mut(), }; unsafe { let new_ptr = xmlNewChild( self.node_ptr_mut()?, ns_ptr, c_name.as_bytes().as_ptr(), ptr::null(), ); Ok(Node::wrap(new_ptr, &self.get_docref().upgrade().unwrap())) } } /// Adds a new text child, to this `Node` pub fn add_text_child( &mut self, ns: Option, name: &str, content: &str, ) -> Result> { let c_name = CString::new(name).unwrap(); let c_content = CString::new(content).unwrap(); let ns_ptr = match ns { None => ptr::null_mut(), Some(mut ns) => ns.ns_ptr_mut(), }; unsafe { let new_ptr = xmlNewTextChild( self.node_ptr_mut()?, ns_ptr, c_name.as_bytes().as_ptr(), c_content.as_bytes().as_ptr(), ); Ok(Node::wrap(new_ptr, &self.get_docref().upgrade().unwrap())) } } /// Append text to this `Node` pub fn append_text(&mut self, content: &str) -> Result<(), Box> { let c_len = content.len() as i32; if c_len > 0 { let c_content = CString::new(content).unwrap(); unsafe { xmlNodeAddContentLen(self.node_ptr_mut()?, c_content.as_bytes().as_ptr(), c_len); } } Ok(()) } /// Unbinds the Node from its siblings and Parent, but not from the Document it belongs to. /// If the node is not inserted into the DOM afterwards, it will be lost after the program terminates. /// From a low level view, the unbound node is stripped /// from the context it is and inserted into a (hidden) document-fragment. pub fn unlink_node(&mut self) { let node_type = self.get_type(); if node_type != Some(NodeType::DocumentNode) && node_type != Some(NodeType::DocumentFragNode) && !self.is_unlinked() { // only unlink nodes that are currently marked as linked self.set_unlinked(); unsafe { xmlUnlinkNode(self.node_ptr()); } } } /// Alias for `unlink_node` pub fn unlink(&mut self) { self.unlink_node() } /// Alias for `unlink_node` pub fn unbind_node(&mut self) { self.unlink_node() } /// Alias for `unlink_node` pub fn unbind(&mut self) { self.unlink_node() } /// Checks if node is marked as unlinked pub fn is_unlinked(&self) -> bool { self.0.borrow().unlinked } fn ptr_as_option(&self, node_ptr: xmlNodePtr) -> Option { if node_ptr.is_null() { None } else { let doc_ref = self.get_docref().upgrade().unwrap(); let new_node = Node::wrap(node_ptr, &doc_ref); Some(new_node) } } /// internal helper to ensure the node is marked as linked/imported/adopted in the main document tree pub(crate) fn set_linked(&self) { self.0.borrow_mut().unlinked = false; } /// internal helper to ensure the node is marked as unlinked/removed from the main document tree pub(crate) fn set_unlinked(&self) { self.0.borrow_mut().unlinked = true; self .get_docref() .upgrade() .unwrap() .borrow_mut() .forget_node(self.node_ptr()); } /// find nodes via xpath, at a specified node or the document root pub fn findnodes(&self, xpath: &str) -> Result, ()> { let mut context = Context::from_node(self)?; context.findnodes(xpath, Some(self)) } /// find String values via xpath, at a specified node or the document root pub fn findvalues(&self, xpath: &str) -> Result, ()> { let mut context = Context::from_node(self)?; context.findvalues(xpath, Some(self)) } /// replace a `self`'s `old` child node with a `new` node in the same position /// borrowed from Perl's XML::LibXML pub fn replace_child_node( &mut self, mut new: Node, mut old: Node, ) -> Result> { // if newNode == oldNode or self == newNode then do nothing, just return nNode. if new == old || self == &new { // nothing to do here, already in place Ok(old) } else if self.get_type() == Some(NodeType::ElementNode) { if let Some(old_parent) = old.get_parent() { if &old_parent == self { // unlink new to be available for insertion new.unlink(); // mid-child case old.add_next_sibling(&mut new)?; old.unlink(); Ok(old) } else { Err(From::from(format!( "Old node was not a child of {:?} parent. Registered parent is {:?} instead.", self.get_name(), old_parent.get_name() ))) } } else { Err(From::from(format!( "Old node was not a child of {:?} parent. No registered parent exists.", self.get_name() ))) } } else { Err(From::from( "Can only call replace_child_node an a NodeType::Element type parent.", )) } } } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������libxml-0.3.5/src/tree/nodetype.rs�������������������������������������������������������������������0000644�0000000�0000000�00000003075�10461020230�0015050�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������//! Types of libxml2 Nodes //! use crate::bindings::xmlElementType; /// Types of xml nodes #[derive(Debug, PartialEq, Eq)] #[allow(missing_docs)] pub enum NodeType { ElementNode, AttributeNode, TextNode, CDataSectionNode, EntityRefNode, EntityNode, PiNode, CommentNode, DocumentNode, DocumentTypeNode, DocumentFragNode, NotationNode, HtmlDocumentNode, DTDNode, ElementDecl, AttributeDecl, EntityDecl, NamespaceDecl, XIncludeStart, XIncludeEnd, DOCBDocumentNode, } impl NodeType { /// converts an integer from libxml's `enum NodeType` /// to an instance of our `NodeType` pub fn from_int(i: xmlElementType) -> Option { match i { 1 => Some(NodeType::ElementNode), 2 => Some(NodeType::AttributeNode), 3 => Some(NodeType::TextNode), 4 => Some(NodeType::CDataSectionNode), 5 => Some(NodeType::EntityRefNode), 6 => Some(NodeType::EntityNode), 7 => Some(NodeType::PiNode), 8 => Some(NodeType::CommentNode), 9 => Some(NodeType::DocumentNode), 10 => Some(NodeType::DocumentTypeNode), 11 => Some(NodeType::DocumentFragNode), 12 => Some(NodeType::NotationNode), 13 => Some(NodeType::HtmlDocumentNode), 14 => Some(NodeType::DTDNode), 15 => Some(NodeType::ElementDecl), 16 => Some(NodeType::AttributeDecl), 17 => Some(NodeType::EntityDecl), 18 => Some(NodeType::NamespaceDecl), 19 => Some(NodeType::XIncludeStart), 20 => Some(NodeType::XIncludeEnd), 21 => Some(NodeType::DOCBDocumentNode), _ => None, } } } �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������libxml-0.3.5/src/wrapper.h��������������������������������������������������������������������������0000644�0000000�0000000�00000000325�10461020230�0013540�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#include #include #include #include #include #include #include �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������libxml-0.3.5/src/xpath.rs���������������������������������������������������������������������������0000644�0000000�0000000�00000021112�10461020230�0013376�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������//! The `XPath` functionality use crate::bindings::*; use crate::c_helpers::*; use crate::readonly::RoNode; use crate::tree::{Document, DocumentRef, DocumentWeak, Node}; use libc::{c_char, c_void, size_t}; use std::cell::RefCell; use std::ffi::{CStr, CString}; use std::fmt; use std::rc::Rc; use std::str; ///Thinly wrapped libxml2 xpath context pub(crate) type ContextRef = Rc>; #[derive(Debug)] pub(crate) struct _Context(pub(crate) xmlXPathContextPtr); impl Drop for _Context { ///free xpath context when it goes out of scope fn drop(&mut self) { unsafe { xmlXPathFreeContext(self.0); } } } /// An XPath context #[derive(Clone)] pub struct Context { /// Safe reference to the libxml2 context pointer pub(crate) context_ptr: ContextRef, ///Document contains pointer, needed for ContextPtr, so we need to borrow Document to prevent it's freeing pub(crate) document: DocumentWeak, } ///Essentially, the result of the evaluation of some xpath expression #[derive(Debug)] pub struct Object { ///libxml's `ObjectPtr` pub ptr: xmlXPathObjectPtr, document: DocumentWeak, } impl Context { ///create the xpath context for a document pub fn new(doc: &Document) -> Result { let ctxtptr = unsafe { xmlXPathNewContext(doc.doc_ptr()) }; if ctxtptr.is_null() { Err(()) } else { Ok(Context { context_ptr: Rc::new(RefCell::new(_Context(ctxtptr))), document: Rc::downgrade(&doc.0), }) } } pub(crate) fn new_ptr(docref: &DocumentRef) -> Result { let ctxtptr = unsafe { xmlXPathNewContext(docref.borrow().doc_ptr) }; if ctxtptr.is_null() { Err(()) } else { Ok(Context { context_ptr: Rc::new(RefCell::new(_Context(ctxtptr))), document: Rc::downgrade(docref), }) } } /// Returns the raw libxml2 context pointer behind the struct pub fn as_ptr(&self) -> xmlXPathContextPtr { self.context_ptr.borrow().0 } /// Instantiate a new Context for the Document of a given Node. /// Note: the Context is root-level for that document, use `.set_context_node` to limit scope to this node pub fn from_node(node: &Node) -> Result { let docref = node.get_docref().upgrade().unwrap(); Context::new_ptr(&docref) } /// Register a namespace prefix-href pair on the xpath context pub fn register_namespace(&self, prefix: &str, href: &str) -> Result<(), ()> { let c_prefix = CString::new(prefix).unwrap(); let c_href = CString::new(href).unwrap(); unsafe { let result = xmlXPathRegisterNs( self.as_ptr(), c_prefix.as_bytes().as_ptr(), c_href.as_bytes().as_ptr(), ); if result != 0 { Err(()) } else { Ok(()) } } } ///evaluate an xpath pub fn evaluate(&self, xpath: &str) -> Result { let c_xpath = CString::new(xpath).unwrap(); let ptr = unsafe { xmlXPathEvalExpression(c_xpath.as_bytes().as_ptr(), self.as_ptr()) }; if ptr.is_null() { Err(()) } else { Ok(Object { ptr, document: self.document.clone(), }) } } ///evaluate an xpath on a context Node pub fn node_evaluate(&self, xpath: &str, node: &Node) -> Result { let c_xpath = CString::new(xpath).unwrap(); let ptr = unsafe { xmlXPathNodeEval(node.node_ptr(), c_xpath.as_bytes().as_ptr(), self.as_ptr()) }; if ptr.is_null() { Err(()) } else { Ok(Object { ptr, document: self.document.clone(), }) } } ///evaluate an xpath on a context RoNode pub fn node_evaluate_readonly(&self, xpath: &str, node: RoNode) -> Result { let c_xpath = CString::new(xpath).unwrap(); let ptr = unsafe { xmlXPathNodeEval(node.0, c_xpath.as_bytes().as_ptr(), self.as_ptr()) }; if ptr.is_null() { Err(()) } else { Ok(Object { ptr, document: self.document.clone(), }) } } /// localize xpath context to a specific Node pub fn set_context_node(&mut self, node: &Node) -> Result<(), ()> { unsafe { let result = xmlXPathSetContextNode(node.node_ptr(), self.as_ptr()); if result != 0 { return Err(()); } } Ok(()) } /// find nodes via xpath, at a specified node or the document root pub fn findnodes(&mut self, xpath: &str, node_opt: Option<&Node>) -> Result, ()> { let evaluated = if let Some(node) = node_opt { self.node_evaluate(xpath, node)? } else { self.evaluate(xpath)? }; Ok(evaluated.get_nodes_as_vec()) } /// find literal values via xpath, at a specified node or the document root pub fn findvalues(&mut self, xpath: &str, node_opt: Option<&Node>) -> Result, ()> { let evaluated = if let Some(node) = node_opt { self.node_evaluate(xpath, node)? } else { self.evaluate(xpath)? }; Ok(evaluated.get_nodes_as_str()) } /// find a literal value via xpath, at a specified node or the document root pub fn findvalue(&mut self, xpath: &str, node_opt: Option<&Node>) -> Result { let evaluated = if let Some(node) = node_opt { self.node_evaluate(xpath, node)? } else { self.evaluate(xpath)? }; Ok(evaluated.to_string()) } } impl Drop for Object { /// free the memory allocated fn drop(&mut self) { unsafe { xmlXPathFreeObject(self.ptr); } } } impl Object { ///get the number of nodes in the result set pub fn get_number_of_nodes(&self) -> usize { let v = xmlXPathObjectNumberOfNodes(self.ptr); if v == -1 { panic!("rust-libxml: xpath: Passed in null pointer!"); } if v == -2 { // No nodes found! return 0; } if v < -2 { panic!("rust-libxml: xpath: expected non-negative number of result nodes"); } v as usize } /// returns the result set as a vector of `Node` objects pub fn get_nodes_as_vec(&self) -> Vec { let n = self.get_number_of_nodes(); let mut vec: Vec = Vec::with_capacity(n); let slice = if n > 0 { xmlXPathObjectGetNodes(self.ptr, n as size_t) } else { Vec::new() }; for ptr in slice { if ptr.is_null() { panic!("rust-libxml: xpath: found null pointer result set"); } let node = Node::wrap(ptr, &self.document.upgrade().unwrap()); vec.push(node); } vec } /// returns the result set as a vector of `RoNode` objects pub fn get_readonly_nodes_as_vec(&self) -> Vec { let n = self.get_number_of_nodes(); let mut vec: Vec = Vec::with_capacity(n); let slice = if n > 0 { xmlXPathObjectGetNodes(self.ptr, n as size_t) } else { Vec::new() }; for ptr in slice { if ptr.is_null() { panic!("rust-libxml: xpath: found null pointer result set"); } vec.push(RoNode(ptr)); } vec } /// returns the result set as a vector of Strings pub fn get_nodes_as_str(&self) -> Vec { let n = self.get_number_of_nodes(); let mut vec: Vec = Vec::with_capacity(n); let slice = if n > 0 { xmlXPathObjectGetNodes(self.ptr, n as size_t) } else { Vec::new() }; for ptr in slice { if ptr.is_null() { panic!("rust-libxml: xpath: found null pointer result set"); } let value_ptr = unsafe { xmlXPathCastNodeToString(ptr) }; let c_value_string = unsafe { CStr::from_ptr(value_ptr as *const c_char) }; let ready_str = c_value_string.to_string_lossy().into_owned(); bindgenFree(value_ptr as *mut c_void); vec.push(ready_str); } vec } } impl fmt::Display for Object { /// use if the XPath used was meant to return a string, such as string(//foo/@attr) fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { unsafe { let receiver = xmlXPathCastToString(self.ptr); let c_string = CStr::from_ptr(receiver as *const c_char); let rust_string = str::from_utf8(c_string.to_bytes()).unwrap().to_owned(); bindgenFree(receiver as *mut c_void); write!(f, "{rust_string}") } } } /// Calls the binding to http://xmlsoft.org/html/libxml-xpath.html#xmlXPathCompile and return true if /// a non-null pointer is returned. The idea is to use this to validate an xpath independent of context. /// Tests describing what this validates in tests/xpath_tests.rs pub fn is_well_formed_xpath(xpath: &str) -> bool { let c_xpath = CString::new(xpath).unwrap(); let xml_xpath_comp_expr_ptr = unsafe { xmlXPathCompile(c_xpath.as_bytes().as_ptr()) }; if xml_xpath_comp_expr_ptr.is_null() { false } else { bindgenFree(xml_xpath_comp_expr_ptr as *mut c_void); true } } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������libxml-0.3.5/tests/VALGRIND.md����������������������������������������������������������������������0000644�0000000�0000000�00000002527�10461020230�0014020�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������It is often good practice, especially when venturing on large API refactors, to double-check for any newly created memory leaks. Some leaks can only be spotted in external projects that show advance use cases of `rust-libxml`, for example allocating a `Node` in a default trait of a struct with a `Node` field. For now the only safe approach to that pattern is using the `Node::null()` placeholder, but the Rust idiomatic approach is to instead refactor to an `Option` field. Some, more direct, leak scenarios can already be spotted from the libxml test suite, and one can use valgrind to obtain a report via a call of the form: ``` valgrind --leak-check=full target/debug/base_tests-3d29e5da1f969267 ``` Additionally, as Rust nightlies keep evolving, a specific allocation system may be necessary to properly run valgrind. At the time of writing, `rust-libxml` tests need no such changes, but some external projects do. For convenience, here is a known working preamble, which can be added to the preambles of executable files, including example and test files. ```rust #![feature(alloc_system, allocator_api)] extern crate alloc_system; use alloc_system::System; #[global_allocator] static A: System = System; ``` For more discussion motivating this explanation, see the respective [GitHub pull request](https://github.com/KWARC/rust-libxml/pull/43).�������������������������������������������������������������������������������������������������������������������������������������������������������������������������libxml-0.3.5/tests/base_tests.rs��������������������������������������������������������������������0000644�0000000�0000000�00000020434�10461020230�0014767�0����������������������������������������������������������������������������������������������������ustar �����������������������������������������������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������//! Base API tests, to be split into distinct sub-suites later on //! use std::env; use std::fs::File; use std::io::Read; use libxml::parser::{Parser, ParserOptions}; use libxml::tree::{Document, Node, SaveOptions}; #[test] /// Build a hello world XML doc fn hello_builder() { let doc_result = Document::new(); assert!(doc_result.is_ok()); let mut doc = doc_result.unwrap(); // This tests for functionality (return self if there is no root element) that is removed. let doc_node = doc.get_root_element(); assert_eq!(doc_node, None, "empty document has no root element"); let hello_element_result = Node::new("hello", None, &doc); assert!(hello_element_result.is_ok()); let mut hello_element = hello_element_result.unwrap(); assert!(hello_element.set_content("world!").is_ok()); doc.set_root_element(&hello_element); assert!(hello_element.set_content("world!").is_ok()); let added = hello_element.new_child(None, "child"); assert!(added.is_ok()); let mut new_child = added.unwrap(); assert!(new_child.set_content("set content").is_ok()); assert_eq!(new_child.get_content(), "set content"); assert_eq!(hello_element.get_content(), "world!set content"); let node_string = doc.node_to_string(&hello_element); assert!(node_string.len() > 1); assert!(hello_element.set_name("world").is_ok()); assert_eq!(hello_element.get_name(), "world"); let doc_string = doc.to_string(); assert!(doc_string.len() > 1); let output_path = env::temp_dir().join("rust_libxml_tests_helloworld.xml"); assert!(doc.save_file(&output_path.display().to_string()).is_ok()); } #[test] fn create_pi() { let doc_result = Document::new(); assert!(doc_result.is_ok()); let mut doc = doc_result.unwrap(); // Add a PI let node_ok: Result = doc.create_processing_instruction("piname", "picontent"); assert!(node_ok.is_ok()); assert_eq!(node_ok.unwrap().get_content(), "picontent"); let doc_string = doc.to_string(); assert!(doc_string.len() > 1); } #[test] /// Duplicate an xml file fn duplicate_file() { let parser = Parser::default(); { let doc_result = parser.parse_file("tests/resources/file01.xml"); assert!(doc_result.is_ok()); let doc = doc_result.unwrap(); let output_path = env::temp_dir().join("rust_libxml_tests_copy.xml"); doc.save_file(&output_path.display().to_string()).unwrap(); } } #[test] // Can parse an xml string in memory fn can_parse_xml_string() { let mut file = File::open("tests/resources/file01.xml").unwrap(); let mut xml_string = String::new(); file.read_to_string(&mut xml_string).unwrap(); let parser = Parser::default(); let doc = parser.parse_string(&xml_string).unwrap(); assert_eq!(doc.get_root_element().unwrap().get_name(), "root"); } #[test] /// Can load an HTML file fn can_load_html_file() { let parser = Parser::default_html(); { let doc_result = parser.parse_file("tests/resources/example.html"); assert!(doc_result.is_ok()); let doc = doc_result.unwrap(); let root = doc.get_root_element().unwrap(); assert_eq!(root.get_name(), "html"); } } fn create_test_document(file: Option<&str>) -> Document { let parser = Parser::default(); let doc_result = parser.parse_file(file.unwrap_or("tests/resources/file01.xml")); assert!(doc_result.is_ok()); doc_result.unwrap() } #[test] fn document_can_import_node() { let doc1 = create_test_document(None); let mut doc2 = create_test_document(None); assert_eq!( doc2.get_root_element().unwrap().get_child_elements().len(), 2 ); let mut elements = doc1.get_root_element().unwrap().get_child_elements(); let mut node = elements.pop().unwrap(); node.unlink(); let mut imported = doc2.import_node(&mut node).unwrap(); assert!(doc2 .get_root_element() .unwrap() .add_child(&mut imported) .is_ok()); assert_eq!( doc2.get_root_element().unwrap().get_child_elements().len(), 3 ); } #[test] fn document_formatted_serialization() { let doc = create_test_document(Some("tests/resources/unformatted.xml")); let doc_str = doc.to_string(); // don't insist too hard on the length, cross-platform differences may have a minor influence assert!(doc_str.len() > 370); let doc_str_formatted = doc.to_string_with_options(SaveOptions { format: true, ..SaveOptions::default() }); assert!(doc_str_formatted.len() > 460); // basic assertion - a formatted document is longer than an unformatted one assert!(doc_str_formatted.len() > doc_str.len()); } #[test] /// Test well-formedness of a Rust string /// IMPORTANT: Currenlty NOT THREAD-SAFE, use in single-threaded apps only! fn well_formed_html() { let parser = Parser::default_html(); let trivial_well_formed = parser.is_well_formed_html("\n"); assert!(trivial_well_formed); let trivial_ill_formed = parser.is_well_formed_html("garbage"); assert!(!trivial_ill_formed); let should_ill_formed = parser.is_well_formed_html("> "); assert!(!should_ill_formed); let should_well_formed = parser.is_well_formed_html("\nTest\n

Tiny

2"); assert!(should_well_formed); } #[test] /// Parse & serialize HTML fragment fn html_fragment() { let fragment = r#"
Compression results on incompressible data.

Compression results on incompressible data.

"#; let parser = Parser::default_html(); let document = parser .parse_string_with_options( fragment, ParserOptions { no_def_dtd: true, no_implied: true, ..Default::default() }, ) .unwrap(); let mut serialized_fragment = document.to_string_with_options(SaveOptions { no_empty_tags: true, as_html: true, ..Default::default() }); let _added_newline = serialized_fragment.pop(); // remove added '\n' assert_eq!(fragment, serialized_fragment); } fn serialization_roundtrip(file_name: &str) { let file_result = std::fs::read_to_string(file_name); assert!(file_result.is_ok()); let xml_file = file_result.unwrap(); let parser = Parser::default(); let parse_result = parser.parse_string(xml_file.as_bytes()); assert!(parse_result.is_ok()); let doc = parse_result.unwrap(); let doc_str = doc.to_string(); assert_eq!(strip_whitespace(&xml_file), strip_whitespace(&doc_str)); } fn strip_whitespace(string: &str) -> String { string.replace("\r","") .replace("\n", "") .replace(" ", "") } #[test] fn simple_serialization_test01() { serialization_roundtrip("tests/resources/file01.xml"); } #[test] fn simple_serialization_unformatted() { serialization_roundtrip("tests/resources/unformatted.xml"); } #[test] fn simple_serialization_namespaces() { serialization_roundtrip("tests/resources/simple_namespaces.xml"); } #[test] fn serialization_no_empty() { let source_result = std::fs::read_to_string("tests/resources/empty_tags.xml"); assert!(source_result.is_ok()); let source_file = source_result.unwrap(); let result = std::fs::read_to_string("tests/resources/empty_tags_result.xml"); assert!(result.is_ok()); let result_file = result.unwrap(); let options = SaveOptions { no_empty_tags: true, ..SaveOptions::default() }; let parser = Parser::default(); let parse_result = parser.parse_string(source_file.as_bytes()); assert!(parse_result.is_ok()); let doc = parse_result.unwrap(); let doc_str = doc.to_string_with_options(options); assert_eq!(strip_whitespace(&result_file), strip_whitespace(&doc_str)); } #[test] fn serialization_as_html() { let source_result = std::fs::read_to_string("tests/resources/as_html.xml"); assert!(source_result.is_ok()); let source_file = source_result.unwrap(); let result = std::fs::read_to_string("tests/resources/as_html_result.xml"); assert!(result.is_ok()); let result_file = result.unwrap(); let options = SaveOptions { as_html: true, ..SaveOptions::default() }; let parser = Parser::default(); let parse_result = parser.parse_string(source_file.as_bytes()); assert!(parse_result.is_ok()); let doc = parse_result.unwrap(); let doc_str = doc.to_string_with_options(options); assert_eq!(strip_whitespace(&result_file), strip_whitespace(&doc_str)); } libxml-0.3.5/tests/codec_tests.rs000064400000000000000000000037371046102023000151410ustar 00000000000000//! BOM parsing tests //! use libxml::parser::{Parser, XmlParseError}; use libxml::tree::Document; use std::fs; use std::io; use std::io::prelude::*; // HELPERS ///Read the entire file to a byte vector. Similar to read_to_string with ///no encoding assumption. fn read_to_end(path: &str) -> io::Result> { let mut buffer = Vec::new(); let mut file = fs::File::open(path)?; file.read_to_end(&mut buffer)?; Ok(buffer) } ///Generate a unittest for a document result from parsing a variant of file01. fn file01_test(doc_result: Result) { assert!(doc_result.is_ok()); let doc = doc_result.unwrap(); let root = doc.get_root_element().unwrap(); // Tests let root_children = root.get_child_nodes(); assert_eq!(root_children.len(), 5, "file01 root has five child nodes"); let mut element_children = root.get_child_elements(); assert_eq!( element_children.len(), 2, "file01 root has two child elements" ); assert_eq!(element_children.pop().unwrap().get_name(), "child"); assert_eq!(element_children.pop().unwrap().get_name(), "child"); assert!(element_children.is_empty()); } ///Run a test for both the file and the path of file01. fn run_test(path: &str) { let parser = Parser::default(); file01_test(parser.parse_file(path)); let input = read_to_end(path).unwrap(); file01_test(parser.parse_string(&input)); } // ENCODINGS #[test] fn utf8_test() { run_test("tests/resources/file01.xml"); } #[test] fn utf16le_test() { run_test("tests/resources/file01_utf16le.xml"); } #[test] fn utf16be_test() { run_test("tests/resources/file01_utf16be.xml"); } // BOM #[test] fn utf8_bom_test() { run_test("tests/resources/file01_utf8_bom.xml"); } #[test] fn utf16le_bom_test() { run_test("tests/resources/file01_utf16le_bom.xml"); } #[test] fn utf16be_bom_test() { run_test("tests/resources/file01_utf16be_bom.xml"); } // UNICODE PATHS #[test] fn nonbmp_path_test() { run_test("tests/resources/file01_🔥🔥🔥.xml"); } libxml-0.3.5/tests/mutability_guards.rs000064400000000000000000000031441046102023000163620ustar 00000000000000//! Enforce Rust ownership pragmatics for the underlying libxml2 objects use libxml::parser::Parser; use libxml::tree::set_node_rc_guard; #[test] fn ownership_guards() { // Setup let parser = Parser::default(); let doc_result = parser.parse_file("tests/resources/file01.xml"); assert!(doc_result.is_ok()); let doc = doc_result.unwrap(); let root = doc.get_root_element().unwrap(); let mut first_a = root.get_first_element_child().unwrap(); let first_b = root.get_first_element_child().unwrap(); assert_eq!( first_a.get_attribute("attribute"), Some(String::from("value")) ); assert_eq!( first_b.get_attribute("attribute"), Some(String::from("value")) ); // Setting an attribute will fail and return an error, as there are too many Rc references // to the same node (Rc strong count of 3) // see `Node::node_ptr_mut` for details assert!(first_a.set_attribute("attribute", "newa").is_err()); assert_eq!( first_a.get_attribute("attribute"), Some(String::from("value")) ); assert_eq!( first_b.get_attribute("attribute"), Some(String::from("value")) ); // Try again with guard boosted, which allows the change set_node_rc_guard(3); // Setting an attribute will fail and return an error, as there are too many Rc references // to the same node (Rc strong count of 3) // see `Node::node_ptr_mut` for details assert!(first_a.set_attribute("attribute", "newa").is_ok()); assert_eq!( first_a.get_attribute("attribute"), Some(String::from("newa")) ); assert_eq!( first_b.get_attribute("attribute"), Some(String::from("newa")) ); } libxml-0.3.5/tests/readonly_tests.rs000064400000000000000000000030711046102023000156700ustar 00000000000000//! Tree module tests //! use libxml::parser::Parser; use libxml::readonly::RoNode; use libxml::tree::NodeType; fn dfs_node(node: RoNode) -> i32 { 1 + node .get_child_nodes() .into_iter() .map(dfs_node) .sum::() } fn dfs_element(node: RoNode) -> i32 { 1 + node .get_child_elements() .into_iter() .map(dfs_element) .sum::() } #[test] fn readonly_scan_test() { let parser = Parser::default_html(); let doc_result = parser.parse_file("tests/resources/example.html"); assert!(doc_result.is_ok()); let doc = doc_result.unwrap(); let root: RoNode = doc.get_root_readonly().unwrap(); assert_eq!(root.get_name(), "html"); // "get_child_nodes" exhaustivity test, // 33 nodes, including text, comments, etc assert_eq!(dfs_node(root), 33); // "get_element_nodes" exhaustivity test, // 13 named element nodes in example.html assert_eq!(dfs_element(root), 13); let text: RoNode = root.get_first_child().expect("first child is a text node"); assert_eq!(text.get_name(), "text"); let head: RoNode = root .get_first_element_child() .expect("head is first child of html"); assert_eq!(head.get_name(), "head"); let mut sibling: RoNode = head .get_next_sibling() .expect("head should be followed by text"); assert_eq!(sibling.get_name(), "text"); while let Some(next) = sibling.get_next_sibling() { sibling = next; if next.get_type() == Some(NodeType::ElementNode) { break; } } assert_eq!(sibling.get_type(), Some(NodeType::ElementNode)); assert_eq!(sibling.get_name(), "body"); } libxml-0.3.5/tests/resources/as_html.xml000064400000000000000000000002001046102023000164350ustar 00000000000000 Page Title

This is a Heading

This is a paragraph.

libxml-0.3.5/tests/resources/as_html_result.xml000064400000000000000000000003031046102023000200370ustar 00000000000000 Page Title

This is a Heading

This is a paragraph.

libxml-0.3.5/tests/resources/empty_tags.xml000064400000000000000000000001221046102023000171650ustar 00000000000000 libxml-0.3.5/tests/resources/empty_tags_result.xml000064400000000000000000000001361046102023000205700ustar 00000000000000 libxml-0.3.5/tests/resources/example.html000064400000000000000000000023661046102023000166240ustar 00000000000000 Example Domain

Example Domain

This domain is established to be used for illustrative examples in documents. You may use this domain in examples without prior coordination or asking for permission.

More information...

libxml-0.3.5/tests/resources/file01.xml000064400000000000000000000002241046102023000160740ustar 00000000000000 some text more text libxml-0.3.5/tests/resources/file01_ns.xml000064400000000000000000000015241046102023000166000ustar 00000000000000 some text more text libxml-0.3.5/tests/resources/file01_utf16be.xml000064400000000000000000000004561046102023000174370ustar 00000000000000<?xml version="1.0" encoding="UTF-16BE"?> <root> <child attribute="value">some text</child> <child attribute="empty">more text</child> </root> libxml-0.3.5/tests/resources/file01_utf16be_bom.xml000064400000000000000000000004601046102023000202670ustar 00000000000000þÿ<?xml version="1.0" encoding="UTF-16BE"?> <root> <child attribute="value">some text</child> <child attribute="empty">more text</child> </root> libxml-0.3.5/tests/resources/file01_utf16le.xml000064400000000000000000000004561046102023000174510ustar 00000000000000<?xml version="1.0" encoding="UTF-16LE"?> <root> <child attribute="value">some text</child> <child attribute="empty">more text</child> </root> libxml-0.3.5/tests/resources/file01_utf16le_bom.xml000064400000000000000000000004601046102023000203010ustar 00000000000000ÿþ<?xml version="1.0" encoding="UTF-16LE"?> <root> <child attribute="value">some text</child> <child attribute="empty">more text</child> </root> libxml-0.3.5/tests/resources/file01_utf8_bom.xml000064400000000000000000000002271046102023000177020ustar 00000000000000 some text more text libxml-0.3.5/tests/resources/file01_🔥🔥🔥.xml000064400000000000000000000002241046102023000223630ustar 00000000000000 some text more text libxml-0.3.5/tests/resources/file02.xml000064400000000000000000000001371046102023000161000ustar 00000000000000

Something

libxml-0.3.5/tests/resources/ids.xml000064400000000000000000000004211046102023000155720ustar 00000000000000

Hello

World!

libxml-0.3.5/tests/resources/schema.xml000064400000000000000000000002251046102023000162550ustar 00000000000000 Tove Jani Reminder Don't forget me this weekend! libxml-0.3.5/tests/resources/schema.xsd000064400000000000000000000006501046102023000162550ustar 00000000000000 libxml-0.3.5/tests/resources/simple_namespaces.xml000064400000000000000000000007261046102023000205130ustar 00000000000000 col 1 col 2 col 3 nested f libxml-0.3.5/tests/resources/unformatted.xml000064400000000000000000000005651046102023000173540ustar 00000000000000col 1col 2col 3 nested flibxml-0.3.5/tests/results/README.md000064400000000000000000000001121046102023000152340ustar 00000000000000# Test results This directory will contain the result files of the tests. libxml-0.3.5/tests/schema_tests.rs000064400000000000000000000124321046102023000153140ustar 00000000000000//! //! Test Schema Loading, XML Validating //! use libxml::schemas::SchemaParserContext; use libxml::schemas::SchemaValidationContext; use libxml::parser::Parser; static NOTE_SCHEMA: &str = r#" "#; static STOCK_SCHEMA: &str = r#" "#; static VALID_NOTE_XML: &str = r#" Tove Jani Reminder Don't forget me this weekend! "#; static INVALID_NOTE_XML: &str = r#" Tove Jani Reminder Don't forget me this weekend! "#; static INVALID_STOCK_XML: &str = r#" 2014-01-01 NOT A NUMBER 2014-01-02 540.98 NOT A DATE 543.93 =2.12, at least not as currently implemented. // while it still reliably succeeds single-threaded, new implementation is needed to use // these in a parallel setting. #[test] fn schema_from_string() { let xml = Parser::default() .parse_string(VALID_NOTE_XML) .expect("Expected to be able to parse XML Document from string"); let mut xsdparser = SchemaParserContext::from_buffer(NOTE_SCHEMA); let xsd = SchemaValidationContext::from_parser(&mut xsdparser); if let Err(errors) = xsd { for err in &errors { eprintln!("{}", err.message.as_ref().unwrap()); } panic!("Failed to parse schema with {} errors", errors.len()); } let mut xsdvalidator = xsd.unwrap(); // loop over more than one validation to test for leaks in the error handling callback interactions for _ in 0..5 { if let Err(errors) = xsdvalidator.validate_document(&xml) { for err in &errors { eprintln!("{}", err.message.as_ref().unwrap()); } panic!("Invalid XML accoding to XSD schema"); } } } #[test] fn schema_from_string_generates_errors() { let xml = Parser::default() .parse_string(INVALID_NOTE_XML) .expect("Expected to be able to parse XML Document from string"); let mut xsdparser = SchemaParserContext::from_buffer(NOTE_SCHEMA); let xsd = SchemaValidationContext::from_parser(&mut xsdparser); if let Err(errors) = xsd { for err in &errors { eprintln!("{}", err.message.as_ref().unwrap()); } panic!("Failed to parse schema with {} errors", errors.len()); } let mut xsdvalidator = xsd.unwrap(); for _ in 0..5 { if let Err(errors) = xsdvalidator.validate_document(&xml) { for err in &errors { assert_eq!( "Element 'bad': This element is not expected. Expected is ( to ).\n", err.message.as_ref().unwrap() ); } } } } #[test] fn schema_from_string_reports_unique_errors() { let xml = Parser::default() .parse_string(INVALID_STOCK_XML) .expect("Expected to be able to parse XML Document from string"); let mut xsdparser = SchemaParserContext::from_buffer(STOCK_SCHEMA); let xsd = SchemaValidationContext::from_parser(&mut xsdparser); if let Err(errors) = xsd { for err in &errors { eprintln!("{}", err.message.as_ref().unwrap()); } panic!("Failed to parse schema with {} errors", errors.len()); } let mut xsdvalidator = xsd.unwrap(); for _ in 0..5 { if let Err(errors) = xsdvalidator.validate_document(&xml) { assert_eq!(errors.len(), 5); let expected_errors = vec![ "Element 'stock', attribute 'junkAttribute': The attribute 'junkAttribute' is not allowed.\n", "Element 'stock': The attribute 'ticker' is required but missing.\n", "Element 'stock': The attribute 'exchange' is required but missing.\n", "Element 'price': 'NOT A NUMBER' is not a valid value of the atomic type 'xs:float'.\n", "Element 'date': 'NOT A DATE' is not a valid value of the atomic type 'xs:date'.\n" ]; for err_msg in expected_errors { assert!(errors.iter().any(|err| err.message.as_ref().unwrap() == err_msg), "Expected error message {} was not found", err_msg); } } } } libxml-0.3.5/tests/tree_tests.rs000064400000000000000000000506071046102023000150210ustar 00000000000000//! Tree module tests //! use libxml::parser::Parser; use libxml::tree::{Document, Namespace, Node, NodeType}; #[test] /// Root node and first child of root node are different /// (There is a tiny chance this might fail for a correct program) fn child_of_root_has_different_hash() { let parser = Parser::default(); { let doc_result = parser.parse_file("tests/resources/file01.xml"); assert!(doc_result.is_ok()); let doc = doc_result.unwrap(); let root = doc.get_root_element().unwrap(); assert!(!root.is_text_node()); if let Some(child) = root.get_first_child() { assert!(root != child); } else { assert!(false); //test failed - child doesn't exist } // same check with last child if let Some(child) = root.get_last_child() { assert!(root != child); } else { assert!(false); //test failed - child doesn't exist } } } #[test] /// Siblings basic unit tests fn node_sibling_accessors() { let mut doc = Document::new().unwrap(); let hello_element_result = Node::new("hello", None, &doc); assert!(hello_element_result.is_ok()); let mut hello_element = hello_element_result.unwrap(); doc.set_root_element(&hello_element); let mut new_sibling = Node::new("sibling", None, &doc).unwrap(); assert!(hello_element.add_prev_sibling(&mut new_sibling).is_ok()); } #[test] fn node_children_accessors() { // Setup let parser = Parser::default(); let doc_result = parser.parse_file("tests/resources/file01.xml"); assert!(doc_result.is_ok()); let doc = doc_result.unwrap(); let root = doc.get_root_element().unwrap(); // Tests let root_children = root.get_child_nodes(); assert_eq!(root_children.len(), 5, "file01 root has five child nodes"); let mut element_children = root.get_child_elements(); assert_eq!( element_children.len(), 2, "file01 root has two child elements" ); assert_eq!(element_children.pop().unwrap().get_name(), "child"); assert_eq!(element_children.pop().unwrap().get_name(), "child"); assert!(element_children.is_empty()); } #[test] fn node_attributes_accessor() { // Setup let parser = Parser::default(); let doc_result = parser.parse_file("tests/resources/file01.xml"); assert!(doc_result.is_ok()); let doc = doc_result.unwrap(); let root = doc.get_root_element().unwrap(); let mut root_elements = root.get_child_elements(); let child_opt = root_elements.first_mut(); assert!(child_opt.is_some()); let child = child_opt.unwrap(); // All attributes let attributes = child.get_attributes(); assert_eq!(attributes.len(), 1); assert_eq!(attributes.get("attribute"), Some(&"value".to_string())); // Has assert_eq!(child.has_attribute("attribute"), true); // Get assert_eq!(child.get_attribute("attribute"), Some("value".to_string())); // Get as node let attr_node_opt = child.get_attribute_node("attribute"); assert!(attr_node_opt.is_some()); let attr_node = attr_node_opt.unwrap(); assert_eq!(attr_node.get_name(), "attribute"); assert_eq!(attr_node.get_type(), Some(NodeType::AttributeNode)); // Set assert!(child.set_attribute("attribute", "setter_value").is_ok()); assert_eq!( child.get_attribute("attribute"), Some("setter_value".to_string()) ); // Remove assert!(child.remove_attribute("attribute").is_ok()); assert_eq!(child.get_attribute("attribute"), None); assert_eq!(child.has_attribute("attribute"), false); // Recount let attributes = child.get_attributes(); assert_eq!(attributes.len(), 0); } #[test] fn node_attributes_ns_accessor() { // Setup let parser = Parser::default(); let doc_result = parser.parse_file("tests/resources/file01_ns.xml"); assert!(doc_result.is_ok()); let doc = doc_result.unwrap(); let root = doc.get_root_element().unwrap(); let mut root_elements = root.get_child_elements(); let child_opt = root_elements.first_mut(); assert!(child_opt.is_some()); let child = child_opt.unwrap(); // All attributes let attributes = child.get_attributes_ns(); assert_eq!(attributes.len(), 3); assert_eq!( attributes.get(&("attribute".to_string(), None)), Some(&"value1".to_string()) ); let namespaces = child.get_namespaces(&doc); assert_eq!(namespaces.len(), 2); let foo_ns = namespaces[0].clone(); let bar_ns = namespaces[1].clone(); assert_eq!( attributes.get(&("attribute".to_string(), Some(foo_ns.clone()))), Some(&"foo1".to_string()) ); assert_eq!( attributes.get(&("attr".to_string(), Some(bar_ns.clone()))), Some(&"bar1".to_string()) ); // Has assert!(child.has_attribute("attribute")); assert!(child.has_attribute_no_ns("attribute")); assert!(child.has_attribute_ns("attribute", "http://www.example.com/myns"),); assert!(child.has_attribute("attr")); assert!(!child.has_attribute_no_ns("attr")); assert!(child.has_attribute_ns("attr", "http://www.example.com/myns")); // Get assert_eq!( child.get_attribute_no_ns("attribute"), Some("value1".to_string()) ); assert_eq!( child.get_attribute_ns("attribute", "http://www.example.com/myns"), Some("foo1".to_string()) ); assert_eq!( child.get_attribute_ns("attr", "http://www.example.com/myns"), Some("bar1".to_string()) ); // Get as node let attr_node_opt = child.get_attribute_node_no_ns("attribute"); assert!(attr_node_opt.is_some()); let attr_node = attr_node_opt.unwrap(); assert_eq!(attr_node.get_name(), "attribute"); assert_eq!(attr_node.get_type(), Some(NodeType::AttributeNode)); let attr_node_opt = child.get_attribute_node_no_ns("attr"); assert!(attr_node_opt.is_none()); let attr_node_opt = child.get_attribute_node_ns("attr", "http://www.example.com/myns"); assert!(attr_node_opt.is_some()); let attr_node = attr_node_opt.unwrap(); assert_eq!(attr_node.get_name(), "attr"); assert_eq!(attr_node.get_type(), Some(NodeType::AttributeNode)); // Set assert!(child.set_attribute("attribute", "setter_value").is_ok()); assert_eq!( child.get_attribute_no_ns("attribute"), Some("setter_value".to_string()) ); assert!(child .set_attribute_ns("attribute", "foo_value", &foo_ns) .is_ok()); assert_eq!( child.get_attribute_no_ns("attribute"), Some("setter_value".to_string()) ); // Remove assert!(child.has_attribute_no_ns("attribute")); assert!(child.remove_attribute_no_ns("attribute").is_ok()); assert_eq!(child.get_attribute_no_ns("attribute"), None); assert!(!child.has_attribute_no_ns("attribute")); // Recount let attributes = child.get_attributes_ns(); assert_eq!(attributes.len(), 2); } #[test] fn namespace_partial_eq() { // Setup let parser = Parser::default(); let doc_result = parser.parse_file("tests/resources/file01_ns.xml"); assert!(doc_result.is_ok()); let doc = doc_result.unwrap(); let root = doc.get_root_element().unwrap(); let mut root_elements = root.get_child_elements(); let child1_opt = root_elements.first_mut(); assert!(child1_opt.is_some()); let child1 = child1_opt.unwrap(); // Child 1 namespaces let namespaces1 = child1.get_namespaces(&doc); assert_eq!(namespaces1.len(), 2); let foo_ns1 = namespaces1[0].clone(); assert_eq!(foo_ns1.get_prefix(), "foo"); assert_eq!(foo_ns1.get_href(), "http://www.example.com/myns"); let bar_ns1 = namespaces1[1].clone(); assert_eq!(bar_ns1.get_prefix(), "bar"); assert_eq!(bar_ns1.get_href(), "http://www.example.com/myns"); // The current implementation of PartialEq for Namespace compares the prefix // and href assert!(foo_ns1 != bar_ns1); // Compare with child2 namespace let child2_opt = child1.get_next_element_sibling(); assert!(child2_opt.is_some()); let child2 = child2_opt.unwrap(); let attributes2 = child2.get_attributes_ns(); assert_eq!(attributes2.len(), 2); let namespaces2 = child2.get_namespaces(&doc); assert_eq!(namespaces2.len(), 1); let foo_ns2 = namespaces2[0].clone(); // The current implementation of PartialEq for Namespace compares the prefix // and href not the pointer assert!(foo_ns1 == foo_ns2); assert_eq!(foo_ns1.get_href(), foo_ns2.get_href()); assert_eq!(foo_ns1.get_prefix(), foo_ns2.get_prefix()); assert_ne!(foo_ns1.ns_ptr(), foo_ns2.ns_ptr()); } #[test] fn attribute_namespace_accessors() { let mut doc = Document::new().unwrap(); let element_result = Node::new("example", None, &doc); assert!(element_result.is_ok()); let mut element = element_result.unwrap(); doc.set_root_element(&element); let ns_result = Namespace::new( "myxml", "http://www.w3.org/XML/1998/namespace", &mut element, ); assert!(ns_result.is_ok()); let ns = ns_result.unwrap(); assert!(element.set_attribute_ns("id", "testing", &ns).is_ok()); let id_attr = element.get_attribute_ns("id", "http://www.w3.org/XML/1998/namespace"); assert!(id_attr.is_some()); assert_eq!(id_attr.unwrap(), "testing"); let id_regular = element.get_attribute("id"); assert!(id_regular.is_some()); assert_eq!(id_regular.unwrap(), "testing"); let id_false_ns = element.get_attribute_ns("id", "http://www.foobar.org"); assert!(id_false_ns.is_none()); let fb_ns_result = Namespace::new("fb", "http://www.foobar.org", &mut element); assert!(fb_ns_result.is_ok()); let fb_ns = fb_ns_result.unwrap(); assert!(element.set_attribute_ns("fb", "fb", &fb_ns).is_ok()); assert_eq!( element.get_attribute_ns("fb", "http://www.foobar.org"), Some("fb".to_string()) ); assert!(element .remove_attribute_ns("fb", "http://www.foobar.org") .is_ok()); assert_eq!( element.get_attribute_ns("fb", "http://www.foobar.org"), None ); let ns_prefix = element.lookup_namespace_prefix("http://www.w3.org/XML/1998/namespace"); assert_eq!(ns_prefix, Some("xml".to_string())); // system ns has the global prefix when doing global lookup let fb_prefix = element.lookup_namespace_prefix("http://www.foobar.org"); assert_eq!(fb_prefix, Some("fb".to_string())); // system ns has the global prefix when doing global lookup let ns_uri = element.lookup_namespace_uri("myxml"); assert_eq!( ns_uri, Some("http://www.w3.org/XML/1998/namespace".to_string()) ); // system ns has the global uri when doing global lookup let fb_uri = element.lookup_namespace_uri("fb"); assert_eq!(fb_uri, Some("http://www.foobar.org".to_string())); // system ns has the global prefix when doing global lookup } #[test] fn attribute_no_namespace() { let mut doc = Document::new().unwrap(); let element_result = Node::new("example", None, &doc); assert!(element_result.is_ok()); let mut element = element_result.unwrap(); doc.set_root_element(&element); let ns_result = Namespace::new("myns", "https://www.example.com/myns", &mut element); assert!(ns_result.is_ok()); let ns = ns_result.unwrap(); assert!(element.set_attribute_ns("foo", "ns", &ns).is_ok()); let foo_ns_attr = element.get_attribute_ns("foo", "https://www.example.com/myns"); assert!(foo_ns_attr.is_some()); assert_eq!(foo_ns_attr.unwrap(), "ns"); let foo_no_ns_attr = element.get_attribute_no_ns("foo"); assert!(foo_no_ns_attr.is_none()); assert!(element.set_attribute("foo", "no_ns").is_ok()); let foo_no_ns_attr = element.get_attribute_no_ns("foo"); assert!(foo_no_ns_attr.is_some()); assert_eq!(foo_no_ns_attr.unwrap(), "no_ns"); assert!(element.remove_attribute_no_ns("foo").is_ok()); let foo_no_ns_attr = element.get_attribute_no_ns("foo"); assert!(foo_no_ns_attr.is_none()); assert!(element.set_attribute("bar", "bar").is_ok()); let bar_no_ns_attr = element.get_attribute_no_ns("bar"); assert!(bar_no_ns_attr.is_some()); assert_eq!(bar_no_ns_attr.unwrap(), "bar"); } #[test] fn node_can_unbind() { let mut doc = Document::new().unwrap(); let element_result = Node::new("example", None, &doc); assert!(element_result.is_ok()); let mut element = element_result.unwrap(); doc.set_root_element(&element); let mut first_child = Node::new("first", None, &doc).unwrap(); let mut second_child = Node::new("second", None, &doc).unwrap(); let mut third_child = Node::new("third", None, &doc).unwrap(); assert!(element.add_child(&mut first_child).is_ok()); assert!(element.add_child(&mut second_child).is_ok()); assert!(element.add_child(&mut third_child).is_ok()); assert_eq!(element.get_child_nodes().len(), 3); first_child.unbind_node(); assert_eq!(element.get_child_nodes().len(), 2); second_child.unlink_node(); assert_eq!(element.get_child_nodes().len(), 1); third_child.unlink(); assert_eq!(element.get_child_nodes().len(), 0); // Test reparenting via unlink let mut transfer = Node::new("transfer", None, &doc).unwrap(); assert!(element.add_child(&mut transfer).is_ok()); assert!(transfer.append_text("test text").is_ok()); let mut receiver = Node::new("receiver", None, &doc).unwrap(); assert!(element.add_child(&mut receiver).is_ok()); assert_eq!(element.get_child_nodes().len(), 2); assert_eq!(transfer.get_child_nodes().len(), 1); assert_eq!(receiver.get_child_nodes().len(), 0); transfer.unlink(); assert_eq!(element.get_child_nodes().len(), 1); assert_eq!(receiver.get_child_nodes().len(), 0); assert!(receiver.add_child(&mut transfer).is_ok()); assert_eq!(receiver.get_child_nodes().len(), 1); assert_eq!(transfer.get_content(), "test text".to_owned()); assert_eq!(transfer.get_parent(), Some(receiver)); } #[test] /// Can mock a node object (useful for defaults that will be overridden) fn can_mock_node() { let doc_mock = Document::new().unwrap(); let node_mock = Node::mock(&doc_mock); assert!(!node_mock.is_text_node()); } #[test] /// Can make a mock node hashable fn can_hash_mock_node() { let doc_mock = Document::new().unwrap(); let node_mock = Node::mock(&doc_mock); assert!(node_mock.to_hashable() > 0); } #[test] /// Can make null nodes and documents, to avoid memory allocations fn can_null_node() { let null_node = Node::null(); let second_null_node = Node::null(); assert!(null_node.is_null()); assert!(second_null_node.is_null()); assert_eq!(null_node, second_null_node); } #[test] /// Can set and get attributes fn can_manage_attributes() { let mut doc = Document::new().unwrap(); let hello_element_result = Node::new("hello", None, &doc); assert!(hello_element_result.is_ok()); let mut hello_element = hello_element_result.unwrap(); doc.set_root_element(&hello_element); let key = "examplekey"; let value = "examplevalue"; let pre_value = hello_element.get_attribute(key); assert_eq!(pre_value, None); let pre_prop_check = hello_element.has_property(key); assert_eq!(pre_prop_check, false); let pre_prop_value = hello_element.get_property(key); assert_eq!(pre_prop_value, None); assert!(hello_element.set_attribute(key, value).is_ok()); let new_check = hello_element.has_attribute(key); assert_eq!(new_check, true); let new_value = hello_element.get_attribute(key); assert_eq!(new_value, Some(value.to_owned())); } #[test] /// Can set and get text node content fn can_set_get_text_node_content() { let mut doc = Document::new().unwrap(); let hello_element_result = Node::new("hello", None, &doc); assert!(hello_element_result.is_ok()); let mut hello_element = hello_element_result.unwrap(); doc.set_root_element(&hello_element); assert!(hello_element.get_content().is_empty()); assert!(hello_element.append_text("hello ").is_ok()); assert_eq!(hello_element.get_content(), "hello "); assert!(hello_element.append_text("world!").is_ok()); assert_eq!(hello_element.get_content(), "hello world!"); } #[test] /// Basic namespace workflow fn can_work_with_namespaces() { let mut doc = Document::new().unwrap(); let mut root_node = Node::new("root", None, &doc).unwrap(); doc.set_root_element(&root_node); let initial_namespace_list = root_node.get_namespaces(&doc); assert_eq!(initial_namespace_list.len(), 0); let mock_ns_result = Namespace::new("mock", "http://example.com/ns/mock", &mut root_node); assert!(mock_ns_result.is_ok()); let second_ns_result = Namespace::new("second", "http://example.com/ns/second", &mut root_node); assert!(second_ns_result.is_ok()); // try to attach this namespace to a node assert!(root_node.get_namespace().is_none()); assert!(root_node.set_namespace(&mock_ns_result.unwrap()).is_ok()); let active_ns_opt = root_node.get_namespace(); assert!(active_ns_opt.is_some()); let active_ns = active_ns_opt.unwrap(); assert_eq!(active_ns.get_prefix(), "mock"); assert_eq!(active_ns.get_href(), "http://example.com/ns/mock"); // now get all namespaces for the node and check we have ours let mut namespace_list = root_node.get_namespaces(&doc); assert_eq!(namespace_list.len(), 2); let second_ns = namespace_list.pop().unwrap(); assert_eq!(second_ns.get_prefix(), "second"); assert_eq!(second_ns.get_href(), "http://example.com/ns/second"); let first_ns = namespace_list.pop().unwrap(); assert_eq!(first_ns.get_prefix(), "mock"); assert_eq!(first_ns.get_href(), "http://example.com/ns/mock"); } #[test] fn can_work_with_ns_declarations() { let mut doc = Document::new().unwrap(); let mut root_node = Node::new("root", None, &doc).unwrap(); doc.set_root_element(&root_node); let mock_ns_result = Namespace::new("mock1", "http://example.com/ns/mock1", &mut root_node); assert!(mock_ns_result.is_ok()); let second_ns_result = Namespace::new("mock2", "http://example.com/ns/mock2", &mut root_node); assert!(second_ns_result.is_ok()); let declarations = root_node.get_namespace_declarations(); assert_eq!(declarations.len(), 2); } #[test] /// Can view documents as nodes fn can_cast_doc_to_node() { // Setup let parser = Parser::default(); let doc_result = parser.parse_file("tests/resources/file01.xml"); assert!(doc_result.is_ok()); let doc = doc_result.unwrap(); let doc_node = doc.as_node(); assert_eq!(doc_node.get_type(), Some(NodeType::DocumentNode)); let root_node_opt = doc_node.get_first_child(); assert!(root_node_opt.is_some()); let root_node = root_node_opt.unwrap(); assert_eq!(root_node.get_name(), "root"); } #[test] fn can_replace_child() { let mut doc = Document::new().unwrap(); let mut root_node = Node::new("root", None, &doc).unwrap(); doc.set_root_element(&root_node); let mut a = Node::new("a", None, &doc).unwrap(); let mut b = Node::new("b", None, &doc).unwrap(); let mut c = Node::new("c", None, &doc).unwrap(); let mut d = Node::new("d", None, &doc).unwrap(); let mut e = Node::new("e", None, &doc).unwrap(); assert!(root_node.add_child(&mut a).is_ok()); assert!(root_node.add_child(&mut b).is_ok()); assert!(root_node.add_child(&mut c).is_ok()); assert!(root_node.add_child(&mut d).is_ok()); assert!(root_node.add_child(&mut e).is_ok()); assert_eq!( doc.to_string(), "\n\n", "document initialized correctly." ); // replace first child with new F let f = Node::new("F", None, &doc).unwrap(); let a_result = root_node.replace_child_node(f, a); assert!(a_result.is_ok()); assert_eq!( doc.to_string(), "\n\n", "document initialized correctly." ); // replace last child with new G let g = Node::new("G", None, &doc).unwrap(); assert!(root_node.replace_child_node(g, e).is_ok()); assert_eq!( doc.to_string(), "\n\n", "document initialized correctly." ); // replace middle child with new H let h = Node::new("H", None, &doc).unwrap(); assert!(root_node.replace_child_node(h, c).is_ok()); assert_eq!( doc.to_string(), "\n\n", "document initialized correctly." ); // fail to replace a, as it is already removed. let none = Node::new("none", None, &doc).unwrap(); assert!(root_node .replace_child_node(none, a_result.unwrap()) .is_err()); // no change. assert_eq!( doc.to_string(), "\n\n", "document initialized correctly." ); // replacing with self succeeds without change. assert!(root_node.replace_child_node(b.clone(), b).is_ok()); assert_eq!( doc.to_string(), "\n\n", "document initialized correctly." ); // replacing with parent succeeds without change. assert!(root_node.replace_child_node(root_node.clone(), d).is_ok()); assert_eq!( doc.to_string(), "\n\n", "document initialized correctly." ); } libxml-0.3.5/tests/xpath_tests.rs000064400000000000000000000173511046102023000152050ustar 00000000000000//! xpath module tests //! use libxml::parser::Parser; use libxml::xpath::Context; #[test] /// Test the evaluation of an xpath expression yields the correct number of nodes fn xpath_result_number_correct() { let parser = Parser::default(); let doc_result = parser.parse_file("tests/resources/file01.xml"); assert!(doc_result.is_ok()); let doc = doc_result.unwrap(); let context = Context::new(&doc).unwrap(); let result1 = context.evaluate("//child").unwrap(); assert_eq!(result1.get_number_of_nodes(), 2); assert_eq!(result1.get_nodes_as_vec().len(), 2); let result2 = context.evaluate("//nonexistent").unwrap(); assert_eq!(result2.get_number_of_nodes(), 0); assert_eq!(result2.get_nodes_as_vec().len(), 0); } #[test] /// Test xpath with namespaces fn xpath_with_namespaces() { let parser = Parser::default(); let doc_result = parser.parse_file("tests/resources/simple_namespaces.xml"); assert!(doc_result.is_ok()); let doc = doc_result.unwrap(); let context = Context::new(&doc).unwrap(); assert!(context .register_namespace("h", "http://example.com/ns/hello") .is_ok()); assert!(context .register_namespace("f", "http://example.com/ns/farewell") .is_ok()); assert!(context .register_namespace("r", "http://example.com/ns/root") .is_ok()); let result_h_td = context.evaluate("//h:td").unwrap(); assert_eq!(result_h_td.get_number_of_nodes(), 3); assert_eq!(result_h_td.get_nodes_as_vec().len(), 3); let result_h_table = context.evaluate("//h:table").unwrap(); assert_eq!(result_h_table.get_number_of_nodes(), 2); assert_eq!(result_h_table.get_nodes_as_vec().len(), 2); let result_f_footer = context.evaluate("//f:footer").unwrap(); assert_eq!(result_f_footer.get_number_of_nodes(), 2); assert_eq!(result_f_footer.get_nodes_as_vec().len(), 2); let result_r = context.evaluate("//r:*").unwrap(); assert_eq!(result_r.get_number_of_nodes(), 1); assert_eq!(result_r.get_nodes_as_vec().len(), 1); let result_h = context.evaluate("//h:*").unwrap(); assert_eq!(result_h.get_number_of_nodes(), 7); assert_eq!(result_h.get_nodes_as_vec().len(), 7); let result_f = context.evaluate("//f:*").unwrap(); assert_eq!(result_f.get_number_of_nodes(), 4); assert_eq!(result_f.get_nodes_as_vec().len(), 4); let result_all = context.evaluate("//*").unwrap(); assert_eq!(result_all.get_number_of_nodes(), 12); assert_eq!(result_all.get_nodes_as_vec().len(), 12); let result_h_table = context.evaluate("//table").unwrap(); assert_eq!(result_h_table.get_number_of_nodes(), 0); assert_eq!(result_h_table.get_nodes_as_vec().len(), 0); assert!(doc.as_node().recursively_remove_namespaces().is_ok()); let result_h_table = context.evaluate("//table").unwrap(); assert_eq!(result_h_table.get_number_of_nodes(), 2); assert_eq!(result_h_table.get_nodes_as_vec().len(), 2); } #[test] /// Test that an xpath expression finds the correct node and /// that the class names are interpreted correctly. fn class_names() { let parser = Parser::default_html(); let doc_result = parser.parse_file("tests/resources/file02.xml"); assert!(doc_result.is_ok()); let doc = doc_result.unwrap(); let context = Context::new(&doc).unwrap(); let p_result = context.evaluate("/html/body/p"); assert!(p_result.is_ok()); let p = p_result.unwrap(); assert_eq!(p.get_number_of_nodes(), 1); let node = &p.get_nodes_as_vec()[0]; let names = node.get_class_names(); assert_eq!(names.len(), 2); assert!(names.contains("paragraph")); assert!(names.contains("important")); assert!(!names.contains("nonsense")); } #[test] /// Test that an xpath string() function processed correctly fn xpath_string_function() { let parser = Parser::default_html(); let doc_result = parser.parse_file("tests/resources/file01.xml"); assert!(doc_result.is_ok()); let doc = doc_result.unwrap(); let context = Context::new(&doc).unwrap(); let p_result = context.evaluate("string(//root//child[1]/@attribute)"); assert!(p_result.is_ok()); let p = p_result.unwrap(); // Not a node really assert_eq!(p.get_number_of_nodes(), 0); let content = p.to_string(); assert_eq!(content, "value"); } #[test] /// Test that the dual findnodes interfaces are operational fn findnodes_interfaces() { let parser = Parser::default_html(); let doc_result = parser.parse_file("tests/resources/file02.xml"); assert!(doc_result.is_ok()); let doc = doc_result.unwrap(); // Xpath interface let mut context = Context::new(&doc).unwrap(); let body = context.evaluate("/html/body").unwrap().get_nodes_as_vec(); let p_result = context.findnodes("p", body.first()); assert!(p_result.is_ok()); let p = p_result.unwrap(); assert_eq!(p.len(), 1); // Node interface let body_node = body.first().unwrap(); let p2_result = body_node.findnodes("p"); assert!(p2_result.is_ok()); let p2 = p2_result.unwrap(); assert_eq!(p2.len(), 1); } #[test] /// Clone is safe on Context objects fn safe_context_clone() { let parser = Parser::default_html(); let doc_result = parser.parse_file("tests/resources/file02.xml"); assert!(doc_result.is_ok()); let doc = doc_result.unwrap(); // Xpath interface let context = Context::new(&doc).unwrap(); let body = context.evaluate("/html/body").unwrap().get_nodes_as_vec(); assert_eq!(body.len(), 1); let context2 = context.clone(); let body2 = context2.evaluate("/html/body").unwrap().get_nodes_as_vec(); assert_eq!(body2.len(), 1); } #[test] fn cleanup_safely_unlinked_xpath_nodes() { let p = Parser::default(); let doc_result = p.parse_string(r##" "##); assert!(doc_result.is_ok(), "successfully parsed SVG snippet"); let doc = doc_result.unwrap(); let mut xpath = libxml::xpath::Context::new(&doc).unwrap(); xpath .register_namespace("svg", "http://www.w3.org/2000/svg") .unwrap(); for mut k in xpath.findnodes("//svg:c", None).unwrap() { k.unlink_node(); } drop(xpath); drop(doc); assert!(true, "Drops went OK."); } #[test] fn xpath_find_string_values() { let parser = Parser::default(); let doc_result = parser.parse_file("tests/resources/ids.xml"); assert!(doc_result.is_ok()); let doc = doc_result.unwrap(); let mut xpath = libxml::xpath::Context::new(&doc).unwrap(); if let Some(root) = doc.get_root_element() { let tests = root.get_child_elements(); let empty_test = &tests[0]; let ids_test = &tests[1]; let empty_values = xpath.findvalues(".//@xml:id", Some(empty_test)); assert_eq!(empty_values, Ok(Vec::new())); let ids_values = xpath.findvalues(".//@xml:id", Some(ids_test)); let expected_ids = Ok(vec![String::from("start"),String::from("mid"),String::from("end")]); assert_eq!(ids_values, expected_ids); let node_ids_values = ids_test.findvalues(".//@xml:id"); assert_eq!(node_ids_values, expected_ids); } else { panic!("Document fails to obtain root!"); } } /// Tests for checking xpath well-formedness mod compile_tests { use libxml::xpath::is_well_formed_xpath; #[test] fn can_compile_an_xpath() { let compiles = is_well_formed_xpath("//a"); assert_eq!(compiles, true); } #[test] fn invalid_xpath_does_not_compile() { let compiles = is_well_formed_xpath("//a[but invalid]"); assert_eq!(compiles, false); } }