quick-xml-0.27.1/.cargo_vcs_info.json0000644000000001360000000000100130350ustar { "git": { "sha1": "89fa620eb349a774d4cb682b6d5fd61220df0295" }, "path_in_vcs": "" }quick-xml-0.27.1/Cargo.lock0000644000000531730000000000100110210ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "aho-corasick" version = "0.7.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" dependencies = [ "memchr", ] [[package]] name = "anes" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "async-stream" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dad5c83079eae9969be7fadefe640a1c566901f05ff91ab221de4b6f68d9507e" dependencies = [ "async-stream-impl", "futures-core", ] [[package]] name = "async-stream-impl" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "10f203db73a71dfa2fb6dd22763990fa26f3d2625a6da2da900d23b87d26be27" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "atty" version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ "hermit-abi 0.1.19", "libc", "winapi", ] [[package]] name = "autocfg" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bumpalo" version = "3.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" [[package]] name = "bytes" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfb24e866b15a1af2a1b663f10c6b6b8f397a84aadb828f12e5b289ec23a3a3c" [[package]] name = "cast" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "ciborium" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0c137568cc60b904a7724001b35ce2630fd00d5d84805fbb608ab89509d788f" dependencies = [ "ciborium-io", "ciborium-ll", "serde", ] [[package]] name = "ciborium-io" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "346de753af073cc87b52b2083a506b38ac176a44cfb05497b622e27be899b369" [[package]] name = "ciborium-ll" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "213030a2b5a4e0c0892b6652260cf6ccac84827b83a85a534e178e3906c4cf1b" dependencies = [ "ciborium-io", "half", ] [[package]] name = "clap" version = "3.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" dependencies = [ "bitflags", "clap_lex", "indexmap", "textwrap", ] [[package]] name = "clap_lex" version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" dependencies = [ "os_str_bytes", ] [[package]] name = "criterion" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb" dependencies = [ "anes", "atty", "cast", "ciborium", "clap", "criterion-plot", "itertools", "lazy_static", "num-traits", "oorandom", "plotters", "rayon", "regex", "serde", "serde_derive", "serde_json", "tinytemplate", "walkdir", ] [[package]] name = "criterion-plot" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", "itertools", ] [[package]] name = "crossbeam-channel" version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" dependencies = [ "cfg-if", "crossbeam-utils", ] [[package]] name = "crossbeam-deque" version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" dependencies = [ "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" version = "0.9.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", "memoffset", "scopeguard", ] [[package]] name = "crossbeam-utils" version = "0.8.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f" dependencies = [ "cfg-if", ] [[package]] name = "ctor" version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" dependencies = [ "quote", "syn", ] [[package]] name = "diff" version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" [[package]] name = "document-features" version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e493c573fce17f00dcab13b6ac057994f3ce17d1af4dc39bfd482b83c6eb6157" dependencies = [ "litrs", ] [[package]] name = "either" version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" [[package]] name = "encoding_rs" version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b" dependencies = [ "cfg-if", ] [[package]] name = "futures-core" version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04909a7a7e4633ae6c4a9ab280aeb86da1236243a77b694a49eacd659a4bd3ac" [[package]] name = "half" version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" [[package]] name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" [[package]] name = "hermit-abi" version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" dependencies = [ "libc", ] [[package]] name = "hermit-abi" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" dependencies = [ "libc", ] [[package]] name = "indexmap" version = "1.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" dependencies = [ "autocfg", "hashbrown", ] [[package]] name = "itertools" version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ "either", ] [[package]] name = "itoa" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440" [[package]] name = "js-sys" version = "0.3.60" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" dependencies = [ "wasm-bindgen", ] [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" version = "0.2.138" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db6d7e329c562c5dfab7a46a2afabc8b987ab9a4834c9d1ca04dc54c1546cef8" [[package]] name = "litrs" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9275e0933cf8bb20f008924c0cb07a0692fe54d8064996520bf998de9eb79aa" [[package]] name = "log" version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" dependencies = [ "cfg-if", ] [[package]] name = "memchr" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "memoffset" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" dependencies = [ "autocfg", ] [[package]] name = "num-traits" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" dependencies = [ "autocfg", ] [[package]] name = "num_cpus" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" dependencies = [ "hermit-abi 0.2.6", "libc", ] [[package]] name = "once_cell" version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" [[package]] name = "oorandom" version = "11.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" [[package]] name = "ordered-float" version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7940cf2ca942593318d07fcf2596cdca60a85c9e7fab408a5e21a4f9dcd40d87" dependencies = [ "num-traits", ] [[package]] name = "os_str_bytes" version = "6.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" [[package]] name = "output_vt100" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66" dependencies = [ "winapi", ] [[package]] name = "pin-project-lite" version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" [[package]] name = "plotters" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2538b639e642295546c50fcd545198c9d64ee2a38620a628724a3b266d5fbf97" dependencies = [ "num-traits", "plotters-backend", "plotters-svg", "wasm-bindgen", "web-sys", ] [[package]] name = "plotters-backend" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "193228616381fecdc1224c62e96946dfbc73ff4384fba576e052ff8c1bea8142" [[package]] name = "plotters-svg" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9a81d2759aae1dae668f783c308bc5c8ebd191ff4184aaa1b37f65a6ae5a56f" dependencies = [ "plotters-backend", ] [[package]] name = "pretty_assertions" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a25e9bcb20aa780fd0bb16b72403a9064d6b3f22f026946029acb941a50af755" dependencies = [ "ctor", "diff", "output_vt100", "yansi", ] [[package]] name = "proc-macro2" version = "1.0.49" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57a8eca9f9c4ffde41714334dee777596264c7825420f521abc92b5b5deb63a5" dependencies = [ "unicode-ident", ] [[package]] name = "quick-xml" version = "0.27.1" dependencies = [ "criterion", "document-features", "encoding_rs", "memchr", "pretty_assertions", "regex", "serde", "serde-value", "tokio", "tokio-test", ] [[package]] name = "quote" version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" dependencies = [ "proc-macro2", ] [[package]] name = "rayon" version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7" dependencies = [ "either", "rayon-core", ] [[package]] name = "rayon-core" version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cac410af5d00ab6884528b4ab69d1e8e146e8d471201800fa1b4524126de6ad3" dependencies = [ "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", "num_cpus", ] [[package]] name = "regex" version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] [[package]] name = "regex-syntax" version = "0.6.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" [[package]] name = "ryu" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde" [[package]] name = "same-file" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" dependencies = [ "winapi-util", ] [[package]] name = "scopeguard" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "serde" version = "1.0.151" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fed41fc1a24994d044e6db6935e69511a1153b52c15eb42493b26fa87feba0" dependencies = [ "serde_derive", ] [[package]] name = "serde-value" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" dependencies = [ "ordered-float", "serde", ] [[package]] name = "serde_derive" version = "1.0.151" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "255abe9a125a985c05190d687b320c12f9b1f0b99445e608c21ba0782c719ad8" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883" dependencies = [ "itoa", "ryu", "serde", ] [[package]] name = "syn" version = "1.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "textwrap" version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "tinytemplate" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" dependencies = [ "serde", "serde_json", ] [[package]] name = "tokio" version = "1.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eab6d665857cc6ca78d6e80303a02cea7a7851e85dfbd77cbdc09bd129f1ef46" dependencies = [ "autocfg", "bytes", "memchr", "pin-project-lite", "tokio-macros", "windows-sys", ] [[package]] name = "tokio-macros" version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "tokio-stream" version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d660770404473ccd7bc9f8b28494a811bc18542b915c0855c51e8f419d5223ce" dependencies = [ "futures-core", "pin-project-lite", "tokio", ] [[package]] name = "tokio-test" version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53474327ae5e166530d17f2d956afcb4f8a004de581b3cae10f12006bc8163e3" dependencies = [ "async-stream", "bytes", "futures-core", "tokio", "tokio-stream", ] [[package]] name = "unicode-ident" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" [[package]] name = "walkdir" version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" dependencies = [ "same-file", "winapi", "winapi-util", ] [[package]] name = "wasm-bindgen" version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" dependencies = [ "cfg-if", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" dependencies = [ "quote", "wasm-bindgen-macro-support", ] [[package]] name = "wasm-bindgen-macro-support" version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" dependencies = [ "proc-macro2", "quote", "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" [[package]] name = "web-sys" version = "0.3.60" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bcda906d8be16e728fd5adc5b729afad4e444e106ab28cd1c7256e54fa61510f" dependencies = [ "js-sys", "wasm-bindgen", ] [[package]] name = "winapi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" dependencies = [ "winapi", ] [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-sys" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" [[package]] name = "windows_aarch64_msvc" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" [[package]] name = "windows_i686_gnu" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" [[package]] name = "windows_i686_msvc" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" [[package]] name = "windows_x86_64_gnu" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" [[package]] name = "windows_x86_64_gnullvm" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" [[package]] name = "windows_x86_64_msvc" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" [[package]] name = "yansi" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" quick-xml-0.27.1/Cargo.toml0000644000000052530000000000100110400ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" rust-version = "1.52" name = "quick-xml" version = "0.27.1" include = [ "src/*", "LICENSE-MIT.md", "README.md", ] description = "High performance xml reader and writer" documentation = "https://docs.rs/quick-xml" readme = "README.md" keywords = [ "xml", "serde", "parser", "writer", "html", ] categories = [ "asynchronous", "encoding", "parsing", "parser-implementations", ] license = "MIT" repository = "https://github.com/tafia/quick-xml" [package.metadata.docs.rs] all-features = true rustdoc-args = [ "--cfg", "docs_rs", ] [lib] bench = false [[test]] name = "encodings" path = "tests/encodings.rs" required-features = ["encoding"] [[test]] name = "serde_roundtrip" path = "tests/serde_roundtrip.rs" required-features = ["serialize"] [[test]] name = "serde-de" path = "tests/serde-de.rs" required-features = ["serialize"] [[test]] name = "serde-se" path = "tests/serde-se.rs" required-features = ["serialize"] [[test]] name = "serde-migrated" path = "tests/serde-migrated.rs" required-features = ["serialize"] [[test]] name = "async-tokio" path = "tests/async-tokio.rs" required-features = ["async-tokio"] [[bench]] name = "microbenches" path = "benches/microbenches.rs" harness = false [[bench]] name = "macrobenches" path = "benches/macrobenches.rs" harness = false [dependencies.document-features] version = "0.2" optional = true [dependencies.encoding_rs] version = "0.8" optional = true [dependencies.memchr] version = "2.0" [dependencies.serde] version = "1.0.100" optional = true [dependencies.tokio] version = "1.0" features = ["io-util"] optional = true default-features = false [dev-dependencies.criterion] version = "0.4" [dev-dependencies.pretty_assertions] version = "1.3" [dev-dependencies.regex] version = "1" [dev-dependencies.serde] version = "1.0" features = ["derive"] [dev-dependencies.serde-value] version = "0.7" [dev-dependencies.tokio] version = "1.21" features = [ "macros", "rt", ] default-features = false [dev-dependencies.tokio-test] version = "0.4" [features] async-tokio = ["tokio"] default = [] encoding = ["encoding_rs"] escape-html = [] overlapped-lists = [] serde-types = ["serde/derive"] serialize = ["serde"] quick-xml-0.27.1/Cargo.toml.orig000064400000000000000000000162230072674642500145500ustar 00000000000000[package] name = "quick-xml" version = "0.27.1" description = "High performance xml reader and writer" edition = "2018" documentation = "https://docs.rs/quick-xml" repository = "https://github.com/tafia/quick-xml" keywords = ["xml", "serde", "parser", "writer", "html"] categories = ["asynchronous", "encoding", "parsing", "parser-implementations"] license = "MIT" rust-version = "1.52" include = ["src/*", "LICENSE-MIT.md", "README.md"] [dependencies] document-features = { version = "0.2", optional = true } encoding_rs = { version = "0.8", optional = true } serde = { version = "1.0.100", optional = true } tokio = { version = "1.0", optional = true, default-features = false, features = ["io-util"] } memchr = "2.0" [dev-dependencies] criterion = "0.4" pretty_assertions = "1.3" regex = "1" serde = { version = "1.0", features = ["derive"] } serde-value = "0.7" tokio = { version = "1.21", default-features = false, features = ["macros", "rt"] } tokio-test = "0.4" [lib] bench = false [[bench]] name = "microbenches" harness = false path = "benches/microbenches.rs" [[bench]] name = "macrobenches" harness = false path = "benches/macrobenches.rs" [features] default = [] ## Enables support for asynchronous reading from `tokio`'s IO-Traits by enabling ## [reading events] from types implementing [`tokio::io::AsyncBufRead`]. ## ## [reading events]: crate::reader::Reader::read_event_into_async async-tokio = ["tokio"] ## Enables support of non-UTF-8 encoded documents. Encoding will be inferred from ## the XML declaration if it will be found, otherwise UTF-8 is assumed. ## ## Currently, only ASCII-compatible encodings are supported, so, for example, ## UTF-16 will not work (therefore, `quick-xml` is not [standard compliant]). ## ## Thus, quick-xml supports all encodings of [`encoding_rs`] except these: ## - [UTF-16BE] ## - [UTF-16LE] ## - [ISO-2022-JP] ## ## You should stop to process document when one of that encoding will be detected, ## because generated events can be wrong and do not reflect a real document structure! ## ## Because there is only supported encodings that is not ASCII compatible, you can ## check for that to detect them: ## ## ``` ## use quick_xml::events::Event; ## use quick_xml::reader::Reader; ## ## # fn to_utf16le_with_bom(string: &str) -> Vec { ## # let mut bytes = Vec::new(); ## # bytes.extend_from_slice(&[0xFF, 0xFE]); // UTF-16 LE BOM ## # for ch in string.encode_utf16() { ## # bytes.extend_from_slice(&ch.to_le_bytes()); ## # } ## # bytes ## # } ## let xml = to_utf16le_with_bom(r#""#); ## let mut reader = Reader::from_reader(xml.as_ref()); ## reader.trim_text(true); ## ## let mut buf = Vec::new(); ## let mut unsupported = false; ## loop { ## if !reader.decoder().encoding().is_ascii_compatible() { ## unsupported = true; ## break; ## } ## buf.clear(); ## match reader.read_event_into(&mut buf).unwrap() { ## Event::Eof => break, ## _ => {} ## } ## } ## assert_eq!(unsupported, true); ## ``` ## That restriction will be eliminated once issue [#158] is resolved. ## ## [standard compliant]: https://www.w3.org/TR/xml11/#charencoding ## [UTF-16BE]: encoding_rs::UTF_16BE ## [UTF-16LE]: encoding_rs::UTF_16LE ## [ISO-2022-JP]: encoding_rs::ISO_2022_JP ## [#158]: https://github.com/tafia/quick-xml/issues/158 encoding = ["encoding_rs"] ## Enables support for recognizing all [HTML 5 entities] in [`unescape`] and ## [`unescape_with`] functions. The full list of entities also can be found in ## . ## ## [HTML 5 entities]: https://dev.w3.org/html5/html-author/charref ## [`unescape`]: crate::escape::unescape ## [`unescape_with`]: crate::escape::unescape_with escape-html = [] ## This feature for a serde deserializer that enables support for deserializing ## lists where tags are overlapped with tags that do not correspond to the list. ## ## When this feature is enabled, the XML: ## ```xml ## ## ## ## ## ## ## ``` ## could be deserialized to a struct: ## ```no_run ## # use serde::Deserialize; ## #[derive(Deserialize)] ## #[serde(rename_all = "kebab-case")] ## struct AnyName { ## item: Vec<()>, ## another_item: (), ## } ## ``` ## ## When this feature is not enabled (default), only the first element will be ## associated with the field, and the deserialized type will report an error ## (duplicated field) when the deserializer encounters a second ``. ## ## Note, that enabling this feature can lead to high and even unlimited memory ## consumption, because deserializer should check all events up to the end of a ## container tag (`` in that example) to figure out that there are no ## more items for a field. If `` or even EOF is not encountered, the ## parsing will never end which can lead to a denial-of-service (DoS) scenario. ## ## Having several lists and overlapped elements for them in XML could also lead ## to quadratic parsing time, because the deserializer must check the list of ## events as many times as the number of sequence fields present in the schema. ## ## To reduce negative consequences, always [limit] the maximum number of events ## that [`Deserializer`] will buffer. ## ## This feature works only with `serialize` feature and has no effect if `serialize` ## is not enabled. ## ## [limit]: crate::de::Deserializer::event_buffer_size ## [`Deserializer`]: crate::de::Deserializer overlapped-lists = [] ## Enables serialization of some types using [`serde`]. Probably your rarely will ## need this feature enabled. ## ## This feature does NOT provide XML serializer or deserializer. You should use ## the `serialize` feature for that instead. # Cannot name "serde" to avoid clash with dependency. # "dep:" prefix only avalible from Rust 1.60 serde-types = ["serde/derive"] ## Enables support for [`serde`] serialization and deserialization. When this ## feature is enabled, quick-xml provides serializer and deserializer for XML. ## ## This feature does NOT enables serializaton of the types inside quick-xml. ## If you need that, use the `serde-types` feature. serialize = ["serde"] # "dep:" prefix only avalible from Rust 1.60 [package.metadata.docs.rs] # document all features all-features = true # defines the configuration attribute `docs_rs` to enable feature requirements # See https://stackoverflow.com/questions/61417452 rustdoc-args = ["--cfg", "docs_rs"] [[test]] name = "encodings" required-features = ["encoding"] path = "tests/encodings.rs" [[test]] name = "serde_roundtrip" required-features = ["serialize"] path = "tests/serde_roundtrip.rs" [[test]] name = "serde-de" required-features = ["serialize"] path = "tests/serde-de.rs" [[test]] name = "serde-se" required-features = ["serialize"] path = "tests/serde-se.rs" [[test]] name = "serde-migrated" required-features = ["serialize"] path = "tests/serde-migrated.rs" [[test]] name = "async-tokio" required-features = ["async-tokio"] path = "tests/async-tokio.rs" quick-xml-0.27.1/LICENSE-MIT.md000064400000000000000000000021210072674642500137040ustar 00000000000000The MIT License (MIT) Copyright (c) 2016 Johann Tuffe Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. quick-xml-0.27.1/README.md000064400000000000000000000142540072674642500131420ustar 00000000000000# quick-xml ![status](https://github.com/tafia/quick-xml/actions/workflows/rust.yml/badge.svg) [![Crate](https://img.shields.io/crates/v/quick-xml.svg)](https://crates.io/crates/quick-xml) [![docs.rs](https://docs.rs/quick-xml/badge.svg)](https://docs.rs/quick-xml) [![codecov](https://img.shields.io/codecov/c/github/tafia/quick-xml)](https://codecov.io/gh/tafia/quick-xml) [![MSRV](https://img.shields.io/badge/rustc-1.52.0+-ab6000.svg)](https://blog.rust-lang.org/2021/05/06/Rust-1.52.0.html) High performance xml pull reader/writer. The reader: - is almost zero-copy (use of `Cow` whenever possible) - is easy on memory allocation (the API provides a way to reuse buffers) - support various encoding (with `encoding` feature), namespaces resolution, special characters. Syntax is inspired by [xml-rs](https://github.com/netvl/xml-rs). ## Example ### Reader ```rust use quick_xml::events::Event; use quick_xml::reader::Reader; let xml = r#" Test Test 2 "#; let mut reader = Reader::from_str(xml); reader.trim_text(true); let mut count = 0; let mut txt = Vec::new(); let mut buf = Vec::new(); // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s) loop { // NOTE: this is the generic case when we don't know about the input BufRead. // when the input is a &str or a &[u8], we don't actually need to use another // buffer, we could directly call `reader.read_event()` match reader.read_event_into(&mut buf) { Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), // exits the loop when reaching end of file Ok(Event::Eof) => break, Ok(Event::Start(e)) => { match e.name().as_ref() { b"tag1" => println!("attributes values: {:?}", e.attributes().map(|a| a.unwrap().value) .collect::>()), b"tag2" => count += 1, _ => (), } } Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()), // There are several other `Event`s we do not consider here _ => (), } // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low buf.clear(); } ``` ### Writer ```rust use quick_xml::events::{Event, BytesEnd, BytesStart}; use quick_xml::reader::Reader; use quick_xml::writer::Writer; use std::io::Cursor; let xml = r#"text"#; let mut reader = Reader::from_str(xml); reader.trim_text(true); let mut writer = Writer::new(Cursor::new(Vec::new())); loop { match reader.read_event() { Ok(Event::Start(e)) if e.name().as_ref() == b"this_tag" => { // crates a new element ... alternatively we could reuse `e` by calling // `e.into_owned()` let mut elem = BytesStart::new("my_elem"); // collect existing attributes elem.extend_attributes(e.attributes().map(|attr| attr.unwrap())); // copy existing attributes, adds a new my-key="some value" attribute elem.push_attribute(("my-key", "some value")); // writes the event to the writer assert!(writer.write_event(Event::Start(elem)).is_ok()); }, Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => { assert!(writer.write_event(Event::End(BytesEnd::new("my_elem"))).is_ok()); }, Ok(Event::Eof) => break, // we can either move or borrow the event to write, depending on your use-case Ok(e) => assert!(writer.write_event(e).is_ok()), Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), } } let result = writer.into_inner().into_inner(); let expected = r#"text"#; assert_eq!(result, expected.as_bytes()); ``` ## Serde When using the `serialize` feature, quick-xml can be used with serde's `Serialize`/`Deserialize` traits. ### Credits This has largely been inspired by [serde-xml-rs](https://github.com/RReverser/serde-xml-rs). quick-xml follows its convention for deserialization, including the [`$value`](https://github.com/RReverser/serde-xml-rs#parsing-the-value-of-a-tag) special name. ### Parsing the "value" of a tag If you have an input of the form `bar`, and you want to get at the `bar`, you can use either the special name `$text`, or the special name `$value`: ```rust,ignore struct Foo { pub abc: String, #[serde(rename = "$text")] pub body: String, } ``` Read about the difference in the [documentation](https://docs.rs/quick-xml/latest/quick_xml/de/index.html#difference-between-text-and-value-special-names). ### Performance Note that despite not focusing on performance (there are several unnecessary copies), it remains about 10x faster than serde-xml-rs. # Features - `encoding`: support non utf8 xmls - `serialize`: support serde `Serialize`/`Deserialize` ## Performance Benchmarking is hard and the results depend on your input file and your machine. Here on my particular file, quick-xml is around **50 times faster** than [xml-rs](https://crates.io/crates/xml-rs) crate. ``` // quick-xml benches test bench_quick_xml ... bench: 198,866 ns/iter (+/- 9,663) test bench_quick_xml_escaped ... bench: 282,740 ns/iter (+/- 61,625) test bench_quick_xml_namespaced ... bench: 389,977 ns/iter (+/- 32,045) // same bench with xml-rs test bench_xml_rs ... bench: 14,468,930 ns/iter (+/- 321,171) // serde-xml-rs vs serialize feature test bench_serde_quick_xml ... bench: 1,181,198 ns/iter (+/- 138,290) test bench_serde_xml_rs ... bench: 15,039,564 ns/iter (+/- 783,485) ``` For a feature and performance comparison, you can also have a look at RazrFalcon's [parser comparison table](https://github.com/RazrFalcon/roxmltree#parsing). ## Contribute Any PR is welcomed! ## License MIT quick-xml-0.27.1/src/de/escape.rs000064400000000000000000000141250072674642500146450ustar 00000000000000//! Serde `Deserializer` module use crate::de::deserialize_bool; use crate::encoding::Decoder; use crate::errors::serialize::DeError; use crate::escape::unescape; use serde::de::{DeserializeSeed, EnumAccess, VariantAccess, Visitor}; use serde::{self, forward_to_deserialize_any, serde_if_integer128}; use std::borrow::Cow; /// A deserializer for a xml escaped and encoded value /// /// # Note /// /// Escaping the value is actually not always necessary, for instance /// when converting to float, we don't expect any escapable character /// anyway #[derive(Clone, Debug)] pub struct EscapedDeserializer<'a> { decoder: Decoder, /// Possible escaped value of text/CDATA or attribute value escaped_value: Cow<'a, [u8]>, /// If `true`, value requires unescaping before using escaped: bool, } impl<'a> EscapedDeserializer<'a> { pub fn new(escaped_value: Cow<'a, [u8]>, decoder: Decoder, escaped: bool) -> Self { EscapedDeserializer { decoder, escaped_value, escaped, } } } macro_rules! deserialize_num { ($method:ident, $visit:ident) => { fn $method(self, visitor: V) -> Result where V: Visitor<'de>, { let value = self.decoder.decode(self.escaped_value.as_ref())?.parse()?; visitor.$visit(value) } }; } impl<'de, 'a> serde::Deserializer<'de> for EscapedDeserializer<'a> { type Error = DeError; fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } fn deserialize_str(self, visitor: V) -> Result where V: Visitor<'de>, { let decoded = self.decoder.decode(&self.escaped_value)?; if self.escaped { match unescape(&decoded)? { Cow::Borrowed(s) => visitor.visit_str(s), Cow::Owned(s) => visitor.visit_string(s), } } else { match decoded { Cow::Borrowed(s) => visitor.visit_str(s), Cow::Owned(s) => visitor.visit_string(s), } } } /// Returns [`DeError::Unsupported`] fn deserialize_bytes(self, _visitor: V) -> Result where V: Visitor<'de>, { Err(DeError::Unsupported( "binary data content is not supported by XML format".into(), )) } /// Forwards deserialization to the [`deserialize_bytes`](#method.deserialize_bytes) fn deserialize_byte_buf(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_bytes(visitor) } fn deserialize_string(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } fn deserialize_bool(self, visitor: V) -> Result where V: Visitor<'de>, { deserialize_bool(self.escaped_value.as_ref(), self.decoder, visitor) } fn deserialize_char(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { if self.escaped_value.as_ref().is_empty() { visitor.visit_none() } else { visitor.visit_some(self) } } fn deserialize_enum( self, _name: &str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_enum(self) } fn deserialize_newtype_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_newtype_struct(self) } deserialize_num!(deserialize_i64, visit_i64); deserialize_num!(deserialize_i32, visit_i32); deserialize_num!(deserialize_i16, visit_i16); deserialize_num!(deserialize_i8, visit_i8); deserialize_num!(deserialize_u64, visit_u64); deserialize_num!(deserialize_u32, visit_u32); deserialize_num!(deserialize_u16, visit_u16); deserialize_num!(deserialize_u8, visit_u8); deserialize_num!(deserialize_f64, visit_f64); deserialize_num!(deserialize_f32, visit_f32); serde_if_integer128! { deserialize_num!(deserialize_i128, visit_i128); deserialize_num!(deserialize_u128, visit_u128); } forward_to_deserialize_any! { unit_struct seq tuple tuple_struct map struct identifier ignored_any } } impl<'de, 'a> EnumAccess<'de> for EscapedDeserializer<'a> { type Error = DeError; type Variant = Self; fn variant_seed(self, seed: V) -> Result<(V::Value, Self), Self::Error> where V: DeserializeSeed<'de>, { let name = seed.deserialize(self.clone())?; Ok((name, self)) } } impl<'de, 'a> VariantAccess<'de> for EscapedDeserializer<'a> { type Error = DeError; fn unit_variant(self) -> Result<(), Self::Error> { Ok(()) } fn newtype_variant_seed(self, seed: T) -> Result where T: DeserializeSeed<'de>, { seed.deserialize(self) } fn tuple_variant(self, _len: usize, _visitor: V) -> Result where V: Visitor<'de>, { unimplemented!() } fn struct_variant( self, _fields: &'static [&'static str], _visitor: V, ) -> Result where V: Visitor<'de>, { unimplemented!() } } quick-xml-0.27.1/src/de/key.rs000064400000000000000000000361140072674642500141770ustar 00000000000000use crate::de::str2bool; use crate::encoding::Decoder; use crate::errors::serialize::DeError; use crate::name::QName; use serde::de::{DeserializeSeed, Deserializer, EnumAccess, VariantAccess, Visitor}; use serde::{forward_to_deserialize_any, serde_if_integer128}; use std::borrow::Cow; macro_rules! deserialize_num { ($method:ident, $visit:ident) => { fn $method(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.$visit(self.name.parse()?) } }; } /// Decodes raw bytes using the deserializer encoding. /// The method will borrow if encoding is UTF-8 compatible and `name` contains /// only UTF-8 compatible characters (usually only ASCII characters). #[inline] fn decode_name<'n>(name: QName<'n>, decoder: Decoder) -> Result, DeError> { let local = name.local_name(); Ok(decoder.decode(local.into_inner())?) } /// A deserializer for xml names of elements and attributes. /// /// Used for deserializing values from: /// - attribute names (`<... name="..." ...>`) /// - element names (`...`) /// /// Converts a name to an identifier string using the following rules: /// /// - if it is an [`attribute`] name, put `@` in front of the identifier /// - put the decoded [`local_name()`] of a name to the identifier /// /// The final identifier looks like `[@]local_name` /// (where `[]` means optional element). /// /// The deserializer also supports deserializing names as other primitive types: /// - numbers /// - booleans /// - unit (`()`) and unit structs /// - unit variants of the enumerations /// /// Because `serde` does not define on which side type conversion should be /// performed, and because [`Deserialize`] implementation for that primitives /// in serde does not accept strings, the deserializer will perform conversion /// by itself. /// /// The deserializer is able to deserialize unit and unit structs, but any name /// will be converted to the same unit instance. This is asymmetry with a serializer, /// which not able to serialize those types, because empty names are impossible /// in XML. /// /// `deserialize_any()` returns the same result as `deserialize_identifier()`. /// /// # Lifetime /// /// - `'d`: lifetime of a deserializer that holds a buffer with content of events /// /// [`attribute`]: Self::from_attr /// [`local_name()`]: QName::local_name /// [`Deserialize`]: serde::Deserialize pub struct QNameDeserializer<'d> { name: Cow<'d, str>, } impl<'d> QNameDeserializer<'d> { /// Creates deserializer from name of an attribute pub fn from_attr(name: QName<'d>, decoder: Decoder) -> Result { let local = decode_name(name, decoder)?; Ok(Self { name: Cow::Owned(format!("@{local}")), }) } /// Creates deserializer from name of an element pub fn from_elem(name: QName<'d>, decoder: Decoder) -> Result { let local = decode_name(name, decoder)?; Ok(Self { name: local }) } } impl<'de, 'd> Deserializer<'de> for QNameDeserializer<'d> { type Error = DeError; forward_to_deserialize_any! { char str string bytes byte_buf seq tuple tuple_struct map struct ignored_any } /// According to the , /// valid boolean representations are only `"true"`, `"false"`, `"1"`, /// and `"0"`. But this method also handles following: /// /// |`bool` |XML content /// |-------|------------------------------------------------------------- /// |`true` |`"True"`, `"TRUE"`, `"t"`, `"Yes"`, `"YES"`, `"yes"`, `"y"` /// |`false`|`"False"`, `"FALSE"`, `"f"`, `"No"`, `"NO"`, `"no"`, `"n"` fn deserialize_bool(self, visitor: V) -> Result where V: Visitor<'de>, { str2bool(self.name.as_ref(), visitor) } deserialize_num!(deserialize_i8, visit_i8); deserialize_num!(deserialize_i16, visit_i16); deserialize_num!(deserialize_i32, visit_i32); deserialize_num!(deserialize_i64, visit_i64); deserialize_num!(deserialize_u8, visit_u8); deserialize_num!(deserialize_u16, visit_u16); deserialize_num!(deserialize_u32, visit_u32); deserialize_num!(deserialize_u64, visit_u64); serde_if_integer128! { deserialize_num!(deserialize_i128, visit_i128); deserialize_num!(deserialize_u128, visit_u128); } deserialize_num!(deserialize_f32, visit_f32); deserialize_num!(deserialize_f64, visit_f64); /// Calls [`Visitor::visit_unit`] fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } /// Forwards deserialization to the [`Self::deserialize_unit`] fn deserialize_unit_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_unit(visitor) } /// Forwards deserialization to the [`Self::deserialize_identifier`] #[inline] fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_identifier(visitor) } /// If `name` is an empty string then calls [`Visitor::visit_none`], /// otherwise calls [`Visitor::visit_some`] with itself fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { if self.name.is_empty() { visitor.visit_none() } else { visitor.visit_some(self) } } fn deserialize_newtype_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_newtype_struct(self) } /// Calls a [`Visitor::visit_str`] if [`name`] contains only UTF-8 /// compatible encoded characters and represents an element name and /// a [`Visitor::visit_string`] in all other cases. /// /// [`name`]: Self::name fn deserialize_identifier(self, visitor: V) -> Result where V: Visitor<'de>, { match self.name { Cow::Borrowed(name) => visitor.visit_str(name), Cow::Owned(name) => visitor.visit_string(name), } } fn deserialize_enum( self, _name: &str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_enum(self) } } impl<'de, 'd> EnumAccess<'de> for QNameDeserializer<'d> { type Error = DeError; type Variant = QNameUnitOnly; fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> where V: DeserializeSeed<'de>, { let name = seed.deserialize(self)?; Ok((name, QNameUnitOnly)) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Deserializer of variant data, that supports only unit variants. /// Attempt to deserialize newtype, tuple or struct variant will return a /// [`DeError::Unsupported`] error. pub struct QNameUnitOnly; impl<'de> VariantAccess<'de> for QNameUnitOnly { type Error = DeError; #[inline] fn unit_variant(self) -> Result<(), DeError> { Ok(()) } fn newtype_variant_seed(self, _seed: T) -> Result where T: DeserializeSeed<'de>, { Err(DeError::Unsupported( "enum newtype variants are not supported as an XML names".into(), )) } fn tuple_variant(self, _len: usize, _visitor: V) -> Result where V: Visitor<'de>, { Err(DeError::Unsupported( "enum tuple variants are not supported as an XML names".into(), )) } fn struct_variant( self, _fields: &'static [&'static str], _visitor: V, ) -> Result where V: Visitor<'de>, { Err(DeError::Unsupported( "enum struct variants are not supported as an XML names".into(), )) } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod tests { use super::*; use crate::se::key::QNameSerializer; use crate::utils::{ByteBuf, Bytes}; use pretty_assertions::assert_eq; use serde::de::IgnoredAny; use serde::{Deserialize, Serialize}; use std::collections::HashMap; #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Unit; #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Newtype(String); #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Struct { key: String, val: usize, } #[derive(Debug, Deserialize, Serialize, PartialEq)] enum Enum { Unit, #[serde(rename = "@Attr")] Attr, Newtype(String), Tuple(String, usize), Struct { key: String, val: usize, }, } #[derive(Debug, Deserialize, PartialEq)] #[serde(field_identifier)] enum Id { Field, } #[derive(Debug, Deserialize)] #[serde(transparent)] struct Any(IgnoredAny); impl PartialEq for Any { fn eq(&self, _other: &Any) -> bool { true } } /// Checks that given `$input` successfully deserializing into given `$result` macro_rules! deserialized_to_only { ($name:ident: $type:ty = $input:literal => $result:expr) => { #[test] fn $name() { let de = QNameDeserializer { name: Cow::Borrowed($input), }; let data: $type = Deserialize::deserialize(de).unwrap(); assert_eq!(data, $result); } }; } /// Checks that given `$input` successfully deserializing into given `$result` macro_rules! deserialized_to { ($name:ident: $type:ty = $input:literal => $result:expr) => { #[test] fn $name() { let de = QNameDeserializer { name: Cow::Borrowed($input), }; let data: $type = Deserialize::deserialize(de).unwrap(); assert_eq!(data, $result); // Roundtrip to ensure that serializer corresponds to deserializer assert_eq!( data.serialize(QNameSerializer { writer: String::new() }) .unwrap(), $input ); } }; } /// Checks that attempt to deserialize given `$input` as a `$type` results to a /// deserialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $type:ty = $input:literal => $kind:ident($reason:literal)) => { #[test] fn $name() { let de = QNameDeserializer { name: Cow::Borrowed($input), }; let err = <$type as Deserialize>::deserialize(de).unwrap_err(); match err { DeError::$kind(e) => assert_eq!(e, $reason), _ => panic!( "Expected `{}({})`, found `{:?}`", stringify!($kind), $reason, err ), } } }; } deserialized_to!(false_: bool = "false" => false); deserialized_to!(true_: bool = "true" => true); deserialized_to!(i8_: i8 = "-2" => -2); deserialized_to!(i16_: i16 = "-2" => -2); deserialized_to!(i32_: i32 = "-2" => -2); deserialized_to!(i64_: i64 = "-2" => -2); deserialized_to!(u8_: u8 = "3" => 3); deserialized_to!(u16_: u16 = "3" => 3); deserialized_to!(u32_: u32 = "3" => 3); deserialized_to!(u64_: u64 = "3" => 3); serde_if_integer128! { deserialized_to!(i128_: i128 = "-2" => -2); deserialized_to!(u128_: u128 = "2" => 2); } deserialized_to!(f32_: f32 = "1.23" => 1.23); deserialized_to!(f64_: f64 = "1.23" => 1.23); deserialized_to!(char_unescaped: char = "h" => 'h'); err!(char_escaped: char = "<" => Custom("invalid value: string \"<\", expected a character")); deserialized_to!(string: String = "<escaped string" => "<escaped string"); err!(borrowed_str: &str = "name" => Custom("invalid type: string \"name\", expected a borrowed string")); err!(byte_buf: ByteBuf = "<escaped string" => Custom("invalid type: string \"<escaped string\", expected byte data")); err!(borrowed_bytes: Bytes = "name" => Custom("invalid type: string \"name\", expected borrowed bytes")); deserialized_to!(option_none: Option = "" => None); deserialized_to!(option_some: Option = "name" => Some("name".into())); // Unit structs cannot be represented in some meaningful way, but it meaningful // to use them as a placeholder when we want to deserialize _something_ deserialized_to_only!(unit: () = "anything" => ()); deserialized_to_only!(unit_struct: Unit = "anything" => Unit); deserialized_to!(newtype: Newtype = "<escaped string" => Newtype("<escaped string".into())); err!(seq: Vec<()> = "name" => Custom("invalid type: string \"name\", expected a sequence")); err!(tuple: ((), ()) = "name" => Custom("invalid type: string \"name\", expected a tuple of size 2")); err!(tuple_struct: ((), ()) = "name" => Custom("invalid type: string \"name\", expected a tuple of size 2")); err!(map: HashMap<(), ()> = "name" => Custom("invalid type: string \"name\", expected a map")); err!(struct_: Struct = "name" => Custom("invalid type: string \"name\", expected struct Struct")); deserialized_to!(enum_unit: Enum = "Unit" => Enum::Unit); deserialized_to!(enum_unit_for_attr: Enum = "@Attr" => Enum::Attr); err!(enum_newtype: Enum = "Newtype" => Unsupported("enum newtype variants are not supported as an XML names")); err!(enum_tuple: Enum = "Tuple" => Unsupported("enum tuple variants are not supported as an XML names")); err!(enum_struct: Enum = "Struct" => Unsupported("enum struct variants are not supported as an XML names")); // Field identifiers cannot be serialized, and IgnoredAny represented _something_ // which is not concrete deserialized_to_only!(identifier: Id = "Field" => Id::Field); deserialized_to_only!(ignored_any: Any = "any-name" => Any(IgnoredAny)); } quick-xml-0.27.1/src/de/map.rs000064400000000000000000001002550072674642500141620ustar 00000000000000//! Serde `Deserializer` module use crate::{ de::key::QNameDeserializer, de::simple_type::SimpleTypeDeserializer, de::{str2bool, DeEvent, Deserializer, XmlRead, TEXT_KEY, VALUE_KEY}, encoding::Decoder, errors::serialize::DeError, events::attributes::IterState, events::BytesStart, name::QName, }; use serde::de::{self, DeserializeSeed, IntoDeserializer, SeqAccess, Visitor}; use serde::serde_if_integer128; use std::borrow::Cow; use std::ops::Range; /// Defines a source that should be used to deserialize a value in the next call /// to [`next_value_seed()`](de::MapAccess::next_value_seed) #[derive(Debug, PartialEq)] enum ValueSource { /// Source are not specified, because [`next_key_seed()`] not yet called. /// This is an initial state and state after deserializing value /// (after call of [`next_value_seed()`]). /// /// Attempt to call [`next_value_seed()`] while accessor in this state would /// return a [`DeError::KeyNotRead`] error. /// /// [`next_key_seed()`]: de::MapAccess::next_key_seed /// [`next_value_seed()`]: de::MapAccess::next_value_seed Unknown, /// Next value should be deserialized from an attribute value; value is located /// at specified span. Attribute(Range), /// Value should be deserialized from the text content of the XML node, which /// represented or by an ordinary text node, or by a CDATA node: /// /// ```xml /// /// text content /// /// /// ``` /// ```xml /// /// /// /// /// ``` Text, /// Next value should be deserialized from an element with an any name, except /// elements with a name matching one of the struct fields. Corresponding tag /// name will always be associated with a field with name [`VALUE_KEY`]. /// /// That state is set when call to [`peek()`] returns a [`Start`] event, which /// [`name()`] is not listed in the [list of known fields] (which for a struct /// is a list of field names, and for a map that is an empty list), _and_ /// struct has a field with a special name [`VALUE_KEY`]. /// /// When in this state, next event, returned by [`next()`], will be a [`Start`], /// which represents both a key, and a value. Value would be deserialized from /// the whole element and how is will be done determined by the value deserializer. /// The [`MapAccess`] do not consume any events in that state. /// /// Because in that state any encountered `` is mapped to the [`VALUE_KEY`] /// field, it is possible to use tag name as an enum discriminator, so `enum`s /// can be deserialized from that XMLs: /// /// ```xml /// /// ... /// /// /// /// ``` /// ```xml /// /// ... /// /// /// /// ``` /// /// both can be deserialized into /// /// ```ignore /// enum Enum { /// variant1, /// variant2, /// } /// struct AnyName { /// #[serde(rename = "$value")] /// field: Enum, /// } /// ``` /// /// That is possible, because value deserializer have access to the full content /// of a `...` or `...` node, including /// the tag name. /// /// [`Start`]: DeEvent::Start /// [`peek()`]: Deserializer::peek() /// [`next()`]: Deserializer::next() /// [`name()`]: BytesStart::name() /// [`Text`]: Self::Text /// [list of known fields]: MapAccess::fields Content, /// Next value should be deserialized from an element with a dedicated name. /// If deserialized type is a sequence, then that sequence will collect all /// elements with the same name until it will be filled. If not all elements /// would be consumed, the rest will be ignored. /// /// That state is set when call to [`peek()`] returns a [`Start`] event, which /// [`name()`] represents a field name. That name will be deserialized as a key. /// /// When in this state, next event, returned by [`next()`], will be a [`Start`], /// which represents both a key, and a value. Value would be deserialized from /// the whole element and how is will be done determined by the value deserializer. /// The [`MapAccess`] do not consume any events in that state. /// /// An illustration below shows, what data is used to deserialize key and value: /// ```xml /// /// ... /// /// /// /// ``` /// /// Although value deserializer will have access to the full content of a `` /// node (including the tag name), it will not get much benefits from that, /// because tag name will always be fixed for a given map field (equal to a /// field name). So, if the field type is an `enum`, it cannot select its /// variant based on the tag name. If that is needed, then [`Content`] variant /// of this enum should be used. Such usage is enabled by annotating a struct /// field as "content" field, which implemented as given the field a special /// [`VALUE_KEY`] name. /// /// [`Start`]: DeEvent::Start /// [`peek()`]: Deserializer::peek() /// [`next()`]: Deserializer::next() /// [`name()`]: BytesStart::name() /// [`Content`]: Self::Content Nested, } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A deserializer that extracts map-like structures from an XML. This deserializer /// represents a one XML tag: /// /// ```xml /// ... /// ``` /// /// Name of this tag is stored in a [`Self::start`] property. /// /// # Lifetimes /// /// - `'de` lifetime represents a buffer, from which deserialized values can /// borrow their data. Depending on the underlying reader, there can be an /// internal buffer of deserializer (i.e. deserializer itself) or an input /// (in that case it is possible to approach zero-copy deserialization). /// /// - `'a` lifetime represents a parent deserializer, which could own the data /// buffer. pub(crate) struct MapAccess<'de, 'a, R> where R: XmlRead<'de>, { /// Tag -- owner of attributes start: BytesStart<'de>, de: &'a mut Deserializer<'de, R>, /// State of the iterator over attributes. Contains the next position in the /// inner `start` slice, from which next attribute should be parsed. iter: IterState, /// Current state of the accessor that determines what next call to API /// methods should return. source: ValueSource, /// List of field names of the struct. It is empty for maps fields: &'static [&'static str], /// If `true`, then the deserialized struct has a field with a special name: /// [`VALUE_KEY`]. That field should be deserialized from the whole content /// of an XML node, including tag name: /// /// ```xml /// value for VALUE_KEY field /// ``` has_value_field: bool, } impl<'de, 'a, R> MapAccess<'de, 'a, R> where R: XmlRead<'de>, { /// Create a new MapAccess pub fn new( de: &'a mut Deserializer<'de, R>, start: BytesStart<'de>, fields: &'static [&'static str], ) -> Result { Ok(MapAccess { de, iter: IterState::new(start.name().as_ref().len(), false), start, source: ValueSource::Unknown, fields, has_value_field: fields.contains(&VALUE_KEY), }) } } impl<'de, 'a, R> de::MapAccess<'de> for MapAccess<'de, 'a, R> where R: XmlRead<'de>, { type Error = DeError; fn next_key_seed>( &mut self, seed: K, ) -> Result, Self::Error> { debug_assert_eq!(self.source, ValueSource::Unknown); // FIXME: There error positions counted from the start of tag name - need global position let slice = &self.start.buf; let decoder = self.de.reader.decoder(); if let Some(a) = self.iter.next(slice).transpose()? { // try getting map from attributes (key= "value") let (key, value) = a.into(); self.source = ValueSource::Attribute(value.unwrap_or_default()); let de = QNameDeserializer::from_attr(QName(&slice[key]), decoder)?; seed.deserialize(de).map(Some) } else { // try getting from events (value) match self.de.peek()? { // We shouldn't have both `$value` and `$text` fields in the same // struct, so if we have `$value` field, the we should deserialize // text content to `$value` DeEvent::Text(_) | DeEvent::CData(_) if self.has_value_field => { self.source = ValueSource::Content; // Deserialize `key` from special attribute name which means // that value should be taken from the text content of the // XML node seed.deserialize(VALUE_KEY.into_deserializer()).map(Some) } DeEvent::Text(_) | DeEvent::CData(_) => { self.source = ValueSource::Text; // Deserialize `key` from special attribute name which means // that value should be taken from the text content of the // XML node seed.deserialize(TEXT_KEY.into_deserializer()).map(Some) } // Used to deserialize collections of enums, like: // // // // // // // into // // enum Enum { A, B, С } // struct Root { // #[serde(rename = "$value")] // items: Vec, // } // TODO: This should be handled by #[serde(flatten)] // See https://github.com/serde-rs/serde/issues/1905 DeEvent::Start(e) if self.has_value_field && not_in(self.fields, e, decoder)? => { self.source = ValueSource::Content; seed.deserialize(VALUE_KEY.into_deserializer()).map(Some) } DeEvent::Start(e) => { self.source = ValueSource::Nested; let de = QNameDeserializer::from_elem(e.name(), decoder)?; seed.deserialize(de).map(Some) } // Stop iteration after reaching a closing tag DeEvent::End(e) if e.name() == self.start.name() => Ok(None), // This is a unmatched closing tag, so the XML is invalid DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())), // We cannot get `Eof` legally, because we always inside of the // opened tag `self.start` DeEvent::Eof => Err(DeError::UnexpectedEof), } } } fn next_value_seed>( &mut self, seed: K, ) -> Result { match std::mem::replace(&mut self.source, ValueSource::Unknown) { ValueSource::Attribute(value) => seed.deserialize(SimpleTypeDeserializer::from_part( &self.start.buf, value, true, self.de.reader.decoder(), )), // This arm processes the following XML shape: // // text value // // The whole map represented by an `` element, the map key // is implicit and equals to the `TEXT_KEY` constant, and the value // is a `Text` or a `CData` event (the value deserializer will see one // of that events) // This case are checked by "xml_schema_lists::element" tests in tests/serde-de.rs ValueSource::Text => match self.de.next()? { DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content( // Comment to prevent auto-formatting e.decode(true)?, )), DeEvent::CData(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content( // Comment to prevent auto-formatting e.decode()?, )), // SAFETY: We set `Text` only when we seen `Text` or `CData` _ => unreachable!(), }, // This arm processes the following XML shape: // // ... // // The whole map represented by an `` element, the map key // is implicit and equals to the `VALUE_KEY` constant, and the value // is a `Start` event (the value deserializer will see that event) ValueSource::Content => seed.deserialize(MapValueDeserializer { map: self, allow_start: false, }), // This arm processes the following XML shape: // // ... // // The whole map represented by an `` element, the map key // is a `tag`, and the value is a `Start` event (the value deserializer // will see that event) ValueSource::Nested => seed.deserialize(MapValueDeserializer { map: self, allow_start: true, }), ValueSource::Unknown => Err(DeError::KeyNotRead), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// macro_rules! forward { ( $deserialize:ident $( ($($name:ident : $type:ty),*) )? ) => { #[inline] fn $deserialize>( self, $($($name: $type,)*)? visitor: V ) -> Result { self.map.de.$deserialize($($($name,)*)? visitor) } }; } /// A deserializer for a value of map or struct. That deserializer slightly /// differently processes events for a primitive types and sequences than /// a [`Deserializer`]. struct MapValueDeserializer<'de, 'a, 'm, R> where R: XmlRead<'de>, { /// Access to the map that created this deserializer. Gives access to the /// context, such as list of fields, that current map known about. map: &'m mut MapAccess<'de, 'a, R>, /// Determines, should [`Deserializer::read_string_impl()`] expand the second /// level of tags or not. /// /// If this field is `true`, we process the following XML shape: /// /// ```xml /// /// ... /// /// ``` /// /// The whole map represented by an `` element, the map key is a `tag`, /// and the value starts with is a `Start("tag")` (the value deserializer will /// see that event first) and extended to the matching `End("tag")` event. /// In order to deserialize primitives (such as `usize`) we need to allow to /// look inside the one levels of tags, so the /// /// ```xml /// 42 /// ``` /// /// could be deserialized into `42usize` without problems, and at the same time /// /// ```xml /// /// /// /// /// /// ``` /// could be deserialized to a struct. /// /// If this field is `false`, we processes the one of following XML shapes: /// /// ```xml /// /// text value /// /// ``` /// ```xml /// /// /// /// ``` /// ```xml /// /// ... /// /// ``` /// /// The whole map represented by an `` element, the map key is /// implicit and equals to the [`VALUE_KEY`] constant, and the value is /// a [`Text`], a [`CData`], or a [`Start`] event (the value deserializer /// will see one of those events). In the first two cases the value of this /// field do not matter (because we already see the textual event and there /// no reasons to look "inside" something), but in the last case the primitives /// should raise a deserialization error, because that means that you trying /// to deserialize the following struct: /// /// ```ignore /// struct AnyName { /// #[serde(rename = "$text")] /// any_name: String, /// } /// ``` /// which means that `any_name` should get a content of the `` element. /// /// Changing this can be valuable for , /// but those fields should be explicitly marked that they want to get any /// possible markup as a `String` and that mark is different from marking them /// as accepting "text content" which the currently `$text` means. /// /// [`Text`]: DeEvent::Text /// [`CData`]: DeEvent::CData /// [`Start`]: DeEvent::Start allow_start: bool, } impl<'de, 'a, 'm, R> MapValueDeserializer<'de, 'a, 'm, R> where R: XmlRead<'de>, { /// Returns a next string as concatenated content of consequent [`Text`] and /// [`CData`] events, used inside [`deserialize_primitives!()`]. /// /// [`Text`]: DeEvent::Text /// [`CData`]: DeEvent::CData #[inline] fn read_string(&mut self, unescape: bool) -> Result, DeError> { self.map.de.read_string_impl(unescape, self.allow_start) } } impl<'de, 'a, 'm, R> de::Deserializer<'de> for MapValueDeserializer<'de, 'a, 'm, R> where R: XmlRead<'de>, { type Error = DeError; deserialize_primitives!(mut); forward!(deserialize_option); forward!(deserialize_unit); forward!(deserialize_unit_struct(name: &'static str)); forward!(deserialize_newtype_struct(name: &'static str)); forward!(deserialize_map); forward!(deserialize_struct( name: &'static str, fields: &'static [&'static str] )); forward!(deserialize_enum( name: &'static str, variants: &'static [&'static str] )); forward!(deserialize_any); forward!(deserialize_ignored_any); /// Tuple representation is the same as [sequences](#method.deserialize_seq). fn deserialize_tuple(self, _len: usize, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_seq(visitor) } /// Named tuple representation is the same as [unnamed tuples](#method.deserialize_tuple). fn deserialize_tuple_struct( self, _name: &'static str, len: usize, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_tuple(len, visitor) } /// Deserializes each `` in /// ```xml /// /// ... /// ... /// ... /// /// ``` /// as a sequence item, where `` represents a Map in a [`Self::map`], /// and a `` is a sequential field of that map. fn deserialize_seq(self, visitor: V) -> Result where V: Visitor<'de>, { let filter = if self.allow_start { match self.map.de.peek()? { // Clone is cheap if event borrows from the input DeEvent::Start(e) => TagFilter::Include(e.clone()), // SAFETY: we use that deserializer with `allow_start == true` // only from the `MapAccess::next_value_seed` and only when we // peeked `Start` event _ => unreachable!(), } } else { TagFilter::Exclude(self.map.fields) }; visitor.visit_seq(MapValueSeqAccess { #[cfg(feature = "overlapped-lists")] checkpoint: self.map.de.skip_checkpoint(), map: self.map, filter, }) } #[inline] fn is_human_readable(&self) -> bool { self.map.de.is_human_readable() } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Check if tag `start` is included in the `fields` list. `decoder` is used to /// get a string representation of a tag. /// /// Returns `true`, if `start` is not in the `fields` list and `false` otherwise. fn not_in( fields: &'static [&'static str], start: &BytesStart, decoder: Decoder, ) -> Result { let tag = decoder.decode(start.name().into_inner())?; Ok(fields.iter().all(|&field| field != tag.as_ref())) } /// A filter that determines, what tags should form a sequence. /// /// There are two types of sequences: /// - sequence where each element represented by tags with the same name /// - sequence where each element can have a different tag /// /// The first variant could represent a collection of structs, the second -- /// a collection of enum variants. /// /// In the second case we don't know what tag name should be expected as a /// sequence element, so we accept any element. Since the sequence are flattened /// into maps, we skip elements which have dedicated fields in a struct by using an /// `Exclude` filter that filters out elements with names matching field names /// from the struct. /// /// # Lifetimes /// /// `'de` represents a lifetime of the XML input, when filter stores the /// dedicated tag name #[derive(Debug)] enum TagFilter<'de> { /// A `SeqAccess` interested only in tags with specified name to deserialize /// an XML like this: /// /// ```xml /// <...> /// /// /// /// ... /// /// ``` /// /// The tag name is stored inside (`b"tag"` for that example) Include(BytesStart<'de>), //TODO: Need to store only name instead of a whole tag /// A `SeqAccess` interested in tags with any name, except explicitly listed. /// Excluded tags are used as struct field names and therefore should not /// fall into a `$value` category Exclude(&'static [&'static str]), } impl<'de> TagFilter<'de> { fn is_suitable(&self, start: &BytesStart, decoder: Decoder) -> Result { match self { Self::Include(n) => Ok(n.name() == start.name()), Self::Exclude(fields) => not_in(fields, start, decoder), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// An accessor to sequence elements forming a value for struct field. /// Technically, this sequence is flattened out into structure and sequence /// elements are overlapped with other fields of a structure. Each call to /// [`Self::next_element_seed`] consumes a next sub-tree or consequent list /// of [`Text`] and [`CData`] events. /// /// ```xml /// <> /// ... /// The is the one item /// This is item it splitted by comments /// ...and that is the third! /// ... /// /// ``` /// /// Depending on [`Self::filter`], only some of that possible constructs would be /// an element. /// /// [`Text`]: DeEvent::Text /// [`CData`]: DeEvent::CData struct MapValueSeqAccess<'de, 'a, 'm, R> where R: XmlRead<'de>, { /// Accessor to a map that creates this accessor and to a deserializer for /// a sequence items. map: &'m mut MapAccess<'de, 'a, R>, /// Filter that determines whether a tag is a part of this sequence. /// /// When feature `overlapped-lists` is not activated, iteration will stop /// when found a tag that does not pass this filter. /// /// When feature `overlapped-lists` is activated, all tags, that not pass /// this check, will be skipped. filter: TagFilter<'de>, /// Checkpoint after which all skipped events should be returned. All events, /// that was skipped before creating this checkpoint, will still stay buffered /// and will not be returned #[cfg(feature = "overlapped-lists")] checkpoint: usize, } #[cfg(feature = "overlapped-lists")] impl<'de, 'a, 'm, R> Drop for MapValueSeqAccess<'de, 'a, 'm, R> where R: XmlRead<'de>, { fn drop(&mut self) { self.map.de.start_replay(self.checkpoint); } } impl<'de, 'a, 'm, R> SeqAccess<'de> for MapValueSeqAccess<'de, 'a, 'm, R> where R: XmlRead<'de>, { type Error = DeError; fn next_element_seed(&mut self, seed: T) -> Result, DeError> where T: DeserializeSeed<'de>, { let decoder = self.map.de.reader.decoder(); loop { break match self.map.de.peek()? { // If we see a tag that we not interested, skip it #[cfg(feature = "overlapped-lists")] DeEvent::Start(e) if !self.filter.is_suitable(e, decoder)? => { self.map.de.skip()?; continue; } // Stop iteration when list elements ends #[cfg(not(feature = "overlapped-lists"))] DeEvent::Start(e) if !self.filter.is_suitable(e, decoder)? => Ok(None), // Stop iteration after reaching a closing tag DeEvent::End(e) if e.name() == self.map.start.name() => Ok(None), // This is a unmatched closing tag, so the XML is invalid DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())), // We cannot get `Eof` legally, because we always inside of the // opened tag `self.map.start` DeEvent::Eof => Err(DeError::UnexpectedEof), // Start(tag), Text, CData _ => seed .deserialize(SeqItemDeserializer { map: self.map }) .map(Some), }; } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A deserializer for a single item of a sequence. struct SeqItemDeserializer<'de, 'a, 'm, R> where R: XmlRead<'de>, { /// Access to the map that created this deserializer. Gives access to the /// context, such as list of fields, that current map known about. map: &'m mut MapAccess<'de, 'a, R>, } impl<'de, 'a, 'm, R> SeqItemDeserializer<'de, 'a, 'm, R> where R: XmlRead<'de>, { /// Returns a next string as concatenated content of consequent [`Text`] and /// [`CData`] events, used inside [`deserialize_primitives!()`]. /// /// [`Text`]: DeEvent::Text /// [`CData`]: DeEvent::CData #[inline] fn read_string(&mut self, unescape: bool) -> Result, DeError> { self.map.de.read_string_impl(unescape, true) } } impl<'de, 'a, 'm, R> de::Deserializer<'de> for SeqItemDeserializer<'de, 'a, 'm, R> where R: XmlRead<'de>, { type Error = DeError; deserialize_primitives!(mut); forward!(deserialize_option); forward!(deserialize_unit); forward!(deserialize_unit_struct(name: &'static str)); forward!(deserialize_newtype_struct(name: &'static str)); forward!(deserialize_map); forward!(deserialize_struct( name: &'static str, fields: &'static [&'static str] )); forward!(deserialize_enum( name: &'static str, variants: &'static [&'static str] )); forward!(deserialize_any); forward!(deserialize_ignored_any); /// Representation of tuples the same as [sequences](#method.deserialize_seq). fn deserialize_tuple(self, _len: usize, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_seq(visitor) } /// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple). fn deserialize_tuple_struct( self, _name: &'static str, len: usize, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_tuple(len, visitor) } /// This method deserializes a sequence inside of element that itself is a /// sequence element: /// /// ```xml /// <> /// ... /// inner sequence /// inner sequence /// inner sequence /// ... /// /// ``` fn deserialize_seq(self, visitor: V) -> Result where V: Visitor<'de>, { match self.map.de.next()? { DeEvent::Text(e) => SimpleTypeDeserializer::from_text_content( // Comment to prevent auto-formatting e.decode(true)?, ) .deserialize_seq(visitor), DeEvent::CData(e) => SimpleTypeDeserializer::from_text_content( // Comment to prevent auto-formatting e.decode()?, ) .deserialize_seq(visitor), // This is a sequence element. We cannot treat it as another flatten // sequence if type will require `deserialize_seq` We instead forward // it to `xs:simpleType` implementation DeEvent::Start(e) => { let value = match self.map.de.next()? { DeEvent::Text(e) => SimpleTypeDeserializer::from_text_content( // Comment to prevent auto-formatting e.decode(true)?, ) .deserialize_seq(visitor), DeEvent::CData(e) => SimpleTypeDeserializer::from_text_content( // Comment to prevent auto-formatting e.decode()?, ) .deserialize_seq(visitor), e => Err(DeError::Unsupported( format!("unsupported event {:?}", e).into(), )), }; // TODO: May be assert that here we expect only matching closing tag? self.map.de.read_to_end(e.name())?; value } // SAFETY: we use that deserializer only when Start(element), Text, // or CData event Start(tag), Text, CData was peeked already _ => unreachable!(), } } #[inline] fn is_human_readable(&self) -> bool { self.map.de.is_human_readable() } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[test] fn test_not_in() { let tag = BytesStart::new("tag"); assert_eq!(not_in(&[], &tag, Decoder::utf8()).unwrap(), true); assert_eq!( not_in(&["no", "such", "tags"], &tag, Decoder::utf8()).unwrap(), true ); assert_eq!( not_in(&["some", "tag", "included"], &tag, Decoder::utf8()).unwrap(), false ); } quick-xml-0.27.1/src/de/mod.rs000064400000000000000000003367210072674642500141750ustar 00000000000000//! Serde `Deserializer` module. //! //! Due to the complexity of the XML standard and the fact that serde was developed //! with JSON in mind, not all serde concepts apply smoothly to XML. This leads to //! that fact that some XML concepts are inexpressible in terms of serde derives //! and may require manual deserialization. //! //! The most notable restriction is the ability to distinguish between _elements_ //! and _attributes_, as no other format used by serde has such a conception. //! //! Due to that the mapping is performed in a best effort manner. //! //! //! //! Table of Contents //! ================= //! - [Mapping XML to Rust types](#mapping-xml-to-rust-types) //! - [Optional attributes and elements](#optional-attributes-and-elements) //! - [Choices (`xs:choice` XML Schema type)](#choices-xschoice-xml-schema-type) //! - [Sequences (`xs:all` and `xs:sequence` XML Schema types)](#sequences-xsall-and-xssequence-xml-schema-types) //! - [Composition Rules](#composition-rules) //! - [Difference between `$text` and `$value` special names](#difference-between-text-and-value-special-names) //! - [`$text`](#text) //! - [`$value`](#value) //! - [Primitives and sequences of primitives](#primitives-and-sequences-of-primitives) //! - [Structs and sequences of structs](#structs-and-sequences-of-structs) //! - [Enums and sequences of enums](#enums-and-sequences-of-enums) //! - [Frequently Used Patterns](#frequently-used-patterns) //! - [`` lists](#element-lists) //! //! //! //! Mapping XML to Rust types //! ========================= //! //! Type names are never considered when deserializing, so you can name your //! types as you wish. Other general rules: //! - `struct` field name could be represented in XML only as an attribute name //! or an element name; //! - `enum` variant name could be represented in XML only as an attribute name //! or an element name; //! - the unit struct, unit type `()` and unit enum variant can be deserialized //! from any valid XML content: //! - attribute and element names; //! - attribute and element values; //! - text or CDATA content (including mixed text and CDATA content). //! //!
//! //! NOTE: examples, marked with `FIXME:` does not work yet -- any PRs that fixes //! that are welcome! The message after marker is a test failure message. //! Also, all that tests are marked with an `ignore` option, although their //! compiles. This is by intention, because rustdoc marks such blocks with //! an information icon unlike `no_run` blocks. //! //!
//! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //!
To parse all these XML's......use that Rust type(s)
//! Content of attributes and text / CDATA content of elements (including mixed //! text and CDATA content): //! //! ```xml //! <... ...="content" /> //! ``` //! ```xml //! <...>content //! ``` //! ```xml //! <...> //! ``` //! ```xml //! <...>texttext //! ``` //!
//! //! Merging of the text / CDATA content is tracked in the issue [#474] and //! will be available in the next release. //!
//!
//! //! You can use any type that can be deserialized from an `&str`, for example: //! - [`String`] and [`&str`] //! - [`Cow`] //! - [`u32`], [`f32`] and other numeric types //! - `enum`s, like //! ```ignore //! // FIXME: #474, merging mixed text / CDATA //! // content does not work yet //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! enum Language { //! Rust, //! Cpp, //! #[serde(other)] //! Other, //! } //! # #[derive(Debug, PartialEq, Deserialize)] //! # struct X { #[serde(rename = "$text")] x: Language } //! # assert_eq!(X { x: Language::Rust }, quick_xml::de::from_str("Rust").unwrap()); //! # assert_eq!(X { x: Language::Cpp }, quick_xml::de::from_str("Cp").unwrap()); //! # assert_eq!(X { x: Language::Other }, quick_xml::de::from_str("").unwrap()); //! ``` //! //!
//! //! NOTE: deserialization to non-owned types (i.e. borrow from the input), //! such as `&str`, is possible only if you parse document in the UTF-8 //! encoding and content does not contain entity references such as `&`, //! or character references such as ` `, as well as text content represented //! by one piece of [text] or [CDATA] element. //!
//! //! //! [text]: Event::Text //! [CDATA]: Event::CData //!
//! //! Content of attributes and text / CDATA content of elements (including mixed //! text and CDATA content), which represents a space-delimited lists, as //! specified in the XML Schema specification for [`xs:list`] `simpleType`: //! //! ```xml //! <... ...="element1 element2 ..." /> //! ``` //! ```xml //! <...> //! element1 //! element2 //! ... //! //! ``` //! ```xml //! <...> //! ``` //!
//! //! Merging of the text / CDATA content is tracked in the issue [#474] and //! will be available in the next release. //!
//! //! [`xs:list`]: https://www.w3.org/TR/xmlschema11-2/#list-datatypes //!
//! //! Use any type that deserialized using [`deserialize_seq()`] call, for example: //! //! ``` //! // FIXME: #474, merging mixed text / CDATA //! // content does not work yet //! type List = Vec; //! ``` //! //! See the next row to learn where in your struct definition you should //! use that type. //! //! According to the XML Schema specification, delimiters for elements is one //! or more space (`' '`, `'\r'`, `'\n'`, and `'\t'`) character(s). //! //!
//! //! NOTE: according to the XML Schema restrictions, you cannot escape those //! white-space characters, so list elements will _never_ contain them. //! In practice you will usually use `xs:list`s for lists of numbers or enumerated //! values which looks like identifiers in many languages, for example, `item`, //! `some_item` or `some-item`, so that shouldn't be a problem. //! //! NOTE: according to the XML Schema specification, list elements can be //! delimited only by spaces. Other delimiters (for example, commas) are not //! allowed. //! //!
//! //! [`deserialize_seq()`]: de::Deserializer::deserialize_seq //!
//! A typical XML with attributes. The root tag name does not matter: //! //! ```xml //! //! ``` //! //! //! A structure where each XML attribute is mapped to a field with a name //! starting with `@`. Because Rust identifiers do not permit the `@` character, //! you should use the `#[serde(rename = "@...")]` attribute to rename it. //! The name of the struct itself does not matter: //! //! ``` //! # use serde::Deserialize; //! # type T = (); //! # type U = (); //! // Get both attributes //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@one")] //! one: T, //! //! #[serde(rename = "@two")] //! two: U, //! } //! # quick_xml::de::from_str::(r#""#).unwrap(); //! ``` //! ``` //! # use serde::Deserialize; //! # type T = (); //! // Get only the one attribute, ignore the other //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@one")] //! one: T, //! } //! # quick_xml::de::from_str::(r#""#).unwrap(); //! # quick_xml::de::from_str::(r#""#).unwrap(); //! # quick_xml::de::from_str::(r#"..."#).unwrap(); //! ``` //! ``` //! # use serde::Deserialize; //! // Ignore all attributes //! // You can also use the `()` type (unit type) //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName; //! # quick_xml::de::from_str::(r#""#).unwrap(); //! # quick_xml::de::from_str::(r#"..."#).unwrap(); //! # quick_xml::de::from_str::(r#"......"#).unwrap(); //! ``` //! //! All these structs can be used to deserialize from an XML on the //! left side depending on amount of information that you want to get. //! Of course, you can combine them with elements extractor structs (see below). //! //!
//! //! NOTE: XML allows you to have an attribute and an element with the same name //! inside the one element. quick-xml deals with that by prepending a `@` prefix //! to the name of attributes. //!
//!
//! A typical XML with child elements. The root tag name does not matter: //! //! ```xml //! //! ... //! ... //! //! ``` //! //! A structure where an each XML child element are mapped to the field. //! Each element name becomes a name of field. The name of the struct itself //! does not matter: //! //! ``` //! # use serde::Deserialize; //! # type T = (); //! # type U = (); //! // Get both elements //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! one: T, //! two: U, //! } //! # quick_xml::de::from_str::(r#"......"#).unwrap(); //! # //! # quick_xml::de::from_str::(r#""#).unwrap_err(); //! # quick_xml::de::from_str::(r#"..."#).unwrap_err(); //! ``` //! ``` //! # use serde::Deserialize; //! # type T = (); //! // Get only the one element, ignore the other //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! one: T, //! } //! # quick_xml::de::from_str::(r#"......"#).unwrap(); //! # quick_xml::de::from_str::(r#"..."#).unwrap(); //! ``` //! ``` //! # use serde::Deserialize; //! // Ignore all elements //! // You can also use the `()` type (unit type) //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName; //! # quick_xml::de::from_str::(r#""#).unwrap(); //! # quick_xml::de::from_str::(r#"......"#).unwrap(); //! # quick_xml::de::from_str::(r#"..."#).unwrap(); //! # quick_xml::de::from_str::(r#"..."#).unwrap(); //! ``` //! //! All these structs can be used to deserialize from an XML on the //! left side depending on amount of information that you want to get. //! Of course, you can combine them with attributes extractor structs (see above). //! //!
//! //! NOTE: XML allows you to have an attribute and an element with the same name //! inside the one element. quick-xml deals with that by prepending a `@` prefix //! to the name of attributes. //!
//!
//! An XML with an attribute and a child element named equally: //! //! ```xml //! //! ... //! //! ``` //! //! //! You MUST specify `#[serde(rename = "@field")]` on a field that will be used //! for an attribute: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # type U = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@field")] //! attribute: T, //! field: U, //! } //! # assert_eq!( //! # AnyName { attribute: (), field: () }, //! # quick_xml::de::from_str(r#" //! # //! # ... //! # //! # "#).unwrap(), //! # ); //! ``` //!
//! //! ## Optional attributes and elements //! //!
To parse all these XML's......use that Rust type(s)
//! An optional XML attribute that you want to capture. //! The root tag name does not matter: //! //! ```xml //! //! ``` //! ```xml //! //! ``` //! //! //! A structure with an optional field, renamed according to the requirements //! for attributes: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@optional")] //! optional: Option, //! } //! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#""#).unwrap()); //! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#""#).unwrap()); //! ``` //! When the XML attribute is present, type `T` will be deserialized from //! an attribute value (which is a string). Note, that if `T = String` or other //! string type, the empty attribute is mapped to a `Some("")`, whereas `None` //! represents the missed attribute: //! ```xml //! //! //! //! ``` //!
//! An optional XML elements that you want to capture. //! The root tag name does not matter: //! //! ```xml //! //! ... //! //! ``` //! ```xml //! //! //! //! ``` //! ```xml //! //! ``` //! //! //! A structure with an optional field: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! optional: Option, //! } //! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"..."#).unwrap()); //! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#""#).unwrap()); //! ``` //! When the XML element is present, type `T` will be deserialized from an //! element (which is a string or a multi-mapping -- i.e. mapping which can have //! duplicated keys). //!
//! //! Currently some edge cases exists described in the issue [#497]. //!
//!
//! //! ## Choices (`xs:choice` XML Schema type) //! //!
To parse all these XML's......use that Rust type(s)
//! An XML with different root tag names: //! //! ```xml //! ... //! ``` //! ```xml //! //! ... //! //! ``` //! //! //! An enum where each variant have a name of the possible root tag. The name of //! the enum itself does not matter. //! //! All these structs can be used to deserialize from any XML on the //! left side depending on amount of information that you want to get: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # type U = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum AnyName { //! One { #[serde(rename = "@field1")] field1: T }, //! Two { field2: U }, //! } //! # assert_eq!(AnyName::One { field1: () }, quick_xml::de::from_str(r#"..."#).unwrap()); //! # assert_eq!(AnyName::Two { field2: () }, quick_xml::de::from_str(r#"..."#).unwrap()); //! ``` //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct Two { //! field2: T, //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum AnyName { //! // `field1` content discarded //! One, //! Two(Two), //! } //! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"..."#).unwrap()); //! # assert_eq!(AnyName::Two(Two { field2: () }), quick_xml::de::from_str(r#"..."#).unwrap()); //! ``` //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum AnyName { //! One, //! // the will be mapped to this //! #[serde(other)] //! Other, //! } //! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"..."#).unwrap()); //! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"..."#).unwrap()); //! ``` //!
//! //! NOTE: You should have variants for all possible tag names in your enum //! or have an `#[serde(other)]` variant. //! //!
//!
//! //! `` embedded in the other element, and at the same time you want //! to get access to other attributes that can appear in the same container //! (``). Also this case can be described, as if you want to choose //! Rust enum variant based on a tag name: //! //! ```xml //! //! ... //! //! ``` //! ```xml //! //! ... //! //! ``` //! //! //! A structure with a field which type is an `enum`. //! //! Names of the enum, struct, and struct field with `Choice` type does not matter: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@field")] //! field: T, //! //! #[serde(rename = "$value")] //! any_name: Choice, //! } //! # assert_eq!( //! # AnyName { field: (), any_name: Choice::One }, //! # quick_xml::de::from_str(r#"..."#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { field: (), any_name: Choice::Two }, //! # quick_xml::de::from_str(r#"..."#).unwrap(), //! # ); //! ``` //!
//! //! `` embedded in the other element, and at the same time you want //! to get access to other elements that can appear in the same container //! (``). Also this case can be described, as if you want to choose //! Rust enum variant based on a tag name: //! //! ```xml //! //! ... //! ... //! //! ``` //! ```xml //! //! ... //! ... //! //! ``` //! //! //! A structure with a field which type is an `enum`. //! //! Names of the enum, struct, and struct field with `Choice` type does not matter: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! field: T, //! //! #[serde(rename = "$value")] //! any_name: Choice, //! } //! # assert_eq!( //! # AnyName { field: (), any_name: Choice::One }, //! # quick_xml::de::from_str(r#"......"#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { field: (), any_name: Choice::Two }, //! # quick_xml::de::from_str(r#"......"#).unwrap(), //! # ); //! ``` //! //!
//! //! NOTE: if your `Choice` enum would contain an `#[serde(other)]` //! variant, element `` will be mapped to the `field` and not to the enum //! variant. //!
//! //!
//! //! `` encapsulated in other element with a fixed name: //! //! ```xml //! //! //! ... //! //! //! ``` //! ```xml //! //! //! ... //! //! //! ``` //! //! //! A structure with a field of an intermediate type with one field of `enum` type. //! Actually, this example is not necessary, because you can construct it by yourself //! using the composition rules that were described above. However the XML construction //! described here is very common, so it is shown explicitly. //! //! Names of the enum and struct does not matter: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct Holder { //! #[serde(rename = "$value")] //! any_name: Choice, //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@field")] //! field: T, //! //! choice: Holder, //! } //! # assert_eq!( //! # AnyName { field: (), choice: Holder { any_name: Choice::One } }, //! # quick_xml::de::from_str(r#"..."#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { field: (), choice: Holder { any_name: Choice::Two } }, //! # quick_xml::de::from_str(r#"..."#).unwrap(), //! # ); //! ``` //!
//! //! `` encapsulated in other element with a fixed name: //! //! ```xml //! //! ... //! //! ... //! //! //! ``` //! ```xml //! //! //! ... //! //! ... //! //! ``` //! //! //! A structure with a field of an intermediate type with one field of `enum` type. //! Actually, this example is not necessary, because you can construct it by yourself //! using the composition rules that were described above. However the XML construction //! described here is very common, so it is shown explicitly. //! //! Names of the enum and struct does not matter: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct Holder { //! #[serde(rename = "$value")] //! any_name: Choice, //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! field: T, //! //! choice: Holder, //! } //! # assert_eq!( //! # AnyName { field: (), choice: Holder { any_name: Choice::One } }, //! # quick_xml::de::from_str(r#"......"#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { field: (), choice: Holder { any_name: Choice::Two } }, //! # quick_xml::de::from_str(r#"......"#).unwrap(), //! # ); //! ``` //!
//! //! ## Sequences (`xs:all` and `xs:sequence` XML Schema types) //! //!
To parse all these XML's......use that Rust type(s)
//! A sequence inside of a tag without a dedicated name: //! //! ```xml //! //! ``` //! ```xml //! //! //! //! ``` //! ```xml //! //! //! //! //! //! ``` //! //! //! A structure with a field which have a sequence type, for example, [`Vec`]. //! Because XML syntax does not distinguish between empty sequences and missed //! elements, we should indicate that on the Rust side, because serde will require //! that field `item` exists. You can do that in two possible ways: //! //! Use the `#[serde(default)]` attribute for a [field] or the entire [struct]: //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type Item = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(default)] //! item: Vec, //! } //! # assert_eq!( //! # AnyName { item: vec![] }, //! # quick_xml::de::from_str(r#""#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { item: vec![()] }, //! # quick_xml::de::from_str(r#""#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { item: vec![(), (), ()] }, //! # quick_xml::de::from_str(r#""#).unwrap(), //! # ); //! ``` //! //! Use the [`Option`]. In that case inner array will always contains at least one //! element after deserialization: //! ```ignore //! // FIXME: #510, //! // UnexpectedEnd([97, 110, 121, 45, 116, 97, 103]) //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type Item = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! item: Option>, //! } //! # assert_eq!( //! # AnyName { item: None }, //! # quick_xml::de::from_str(r#""#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { item: Some(vec![()]) }, //! # quick_xml::de::from_str(r#""#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { item: Some(vec![(), (), ()]) }, //! # quick_xml::de::from_str(r#""#).unwrap(), //! # ); //! ``` //!
//! //! Currently not working. The bug is tracked in [#510]. //!
//! //! See also [Frequently Used Patterns](#element-lists). //! //! [field]: https://serde.rs/field-attrs.html#default //! [struct]: https://serde.rs/container-attrs.html#default //!
//! A sequence with a strict order, probably with a mixed content //! (text / CDATA and tags): //! //! ```xml //! ... //! text //! //! ... //! ... //! ``` //!
//! //! NOTE: this is just an example for showing mapping. XML does not allow //! multiple root tags -- you should wrap the sequence into a tag. //!
//!
//! //! All elements mapped to the heterogeneous sequential type: tuple or named tuple. //! Each element of the tuple should be able to be deserialized from the nested //! element content (`...`), except the enum types which would be deserialized //! from the full element (`...`), so they could use the element name //! to choose the right variant: //! //! ```ignore //! // FIXME: #474 //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type One = (); //! # type Two = (); //! # /* //! type One = ...; //! type Two = ...; //! # */ //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName(One, String, Two, One); //! # assert_eq!( //! # AnyName((), "text cdata".into(), (), ()), //! # quick_xml::de::from_str(r#"...text ......"#).unwrap(), //! # ); //! ``` //! ```ignore //! // FIXME: #474, Custom("unknown variant `two`, //! // expected `one`") //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! } //! # type Two = (); //! # /* //! type Two = ...; //! # */ //! type AnyName = (Choice, String, Two, Choice); //! # assert_eq!( //! # (Choice::One, "text cdata".to_string(), (), Choice::One), //! # quick_xml::de::from_str(r#"...text ......"#).unwrap(), //! # ); //! ``` //!
//! //! NOTE: consequent text and CDATA nodes are merged into the one text node, //! so you cannot have two adjacent string types in your sequence. //!
//!
//! //! Merging of the text / CDATA content is tracked in the issue [#474] and //! will be available in the next release. //!
//!
//! A sequence with a non-strict order, probably with a mixed content //! (text / CDATA and tags). //! //! ```xml //! ... //! text //! //! ... //! ... //! ``` //!
//! //! NOTE: this is just an example for showing mapping. XML does not allow //! multiple root tags -- you should wrap the sequence into a tag. //!
//!
//! A homogeneous sequence of elements with a fixed or dynamic size: //! //! ```ignore //! // FIXME: #474 //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! #[serde(other)] //! Other, //! } //! type AnyName = [Choice; 4]; //! # assert_eq!( //! # [Choice::One, Choice::Other, Choice::Two, Choice::One], //! # quick_xml::de::from_str::(r#"...text ......"#).unwrap(), //! # ); //! ``` //! ```ignore //! // FIXME: Custom("unknown variant `text`, expected //! // one of `one`, `two`, `$value`") //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! #[serde(rename = "$value")] //! Other(String), //! } //! type AnyName = Vec; //! # assert_eq!( //! # vec![ //! # Choice::One, //! # Choice::Other("text cdata".into()), //! # Choice::Two, //! # Choice::One, //! # ], //! # quick_xml::de::from_str::(r#"...text ......"#).unwrap(), //! # ); //! ``` //!
//! //! NOTE: consequent text and CDATA nodes are merged into the one text node, //! so you cannot have two adjacent string types in your sequence. //!
//!
//! //! Merging of the text / CDATA content is tracked in the issue [#474] and //! will be available in the next release. //!
//!
//! A sequence with a strict order, probably with a mixed content, //! (text and tags) inside of the other element: //! //! ```xml //! //! ... //! text //! //! ... //! ... //! //! ``` //! //! //! A structure where all child elements mapped to the one field which have //! a heterogeneous sequential type: tuple or named tuple. Each element of the //! tuple should be able to be deserialized from the full element (`...`). //! //! You MUST specify `#[serde(rename = "$value")]` on that field: //! //! ```ignore //! // FIXME: #474, Custom("duplicate field `$value`") //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type One = (); //! # type Two = (); //! # /* //! type One = ...; //! type Two = ...; //! # */ //! //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@attribute")] //! # attribute: (), //! # /* //! attribute: ..., //! # */ //! // Does not (yet?) supported by the serde //! // https://github.com/serde-rs/serde/issues/1905 //! // #[serde(flatten)] //! #[serde(rename = "$value")] //! any_name: (One, String, Two, One), //! } //! # assert_eq!( //! # AnyName { attribute: (), any_name: ((), "text cdata".into(), (), ()) }, //! # quick_xml::de::from_str("\ //! # \ //! # ...\ //! # text \ //! # \ //! # ...\ //! # ...\ //! # " //! # ).unwrap(), //! # ); //! ``` //! ```ignore //! // FIXME: #474, Custom("duplicate field `$value`") //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type One = (); //! # type Two = (); //! # /* //! type One = ...; //! type Two = ...; //! # */ //! //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct NamedTuple(One, String, Two, One); //! //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@attribute")] //! # attribute: (), //! # /* //! attribute: ..., //! # */ //! // Does not (yet?) supported by the serde //! // https://github.com/serde-rs/serde/issues/1905 //! // #[serde(flatten)] //! #[serde(rename = "$value")] //! any_name: NamedTuple, //! } //! # assert_eq!( //! # AnyName { attribute: (), any_name: NamedTuple((), "text cdata".into(), (), ()) }, //! # quick_xml::de::from_str("\ //! # \ //! # ...\ //! # text \ //! # \ //! # ...\ //! # ...\ //! # " //! # ).unwrap(), //! # ); //! ``` //!
//! //! NOTE: consequent text and CDATA nodes are merged into the one text node, //! so you cannot have two adjacent string types in your sequence. //!
//!
//! //! Merging of the text / CDATA content is tracked in the issue [#474] and //! will be available in the next release. //!
//!
//! A sequence with a non-strict order, probably with a mixed content //! (text / CDATA and tags) inside of the other element: //! //! ```xml //! //! ... //! text //! //! ... //! ... //! //! ``` //! //! //! A structure where all child elements mapped to the one field which have //! a homogeneous sequential type: array-like container. A container type `T` //! should be able to be deserialized from the nested element content (`...`), //! except if it is an enum type which would be deserialized from the full //! element (`...`). //! //! You MUST specify `#[serde(rename = "$value")]` on that field: //! //! ```ignore //! // FIXME: Custom("unknown variant `text`, expected //! // one of `one`, `two`, `$value`") //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! #[serde(rename = "$value")] //! Other(String), //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@attribute")] //! # attribute: (), //! # /* //! attribute: ..., //! # */ //! // Does not (yet?) supported by the serde //! // https://github.com/serde-rs/serde/issues/1905 //! // #[serde(flatten)] //! #[serde(rename = "$value")] //! any_name: [Choice; 4], //! } //! # assert_eq!( //! # AnyName { attribute: (), any_name: [ //! # Choice::One, //! # Choice::Other("text cdata".into()), //! # Choice::Two, //! # Choice::One, //! # ] }, //! # quick_xml::de::from_str("\ //! # \ //! # ...\ //! # text \ //! # \ //! # ...\ //! # ...\ //! # " //! # ).unwrap(), //! # ); //! ``` //! ```ignore //! // FIXME: Custom("unknown variant `text`, expected //! // one of `one`, `two`, `$value`") //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! #[serde(rename = "$value")] //! Other(String), //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@attribute")] //! # attribute: (), //! # /* //! attribute: ..., //! # */ //! // Does not (yet?) supported by the serde //! // https://github.com/serde-rs/serde/issues/1905 //! // #[serde(flatten)] //! #[serde(rename = "$value")] //! any_name: Vec, //! } //! # assert_eq!( //! # AnyName { attribute: (), any_name: vec![ //! # Choice::One, //! # Choice::Other("text cdata".into()), //! # Choice::Two, //! # Choice::One, //! # ] }, //! # quick_xml::de::from_str("\ //! # \ //! # ...\ //! # text \ //! # \ //! # ...\ //! # ...\ //! # " //! # ).unwrap(), //! # ); //! ``` //!
//! //! NOTE: consequent text and CDATA nodes are merged into the one text node, //! so you cannot have two adjacent string types in your sequence. //!
//!
//! //! Merging of the text / CDATA content is tracked in the issue [#474] and //! will be available in the next release. //!
//!
//! //! //! //! Composition Rules //! ================= //! //! XML format is very different from other formats supported by `serde`. //! One such difference it is how data in the serialized form is related to //! the Rust type. Usually each byte in the data can be associated only with //! one field in the data structure. However, XML is an exception. //! //! For example, took this XML: //! //! ```xml //! //! //! //! ``` //! //! and try to deserialize it to the struct `AnyName`: //! //! ```no_run //! # use serde::Deserialize; //! #[derive(Deserialize)] //! struct AnyName { // AnyName calls `deserialize_struct` on `` //! // Used data: ^^^^^^^^^^^^^^^^^^^ //! key: Inner, // Inner calls `deserialize_struct` on `` //! // Used data: ^^^^^^^^^^^^ //! } //! #[derive(Deserialize)] //! struct Inner { //! #[serde(rename = "@attr")] //! attr: String, // String calls `deserialize_string` on `value` //! // Used data: ^^^^^ //! } //! ``` //! //! Comments shows what methods of a [`Deserializer`] called by each struct //! `deserialize` method and which input their seen. **Used data** shows, what //! content is actually used for deserializing. As you see, name of the inner //! `` tag used both as a map key / outer struct field name and as part //! of the inner struct (although _value_ of the tag, i.e. `key` is not used //! by it). //! //! //! //! Difference between `$text` and `$value` special names //! ===================================================== //! //! quick-xml supports two special names for fields -- `$text` and `$value`. //! Although they may seem the same, there is a distinction. Two different //! names is required mostly for serialization, because quick-xml should know //! how you want to serialize certain constructs, which could be represented //! through XML in multiple different ways. //! //! The only difference in how complex types and sequences are serialized. //! If you doubt which one you should select, begin with [`$value`](#value). //! //! ## `$text` //! `$text` is used when you want to write your XML as a text or a CDATA content. //! More formally, field with that name represents simple type definition with //! `{variety} = atomic` or `{variety} = union` whose basic members are all atomic, //! as described in the [specification]. //! //! As a result, not all types of such fields can be serialized. Only serialization //! of following types are supported: //! - all primitive types (strings, numbers, booleans) //! - unit variants of enumerations (serializes to a name of a variant) //! - newtypes (delegates serialization to inner type) //! - [`Option`] of above (`None` serializes to nothing) //! - sequences (including tuples and tuple variants of enumerations) of above, //! excluding `None` and empty string elements (because it will not be possible //! to deserialize them back). The elements are separated by space(s) //! - unit type `()` and unit structs (serializes to nothing) //! //! Complex types, such as structs and maps, are not supported in this field. //! If you want them, you should use `$value`. //! //! Sequences serialized to a space-delimited string, that is why only certain //! types are allowed in this mode: //! //! ``` //! # use serde::{Deserialize, Serialize}; //! # use quick_xml::de::from_str; //! # use quick_xml::se::to_string; //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct AnyName { //! #[serde(rename = "$text")] //! field: Vec, //! } //! //! let obj = AnyName { field: vec![1, 2, 3] }; //! let xml = to_string(&obj).unwrap(); //! assert_eq!(xml, "1 2 3"); //! //! let object: AnyName = from_str(&xml).unwrap(); //! assert_eq!(object, obj); //! ``` //! //! ## `$value` //!
//! //! NOTE: a name `#content` would better explain the purpose of that field, //! but `$value` is used for compatibility with other XML serde crates, which //! uses that name. This allow you to switch XML crate more smoothly if required. //!
//! //! Representation of primitive types in `$value` does not differ from their //! representation in `$text` field. The difference is how sequences are serialized. //! `$value` serializes each sequence item as a separate XML element. The name //! of that element is taken from serialized type, and because only `enum`s provide //! such name (their variant name), only they should be used for such fields. //! //! `$value` fields does not support `struct` types with fields, the serialization //! of such types would end with an `Err(Unsupported)`. Unit structs and unit //! type `()` serializing to nothing and can be deserialized from any content. //! //! Serialization and deserialization of `$value` field performed as usual, except //! that name for an XML element will be given by the serialized type, instead of //! field. The latter allow to serialize enumerated types, where variant is encoded //! as a tag name, and, so, represent an XSD `xs:choice` schema by the Rust `enum`. //! //! In the example below, field will be serialized as ``, because elements //! get their names from the field name. It cannot be deserialized, because `Enum` //! expects elements `
`, `` or ``, but `AnyName` looked only for ``: //! //! ```no_run //! # use serde::{Deserialize, Serialize}; //! #[derive(Deserialize, Serialize)] //! enum Enum { A, B, C } //! //! #[derive(Deserialize, Serialize)] //! struct AnyName { //! // //! field: Enum, //! } //! ``` //! //! If you rename field to `$value`, then `field` would be serialized as ``, //! `` or ``, depending on the its content. It is also possible to //! deserialize it from the same elements: //! //! ```no_run //! # use serde::{Deserialize, Serialize}; //! # #[derive(Deserialize, Serialize)] //! # enum Enum { A, B, C } //! # //! #[derive(Deserialize, Serialize)] //! struct AnyName { //! // , or //! #[serde(rename = "$value")] //! field: Enum, //! } //! ``` //! //! ### Primitives and sequences of primitives //! //! Sequences serialized to a list of elements. Note, that types that does not //! produce their own tag (i. e. primitives) are written as is, without delimiters: //! //! ``` //! # use serde::{Deserialize, Serialize}; //! # use quick_xml::de::from_str; //! # use quick_xml::se::to_string; //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct AnyName { //! #[serde(rename = "$value")] //! field: Vec, //! } //! //! let obj = AnyName { field: vec![1, 2, 3] }; //! let xml = to_string(&obj).unwrap(); //! // Note, that types that does not produce their own tag are written as is! //! assert_eq!(xml, "123"); //! //! let object: AnyName = from_str("123").unwrap(); //! assert_eq!(object, AnyName { field: vec![123] }); //! //! // `1 2 3` is mapped to a single `usize` element //! // It is impossible to deserialize list of primitives to such field //! from_str::("1 2 3").unwrap_err(); //! ``` //! //! A particular case of that example is a string `$value` field, which probably //! would be a most used example of that attribute: //! //! ``` //! # use serde::{Deserialize, Serialize}; //! # use quick_xml::de::from_str; //! # use quick_xml::se::to_string; //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct AnyName { //! #[serde(rename = "$value")] //! field: String, //! } //! //! let obj = AnyName { field: "content".to_string() }; //! let xml = to_string(&obj).unwrap(); //! assert_eq!(xml, "content"); //! ``` //! //! ### Structs and sequences of structs //! //! Note, that structures does not have serializable name as well (name of the //! type are never used), so it is impossible to serialize non-unit struct or //! sequence of non-unit structs in `$value` field. (sequences of) unit structs //! are serialized as empty string, although, because units itself serializing //! to nothing: //! //! ``` //! # use serde::{Deserialize, Serialize}; //! # use quick_xml::de::from_str; //! # use quick_xml::se::to_string; //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct Unit; //! //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct AnyName { //! // #[serde(default)] is required to deserialization of empty lists //! // This is a general note, not related to $value //! #[serde(rename = "$value", default)] //! field: Vec, //! } //! //! let obj = AnyName { field: vec![Unit, Unit, Unit] }; //! let xml = to_string(&obj).unwrap(); //! assert_eq!(xml, ""); //! //! let object: AnyName = from_str("").unwrap(); //! assert_eq!(object, AnyName { field: vec![] }); //! //! let object: AnyName = from_str("").unwrap(); //! assert_eq!(object, AnyName { field: vec![] }); //! //! let object: AnyName = from_str("").unwrap(); //! assert_eq!(object, AnyName { field: vec![Unit, Unit, Unit] }); //! ``` //! //! ### Enums and sequences of enums //! //! Enumerations uses the variant name as an element name: //! //! ``` //! # use serde::{Deserialize, Serialize}; //! # use quick_xml::de::from_str; //! # use quick_xml::se::to_string; //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct AnyName { //! #[serde(rename = "$value")] //! field: Vec, //! } //! //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! enum Enum { A, B, C } //! //! let obj = AnyName { field: vec![Enum::A, Enum::B, Enum::C] }; //! let xml = to_string(&obj).unwrap(); //! assert_eq!( //! xml, //! "\ //! \ //! \ //! \ //! " //! ); //! //! let object: AnyName = from_str(&xml).unwrap(); //! assert_eq!(object, obj); //! ``` //! //! ---------------------------------------------------------------------------- //! //! You can have either `$text` or `$value` field in your structs. Unfortunately, //! that is not enforced, so you can theoretically have both, but you should //! avoid that. //! //! //! //! Frequently Used Patterns //! ======================== //! //! Some XML constructs used so frequent, that it is worth to document the recommended //! way to represent them in the Rust. The sections below describes them. //! //! ## `` lists //! Many XML formats wrap lists of elements in the additional container, //! although this is not required by the XML rules: //! //! ```xml //! //! //! //! //! //! //! //! //! //! //! ``` //! In this case, there is a great desire to describe this XML in this way: //! ``` //! /// Represents //! type Element = (); //! //! /// Represents ... //! struct AnyName { //! // Incorrect //! list: Vec, //! } //! ``` //! This will not work, because potentially `` element can have attributes //! and other elements inside. You should define the struct for the `` //! explicitly, as you do that in the XSD for that XML: //! ``` //! /// Represents //! type Element = (); //! //! /// Represents ... //! struct AnyName { //! // Correct //! list: List, //! } //! /// Represents ... //! struct List { //! element: Vec, //! } //! ``` //! //! If you want to simplify your API, you could write a simple function for unwrapping //! inner list and apply it via [`deserialize_with`]: //! //! ``` //! # use pretty_assertions::assert_eq; //! use quick_xml::de::from_str; //! use serde::{Deserialize, Deserializer}; //! //! /// Represents //! type Element = (); //! //! /// Represents ... //! #[derive(Deserialize, Debug, PartialEq)] //! struct AnyName { //! #[serde(deserialize_with = "unwrap_list")] //! list: Vec, //! } //! //! fn unwrap_list<'de, D>(deserializer: D) -> Result, D::Error> //! where //! D: Deserializer<'de>, //! { //! /// Represents ... //! #[derive(Deserialize)] //! struct List { //! // default allows empty list //! #[serde(default)] //! element: Vec, //! } //! Ok(List::deserialize(deserializer)?.element) //! } //! //! assert_eq!( //! AnyName { list: vec![(), (), ()] }, //! from_str(" //! //! //! //! //! //! //! //! ").unwrap(), //! ); //! ``` //! //! Instead of writing such functions manually, you also could try . //! //! [specification]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition //! [`deserialize_with`]: https://serde.rs/field-attrs.html#deserialize_with //! [#474]: https://github.com/tafia/quick-xml/issues/474 //! [#497]: https://github.com/tafia/quick-xml/issues/497 //! [#510]: https://github.com/tafia/quick-xml/issues/510 // Macros should be defined before the modules that using them // Also, macros should be imported before using them use serde::serde_if_integer128; macro_rules! deserialize_type { ($deserialize:ident => $visit:ident, $($mut:tt)?) => { fn $deserialize($($mut)? self, visitor: V) -> Result where V: Visitor<'de>, { // No need to unescape because valid integer representations cannot be escaped let text = self.read_string(false)?; visitor.$visit(text.parse()?) } }; } /// Implement deserialization methods for scalar types, such as numbers, strings, /// byte arrays, booleans and identifiers. macro_rules! deserialize_primitives { ($($mut:tt)?) => { deserialize_type!(deserialize_i8 => visit_i8, $($mut)?); deserialize_type!(deserialize_i16 => visit_i16, $($mut)?); deserialize_type!(deserialize_i32 => visit_i32, $($mut)?); deserialize_type!(deserialize_i64 => visit_i64, $($mut)?); deserialize_type!(deserialize_u8 => visit_u8, $($mut)?); deserialize_type!(deserialize_u16 => visit_u16, $($mut)?); deserialize_type!(deserialize_u32 => visit_u32, $($mut)?); deserialize_type!(deserialize_u64 => visit_u64, $($mut)?); serde_if_integer128! { deserialize_type!(deserialize_i128 => visit_i128, $($mut)?); deserialize_type!(deserialize_u128 => visit_u128, $($mut)?); } deserialize_type!(deserialize_f32 => visit_f32, $($mut)?); deserialize_type!(deserialize_f64 => visit_f64, $($mut)?); fn deserialize_bool($($mut)? self, visitor: V) -> Result where V: Visitor<'de>, { // No need to unescape because valid boolean representations cannot be escaped let text = self.read_string(false)?; str2bool(&text, visitor) } /// Representation of owned strings the same as [non-owned](#method.deserialize_str). fn deserialize_string(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } /// Character represented as [strings](#method.deserialize_str). fn deserialize_char(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } fn deserialize_str($($mut)? self, visitor: V) -> Result where V: Visitor<'de>, { let text = self.read_string(true)?; match text { Cow::Borrowed(string) => visitor.visit_borrowed_str(string), Cow::Owned(string) => visitor.visit_string(string), } } /// Returns [`DeError::Unsupported`] fn deserialize_bytes(self, _visitor: V) -> Result where V: Visitor<'de>, { Err(DeError::Unsupported("binary data content is not supported by XML format".into())) } /// Forwards deserialization to the [`deserialize_bytes`](#method.deserialize_bytes). fn deserialize_byte_buf(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_bytes(visitor) } /// Identifiers represented as [strings](#method.deserialize_str). fn deserialize_identifier(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } }; } mod escape; mod key; mod map; mod simple_type; mod var; pub use crate::errors::serialize::DeError; use crate::{ encoding::Decoder, errors::Error, events::{BytesCData, BytesEnd, BytesStart, BytesText, Event}, name::QName, reader::Reader, }; use serde::de::{self, Deserialize, DeserializeOwned, DeserializeSeed, SeqAccess, Visitor}; use std::borrow::Cow; #[cfg(feature = "overlapped-lists")] use std::collections::VecDeque; use std::io::BufRead; #[cfg(feature = "overlapped-lists")] use std::num::NonZeroUsize; /// Data represented by a text node or a CDATA node. XML markup is not expected pub(crate) const TEXT_KEY: &str = "$text"; /// Data represented by any XML markup inside pub(crate) const VALUE_KEY: &str = "$value"; /// Simplified event which contains only these variants that used by deserializer #[derive(Debug, PartialEq, Eq)] pub enum DeEvent<'a> { /// Start tag (with attributes) ``. Start(BytesStart<'a>), /// End tag ``. End(BytesEnd<'a>), /// Escaped character data between `Start` and `End` element. Text(BytesText<'a>), /// Unescaped character data between `Start` and `End` element, /// stored in ``. CData(BytesCData<'a>), /// End of XML document. Eof, } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A structure that deserializes XML into Rust values. pub struct Deserializer<'de, R> where R: XmlRead<'de>, { /// An XML reader that streams events into this deserializer reader: R, /// When deserializing sequences sometimes we have to skip unwanted events. /// That events should be stored and then replayed. This is a replay buffer, /// that streams events while not empty. When it exhausted, events will /// requested from [`Self::reader`]. #[cfg(feature = "overlapped-lists")] read: VecDeque>, /// When deserializing sequences sometimes we have to skip events, because XML /// is tolerant to elements order and even if in the XSD order is strictly /// specified (using `xs:sequence`) most of XML parsers allows order violations. /// That means, that elements, forming a sequence, could be overlapped with /// other elements, do not related to that sequence. /// /// In order to support this, deserializer will scan events and skip unwanted /// events, store them here. After call [`Self::start_replay()`] all events /// moved from this to [`Self::read`]. #[cfg(feature = "overlapped-lists")] write: VecDeque>, /// Maximum number of events that can be skipped when processing sequences /// that occur out-of-order. This field is used to prevent potential /// denial-of-service (DoS) attacks which could cause infinite memory /// consumption when parsing a very large amount of XML into a sequence field. #[cfg(feature = "overlapped-lists")] limit: Option, #[cfg(not(feature = "overlapped-lists"))] peek: Option>, } /// Deserialize an instance of type `T` from a string of XML text. pub fn from_str<'de, T>(s: &'de str) -> Result where T: Deserialize<'de>, { let mut de = Deserializer::from_str(s); T::deserialize(&mut de) } /// Deserialize from a reader. This method will do internal copies of data /// readed from `reader`. If you want have a `&str` input and want to borrow /// as much as possible, use [`from_str`]. pub fn from_reader(reader: R) -> Result where R: BufRead, T: DeserializeOwned, { let mut de = Deserializer::from_reader(reader); T::deserialize(&mut de) } // TODO: According to the https://www.w3.org/TR/xmlschema-2/#boolean, // valid boolean representations are only "true", "false", "1", and "0" fn str2bool<'de, V>(value: &str, visitor: V) -> Result where V: de::Visitor<'de>, { match value { "true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => { visitor.visit_bool(true) } "false" | "0" | "False" | "FALSE" | "f" | "No" | "NO" | "no" | "n" => { visitor.visit_bool(false) } _ => Err(DeError::InvalidBoolean(value.into())), } } fn deserialize_bool<'de, V>(value: &[u8], decoder: Decoder, visitor: V) -> Result where V: Visitor<'de>, { #[cfg(feature = "encoding")] { let value = decoder.decode(value)?; // No need to unescape because valid boolean representations cannot be escaped str2bool(value.as_ref(), visitor) } #[cfg(not(feature = "encoding"))] { // No need to unescape because valid boolean representations cannot be escaped match value { b"true" | b"1" | b"True" | b"TRUE" | b"t" | b"Yes" | b"YES" | b"yes" | b"y" => { visitor.visit_bool(true) } b"false" | b"0" | b"False" | b"FALSE" | b"f" | b"No" | b"NO" | b"no" | b"n" => { visitor.visit_bool(false) } e => Err(DeError::InvalidBoolean(decoder.decode(e)?.into())), } } } impl<'de, R> Deserializer<'de, R> where R: XmlRead<'de>, { /// Create an XML deserializer from one of the possible quick_xml input sources. /// /// Typically it is more convenient to use one of these methods instead: /// /// - [`Deserializer::from_str`] /// - [`Deserializer::from_reader`] fn new(reader: R) -> Self { Deserializer { reader, #[cfg(feature = "overlapped-lists")] read: VecDeque::new(), #[cfg(feature = "overlapped-lists")] write: VecDeque::new(), #[cfg(feature = "overlapped-lists")] limit: None, #[cfg(not(feature = "overlapped-lists"))] peek: None, } } /// Set the maximum number of events that could be skipped during deserialization /// of sequences. /// /// If `` contains more than specified nested elements, `$text` or /// CDATA nodes, then [`DeError::TooManyEvents`] will be returned during /// deserialization of sequence field (any type that uses [`deserialize_seq`] /// for the deserialization, for example, `Vec`). /// /// This method can be used to prevent a [DoS] attack and infinite memory /// consumption when parsing a very large XML to a sequence field. /// /// It is strongly recommended to set limit to some value when you parse data /// from untrusted sources. You should choose a value that your typical XMLs /// can have _between_ different elements that corresponds to the same sequence. /// /// # Examples /// /// Let's imagine, that we deserialize such structure: /// ``` /// struct List { /// item: Vec<()>, /// } /// ``` /// /// The XML that we try to parse look like this: /// ```xml /// /// /// /// /// with text /// /// /// /// /// /// /// /// ``` /// /// There, when we deserialize the `item` field, we need to buffer 7 events, /// before we can deserialize the second ``: /// /// - `` /// - `` /// - `$text(with text)` /// - `` /// - `` (virtual start event) /// - `` (virtual end event) /// - `` /// /// Note, that `` internally represented as 2 events: /// one for the start tag and one for the end tag. In the future this can be /// eliminated, but for now we use [auto-expanding feature] of a reader, /// because this simplifies deserializer code. /// /// [`deserialize_seq`]: serde::Deserializer::deserialize_seq /// [DoS]: https://en.wikipedia.org/wiki/Denial-of-service_attack /// [auto-expanding feature]: Reader::expand_empty_elements #[cfg(feature = "overlapped-lists")] pub fn event_buffer_size(&mut self, limit: Option) -> &mut Self { self.limit = limit; self } #[cfg(feature = "overlapped-lists")] fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> { if self.read.is_empty() { self.read.push_front(self.reader.next()?); } if let Some(event) = self.read.front() { return Ok(event); } // SAFETY: `self.read` was filled in the code above. // NOTE: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }` // if unsafe code will be allowed unreachable!() } #[cfg(not(feature = "overlapped-lists"))] fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> { if self.peek.is_none() { self.peek = Some(self.reader.next()?); } match self.peek.as_ref() { Some(v) => Ok(v), // SAFETY: a `None` variant for `self.peek` would have been replaced // by a `Some` variant in the code above. // TODO: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }` // if unsafe code will be allowed None => unreachable!(), } } fn next(&mut self) -> Result, DeError> { // Replay skipped or peeked events #[cfg(feature = "overlapped-lists")] if let Some(event) = self.read.pop_front() { return Ok(event); } #[cfg(not(feature = "overlapped-lists"))] if let Some(e) = self.peek.take() { return Ok(e); } self.reader.next() } /// Returns the mark after which all events, skipped by [`Self::skip()`] call, /// should be replayed after calling [`Self::start_replay()`]. #[cfg(feature = "overlapped-lists")] #[inline] #[must_use = "returned checkpoint should be used in `start_replay`"] fn skip_checkpoint(&self) -> usize { self.write.len() } /// Extracts XML tree of events from and stores them in the skipped events /// buffer from which they can be retrieved later. You MUST call /// [`Self::start_replay()`] after calling this to give access to the skipped /// events and release internal buffers. #[cfg(feature = "overlapped-lists")] fn skip(&mut self) -> Result<(), DeError> { let event = self.next()?; self.skip_event(event)?; match self.write.back() { // Skip all subtree, if we skip a start event Some(DeEvent::Start(e)) => { let end = e.name().as_ref().to_owned(); let mut depth = 0; loop { let event = self.next()?; match event { DeEvent::Start(ref e) if e.name().as_ref() == end => { self.skip_event(event)?; depth += 1; } DeEvent::End(ref e) if e.name().as_ref() == end => { self.skip_event(event)?; if depth == 0 { break; } depth -= 1; } DeEvent::Eof => { self.skip_event(event)?; break; } _ => self.skip_event(event)?, } } } _ => (), } Ok(()) } #[cfg(feature = "overlapped-lists")] #[inline] fn skip_event(&mut self, event: DeEvent<'de>) -> Result<(), DeError> { if let Some(max) = self.limit { if self.write.len() >= max.get() { return Err(DeError::TooManyEvents(max)); } } self.write.push_back(event); Ok(()) } /// Moves buffered events, skipped after given `checkpoint` from [`Self::write`] /// skip buffer to [`Self::read`] buffer. /// /// After calling this method, [`Self::peek()`] and [`Self::next()`] starts /// return events that was skipped previously by calling [`Self::skip()`], /// and only when all that events will be consumed, the deserializer starts /// to drain events from underlying reader. /// /// This method MUST be called if any number of [`Self::skip()`] was called /// after [`Self::new()`] or `start_replay()` or you'll lost events. #[cfg(feature = "overlapped-lists")] fn start_replay(&mut self, checkpoint: usize) { if checkpoint == 0 { self.write.append(&mut self.read); std::mem::swap(&mut self.read, &mut self.write); } else { let mut read = self.write.split_off(checkpoint); read.append(&mut self.read); self.read = read; } } #[inline] fn read_string(&mut self, unescape: bool) -> Result, DeError> { self.read_string_impl(unescape, true) } /// Consumes a one XML element or an XML tree, returns associated text or /// an empty string. /// /// If `allow_start` is `false`, then only one event is consumed. If that /// event is [`DeEvent::Start`], then [`DeError::UnexpectedStart`] is returned. /// /// If `allow_start` is `true`, then first text of CDATA event inside it is /// returned and all other content is skipped until corresponding end tag /// will be consumed. /// /// # Handling events /// /// The table below shows how events is handled by this method: /// /// |Event |XML |Handling /// |------------------|---------------------------|---------------------------------------- /// |[`DeEvent::Start`]|`...` |if `allow_start == true`, result determined by the second table, otherwise emits [`UnexpectedStart("tag")`](DeError::UnexpectedStart) /// |[`DeEvent::End`] |`` |Emits [`UnexpectedEnd("any-tag")`](DeError::UnexpectedEnd) /// |[`DeEvent::Text`] |`text content` |Unescapes `text content` and returns it /// |[`DeEvent::CData`]|``|Returns `cdata content` unchanged /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof) /// /// Second event, consumed if [`DeEvent::Start`] was received and `allow_start == true`: /// /// |Event |XML |Handling /// |------------------|---------------------------|---------------------------------------------------------------------------------- /// |[`DeEvent::Start`]|`...` |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart) /// |[`DeEvent::End`] |`
` |Returns an empty slice, if close tag matched the open one /// |[`DeEvent::End`] |`` |Emits [`UnexpectedEnd("any-tag")`](DeError::UnexpectedEnd) /// |[`DeEvent::Text`] |`text content` |Unescapes `text content` and returns it, consumes events up to `
` /// |[`DeEvent::CData`]|``|Returns `cdata content` unchanged, consumes events up to `
` /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof) fn read_string_impl( &mut self, unescape: bool, allow_start: bool, ) -> Result, DeError> { match self.next()? { DeEvent::Text(e) => Ok(e.decode(unescape)?), DeEvent::CData(e) => Ok(e.decode()?), DeEvent::Start(e) if allow_start => { // allow one nested level let inner = self.next()?; let t = match inner { DeEvent::Text(t) => t.decode(unescape)?, DeEvent::CData(t) => t.decode()?, DeEvent::Start(s) => { return Err(DeError::UnexpectedStart(s.name().as_ref().to_owned())) } // We can get End event in case of `` or `` input // Return empty text in that case DeEvent::End(end) if end.name() == e.name() => { return Ok("".into()); } DeEvent::End(end) => { return Err(DeError::UnexpectedEnd(end.name().as_ref().to_owned())) } DeEvent::Eof => return Err(DeError::UnexpectedEof), }; self.read_to_end(e.name())?; Ok(t) } DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())), DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())), DeEvent::Eof => Err(DeError::UnexpectedEof), } } /// Drops all events until event with [name](BytesEnd::name()) `name` won't be /// dropped. This method should be called after [`Self::next()`] #[cfg(feature = "overlapped-lists")] fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { let mut depth = 0; loop { match self.read.pop_front() { Some(DeEvent::Start(e)) if e.name() == name => { depth += 1; } Some(DeEvent::End(e)) if e.name() == name => { if depth == 0 { break; } depth -= 1; } // Drop all other skipped events Some(_) => continue, // If we do not have skipped events, use effective reading that will // not allocate memory for events None => { // We should close all opened tags, because we could buffer // Start events, but not the corresponding End events. So we // keep reading events until we exit all nested tags. // `read_to_end()` will return an error if an Eof was encountered // preliminary (in case of malformed XML). // // // ^^^^^^^^^^ - buffered in `self.read`, when `self.read_to_end()` is called, depth = 2 // ^^^^^^ - read by the first call of `self.reader.read_to_end()` // ^^^^^^ - read by the second call of `self.reader.read_to_end()` loop { self.reader.read_to_end(name)?; if depth == 0 { break; } depth -= 1; } break; } } } Ok(()) } #[cfg(not(feature = "overlapped-lists"))] fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { // First one might be in self.peek match self.next()? { DeEvent::Start(e) => self.reader.read_to_end(e.name())?, DeEvent::End(e) if e.name() == name => return Ok(()), _ => (), } self.reader.read_to_end(name) } } impl<'de> Deserializer<'de, SliceReader<'de>> { /// Create new deserializer that will borrow data from the specified string pub fn from_str(s: &'de str) -> Self { let mut reader = Reader::from_str(s); reader .expand_empty_elements(true) .check_end_names(true) .trim_text(true); Self::new(SliceReader { reader }) } } impl<'de, R> Deserializer<'de, IoReader> where R: BufRead, { /// Create new deserializer that will copy data from the specified reader /// into internal buffer. If you already have a string use [`Self::from_str`] /// instead, because it will borrow instead of copy. If you have `&[u8]` which /// is known to represent UTF-8, you can decode it first before using [`from_str`]. pub fn from_reader(reader: R) -> Self { let mut reader = Reader::from_reader(reader); reader .expand_empty_elements(true) .check_end_names(true) .trim_text(true); Self::new(IoReader { reader, buf: Vec::new(), }) } } impl<'de, 'a, R> de::Deserializer<'de> for &'a mut Deserializer<'de, R> where R: XmlRead<'de>, { type Error = DeError; deserialize_primitives!(); fn deserialize_struct( self, _name: &'static str, fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { match self.next()? { DeEvent::Start(e) => { let name = e.name().as_ref().to_vec(); let map = map::MapAccess::new(self, e, fields)?; let value = visitor.visit_map(map)?; self.read_to_end(QName(&name))?; Ok(value) } DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())), DeEvent::Text(_) | DeEvent::CData(_) => Err(DeError::ExpectedStart), DeEvent::Eof => Err(DeError::UnexpectedEof), } } /// Unit represented in XML as a `xs:element` or text/CDATA content. /// Any content inside `xs:element` is ignored and skipped. /// /// Produces unit struct from any of following inputs: /// - any `...` /// - any `` /// - any text content /// - any CDATA content /// /// # Events handling /// /// |Event |XML |Handling /// |------------------|---------------------------|------------------------------------------- /// |[`DeEvent::Start`]|`...` |Calls `visitor.visit_unit()`, consumes all events up to corresponding `End` event /// |[`DeEvent::End`] |`` |Emits [`UnexpectedEnd("tag")`](DeError::UnexpectedEnd) /// |[`DeEvent::Text`] |`text content` |Calls `visitor.visit_unit()`. Text content is ignored /// |[`DeEvent::CData`]|``|Calls `visitor.visit_unit()`. CDATA content is ignored /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof) fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { match self.next()? { DeEvent::Start(s) => { self.read_to_end(s.name())?; visitor.visit_unit() } DeEvent::Text(_) | DeEvent::CData(_) => visitor.visit_unit(), DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())), DeEvent::Eof => Err(DeError::UnexpectedEof), } } /// Representation of the names units the same as [unnamed units](#method.deserialize_unit) fn deserialize_unit_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_unit(visitor) } fn deserialize_newtype_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_tuple(1, visitor) } /// Representation of tuples the same as [sequences](#method.deserialize_seq). fn deserialize_tuple(self, _len: usize, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_seq(visitor) } /// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple). fn deserialize_tuple_struct( self, _name: &'static str, len: usize, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_tuple(len, visitor) } fn deserialize_enum( self, _name: &'static str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { let value = visitor.visit_enum(var::EnumAccess::new(self))?; Ok(value) } fn deserialize_seq(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_seq(self) } fn deserialize_map(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_struct("", &[], visitor) } fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { match self.peek()? { DeEvent::Text(t) if t.is_empty() => visitor.visit_none(), DeEvent::CData(t) if t.is_empty() => visitor.visit_none(), DeEvent::Eof => visitor.visit_none(), _ => visitor.visit_some(self), } } /// Always call `visitor.visit_unit()` because returned value ignored in any case. /// /// This method consumes any single [event][DeEvent] except the [`Start`][DeEvent::Start] /// event, in which case all events up to corresponding [`End`][DeEvent::End] event will /// be consumed. /// /// This method returns error if current event is [`End`][DeEvent::End] or [`Eof`][DeEvent::Eof] fn deserialize_ignored_any(self, visitor: V) -> Result where V: Visitor<'de>, { match self.next()? { DeEvent::Start(e) => self.read_to_end(e.name())?, DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())), DeEvent::Eof => return Err(DeError::UnexpectedEof), _ => (), } visitor.visit_unit() } fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { match self.peek()? { DeEvent::Start(_) => self.deserialize_map(visitor), // Redirect to deserialize_unit in order to consume an event and return an appropriate error DeEvent::End(_) | DeEvent::Eof => self.deserialize_unit(visitor), _ => self.deserialize_string(visitor), } } } /// An accessor to sequence elements forming a value for top-level sequence of XML /// elements. /// /// Technically, multiple top-level elements violates XML rule of only one top-level /// element, but we consider this as several concatenated XML documents. impl<'de, 'a, R> SeqAccess<'de> for &'a mut Deserializer<'de, R> where R: XmlRead<'de>, { type Error = DeError; fn next_element_seed(&mut self, seed: T) -> Result, Self::Error> where T: DeserializeSeed<'de>, { match self.peek()? { DeEvent::Eof => Ok(None), // Start(tag), End(tag), Text, CData _ => seed.deserialize(&mut **self).map(Some), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Trait used by the deserializer for iterating over input. This is manually /// "specialized" for iterating over `&[u8]`. /// /// You do not need to implement this trait, it is needed to abstract from /// [borrowing](SliceReader) and [copying](IoReader) data sources and reuse code in /// deserializer pub trait XmlRead<'i> { /// Return an input-borrowing event. fn next(&mut self) -> Result, DeError>; /// Skips until end element is found. Unlike `next()` it will not allocate /// when it cannot satisfy the lifetime. fn read_to_end(&mut self, name: QName) -> Result<(), DeError>; /// A copy of the reader's decoder used to decode strings. fn decoder(&self) -> Decoder; } /// XML input source that reads from a std::io input stream. /// /// You cannot create it, it is created automatically when you call /// [`Deserializer::from_reader`] pub struct IoReader { reader: Reader, buf: Vec, } impl<'i, R: BufRead> XmlRead<'i> for IoReader { fn next(&mut self) -> Result, DeError> { let event = loop { let e = self.reader.read_event_into(&mut self.buf)?; match e { Event::Start(e) => break Ok(DeEvent::Start(e.into_owned())), Event::End(e) => break Ok(DeEvent::End(e.into_owned())), Event::Text(e) => break Ok(DeEvent::Text(e.into_owned())), Event::CData(e) => break Ok(DeEvent::CData(e.into_owned())), Event::Eof => break Ok(DeEvent::Eof), _ => self.buf.clear(), } }; self.buf.clear(); event } fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { match self.reader.read_to_end_into(name, &mut self.buf) { Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof), Err(e) => Err(e.into()), Ok(_) => Ok(()), } } fn decoder(&self) -> Decoder { self.reader.decoder() } } /// XML input source that reads from a slice of bytes and can borrow from it. /// /// You cannot create it, it is created automatically when you call /// [`Deserializer::from_str`]. pub struct SliceReader<'de> { reader: Reader<&'de [u8]>, } impl<'de> XmlRead<'de> for SliceReader<'de> { fn next(&mut self) -> Result, DeError> { loop { let e = self.reader.read_event()?; match e { Event::Start(e) => break Ok(DeEvent::Start(e)), Event::End(e) => break Ok(DeEvent::End(e)), Event::Text(e) => break Ok(DeEvent::Text(e)), Event::CData(e) => break Ok(DeEvent::CData(e)), Event::Eof => break Ok(DeEvent::Eof), _ => (), } } } fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { match self.reader.read_to_end(name) { Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof), Err(e) => Err(e.into()), Ok(_) => Ok(()), } } fn decoder(&self) -> Decoder { self.reader.decoder() } } #[cfg(test)] mod tests { use super::*; use pretty_assertions::assert_eq; #[cfg(feature = "overlapped-lists")] mod skip { use super::*; use crate::de::DeEvent::*; use crate::events::{BytesEnd, BytesText}; use pretty_assertions::assert_eq; /// Checks that `peek()` and `read()` behaves correctly after `skip()` #[test] fn read_and_peek() { let mut de = Deserializer::from_str( r#" text "#, ); // Initial conditions - both are empty assert_eq!(de.read, vec![]); assert_eq!(de.write, vec![]); assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("inner"))); // Mark that start_replay() should begin replay from this point let checkpoint = de.skip_checkpoint(); assert_eq!(checkpoint, 0); // Should skip first tree de.skip().unwrap(); assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ Start(BytesStart::new("inner")), Text(BytesText::from_escaped("text")), Start(BytesStart::new("inner")), End(BytesEnd::new("inner")), End(BytesEnd::new("inner")), ] ); // Consume . Now unconsumed XML looks like: // // // text // // // // assert_eq!(de.next().unwrap(), Start(BytesStart::new("next"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("next"))); // We finish writing. Next call to `next()` should start replay that messages: // // // text // // // // and after that stream that messages: // // // de.start_replay(checkpoint); assert_eq!( de.read, vec![ Start(BytesStart::new("inner")), Text(BytesText::from_escaped("text")), Start(BytesStart::new("inner")), End(BytesEnd::new("inner")), End(BytesEnd::new("inner")), ] ); assert_eq!(de.write, vec![]); assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner"))); // Mark that start_replay() should begin replay from this point let checkpoint = de.skip_checkpoint(); assert_eq!(checkpoint, 0); // Skip `$text` node and consume after it de.skip().unwrap(); assert_eq!( de.read, vec![ Start(BytesStart::new("inner")), End(BytesEnd::new("inner")), End(BytesEnd::new("inner")), ] ); assert_eq!( de.write, vec![ // This comment here to keep the same formatting of both arrays // otherwise rustfmt suggest one-line it Text(BytesText::from_escaped("text")), ] ); assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner"))); // We finish writing. Next call to `next()` should start replay messages: // // text // // // and after that stream that messages: // // // de.start_replay(checkpoint); assert_eq!( de.read, vec![ Text(BytesText::from_escaped("text")), End(BytesEnd::new("inner")), ] ); assert_eq!(de.write, vec![]); assert_eq!(de.next().unwrap(), Text(BytesText::from_escaped("text"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner"))); assert_eq!(de.next().unwrap(), Start(BytesStart::new("target"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("target"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); assert_eq!(de.next().unwrap(), Eof); } /// Checks that `read_to_end()` behaves correctly after `skip()` #[test] fn read_to_end() { let mut de = Deserializer::from_str( r#" text "#, ); // Initial conditions - both are empty assert_eq!(de.read, vec![]); assert_eq!(de.write, vec![]); assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); // Mark that start_replay() should begin replay from this point let checkpoint = de.skip_checkpoint(); assert_eq!(checkpoint, 0); // Skip the tree de.skip().unwrap(); assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ Start(BytesStart::new("skip")), Text(BytesText::from_escaped("text")), Start(BytesStart::new("skip")), End(BytesEnd::new("skip")), End(BytesEnd::new("skip")), ] ); // Drop all events thet represents tree. Now unconsumed XML looks like: // // // text // // // assert_eq!(de.next().unwrap(), Start(BytesStart::new("target"))); de.read_to_end(QName(b"target")).unwrap(); assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ Start(BytesStart::new("skip")), Text(BytesText::from_escaped("text")), Start(BytesStart::new("skip")), End(BytesEnd::new("skip")), End(BytesEnd::new("skip")), ] ); // We finish writing. Next call to `next()` should start replay that messages: // // // text // // // // and after that stream that messages: // // de.start_replay(checkpoint); assert_eq!( de.read, vec![ Start(BytesStart::new("skip")), Text(BytesText::from_escaped("text")), Start(BytesStart::new("skip")), End(BytesEnd::new("skip")), End(BytesEnd::new("skip")), ] ); assert_eq!(de.write, vec![]); assert_eq!(de.next().unwrap(), Start(BytesStart::new("skip"))); de.read_to_end(QName(b"skip")).unwrap(); assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); assert_eq!(de.next().unwrap(), Eof); } /// Checks that replay replayes only part of events /// Test for https://github.com/tafia/quick-xml/issues/435 #[test] fn partial_replay() { let mut de = Deserializer::from_str( r#" "#, ); // Initial conditions - both are empty assert_eq!(de.read, vec![]); assert_eq!(de.write, vec![]); assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); // start_replay() should start replay from this point let checkpoint1 = de.skip_checkpoint(); assert_eq!(checkpoint1, 0); // Should skip first and second elements de.skip().unwrap(); // skipped-1 de.skip().unwrap(); // skipped-2 assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), ] ); //////////////////////////////////////////////////////////////////////////////////////// assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner"))); assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("skipped-3"))); assert_eq!( de.read, vec![ // This comment here to keep the same formatting of both arrays // otherwise rustfmt suggest one-line it Start(BytesStart::new("skipped-3")), ] ); assert_eq!( de.write, vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), ] ); // start_replay() should start replay from this point let checkpoint2 = de.skip_checkpoint(); assert_eq!(checkpoint2, 4); // Should skip third and forth elements de.skip().unwrap(); // skipped-3 de.skip().unwrap(); // skipped-4 assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ // checkpoint 1 Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), // checkpoint 2 Start(BytesStart::new("skipped-3")), End(BytesEnd::new("skipped-3")), Start(BytesStart::new("skipped-4")), End(BytesEnd::new("skipped-4")), ] ); assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-2"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-2"))); assert_eq!(de.peek().unwrap(), &End(BytesEnd::new("inner"))); assert_eq!( de.read, vec![ // This comment here to keep the same formatting of both arrays // otherwise rustfmt suggest one-line it End(BytesEnd::new("inner")), ] ); assert_eq!( de.write, vec![ // checkpoint 1 Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), // checkpoint 2 Start(BytesStart::new("skipped-3")), End(BytesEnd::new("skipped-3")), Start(BytesStart::new("skipped-4")), End(BytesEnd::new("skipped-4")), ] ); // Start replay events from checkpoint 2 de.start_replay(checkpoint2); assert_eq!( de.read, vec![ Start(BytesStart::new("skipped-3")), End(BytesEnd::new("skipped-3")), Start(BytesStart::new("skipped-4")), End(BytesEnd::new("skipped-4")), End(BytesEnd::new("inner")), ] ); assert_eq!( de.write, vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), ] ); // Replayed events assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-3"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-3"))); assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-4"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-4"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner"))); assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), ] ); //////////////////////////////////////////////////////////////////////////////////////// // New events assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-1"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-1"))); assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), ] ); // Start replay events from checkpoint 1 de.start_replay(checkpoint1); assert_eq!( de.read, vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), ] ); assert_eq!(de.write, vec![]); // Replayed events assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-1"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-1"))); assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-2"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-2"))); assert_eq!(de.read, vec![]); assert_eq!(de.write, vec![]); // New events assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); assert_eq!(de.next().unwrap(), Eof); } /// Checks that limiting buffer size works correctly #[test] fn limit() { use serde::Deserialize; #[derive(Debug, Deserialize)] #[allow(unused)] struct List { item: Vec<()>, } let mut de = Deserializer::from_str( r#" with text "#, ); de.event_buffer_size(NonZeroUsize::new(3)); match List::deserialize(&mut de) { Err(DeError::TooManyEvents(count)) => assert_eq!(count.get(), 3), e => panic!("Expected `Err(TooManyEvents(3))`, but found {:?}", e), } } /// Without handling Eof in `skip` this test failed with memory allocation #[test] fn invalid_xml() { use crate::de::DeEvent::*; let mut de = Deserializer::from_str(""); // Cache all events let checkpoint = de.skip_checkpoint(); de.skip().unwrap(); de.start_replay(checkpoint); assert_eq!(de.read, vec![Start(BytesStart::new("root")), Eof]); } } mod read_to_end { use super::*; use crate::de::DeEvent::*; use pretty_assertions::assert_eq; #[test] fn complex() { let mut de = Deserializer::from_str( r#" textcontent "#, ); assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); assert_eq!( de.next().unwrap(), Start(BytesStart::from_content(r#"tag a="1""#, 3)) ); assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ()); assert_eq!( de.next().unwrap(), Start(BytesStart::from_content(r#"tag a="2""#, 3)) ); assert_eq!(de.next().unwrap(), CData(BytesCData::new("cdata content"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed"))); assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ()); assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); assert_eq!(de.next().unwrap(), Eof); } #[test] fn invalid_xml() { let mut de = Deserializer::from_str(""); assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag"))); assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag"))); match de.read_to_end(QName(b"tag")) { Err(DeError::UnexpectedEof) => (), x => panic!("Expected `Err(UnexpectedEof)`, but found {:?}", x), } assert_eq!(de.next().unwrap(), Eof); } } #[test] fn borrowing_reader_parity() { let s = r#" Some text "#; let mut reader1 = IoReader { reader: Reader::from_reader(s.as_bytes()), buf: Vec::new(), }; let mut reader2 = SliceReader { reader: Reader::from_str(s), }; loop { let event1 = reader1.next().unwrap(); let event2 = reader2.next().unwrap(); if let (DeEvent::Eof, DeEvent::Eof) = (&event1, &event2) { break; } assert_eq!(event1, event2); } } #[test] fn borrowing_reader_events() { let s = r#" Some text "#; let mut reader = SliceReader { reader: Reader::from_str(s), }; reader .reader .trim_text(true) .expand_empty_elements(true) .check_end_names(true); let mut events = Vec::new(); loop { let event = reader.next().unwrap(); if let DeEvent::Eof = event { break; } events.push(event); } use crate::de::DeEvent::*; assert_eq!( events, vec![ Start(BytesStart::from_content( r#"item name="hello" source="world.rs""#, 4 )), Text(BytesText::from_escaped("Some text")), End(BytesEnd::new("item")), Start(BytesStart::from_content("item2", 5)), End(BytesEnd::new("item2")), Start(BytesStart::from_content("item3", 5)), End(BytesEnd::new("item3")), Start(BytesStart::from_content(r#"item4 value="world" "#, 5)), End(BytesEnd::new("item4")), ] ) } /// Ensures, that [`Deserializer::read_string()`] never can get an `End` event, /// because parser reports error early #[test] fn read_string() { match from_str::(r#""#) { Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => { assert_eq!(expected, ""); assert_eq!(found, "root"); } x => panic!( r#"Expected `Err(InvalidXml(EndEventMismatch("", "root")))`, but found {:?}"#, x ), } let s: String = from_str(r#""#).unwrap(); assert_eq!(s, ""); match from_str::(r#""#) { Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => { assert_eq!(expected, "root"); assert_eq!(found, "other"); } x => panic!( r#"Expected `Err(InvalidXml(EndEventMismatch("root", "other")))`, but found {:?}"#, x ), } } } quick-xml-0.27.1/src/de/simple_type.rs000064400000000000000000001431140072674642500157400ustar 00000000000000//! Contains Serde `Deserializer` for XML [simple types] [as defined] in the XML Schema. //! //! [simple types]: https://www.w3schools.com/xml/el_simpletype.asp //! [as defined]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition use crate::de::{deserialize_bool, str2bool}; use crate::encoding::Decoder; use crate::errors::serialize::DeError; use crate::escape::unescape; use memchr::memchr; use serde::de::{DeserializeSeed, Deserializer, EnumAccess, SeqAccess, VariantAccess, Visitor}; use serde::{self, serde_if_integer128}; use std::borrow::Cow; use std::ops::{Deref, Range}; macro_rules! deserialize_num { ($method:ident, $visit:ident) => { fn $method(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.$visit(self.content.as_str().parse()?) } }; ($method:ident => $visit:ident) => { fn $method(self, visitor: V) -> Result where V: Visitor<'de>, { let string = self.decode()?; visitor.$visit(string.as_str().parse()?) } }; } macro_rules! unsupported { ( $deserialize:ident $( ($($type:ty),*) )? => $message:literal ) => { #[inline] fn $deserialize>( self, $($(_: $type,)*)? _visitor: V ) -> Result { Err(DeError::Unsupported($message.into())) } }; } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A version of [`Cow`] that can borrow from two different buffers, one of them /// is a deserializer input, and conceptually contains only part of owned data. /// /// # Lifetimes /// - `'de` -- lifetime of the data that deserializer borrow from the parsed input /// - `'a` -- lifetime of the data that owned by a deserializer enum Content<'de, 'a> { /// An input borrowed from the parsed data Input(&'de str), /// An input borrowed from the buffer owned by another deserializer Slice(&'a str), /// An input taken from an external deserializer, owned by that deserializer. /// Only part of this data, located after offset represented by `usize`, used /// to deserialize data, the other is a garbage that can't be dropped because /// we do not want to make reallocations if they will not required. Owned(String, usize), } impl<'de, 'a> Content<'de, 'a> { /// Returns string representation of the content fn as_str(&self) -> &str { match self { Content::Input(s) => s, Content::Slice(s) => s, Content::Owned(s, offset) => s.split_at(*offset).1, } } /// Supply to the visitor a borrowed string, a string slice, or an owned /// string depending on the kind of input. Unlike [`Self::deserialize_item`], /// the whole [`Self::Owned`] string will be passed to the visitor. /// /// Calls /// - `visitor.visit_borrowed_str` if data borrowed from the input /// - `visitor.visit_str` if data borrowed from another source /// - `visitor.visit_string` if data owned by this type #[inline] fn deserialize_all(self, visitor: V) -> Result where V: Visitor<'de>, { match self { Content::Input(s) => visitor.visit_borrowed_str(s), Content::Slice(s) => visitor.visit_str(s), Content::Owned(s, _) => visitor.visit_string(s), } } /// Supply to the visitor a borrowed string, a string slice, or an owned /// string depending on the kind of input. Unlike [`Self::deserialize_all`], /// only part of [`Self::Owned`] string will be passed to the visitor. /// /// Calls /// - `visitor.visit_borrowed_str` if data borrowed from the input /// - `visitor.visit_str` if data borrowed from another source /// - `visitor.visit_string` if data owned by this type #[inline] fn deserialize_item(self, visitor: V) -> Result where V: Visitor<'de>, { match self { Content::Input(s) => visitor.visit_borrowed_str(s), Content::Slice(s) => visitor.visit_str(s), Content::Owned(s, 0) => visitor.visit_string(s), Content::Owned(s, offset) => visitor.visit_str(s.split_at(offset).1), } } } /// A deserializer that handles ordinary [simple type definition][item] with /// `{variety} = atomic`, or an ordinary [simple type] definition with /// `{variety} = union` whose basic members are all atomic. /// /// This deserializer can deserialize only primitive types: /// - numbers /// - booleans /// - strings /// - units /// - options /// - unit variants of enums /// /// Identifiers represented as strings and deserialized accordingly. /// /// Deserialization of all other types returns [`Unsupported`][DeError::Unsupported] error. /// /// The `Owned` variant of the content acts as a storage for data, allocated by /// an external deserializer that pass it via [`ListIter`]. /// /// [item]: https://www.w3.org/TR/xmlschema11-1/#std-item_type_definition /// [simple type]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition struct AtomicDeserializer<'de, 'a> { /// Content of the attribute value, text content or CDATA content content: Content<'de, 'a>, /// If `true`, `content` in an escaped form and should be unescaped before use escaped: bool, } impl<'de, 'a> Deserializer<'de> for AtomicDeserializer<'de, 'a> { type Error = DeError; /// Forwards deserialization to the [`Self::deserialize_str`] fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } /// According to the , /// valid boolean representations are only `"true"`, `"false"`, `"1"`, /// and `"0"`. But this method also handles following: /// /// |`bool` |XML content /// |-------|------------------------------------------------------------- /// |`true` |`"True"`, `"TRUE"`, `"t"`, `"Yes"`, `"YES"`, `"yes"`, `"y"` /// |`false`|`"False"`, `"FALSE"`, `"f"`, `"No"`, `"NO"`, `"no"`, `"n"` fn deserialize_bool(self, visitor: V) -> Result where V: Visitor<'de>, { str2bool(self.content.as_str(), visitor) } deserialize_num!(deserialize_i8, visit_i8); deserialize_num!(deserialize_i16, visit_i16); deserialize_num!(deserialize_i32, visit_i32); deserialize_num!(deserialize_i64, visit_i64); deserialize_num!(deserialize_u8, visit_u8); deserialize_num!(deserialize_u16, visit_u16); deserialize_num!(deserialize_u32, visit_u32); deserialize_num!(deserialize_u64, visit_u64); serde_if_integer128! { deserialize_num!(deserialize_i128, visit_i128); deserialize_num!(deserialize_u128, visit_u128); } deserialize_num!(deserialize_f32, visit_f32); deserialize_num!(deserialize_f64, visit_f64); /// Forwards deserialization to the [`Self::deserialize_str`] fn deserialize_char(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } /// Supply to the visitor borrowed string, string slice, or owned string /// depending on the kind of input and presence of the escaped data. /// /// If string requires unescaping, then calls [`Visitor::visit_string`] with /// new allocated buffer with unescaped data. /// /// Otherwise calls /// - [`Visitor::visit_borrowed_str`] if data borrowed from the input /// - [`Visitor::visit_str`] if data borrowed from other deserializer /// - [`Visitor::visit_string`] if data owned by this deserializer fn deserialize_str(self, visitor: V) -> Result where V: Visitor<'de>, { if self.escaped { match unescape(self.content.as_str())? { Cow::Borrowed(_) => self.content.deserialize_item(visitor), Cow::Owned(s) => visitor.visit_string(s), } } else { self.content.deserialize_item(visitor) } } fn deserialize_string(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } /// If `content` is an empty string then calls [`Visitor::visit_none`], /// otherwise calls [`Visitor::visit_some`] with itself fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { if self.content.as_str().is_empty() { visitor.visit_none() } else { visitor.visit_some(self) } } fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } /// Forwards deserialization to the [`Self::deserialize_unit`] fn deserialize_unit_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_unit(visitor) } fn deserialize_newtype_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_newtype_struct(self) } fn deserialize_enum( self, _name: &'static str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_enum(self) } /// Forwards deserialization to the [`Self::deserialize_str`] fn deserialize_identifier(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } fn deserialize_ignored_any(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } unsupported!(deserialize_bytes => "byte arrays are not supported as `xs:list` items"); unsupported!(deserialize_byte_buf => "byte arrays are not supported as `xs:list` items"); unsupported!(deserialize_seq => "sequences are not supported as `xs:list` items"); unsupported!(deserialize_tuple(usize) => "tuples are not supported as `xs:list` items"); unsupported!(deserialize_tuple_struct(&'static str, usize) => "tuples are not supported as `xs:list` items"); unsupported!(deserialize_map => "maps are not supported as `xs:list` items"); unsupported!(deserialize_struct(&'static str, &'static [&'static str]) => "structures are not supported as `xs:list` items"); } impl<'de, 'a> EnumAccess<'de> for AtomicDeserializer<'de, 'a> { type Error = DeError; type Variant = AtomicUnitOnly; fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), DeError> where V: DeserializeSeed<'de>, { let name = seed.deserialize(self)?; Ok((name, AtomicUnitOnly)) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Deserializer of variant data, that supports only unit variants. /// Attempt to deserialize newtype, tuple or struct variant will return a /// [`DeError::Unsupported`] error. pub struct AtomicUnitOnly; impl<'de> VariantAccess<'de> for AtomicUnitOnly { type Error = DeError; #[inline] fn unit_variant(self) -> Result<(), DeError> { Ok(()) } fn newtype_variant_seed(self, _seed: T) -> Result where T: DeserializeSeed<'de>, { Err(DeError::Unsupported( "enum newtype variants are not supported as `xs:list` items".into(), )) } fn tuple_variant(self, _len: usize, _visitor: V) -> Result where V: Visitor<'de>, { Err(DeError::Unsupported( "enum tuple variants are not supported as `xs:list` items".into(), )) } fn struct_variant( self, _fields: &'static [&'static str], _visitor: V, ) -> Result where V: Visitor<'de>, { Err(DeError::Unsupported( "enum struct variants are not supported as `xs:list` items".into(), )) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Iterator over string sub-slices delimited by one or several spaces. /// Contains decoded value of the `simpleType`. /// Iteration ends when list contains `None`. struct ListIter<'de, 'a> { /// If `Some`, contains unconsumed data of the list content: Option>, /// If `true`, `content` in escaped form and should be unescaped before use escaped: bool, } impl<'de, 'a> SeqAccess<'de> for ListIter<'de, 'a> { type Error = DeError; fn next_element_seed(&mut self, seed: T) -> Result, DeError> where T: DeserializeSeed<'de>, { if let Some(mut content) = self.content.take() { const DELIMITER: u8 = b' '; loop { let string = content.as_str(); if string.is_empty() { return Ok(None); } return match memchr(DELIMITER, string.as_bytes()) { // No delimiters in the `content`, deserialize it as a whole atomic None => seed.deserialize(AtomicDeserializer { content, escaped: self.escaped, }), // `content` started with a space, skip them all Some(0) => { // Skip all spaces let start = string.as_bytes().iter().position(|ch| *ch != DELIMITER); content = match (start, content) { // We cannot find any non-space character, so string contains only spaces (None, _) => return Ok(None), // Borrow result from input or deserializer depending on the initial borrowing (Some(start), Content::Input(s)) => Content::Input(s.split_at(start).1), (Some(start), Content::Slice(s)) => Content::Slice(s.split_at(start).1), // Skip additional bytes if we own data (Some(start), Content::Owned(s, skip)) => { Content::Owned(s, skip + start) } }; continue; } // `content` started from an atomic Some(end) => match content { // Borrow for the next iteration from input or deserializer depending on // the initial borrowing Content::Input(s) => { let (item, rest) = s.split_at(end); self.content = Some(Content::Input(rest)); seed.deserialize(AtomicDeserializer { content: Content::Input(item), escaped: self.escaped, }) } Content::Slice(s) => { let (item, rest) = s.split_at(end); self.content = Some(Content::Slice(rest)); seed.deserialize(AtomicDeserializer { content: Content::Slice(item), escaped: self.escaped, }) } // Skip additional bytes if we own data for next iteration, but deserialize from // the borrowed data from our buffer Content::Owned(s, skip) => { let item = s.split_at(skip + end).0; let result = seed.deserialize(AtomicDeserializer { content: Content::Slice(item), escaped: self.escaped, }); self.content = Some(Content::Owned(s, skip + end)); result } }, } .map(Some); } } Ok(None) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A version of [`Cow`] that can borrow from two different buffers, one of them /// is a deserializer input. /// /// # Lifetimes /// - `'de` -- lifetime of the data that deserializer borrow from the parsed input /// - `'a` -- lifetime of the data that owned by a deserializer enum CowRef<'de, 'a> { /// An input borrowed from the parsed data Input(&'de [u8]), /// An input borrowed from the buffer owned by another deserializer Slice(&'a [u8]), /// An input taken from an external deserializer, owned by that deserializer Owned(Vec), } impl<'de, 'a> Deref for CowRef<'de, 'a> { type Target = [u8]; fn deref(&self) -> &[u8] { match self { Self::Input(slice) => slice, Self::Slice(slice) => slice, Self::Owned(ref v) => v, } } } /// A deserializer for an xml probably escaped and encoded value of XSD [simple types]. /// This deserializer will borrow from the input as much as possible. /// /// `deserialize_any()` returns the whole string that deserializer contains. /// /// Escaping the value is actually not always necessary, for instance when /// converting to a float, we don't expect any escapable character anyway. /// In that cases deserializer skips unescaping step. /// /// Used for deserialize values from: /// - attribute values (`<... ...="value" ...>`) /// - text content (`<...>text`) /// - CDATA content (`<...>`) /// /// [simple types]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition pub struct SimpleTypeDeserializer<'de, 'a> { /// - In case of attribute contains escaped attribute value /// - In case of text contains escaped text value /// - In case of CData contains unescaped cdata value content: CowRef<'de, 'a>, /// If `true`, `content` in escaped form and should be unescaped before use escaped: bool, /// Decoder used to deserialize string data, numeric and boolean data. /// Not used for deserializing raw byte buffers decoder: Decoder, } impl<'de, 'a> SimpleTypeDeserializer<'de, 'a> { /// Creates a deserializer from a value, that possible borrowed from input pub fn from_text_content(value: Cow<'de, str>) -> Self { let content = match value { Cow::Borrowed(slice) => CowRef::Input(slice.as_bytes()), Cow::Owned(content) => CowRef::Owned(content.into_bytes()), }; Self::new(content, false, Decoder::utf8()) } /// Creates a deserializer from a part of value at specified range pub fn from_part( value: &'a Cow<'de, [u8]>, range: Range, escaped: bool, decoder: Decoder, ) -> Self { let content = match value { Cow::Borrowed(slice) => CowRef::Input(&slice[range]), Cow::Owned(slice) => CowRef::Slice(&slice[range]), }; Self::new(content, escaped, decoder) } /// Constructor for tests #[inline] fn new(content: CowRef<'de, 'a>, escaped: bool, decoder: Decoder) -> Self { Self { content, escaped, decoder, } } /// Decodes raw bytes using the encoding specified. /// The method will borrow if has the UTF-8 compatible representation. #[inline] fn decode<'b>(&'b self) -> Result, DeError> { Ok(match self.content { CowRef::Input(content) => match self.decoder.decode(content)? { Cow::Borrowed(content) => Content::Input(content), Cow::Owned(content) => Content::Owned(content, 0), }, CowRef::Slice(content) => match self.decoder.decode(content)? { Cow::Borrowed(content) => Content::Slice(content), Cow::Owned(content) => Content::Owned(content, 0), }, CowRef::Owned(ref content) => match self.decoder.decode(content)? { Cow::Borrowed(content) => Content::Slice(content), Cow::Owned(content) => Content::Owned(content, 0), }, }) } } impl<'de, 'a> Deserializer<'de> for SimpleTypeDeserializer<'de, 'a> { type Error = DeError; /// Forwards deserialization to the [`Self::deserialize_str`] fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } fn deserialize_bool(self, visitor: V) -> Result where V: Visitor<'de>, { deserialize_bool(&self.content, self.decoder, visitor) } deserialize_num!(deserialize_i8 => visit_i8); deserialize_num!(deserialize_i16 => visit_i16); deserialize_num!(deserialize_i32 => visit_i32); deserialize_num!(deserialize_i64 => visit_i64); deserialize_num!(deserialize_u8 => visit_u8); deserialize_num!(deserialize_u16 => visit_u16); deserialize_num!(deserialize_u32 => visit_u32); deserialize_num!(deserialize_u64 => visit_u64); serde_if_integer128! { deserialize_num!(deserialize_i128 => visit_i128); deserialize_num!(deserialize_u128 => visit_u128); } deserialize_num!(deserialize_f32 => visit_f32); deserialize_num!(deserialize_f64 => visit_f64); /// Forwards deserialization to the [`Self::deserialize_str`] fn deserialize_char(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } fn deserialize_str(self, visitor: V) -> Result where V: Visitor<'de>, { let content = self.decode()?; if self.escaped { match unescape(content.as_str())? { Cow::Borrowed(_) => content.deserialize_all(visitor), Cow::Owned(s) => visitor.visit_string(s), } } else { content.deserialize_all(visitor) } } /// Forwards deserialization to the [`Self::deserialize_str`] fn deserialize_string(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } /// Returns [`DeError::Unsupported`] fn deserialize_bytes(self, _visitor: V) -> Result where V: Visitor<'de>, { Err(DeError::Unsupported( "binary data content is not supported by XML format".into(), )) } /// Forwards deserialization to the [`Self::deserialize_bytes`] fn deserialize_byte_buf(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_bytes(visitor) } fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { if self.content.is_empty() { visitor.visit_none() } else { visitor.visit_some(self) } } fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } /// Forwards deserialization to the [`Self::deserialize_unit`] fn deserialize_unit_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_unit(visitor) } fn deserialize_newtype_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_newtype_struct(self) } fn deserialize_seq(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_seq(ListIter { content: Some(self.decode()?), escaped: self.escaped, }) } /// Representation of tuples the same as [sequences][Self::deserialize_seq]. fn deserialize_tuple(self, _len: usize, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_seq(visitor) } /// Representation of named tuples the same as [unnamed tuples][Self::deserialize_tuple]. fn deserialize_tuple_struct( self, _name: &'static str, len: usize, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_tuple(len, visitor) } unsupported!(deserialize_map => "maps are not supported for XSD `simpleType`s"); unsupported!(deserialize_struct(&'static str, &'static [&'static str]) => "structures are not supported for XSD `simpleType`s"); fn deserialize_enum( self, _name: &'static str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_enum(self) } /// Forwards deserialization to the [`Self::deserialize_str`] fn deserialize_identifier(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } fn deserialize_ignored_any(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } } impl<'de, 'a> EnumAccess<'de> for SimpleTypeDeserializer<'de, 'a> { type Error = DeError; type Variant = SimpleTypeUnitOnly; fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), DeError> where V: DeserializeSeed<'de>, { let name = seed.deserialize(self)?; Ok((name, SimpleTypeUnitOnly)) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Deserializer of variant data, that supports only unit variants. /// Attempt to deserialize newtype, tuple or struct variant will return a /// [`DeError::Unsupported`] error. pub struct SimpleTypeUnitOnly; impl<'de> VariantAccess<'de> for SimpleTypeUnitOnly { type Error = DeError; #[inline] fn unit_variant(self) -> Result<(), DeError> { Ok(()) } fn newtype_variant_seed(self, _seed: T) -> Result where T: DeserializeSeed<'de>, { Err(DeError::Unsupported( "enum newtype variants are not supported for XSD `simpleType`s".into(), )) } fn tuple_variant(self, _len: usize, _visitor: V) -> Result where V: Visitor<'de>, { Err(DeError::Unsupported( "enum tuple variants are not supported for XSD `simpleType`s".into(), )) } fn struct_variant( self, _fields: &'static [&'static str], _visitor: V, ) -> Result where V: Visitor<'de>, { Err(DeError::Unsupported( "enum struct variants are not supported for XSD `simpleType`s".into(), )) } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod tests { use super::*; use crate::se::simple_type::{QuoteTarget, SimpleTypeSerializer}; use crate::se::{Indent, QuoteLevel}; use crate::utils::{ByteBuf, Bytes}; use serde::de::IgnoredAny; use serde::{Deserialize, Serialize}; use std::collections::HashMap; macro_rules! simple_only { ($encoding:ident, $name:ident: $type:ty = $xml:expr => $result:expr) => { #[test] fn $name() { let decoder = Decoder::$encoding(); let xml = $xml; let de = SimpleTypeDeserializer::new(CowRef::Input(xml.as_ref()), true, decoder); let data: $type = Deserialize::deserialize(de).unwrap(); assert_eq!(data, $result); } }; } macro_rules! simple { ($encoding:ident, $name:ident: $type:ty = $xml:expr => $result:expr) => { #[test] fn $name() { let decoder = Decoder::$encoding(); let xml = $xml; let de = SimpleTypeDeserializer::new(CowRef::Input(xml.as_ref()), true, decoder); let data: $type = Deserialize::deserialize(de).unwrap(); assert_eq!(data, $result); // Roundtrip to ensure that serializer corresponds to deserializer assert_eq!( data.serialize(SimpleTypeSerializer { writer: String::new(), target: QuoteTarget::Text, level: QuoteLevel::Full, indent: Indent::None, }) .unwrap(), xml ); } }; } macro_rules! err { ($encoding:ident, $name:ident: $type:ty = $xml:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let decoder = Decoder::$encoding(); let xml = $xml; let de = SimpleTypeDeserializer::new(CowRef::Input(xml.as_ref()), true, decoder); let err = <$type as Deserialize>::deserialize(de).unwrap_err(); match err { DeError::$kind(e) => assert_eq!(e, $reason), _ => panic!( "Expected `{}({})`, found `{:?}`", stringify!($kind), $reason, err ), } } }; } #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Unit; #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Newtype(String); #[derive(Debug, Deserialize, Serialize, PartialEq)] struct BorrowedNewtype<'a>(&'a str); #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Struct { key: String, val: usize, } #[derive(Debug, Deserialize, Serialize, PartialEq)] enum Enum { Unit, Newtype(String), Tuple(String, usize), Struct { key: String, val: usize }, } #[derive(Debug, Deserialize, PartialEq)] #[serde(field_identifier)] enum Id { Field, } #[derive(Debug, Deserialize)] #[serde(transparent)] struct Any(IgnoredAny); impl PartialEq for Any { fn eq(&self, _other: &Any) -> bool { true } } /// Tests for deserialize atomic and union values, as defined in XSD specification mod atomic { use super::*; use crate::se::simple_type::AtomicSerializer; use pretty_assertions::assert_eq; /// Checks that given `$input` successfully deserializing into given `$result` macro_rules! deserialized_to_only { ($name:ident: $type:ty = $input:literal => $result:expr) => { #[test] fn $name() { let de = AtomicDeserializer { content: Content::Input($input), escaped: true, }; let data: $type = Deserialize::deserialize(de).unwrap(); assert_eq!(data, $result); } }; } /// Checks that given `$input` successfully deserializing into given `$result` /// and the result is serialized back to the `$input` macro_rules! deserialized_to { ($name:ident: $type:ty = $input:literal => $result:expr) => { #[test] fn $name() { let de = AtomicDeserializer { content: Content::Input($input), escaped: true, }; let data: $type = Deserialize::deserialize(de).unwrap(); assert_eq!(data, $result); // Roundtrip to ensure that serializer corresponds to deserializer assert_eq!( data.serialize(AtomicSerializer { writer: String::new(), target: QuoteTarget::Text, level: QuoteLevel::Full, }) .unwrap(), $input ); } }; } /// Checks that attempt to deserialize given `$input` as a `$type` results to a /// deserialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $type:ty = $input:literal => $kind:ident($reason:literal)) => { #[test] fn $name() { let de = AtomicDeserializer { content: Content::Input($input), escaped: true, }; let err = <$type as Deserialize>::deserialize(de).unwrap_err(); match err { DeError::$kind(e) => assert_eq!(e, $reason), _ => panic!( "Expected `{}({})`, found `{:?}`", stringify!($kind), $reason, err ), } } }; } deserialized_to!(false_: bool = "false" => false); deserialized_to!(true_: bool = "true" => true); deserialized_to!(i8_: i8 = "-2" => -2); deserialized_to!(i16_: i16 = "-2" => -2); deserialized_to!(i32_: i32 = "-2" => -2); deserialized_to!(i64_: i64 = "-2" => -2); deserialized_to!(u8_: u8 = "3" => 3); deserialized_to!(u16_: u16 = "3" => 3); deserialized_to!(u32_: u32 = "3" => 3); deserialized_to!(u64_: u64 = "3" => 3); serde_if_integer128! { deserialized_to!(i128_: i128 = "-2" => -2); deserialized_to!(u128_: u128 = "2" => 2); } deserialized_to!(f32_: f32 = "1.23" => 1.23); deserialized_to!(f64_: f64 = "1.23" => 1.23); deserialized_to!(char_unescaped: char = "h" => 'h'); deserialized_to!(char_escaped: char = "<" => '<'); deserialized_to!(string: String = "<escaped string" => " "non-escaped string"); err!(escaped_str: &str = "escaped string" => Custom("invalid type: string \"escaped string\", expected a borrowed string")); err!(byte_buf: ByteBuf = "<escaped string" => Unsupported("byte arrays are not supported as `xs:list` items")); err!(borrowed_bytes: Bytes = "non-escaped string" => Unsupported("byte arrays are not supported as `xs:list` items")); deserialized_to!(option_none: Option<&str> = "" => None); deserialized_to!(option_some: Option<&str> = "non-escaped-string" => Some("non-escaped-string")); deserialized_to_only!(unit: () = "anything" => ()); deserialized_to_only!(unit_struct: Unit = "anything" => Unit); deserialized_to!(newtype_owned: Newtype = "<escaped string" => Newtype(" BorrowedNewtype("non-escaped string")); err!(seq: Vec<()> = "non-escaped string" => Unsupported("sequences are not supported as `xs:list` items")); err!(tuple: ((), ()) = "non-escaped string" => Unsupported("tuples are not supported as `xs:list` items")); err!(tuple_struct: ((), ()) = "non-escaped string" => Unsupported("tuples are not supported as `xs:list` items")); err!(map: HashMap<(), ()> = "non-escaped string" => Unsupported("maps are not supported as `xs:list` items")); err!(struct_: Struct = "non-escaped string" => Unsupported("structures are not supported as `xs:list` items")); deserialized_to!(enum_unit: Enum = "Unit" => Enum::Unit); err!(enum_newtype: Enum = "Newtype" => Unsupported("enum newtype variants are not supported as `xs:list` items")); err!(enum_tuple: Enum = "Tuple" => Unsupported("enum tuple variants are not supported as `xs:list` items")); err!(enum_struct: Enum = "Struct" => Unsupported("enum struct variants are not supported as `xs:list` items")); err!(enum_other: Enum = "any data" => Custom("unknown variant `any data`, expected one of `Unit`, `Newtype`, `Tuple`, `Struct`")); deserialized_to_only!(identifier: Id = "Field" => Id::Field); deserialized_to_only!(ignored_any: Any = "any data" => Any(IgnoredAny)); /// Checks that deserialization from an owned content is working #[test] #[cfg(feature = "encoding")] fn owned_data() { let de = AtomicDeserializer { content: Content::Owned("string slice".into(), 7), escaped: true, }; assert_eq!(de.content.as_str(), "slice"); let data: String = Deserialize::deserialize(de).unwrap(); assert_eq!(data, "slice"); } /// Checks that deserialization from a content borrowed from some /// buffer other that input is working #[test] fn borrowed_from_deserializer() { let de = AtomicDeserializer { content: Content::Slice("string slice"), escaped: true, }; assert_eq!(de.content.as_str(), "string slice"); let data: String = Deserialize::deserialize(de).unwrap(); assert_eq!(data, "string slice"); } } /// Module for testing list accessor mod list { use super::*; use pretty_assertions::assert_eq; #[test] fn empty() { let mut seq = ListIter { content: Some(Content::Input("")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), None); assert_eq!(seq.next_element::<&str>().unwrap(), None); } #[test] fn only_spaces() { let mut seq = ListIter { content: Some(Content::Input(" ")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), None); assert_eq!(seq.next_element::<&str>().unwrap(), None); } #[test] fn one_item() { let mut seq = ListIter { content: Some(Content::Input("abc")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), Some("abc")); assert_eq!(seq.next_element::<&str>().unwrap(), None); assert_eq!(seq.next_element::<&str>().unwrap(), None); } #[test] fn two_items() { let mut seq = ListIter { content: Some(Content::Input("abc def")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), Some("abc")); assert_eq!(seq.next_element::<&str>().unwrap(), Some("def")); assert_eq!(seq.next_element::<&str>().unwrap(), None); assert_eq!(seq.next_element::<&str>().unwrap(), None); } #[test] fn leading_spaces() { let mut seq = ListIter { content: Some(Content::Input(" def")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), Some("def")); assert_eq!(seq.next_element::<&str>().unwrap(), None); assert_eq!(seq.next_element::<&str>().unwrap(), None); } #[test] fn trailing_spaces() { let mut seq = ListIter { content: Some(Content::Input("abc ")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), Some("abc")); assert_eq!(seq.next_element::<&str>().unwrap(), None); assert_eq!(seq.next_element::<&str>().unwrap(), None); } #[test] fn mixed_types() { let mut seq = ListIter { content: Some(Content::Input("string 1.23 42 true false h Unit")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), Some("string")); assert_eq!(seq.next_element::().unwrap(), Some(1.23)); assert_eq!(seq.next_element::().unwrap(), Some(42)); assert_eq!(seq.next_element::().unwrap(), Some(true)); assert_eq!(seq.next_element::().unwrap(), Some(false)); assert_eq!(seq.next_element::().unwrap(), Some('h')); assert_eq!(seq.next_element::().unwrap(), Some(Enum::Unit)); assert_eq!(seq.next_element::<()>().unwrap(), None); assert_eq!(seq.next_element::<()>().unwrap(), None); } } mod utf8 { use super::*; use pretty_assertions::assert_eq; simple!(utf8, i8_: i8 = "-2" => -2); simple!(utf8, i16_: i16 = "-2" => -2); simple!(utf8, i32_: i32 = "-2" => -2); simple!(utf8, i64_: i64 = "-2" => -2); simple!(utf8, u8_: u8 = "3" => 3); simple!(utf8, u16_: u16 = "3" => 3); simple!(utf8, u32_: u32 = "3" => 3); simple!(utf8, u64_: u64 = "3" => 3); serde_if_integer128! { simple!(utf8, i128_: i128 = "-2" => -2); simple!(utf8, u128_: u128 = "2" => 2); } simple!(utf8, f32_: f32 = "1.23" => 1.23); simple!(utf8, f64_: f64 = "1.23" => 1.23); simple!(utf8, false_: bool = "false" => false); simple!(utf8, true_: bool = "true" => true); simple!(utf8, char_unescaped: char = "h" => 'h'); simple!(utf8, char_escaped: char = "<" => '<'); simple!(utf8, string: String = "<escaped string" => " Unsupported("binary data content is not supported by XML format")); simple!(utf8, borrowed_str: &str = "non-escaped string" => "non-escaped string"); err!(utf8, borrowed_bytes: Bytes = "<escaped string" => Unsupported("binary data content is not supported by XML format")); simple!(utf8, option_none: Option<&str> = "" => None); simple!(utf8, option_some: Option<&str> = "non-escaped string" => Some("non-escaped string")); simple_only!(utf8, unit: () = "any data" => ()); simple_only!(utf8, unit_struct: Unit = "any data" => Unit); // Serializer will not escape space because this is unnecessary. // Because borrowing has meaning only for deserializer, no need to test // roundtrip here, it is already tested for strings where compatible list // of escaped characters is used simple_only!(utf8, newtype_owned: Newtype = "<escaped string" => Newtype(" BorrowedNewtype("non-escaped string")); err!(utf8, map: HashMap<(), ()> = "any data" => Unsupported("maps are not supported for XSD `simpleType`s")); err!(utf8, struct_: Struct = "any data" => Unsupported("structures are not supported for XSD `simpleType`s")); simple!(utf8, enum_unit: Enum = "Unit" => Enum::Unit); err!(utf8, enum_newtype: Enum = "Newtype" => Unsupported("enum newtype variants are not supported for XSD `simpleType`s")); err!(utf8, enum_tuple: Enum = "Tuple" => Unsupported("enum tuple variants are not supported for XSD `simpleType`s")); err!(utf8, enum_struct: Enum = "Struct" => Unsupported("enum struct variants are not supported for XSD `simpleType`s")); err!(utf8, enum_other: Enum = "any data" => Custom("unknown variant `any data`, expected one of `Unit`, `Newtype`, `Tuple`, `Struct`")); simple_only!(utf8, identifier: Id = "Field" => Id::Field); simple_only!(utf8, ignored_any: Any = "any data" => Any(IgnoredAny)); } #[cfg(feature = "encoding")] mod utf16 { use super::*; use pretty_assertions::assert_eq; fn to_utf16(string: &str) -> Vec { let mut bytes = Vec::new(); for ch in string.encode_utf16() { bytes.extend_from_slice(&ch.to_le_bytes()); } bytes } macro_rules! utf16 { ($name:ident: $type:ty = $xml:literal => $result:expr) => { simple_only!(utf16, $name: $type = to_utf16($xml) => $result); }; } macro_rules! unsupported { ($name:ident: $type:ty = $xml:literal => $err:literal) => { err!(utf16, $name: $type = to_utf16($xml) => Unsupported($err)); }; } utf16!(i8_: i8 = "-2" => -2); utf16!(i16_: i16 = "-2" => -2); utf16!(i32_: i32 = "-2" => -2); utf16!(i64_: i64 = "-2" => -2); utf16!(u8_: u8 = "3" => 3); utf16!(u16_: u16 = "3" => 3); utf16!(u32_: u32 = "3" => 3); utf16!(u64_: u64 = "3" => 3); serde_if_integer128! { utf16!(i128_: i128 = "-2" => -2); utf16!(u128_: u128 = "2" => 2); } utf16!(f32_: f32 = "1.23" => 1.23); utf16!(f64_: f64 = "1.23" => 1.23); utf16!(false_: bool = "false" => false); utf16!(true_: bool = "true" => true); utf16!(char_unescaped: char = "h" => 'h'); utf16!(char_escaped: char = "<" => '<'); utf16!(string: String = "<escaped string" => " "binary data content is not supported by XML format"); utf16!(option_none: Option<()> = "" => None); utf16!(option_some: Option<()> = "any data" => Some(())); utf16!(unit: () = "any data" => ()); utf16!(unit_struct: Unit = "any data" => Unit); utf16!(newtype_owned: Newtype = "<escaped string" => Newtype(" Custom("invalid type: string \"non-escaped string\", expected a borrowed string")); unsupported!(map: HashMap<(), ()> = "any data" => "maps are not supported for XSD `simpleType`s"); unsupported!(struct_: Struct = "any data" => "structures are not supported for XSD `simpleType`s"); utf16!(enum_unit: Enum = "Unit" => Enum::Unit); unsupported!(enum_newtype: Enum = "Newtype" => "enum newtype variants are not supported for XSD `simpleType`s"); unsupported!(enum_tuple: Enum = "Tuple" => "enum tuple variants are not supported for XSD `simpleType`s"); unsupported!(enum_struct: Enum = "Struct" => "enum struct variants are not supported for XSD `simpleType`s"); err!(utf16, enum_other: Enum = to_utf16("any data") => Custom("unknown variant `any data`, expected one of `Unit`, `Newtype`, `Tuple`, `Struct`")); utf16!(identifier: Id = "Field" => Id::Field); utf16!(ignored_any: Any = "any data" => Any(IgnoredAny)); } } quick-xml-0.27.1/src/de/var.rs000064400000000000000000000053520072674642500141770ustar 00000000000000use crate::{ de::{escape::EscapedDeserializer, DeEvent, Deserializer, XmlRead}, errors::serialize::DeError, }; use serde::de::{self, DeserializeSeed, Deserializer as SerdeDeserializer, Visitor}; use std::borrow::Cow; /// An enum access pub struct EnumAccess<'de, 'a, R> where R: XmlRead<'de>, { de: &'a mut Deserializer<'de, R>, } impl<'de, 'a, R> EnumAccess<'de, 'a, R> where R: XmlRead<'de>, { pub fn new(de: &'a mut Deserializer<'de, R>) -> Self { EnumAccess { de } } } impl<'de, 'a, R> de::EnumAccess<'de> for EnumAccess<'de, 'a, R> where R: XmlRead<'de>, { type Error = DeError; type Variant = VariantAccess<'de, 'a, R>; fn variant_seed(self, seed: V) -> Result<(V::Value, VariantAccess<'de, 'a, R>), DeError> where V: DeserializeSeed<'de>, { let decoder = self.de.reader.decoder(); let de = match self.de.peek()? { DeEvent::Text(t) => EscapedDeserializer::new(Cow::Borrowed(t), decoder, true), // Escape sequences does not processed inside CDATA section DeEvent::CData(t) => EscapedDeserializer::new(Cow::Borrowed(t), decoder, false), DeEvent::Start(e) => { EscapedDeserializer::new(Cow::Borrowed(e.local_name().into_inner()), decoder, false) } _ => { return Err(DeError::Unsupported( "Invalid event for Enum, expecting `Text` or `Start`".into(), )) } }; let name = seed.deserialize(de)?; Ok((name, VariantAccess { de: self.de })) } } pub struct VariantAccess<'de, 'a, R> where R: XmlRead<'de>, { de: &'a mut Deserializer<'de, R>, } impl<'de, 'a, R> de::VariantAccess<'de> for VariantAccess<'de, 'a, R> where R: XmlRead<'de>, { type Error = DeError; fn unit_variant(self) -> Result<(), DeError> { match self.de.next()? { DeEvent::Start(e) => self.de.read_to_end(e.name()), DeEvent::Text(_) | DeEvent::CData(_) => Ok(()), _ => unreachable!(), } } fn newtype_variant_seed(self, seed: T) -> Result where T: DeserializeSeed<'de>, { seed.deserialize(&mut *self.de) } fn tuple_variant(self, len: usize, visitor: V) -> Result where V: Visitor<'de>, { self.de.deserialize_tuple(len, visitor) } fn struct_variant( self, fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { self.de.deserialize_struct("", fields, visitor) } } quick-xml-0.27.1/src/encoding.rs000064400000000000000000000134010072674642500145770ustar 00000000000000//! A module for wrappers that encode / decode data. use std::borrow::Cow; #[cfg(feature = "encoding")] use encoding_rs::{Encoding, UTF_16BE, UTF_16LE, UTF_8}; #[cfg(feature = "encoding")] use crate::Error; use crate::Result; /// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-8. /// See pub(crate) const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF]; /// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-16 with little-endian byte order. /// See #[cfg(feature = "encoding")] pub(crate) const UTF16_LE_BOM: &[u8] = &[0xFF, 0xFE]; /// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-16 with big-endian byte order. /// See #[cfg(feature = "encoding")] pub(crate) const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF]; /// Decoder of byte slices into strings. /// /// If feature `encoding` is enabled, this encoding taken from the `"encoding"` /// XML declaration or assumes UTF-8, if XML has no declaration, encoding /// key is not defined or contains unknown encoding. /// /// The library supports any UTF-8 compatible encodings that crate `encoding_rs` /// is supported. [*UTF-16 and ISO-2022-JP are not supported at the present*][utf16]. /// /// If feature `encoding` is disabled, the decoder is always UTF-8 decoder: /// any XML declarations are ignored. /// /// [utf16]: https://github.com/tafia/quick-xml/issues/158 #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct Decoder { #[cfg(feature = "encoding")] pub(crate) encoding: &'static Encoding, } impl Decoder { pub(crate) fn utf8() -> Self { Decoder { #[cfg(feature = "encoding")] encoding: UTF_8, } } #[cfg(all(test, feature = "encoding", feature = "serialize"))] pub(crate) fn utf16() -> Self { Decoder { encoding: UTF_16LE } } } impl Decoder { /// Returns the `Reader`s encoding. /// /// This encoding will be used by [`decode`]. /// /// [`decode`]: Self::decode #[cfg(feature = "encoding")] pub fn encoding(&self) -> &'static Encoding { self.encoding } /// ## Without `encoding` feature /// /// Decodes an UTF-8 slice regardless of XML declaration and ignoring BOM /// if it is present in the `bytes`. /// /// ## With `encoding` feature /// /// Decodes specified bytes using encoding, declared in the XML, if it was /// declared there, or UTF-8 otherwise, and ignoring BOM if it is present /// in the `bytes`. /// /// ---- /// Returns an error in case of malformed sequences in the `bytes`. pub fn decode<'b>(&self, bytes: &'b [u8]) -> Result> { #[cfg(not(feature = "encoding"))] let decoded = Ok(Cow::Borrowed(std::str::from_utf8(bytes)?)); #[cfg(feature = "encoding")] let decoded = decode(bytes, self.encoding); decoded } } /// Decodes the provided bytes using the specified encoding. /// /// Returns an error in case of malformed or non-representable sequences in the `bytes`. #[cfg(feature = "encoding")] pub fn decode<'b>(bytes: &'b [u8], encoding: &'static Encoding) -> Result> { encoding .decode_without_bom_handling_and_without_replacement(bytes) .ok_or(Error::NonDecodable(None)) } /// Automatic encoding detection of XML files based using the /// [recommended algorithm](https://www.w3.org/TR/xml11/#sec-guessing). /// /// If encoding is detected, `Some` is returned with an encoding and size of BOM /// in bytes, if detection was performed using BOM, or zero, if detection was /// performed without BOM. /// /// IF encoding was not recognized, `None` is returned. /// /// Because the [`encoding_rs`] crate supports only subset of those encodings, only /// the supported subset are detected, which is UTF-8, UTF-16 BE and UTF-16 LE. /// /// The algorithm suggests examine up to the first 4 bytes to determine encoding /// according to the following table: /// /// | Bytes |Detected encoding /// |-------------|------------------------------------------ /// | **BOM** /// |`FE_FF_##_##`|UTF-16, big-endian /// |`FF FE ## ##`|UTF-16, little-endian /// |`EF BB BF` |UTF-8 /// | **No BOM** /// |`00 3C 00 3F`|UTF-16 BE or ISO-10646-UCS-2 BE or similar 16-bit BE (use declared encoding to find the exact one) /// |`3C 00 3F 00`|UTF-16 LE or ISO-10646-UCS-2 LE or similar 16-bit LE (use declared encoding to find the exact one) /// |`3C 3F 78 6D`|UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, or any other 7-bit, 8-bit, or mixed-width encoding which ensures that the characters of ASCII have their normal positions, width, and values; the actual encoding declaration must be read to detect which of these applies, but since all of these encodings use the same bit patterns for the relevant ASCII characters, the encoding declaration itself may be read reliably #[cfg(feature = "encoding")] pub fn detect_encoding(bytes: &[u8]) -> Option<(&'static Encoding, usize)> { match bytes { // with BOM _ if bytes.starts_with(UTF16_BE_BOM) => Some((UTF_16BE, 2)), _ if bytes.starts_with(UTF16_LE_BOM) => Some((UTF_16LE, 2)), _ if bytes.starts_with(UTF8_BOM) => Some((UTF_8, 3)), // without BOM _ if bytes.starts_with(&[0x00, b'<', 0x00, b'?']) => Some((UTF_16BE, 0)), // Some BE encoding, for example, UTF-16 or ISO-10646-UCS-2 _ if bytes.starts_with(&[b'<', 0x00, b'?', 0x00]) => Some((UTF_16LE, 0)), // Some LE encoding, for example, UTF-16 or ISO-10646-UCS-2 _ if bytes.starts_with(&[b'<', b'?', b'x', b'm']) => Some((UTF_8, 0)), // Some ASCII compatible _ => None, } } quick-xml-0.27.1/src/errors.rs000064400000000000000000000266610072674642500143410ustar 00000000000000//! Error management module use crate::escape::EscapeError; use crate::events::attributes::AttrError; use crate::utils::write_byte_string; use std::fmt; use std::io::Error as IoError; use std::str::Utf8Error; use std::string::FromUtf8Error; use std::sync::Arc; /// The error type used by this crate. #[derive(Clone, Debug)] pub enum Error { /// IO error. /// /// `Arc` instead of `IoError` since `IoError` is not `Clone`. Io(Arc), /// Input decoding error. If `encoding` feature is disabled, contains `None`, /// otherwise contains the UTF-8 decoding error NonDecodable(Option), /// Unexpected End of File UnexpectedEof(String), /// End event mismatch EndEventMismatch { /// Expected end event expected: String, /// Found end event found: String, }, /// Unexpected token UnexpectedToken(String), /// Unexpected UnexpectedBang(u8), /// Text not found, expected `Event::Text` TextNotFound, /// `Event::XmlDecl` must start with *version* attribute XmlDeclWithoutVersion(Option), /// Attribute parsing error InvalidAttr(AttrError), /// Escape error EscapeError(EscapeError), /// Specified namespace prefix is unknown, cannot resolve namespace for it UnknownPrefix(Vec), } impl From for Error { /// Creates a new `Error::Io` from the given error #[inline] fn from(error: IoError) -> Error { Error::Io(Arc::new(error)) } } impl From for Error { /// Creates a new `Error::NonDecodable` from the given error #[inline] fn from(error: Utf8Error) -> Error { Error::NonDecodable(Some(error)) } } impl From for Error { /// Creates a new `Error::Utf8` from the given error #[inline] fn from(error: FromUtf8Error) -> Error { error.utf8_error().into() } } impl From for Error { /// Creates a new `Error::EscapeError` from the given error #[inline] fn from(error: EscapeError) -> Error { Error::EscapeError(error) } } impl From for Error { #[inline] fn from(error: AttrError) -> Self { Error::InvalidAttr(error) } } /// A specialized `Result` type where the error is hard-wired to [`Error`]. pub type Result = std::result::Result; impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Error::Io(e) => write!(f, "I/O error: {}", e), Error::NonDecodable(None) => write!(f, "Malformed input, decoding impossible"), Error::NonDecodable(Some(e)) => write!(f, "Malformed UTF-8 input: {}", e), Error::UnexpectedEof(e) => write!(f, "Unexpected EOF during reading {}", e), Error::EndEventMismatch { expected, found } => { write!(f, "Expecting found ", expected, found) } Error::UnexpectedToken(e) => write!(f, "Unexpected token '{}'", e), Error::UnexpectedBang(b) => write!( f, "Only Comment (`--`), CDATA (`[CDATA[`) and DOCTYPE (`DOCTYPE`) nodes can start with a '!', but symbol `{}` found", *b as char ), Error::TextNotFound => write!(f, "Cannot read text, expecting Event::Text"), Error::XmlDeclWithoutVersion(e) => write!( f, "XmlDecl must start with 'version' attribute, found {:?}", e ), Error::InvalidAttr(e) => write!(f, "error while parsing attribute: {}", e), Error::EscapeError(e) => write!(f, "{}", e), Error::UnknownPrefix(prefix) => { f.write_str("Unknown namespace prefix '")?; write_byte_string(f, prefix)?; f.write_str("'") } } } } impl std::error::Error for Error { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { Error::Io(e) => Some(e), Error::NonDecodable(Some(e)) => Some(e), Error::InvalidAttr(e) => Some(e), Error::EscapeError(e) => Some(e), _ => None, } } } #[cfg(feature = "serialize")] pub mod serialize { //! A module to handle serde (de)serialization errors use super::*; use crate::utils::write_byte_string; use std::borrow::Cow; #[cfg(feature = "overlapped-lists")] use std::num::NonZeroUsize; use std::num::{ParseFloatError, ParseIntError}; /// (De)serialization error #[derive(Clone, Debug)] pub enum DeError { /// Serde custom error Custom(String), /// Xml parsing error InvalidXml(Error), /// Cannot parse to integer InvalidInt(ParseIntError), /// Cannot parse to float InvalidFloat(ParseFloatError), /// Cannot parse specified value to boolean InvalidBoolean(String), /// This error indicates an error in the [`Deserialize`](serde::Deserialize) /// implementation when read a map or a struct: `MapAccess::next_value[_seed]` /// was called before `MapAccess::next_key[_seed]`. /// /// You should check your types, that implements corresponding trait. KeyNotRead, /// Deserializer encounter a start tag with a specified name when it is /// not expecting. This happens when you try to deserialize a primitive /// value (numbers, strings, booleans) from an XML element. UnexpectedStart(Vec), /// Deserializer encounter an end tag with a specified name when it is /// not expecting. Usually that should not be possible, because XML reader /// is not able to produce such stream of events that lead to this error. /// /// If you get this error this likely indicates and error in the `quick_xml`. /// Please open an issue at , provide /// your Rust code and XML input. UnexpectedEnd(Vec), /// The [`Reader`] produced [`Event::Eof`] when it is not expecting, /// for example, after producing [`Event::Start`] but before corresponding /// [`Event::End`]. /// /// [`Reader`]: crate::reader::Reader /// [`Event::Eof`]: crate::events::Event::Eof /// [`Event::Start`]: crate::events::Event::Start /// [`Event::End`]: crate::events::Event::End UnexpectedEof, /// This error indicates that [`deserialize_struct`] was called, but there /// is no any XML element in the input. That means that you try to deserialize /// a struct not from an XML element. /// /// [`deserialize_struct`]: serde::de::Deserializer::deserialize_struct ExpectedStart, /// An attempt to deserialize to a type, that is not supported by the XML /// store at current position, for example, attempt to deserialize `struct` /// from attribute or attempt to deserialize binary data. /// /// Serialized type cannot be represented in an XML due to violation of the /// XML rules in the final XML document. For example, attempt to serialize /// a `HashMap<{integer}, ...>` would cause this error because [XML name] /// cannot start from a digit or a hyphen (minus sign). The same result /// would occur if map key is a complex type that cannot be serialized as /// a primitive type (i.e. string, char, bool, unit struct or unit variant). /// /// [XML name]: https://www.w3.org/TR/REC-xml/#sec-common-syn Unsupported(Cow<'static, str>), /// Too many events were skipped while deserializing a sequence, event limit /// exceeded. The limit was provided as an argument #[cfg(feature = "overlapped-lists")] TooManyEvents(NonZeroUsize), } impl fmt::Display for DeError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { DeError::Custom(s) => write!(f, "{}", s), DeError::InvalidXml(e) => write!(f, "{}", e), DeError::InvalidInt(e) => write!(f, "{}", e), DeError::InvalidFloat(e) => write!(f, "{}", e), DeError::InvalidBoolean(v) => write!(f, "Invalid boolean value '{}'", v), DeError::KeyNotRead => write!(f, "Invalid `Deserialize` implementation: `MapAccess::next_value[_seed]` was called before `MapAccess::next_key[_seed]`"), DeError::UnexpectedStart(e) => { f.write_str("Unexpected `Event::Start(")?; write_byte_string(f, e)?; f.write_str(")`") } DeError::UnexpectedEnd(e) => { f.write_str("Unexpected `Event::End(")?; write_byte_string(f, e)?; f.write_str(")`") } DeError::UnexpectedEof => write!(f, "Unexpected `Event::Eof`"), DeError::ExpectedStart => write!(f, "Expecting `Event::Start`"), DeError::Unsupported(s) => write!(f, "Unsupported operation: {}", s), #[cfg(feature = "overlapped-lists")] DeError::TooManyEvents(s) => write!(f, "Deserializer buffers {} events, limit exceeded", s), } } } impl ::std::error::Error for DeError { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { DeError::InvalidXml(e) => Some(e), DeError::InvalidInt(e) => Some(e), DeError::InvalidFloat(e) => Some(e), _ => None, } } } impl serde::de::Error for DeError { fn custom(msg: T) -> Self { DeError::Custom(msg.to_string()) } } impl serde::ser::Error for DeError { fn custom(msg: T) -> Self { DeError::Custom(msg.to_string()) } } impl From for DeError { #[inline] fn from(e: Error) -> Self { Self::InvalidXml(e) } } impl From for DeError { #[inline] fn from(e: EscapeError) -> Self { Self::InvalidXml(e.into()) } } impl From for DeError { #[inline] fn from(e: Utf8Error) -> Self { Self::InvalidXml(e.into()) } } impl From for DeError { #[inline] fn from(e: FromUtf8Error) -> Self { Self::InvalidXml(e.into()) } } impl From for DeError { #[inline] fn from(e: AttrError) -> Self { Self::InvalidXml(e.into()) } } impl From for DeError { #[inline] fn from(e: ParseIntError) -> Self { Self::InvalidInt(e) } } impl From for DeError { #[inline] fn from(e: ParseFloatError) -> Self { Self::InvalidFloat(e) } } impl From for DeError { #[inline] fn from(e: fmt::Error) -> Self { Self::Custom(e.to_string()) } } } quick-xml-0.27.1/src/escapei.rs000064400000000000000000002061270072674642500144330ustar 00000000000000//! Manage xml character escapes use memchr::memchr2_iter; use std::borrow::Cow; use std::ops::Range; #[cfg(test)] use pretty_assertions::assert_eq; /// Error for XML escape / unescape. #[derive(Clone, Debug)] pub enum EscapeError { /// Entity with Null character EntityWithNull(Range), /// Unrecognized escape symbol UnrecognizedSymbol(Range, String), /// Cannot find `;` after `&` UnterminatedEntity(Range), /// Cannot convert Hexa to utf8 TooLongHexadecimal, /// Character is not a valid hexadecimal value InvalidHexadecimal(char), /// Cannot convert decimal to hexa TooLongDecimal, /// Character is not a valid decimal value InvalidDecimal(char), /// Not a valid unicode codepoint InvalidCodepoint(u32), } impl std::fmt::Display for EscapeError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { EscapeError::EntityWithNull(e) => write!( f, "Error while escaping character at range {:?}: Null character entity not allowed", e ), EscapeError::UnrecognizedSymbol(rge, res) => write!( f, "Error while escaping character at range {:?}: Unrecognized escape symbol: {:?}", rge, res ), EscapeError::UnterminatedEntity(e) => write!( f, "Error while escaping character at range {:?}: Cannot find ';' after '&'", e ), EscapeError::TooLongHexadecimal => write!(f, "Cannot convert hexadecimal to utf8"), EscapeError::InvalidHexadecimal(e) => { write!(f, "'{}' is not a valid hexadecimal character", e) } EscapeError::TooLongDecimal => write!(f, "Cannot convert decimal to utf8"), EscapeError::InvalidDecimal(e) => write!(f, "'{}' is not a valid decimal character", e), EscapeError::InvalidCodepoint(n) => write!(f, "'{}' is not a valid codepoint", n), } } } impl std::error::Error for EscapeError {} /// Escapes an `&str` and replaces all xml special characters (`<`, `>`, `&`, `'`, `"`) /// with their corresponding xml escaped value. /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `>` | `>` /// | `&` | `&` /// | `'` | `'` /// | `"` | `"` pub fn escape(raw: &str) -> Cow { _escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&' | b'\'' | b'\"')) } /// Escapes an `&str` and replaces xml special characters (`<`, `>`, `&`) /// with their corresponding xml escaped value. /// /// Should only be used for escaping text content. In XML text content, it is allowed /// (though not recommended) to leave the quote special characters `"` and `'` unescaped. /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `>` | `>` /// | `&` | `&` pub fn partial_escape(raw: &str) -> Cow { _escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&')) } /// Escapes an `&str` and replaces a subset of xml special characters (`<`, `>`, /// `&`, `'`, `"`) with their corresponding xml escaped value. pub(crate) fn _escape bool>(raw: &str, escape_chars: F) -> Cow { let bytes = raw.as_bytes(); let mut escaped = None; let mut iter = bytes.iter(); let mut pos = 0; while let Some(i) = iter.position(|&b| escape_chars(b)) { if escaped.is_none() { escaped = Some(Vec::with_capacity(raw.len())); } let escaped = escaped.as_mut().expect("initialized"); let new_pos = pos + i; escaped.extend_from_slice(&bytes[pos..new_pos]); match bytes[new_pos] { b'<' => escaped.extend_from_slice(b"<"), b'>' => escaped.extend_from_slice(b">"), b'\'' => escaped.extend_from_slice(b"'"), b'&' => escaped.extend_from_slice(b"&"), b'"' => escaped.extend_from_slice(b"""), // This set of escapes handles characters that should be escaped // in elements of xs:lists, because those characters works as // delimiters of list elements b'\t' => escaped.extend_from_slice(b" "), b'\n' => escaped.extend_from_slice(b" "), b'\r' => escaped.extend_from_slice(b" "), b' ' => escaped.extend_from_slice(b" "), _ => unreachable!( "Only '<', '>','\', '&', '\"', '\\t', '\\r', '\\n', and ' ' are escaped" ), } pos = new_pos + 1; } if let Some(mut escaped) = escaped { if let Some(raw) = bytes.get(pos..) { escaped.extend_from_slice(raw); } // SAFETY: we operate on UTF-8 input and search for an one byte chars only, // so all slices that was put to the `escaped` is a valid UTF-8 encoded strings // TODO: Can be replaced with `unsafe { String::from_utf8_unchecked() }` // if unsafe code will be allowed Cow::Owned(String::from_utf8(escaped).unwrap()) } else { Cow::Borrowed(raw) } } /// Unescape an `&str` and replaces all xml escaped characters (`&...;`) into /// their corresponding value. /// /// If feature `escape-html` is enabled, then recognizes all [HTML5 escapes]. /// /// [HTML5 escapes]: https://dev.w3.org/html5/html-author/charref pub fn unescape(raw: &str) -> Result, EscapeError> { unescape_with(raw, |_| None) } /// Unescape an `&str` and replaces all xml escaped characters (`&...;`) into /// their corresponding value, using a resolver function for custom entities. /// /// If feature `escape-html` is enabled, then recognizes all [HTML5 escapes]. /// /// [HTML5 escapes]: https://dev.w3.org/html5/html-author/charref pub fn unescape_with<'input, 'entity, F>( raw: &'input str, resolve_entity: F, ) -> Result, EscapeError> where // the lifetime of the output comes from a capture or is `'static` F: Fn(&str) -> Option<&'entity str>, { let bytes = raw.as_bytes(); let mut unescaped = None; let mut last_end = 0; let mut iter = memchr2_iter(b'&', b';', bytes); while let Some(start) = iter.by_ref().find(|p| bytes[*p] == b'&') { match iter.next() { Some(end) if bytes[end] == b';' => { // append valid data if unescaped.is_none() { unescaped = Some(String::with_capacity(raw.len())); } let unescaped = unescaped.as_mut().expect("initialized"); unescaped.push_str(&raw[last_end..start]); // search for character correctness let pat = &raw[start + 1..end]; if pat.starts_with('#') { let entity = &pat[1..]; // starts after the # let codepoint = parse_number(entity, start..end)?; unescaped.push_str(codepoint.encode_utf8(&mut [0u8; 4])); } else if let Some(value) = named_entity(pat) { unescaped.push_str(value); } else if let Some(value) = resolve_entity(pat) { unescaped.push_str(value); } else { return Err(EscapeError::UnrecognizedSymbol( start + 1..end, pat.to_string(), )); } last_end = end + 1; } _ => return Err(EscapeError::UnterminatedEntity(start..raw.len())), } } if let Some(mut unescaped) = unescaped { if let Some(raw) = raw.get(last_end..) { unescaped.push_str(raw); } Ok(Cow::Owned(unescaped)) } else { Ok(Cow::Borrowed(raw)) } } #[cfg(not(feature = "escape-html"))] fn named_entity(name: &str) -> Option<&str> { // match over strings are not allowed in const functions let s = match name.as_bytes() { b"lt" => "<", b"gt" => ">", b"amp" => "&", b"apos" => "'", b"quot" => "\"", _ => return None, }; Some(s) } #[cfg(feature = "escape-html")] fn named_entity(name: &str) -> Option<&str> { // imported from https://dev.w3.org/html5/html-author/charref // match over strings are not allowed in const functions //TODO: automate up-to-dating using https://html.spec.whatwg.org/entities.json let s = match name.as_bytes() { b"Tab" => "\u{09}", b"NewLine" => "\u{0A}", b"excl" => "\u{21}", b"quot" | b"QUOT" => "\u{22}", b"num" => "\u{23}", b"dollar" => "\u{24}", b"percnt" => "\u{25}", b"amp" | b"AMP" => "\u{26}", b"apos" => "\u{27}", b"lpar" => "\u{28}", b"rpar" => "\u{29}", b"ast" | b"midast" => "\u{2A}", b"plus" => "\u{2B}", b"comma" => "\u{2C}", b"period" => "\u{2E}", b"sol" => "\u{2F}", b"colon" => "\u{3A}", b"semi" => "\u{3B}", b"lt" | b"LT" => "\u{3C}", b"equals" => "\u{3D}", b"gt" | b"GT" => "\u{3E}", b"quest" => "\u{3F}", b"commat" => "\u{40}", b"lsqb" | b"lbrack" => "\u{5B}", b"bsol" => "\u{5C}", b"rsqb" | b"rbrack" => "\u{5D}", b"Hat" => "\u{5E}", b"lowbar" => "\u{5F}", b"grave" | b"DiacriticalGrave" => "\u{60}", b"lcub" | b"lbrace" => "\u{7B}", b"verbar" | b"vert" | b"VerticalLine" => "\u{7C}", b"rcub" | b"rbrace" => "\u{7D}", b"nbsp" | b"NonBreakingSpace" => "\u{A0}", b"iexcl" => "\u{A1}", b"cent" => "\u{A2}", b"pound" => "\u{A3}", b"curren" => "\u{A4}", b"yen" => "\u{A5}", b"brvbar" => "\u{A6}", b"sect" => "\u{A7}", b"Dot" | b"die" | b"DoubleDot" | b"uml" => "\u{A8}", b"copy" | b"COPY" => "\u{A9}", b"ordf" => "\u{AA}", b"laquo" => "\u{AB}", b"not" => "\u{AC}", b"shy" => "\u{AD}", b"reg" | b"circledR" | b"REG" => "\u{AE}", b"macr" | b"OverBar" | b"strns" => "\u{AF}", b"deg" => "\u{B0}", b"plusmn" | b"pm" | b"PlusMinus" => "\u{B1}", b"sup2" => "\u{B2}", b"sup3" => "\u{B3}", b"acute" | b"DiacriticalAcute" => "\u{B4}", b"micro" => "\u{B5}", b"para" => "\u{B6}", b"middot" | b"centerdot" | b"CenterDot" => "\u{B7}", b"cedil" | b"Cedilla" => "\u{B8}", b"sup1" => "\u{B9}", b"ordm" => "\u{BA}", b"raquo" => "\u{BB}", b"frac14" => "\u{BC}", b"frac12" | b"half" => "\u{BD}", b"frac34" => "\u{BE}", b"iquest" => "\u{BF}", b"Agrave" => "\u{C0}", b"Aacute" => "\u{C1}", b"Acirc" => "\u{C2}", b"Atilde" => "\u{C3}", b"Auml" => "\u{C4}", b"Aring" => "\u{C5}", b"AElig" => "\u{C6}", b"Ccedil" => "\u{C7}", b"Egrave" => "\u{C8}", b"Eacute" => "\u{C9}", b"Ecirc" => "\u{CA}", b"Euml" => "\u{CB}", b"Igrave" => "\u{CC}", b"Iacute" => "\u{CD}", b"Icirc" => "\u{CE}", b"Iuml" => "\u{CF}", b"ETH" => "\u{D0}", b"Ntilde" => "\u{D1}", b"Ograve" => "\u{D2}", b"Oacute" => "\u{D3}", b"Ocirc" => "\u{D4}", b"Otilde" => "\u{D5}", b"Ouml" => "\u{D6}", b"times" => "\u{D7}", b"Oslash" => "\u{D8}", b"Ugrave" => "\u{D9}", b"Uacute" => "\u{DA}", b"Ucirc" => "\u{DB}", b"Uuml" => "\u{DC}", b"Yacute" => "\u{DD}", b"THORN" => "\u{DE}", b"szlig" => "\u{DF}", b"agrave" => "\u{E0}", b"aacute" => "\u{E1}", b"acirc" => "\u{E2}", b"atilde" => "\u{E3}", b"auml" => "\u{E4}", b"aring" => "\u{E5}", b"aelig" => "\u{E6}", b"ccedil" => "\u{E7}", b"egrave" => "\u{E8}", b"eacute" => "\u{E9}", b"ecirc" => "\u{EA}", b"euml" => "\u{EB}", b"igrave" => "\u{EC}", b"iacute" => "\u{ED}", b"icirc" => "\u{EE}", b"iuml" => "\u{EF}", b"eth" => "\u{F0}", b"ntilde" => "\u{F1}", b"ograve" => "\u{F2}", b"oacute" => "\u{F3}", b"ocirc" => "\u{F4}", b"otilde" => "\u{F5}", b"ouml" => "\u{F6}", b"divide" | b"div" => "\u{F7}", b"oslash" => "\u{F8}", b"ugrave" => "\u{F9}", b"uacute" => "\u{FA}", b"ucirc" => "\u{FB}", b"uuml" => "\u{FC}", b"yacute" => "\u{FD}", b"thorn" => "\u{FE}", b"yuml" => "\u{FF}", b"Amacr" => "\u{10}", b"amacr" => "\u{10}", b"Abreve" => "\u{10}", b"abreve" => "\u{10}", b"Aogon" => "\u{10}", b"aogon" => "\u{10}", b"Cacute" => "\u{10}", b"cacute" => "\u{10}", b"Ccirc" => "\u{10}", b"ccirc" => "\u{10}", b"Cdot" => "\u{10}", b"cdot" => "\u{10}", b"Ccaron" => "\u{10}", b"ccaron" => "\u{10}", b"Dcaron" => "\u{10}", b"dcaron" => "\u{10}", b"Dstrok" => "\u{11}", b"dstrok" => "\u{11}", b"Emacr" => "\u{11}", b"emacr" => "\u{11}", b"Edot" => "\u{11}", b"edot" => "\u{11}", b"Eogon" => "\u{11}", b"eogon" => "\u{11}", b"Ecaron" => "\u{11}", b"ecaron" => "\u{11}", b"Gcirc" => "\u{11}", b"gcirc" => "\u{11}", b"Gbreve" => "\u{11}", b"gbreve" => "\u{11}", b"Gdot" => "\u{12}", b"gdot" => "\u{12}", b"Gcedil" => "\u{12}", b"Hcirc" => "\u{12}", b"hcirc" => "\u{12}", b"Hstrok" => "\u{12}", b"hstrok" => "\u{12}", b"Itilde" => "\u{12}", b"itilde" => "\u{12}", b"Imacr" => "\u{12}", b"imacr" => "\u{12}", b"Iogon" => "\u{12}", b"iogon" => "\u{12}", b"Idot" => "\u{13}", b"imath" | b"inodot" => "\u{13}", b"IJlig" => "\u{13}", b"ijlig" => "\u{13}", b"Jcirc" => "\u{13}", b"jcirc" => "\u{13}", b"Kcedil" => "\u{13}", b"kcedil" => "\u{13}", b"kgreen" => "\u{13}", b"Lacute" => "\u{13}", b"lacute" => "\u{13}", b"Lcedil" => "\u{13}", b"lcedil" => "\u{13}", b"Lcaron" => "\u{13}", b"lcaron" => "\u{13}", b"Lmidot" => "\u{13}", b"lmidot" => "\u{14}", b"Lstrok" => "\u{14}", b"lstrok" => "\u{14}", b"Nacute" => "\u{14}", b"nacute" => "\u{14}", b"Ncedil" => "\u{14}", b"ncedil" => "\u{14}", b"Ncaron" => "\u{14}", b"ncaron" => "\u{14}", b"napos" => "\u{14}", b"ENG" => "\u{14}", b"eng" => "\u{14}", b"Omacr" => "\u{14}", b"omacr" => "\u{14}", b"Odblac" => "\u{15}", b"odblac" => "\u{15}", b"OElig" => "\u{15}", b"oelig" => "\u{15}", b"Racute" => "\u{15}", b"racute" => "\u{15}", b"Rcedil" => "\u{15}", b"rcedil" => "\u{15}", b"Rcaron" => "\u{15}", b"rcaron" => "\u{15}", b"Sacute" => "\u{15}", b"sacute" => "\u{15}", b"Scirc" => "\u{15}", b"scirc" => "\u{15}", b"Scedil" => "\u{15}", b"scedil" => "\u{15}", b"Scaron" => "\u{16}", b"scaron" => "\u{16}", b"Tcedil" => "\u{16}", b"tcedil" => "\u{16}", b"Tcaron" => "\u{16}", b"tcaron" => "\u{16}", b"Tstrok" => "\u{16}", b"tstrok" => "\u{16}", b"Utilde" => "\u{16}", b"utilde" => "\u{16}", b"Umacr" => "\u{16}", b"umacr" => "\u{16}", b"Ubreve" => "\u{16}", b"ubreve" => "\u{16}", b"Uring" => "\u{16}", b"uring" => "\u{16}", b"Udblac" => "\u{17}", b"udblac" => "\u{17}", b"Uogon" => "\u{17}", b"uogon" => "\u{17}", b"Wcirc" => "\u{17}", b"wcirc" => "\u{17}", b"Ycirc" => "\u{17}", b"ycirc" => "\u{17}", b"Yuml" => "\u{17}", b"Zacute" => "\u{17}", b"zacute" => "\u{17}", b"Zdot" => "\u{17}", b"zdot" => "\u{17}", b"Zcaron" => "\u{17}", b"zcaron" => "\u{17}", b"fnof" => "\u{19}", b"imped" => "\u{1B}", b"gacute" => "\u{1F}", b"jmath" => "\u{23}", b"circ" => "\u{2C}", b"caron" | b"Hacek" => "\u{2C}", b"breve" | b"Breve" => "\u{2D}", b"dot" | b"DiacriticalDot" => "\u{2D}", b"ring" => "\u{2D}", b"ogon" => "\u{2D}", b"tilde" | b"DiacriticalTilde" => "\u{2D}", b"dblac" | b"DiacriticalDoubleAcute" => "\u{2D}", b"DownBreve" => "\u{31}", b"UnderBar" => "\u{33}", b"Alpha" => "\u{39}", b"Beta" => "\u{39}", b"Gamma" => "\u{39}", b"Delta" => "\u{39}", b"Epsilon" => "\u{39}", b"Zeta" => "\u{39}", b"Eta" => "\u{39}", b"Theta" => "\u{39}", b"Iota" => "\u{39}", b"Kappa" => "\u{39}", b"Lambda" => "\u{39}", b"Mu" => "\u{39}", b"Nu" => "\u{39}", b"Xi" => "\u{39}", b"Omicron" => "\u{39}", b"Pi" => "\u{3A}", b"Rho" => "\u{3A}", b"Sigma" => "\u{3A}", b"Tau" => "\u{3A}", b"Upsilon" => "\u{3A}", b"Phi" => "\u{3A}", b"Chi" => "\u{3A}", b"Psi" => "\u{3A}", b"Omega" => "\u{3A}", b"alpha" => "\u{3B}", b"beta" => "\u{3B}", b"gamma" => "\u{3B}", b"delta" => "\u{3B}", b"epsiv" | b"varepsilon" | b"epsilon" => "\u{3B}", b"zeta" => "\u{3B}", b"eta" => "\u{3B}", b"theta" => "\u{3B}", b"iota" => "\u{3B}", b"kappa" => "\u{3B}", b"lambda" => "\u{3B}", b"mu" => "\u{3B}", b"nu" => "\u{3B}", b"xi" => "\u{3B}", b"omicron" => "\u{3B}", b"pi" => "\u{3C}", b"rho" => "\u{3C}", b"sigmav" | b"varsigma" | b"sigmaf" => "\u{3C}", b"sigma" => "\u{3C}", b"tau" => "\u{3C}", b"upsi" | b"upsilon" => "\u{3C}", b"phi" | b"phiv" | b"varphi" => "\u{3C}", b"chi" => "\u{3C}", b"psi" => "\u{3C}", b"omega" => "\u{3C}", b"thetav" | b"vartheta" | b"thetasym" => "\u{3D}", b"Upsi" | b"upsih" => "\u{3D}", b"straightphi" => "\u{3D}", b"piv" | b"varpi" => "\u{3D}", b"Gammad" => "\u{3D}", b"gammad" | b"digamma" => "\u{3D}", b"kappav" | b"varkappa" => "\u{3F}", b"rhov" | b"varrho" => "\u{3F}", b"epsi" | b"straightepsilon" => "\u{3F}", b"bepsi" | b"backepsilon" => "\u{3F}", b"IOcy" => "\u{40}", b"DJcy" => "\u{40}", b"GJcy" => "\u{40}", b"Jukcy" => "\u{40}", b"DScy" => "\u{40}", b"Iukcy" => "\u{40}", b"YIcy" => "\u{40}", b"Jsercy" => "\u{40}", b"LJcy" => "\u{40}", b"NJcy" => "\u{40}", b"TSHcy" => "\u{40}", b"KJcy" => "\u{40}", b"Ubrcy" => "\u{40}", b"DZcy" => "\u{40}", b"Acy" => "\u{41}", b"Bcy" => "\u{41}", b"Vcy" => "\u{41}", b"Gcy" => "\u{41}", b"Dcy" => "\u{41}", b"IEcy" => "\u{41}", b"ZHcy" => "\u{41}", b"Zcy" => "\u{41}", b"Icy" => "\u{41}", b"Jcy" => "\u{41}", b"Kcy" => "\u{41}", b"Lcy" => "\u{41}", b"Mcy" => "\u{41}", b"Ncy" => "\u{41}", b"Ocy" => "\u{41}", b"Pcy" => "\u{41}", b"Rcy" => "\u{42}", b"Scy" => "\u{42}", b"Tcy" => "\u{42}", b"Ucy" => "\u{42}", b"Fcy" => "\u{42}", b"KHcy" => "\u{42}", b"TScy" => "\u{42}", b"CHcy" => "\u{42}", b"SHcy" => "\u{42}", b"SHCHcy" => "\u{42}", b"HARDcy" => "\u{42}", b"Ycy" => "\u{42}", b"SOFTcy" => "\u{42}", b"Ecy" => "\u{42}", b"YUcy" => "\u{42}", b"YAcy" => "\u{42}", b"acy" => "\u{43}", b"bcy" => "\u{43}", b"vcy" => "\u{43}", b"gcy" => "\u{43}", b"dcy" => "\u{43}", b"iecy" => "\u{43}", b"zhcy" => "\u{43}", b"zcy" => "\u{43}", b"icy" => "\u{43}", b"jcy" => "\u{43}", b"kcy" => "\u{43}", b"lcy" => "\u{43}", b"mcy" => "\u{43}", b"ncy" => "\u{43}", b"ocy" => "\u{43}", b"pcy" => "\u{43}", b"rcy" => "\u{44}", b"scy" => "\u{44}", b"tcy" => "\u{44}", b"ucy" => "\u{44}", b"fcy" => "\u{44}", b"khcy" => "\u{44}", b"tscy" => "\u{44}", b"chcy" => "\u{44}", b"shcy" => "\u{44}", b"shchcy" => "\u{44}", b"hardcy" => "\u{44}", b"ycy" => "\u{44}", b"softcy" => "\u{44}", b"ecy" => "\u{44}", b"yucy" => "\u{44}", b"yacy" => "\u{44}", b"iocy" => "\u{45}", b"djcy" => "\u{45}", b"gjcy" => "\u{45}", b"jukcy" => "\u{45}", b"dscy" => "\u{45}", b"iukcy" => "\u{45}", b"yicy" => "\u{45}", b"jsercy" => "\u{45}", b"ljcy" => "\u{45}", b"njcy" => "\u{45}", b"tshcy" => "\u{45}", b"kjcy" => "\u{45}", b"ubrcy" => "\u{45}", b"dzcy" => "\u{45}", b"ensp" => "\u{2002}", b"emsp" => "\u{2003}", b"emsp13" => "\u{2004}", b"emsp14" => "\u{2005}", b"numsp" => "\u{2007}", b"puncsp" => "\u{2008}", b"thinsp" | b"ThinSpace" => "\u{2009}", b"hairsp" | b"VeryThinSpace" => "\u{200A}", b"ZeroWidthSpace" | b"NegativeVeryThinSpace" | b"NegativeThinSpace" | b"NegativeMediumSpace" | b"NegativeThickSpace" => "\u{200B}", b"zwnj" => "\u{200C}", b"zwj" => "\u{200D}", b"lrm" => "\u{200E}", b"rlm" => "\u{200F}", b"hyphen" | b"dash" => "\u{2010}", b"ndash" => "\u{2013}", b"mdash" => "\u{2014}", b"horbar" => "\u{2015}", b"Verbar" | b"Vert" => "\u{2016}", b"lsquo" | b"OpenCurlyQuote" => "\u{2018}", b"rsquo" | b"rsquor" | b"CloseCurlyQuote" => "\u{2019}", b"lsquor" | b"sbquo" => "\u{201A}", b"ldquo" | b"OpenCurlyDoubleQuote" => "\u{201C}", b"rdquo" | b"rdquor" | b"CloseCurlyDoubleQuote" => "\u{201D}", b"ldquor" | b"bdquo" => "\u{201E}", b"dagger" => "\u{2020}", b"Dagger" | b"ddagger" => "\u{2021}", b"bull" | b"bullet" => "\u{2022}", b"nldr" => "\u{2025}", b"hellip" | b"mldr" => "\u{2026}", b"permil" => "\u{2030}", b"pertenk" => "\u{2031}", b"prime" => "\u{2032}", b"Prime" => "\u{2033}", b"tprime" => "\u{2034}", b"bprime" | b"backprime" => "\u{2035}", b"lsaquo" => "\u{2039}", b"rsaquo" => "\u{203A}", b"oline" => "\u{203E}", b"caret" => "\u{2041}", b"hybull" => "\u{2043}", b"frasl" => "\u{2044}", b"bsemi" => "\u{204F}", b"qprime" => "\u{2057}", b"MediumSpace" => "\u{205F}", b"NoBreak" => "\u{2060}", b"ApplyFunction" | b"af" => "\u{2061}", b"InvisibleTimes" | b"it" => "\u{2062}", b"InvisibleComma" | b"ic" => "\u{2063}", b"euro" => "\u{20AC}", b"tdot" | b"TripleDot" => "\u{20DB}", b"DotDot" => "\u{20DC}", b"Copf" | b"complexes" => "\u{2102}", b"incare" => "\u{2105}", b"gscr" => "\u{210A}", b"hamilt" | b"HilbertSpace" | b"Hscr" => "\u{210B}", b"Hfr" | b"Poincareplane" => "\u{210C}", b"quaternions" | b"Hopf" => "\u{210D}", b"planckh" => "\u{210E}", b"planck" | b"hbar" | b"plankv" | b"hslash" => "\u{210F}", b"Iscr" | b"imagline" => "\u{2110}", b"image" | b"Im" | b"imagpart" | b"Ifr" => "\u{2111}", b"Lscr" | b"lagran" | b"Laplacetrf" => "\u{2112}", b"ell" => "\u{2113}", b"Nopf" | b"naturals" => "\u{2115}", b"numero" => "\u{2116}", b"copysr" => "\u{2117}", b"weierp" | b"wp" => "\u{2118}", b"Popf" | b"primes" => "\u{2119}", b"rationals" | b"Qopf" => "\u{211A}", b"Rscr" | b"realine" => "\u{211B}", b"real" | b"Re" | b"realpart" | b"Rfr" => "\u{211C}", b"reals" | b"Ropf" => "\u{211D}", b"rx" => "\u{211E}", b"trade" | b"TRADE" => "\u{2122}", b"integers" | b"Zopf" => "\u{2124}", b"ohm" => "\u{2126}", b"mho" => "\u{2127}", b"Zfr" | b"zeetrf" => "\u{2128}", b"iiota" => "\u{2129}", b"angst" => "\u{212B}", b"bernou" | b"Bernoullis" | b"Bscr" => "\u{212C}", b"Cfr" | b"Cayleys" => "\u{212D}", b"escr" => "\u{212F}", b"Escr" | b"expectation" => "\u{2130}", b"Fscr" | b"Fouriertrf" => "\u{2131}", b"phmmat" | b"Mellintrf" | b"Mscr" => "\u{2133}", b"order" | b"orderof" | b"oscr" => "\u{2134}", b"alefsym" | b"aleph" => "\u{2135}", b"beth" => "\u{2136}", b"gimel" => "\u{2137}", b"daleth" => "\u{2138}", b"CapitalDifferentialD" | b"DD" => "\u{2145}", b"DifferentialD" | b"dd" => "\u{2146}", b"ExponentialE" | b"exponentiale" | b"ee" => "\u{2147}", b"ImaginaryI" | b"ii" => "\u{2148}", b"frac13" => "\u{2153}", b"frac23" => "\u{2154}", b"frac15" => "\u{2155}", b"frac25" => "\u{2156}", b"frac35" => "\u{2157}", b"frac45" => "\u{2158}", b"frac16" => "\u{2159}", b"frac56" => "\u{215A}", b"frac18" => "\u{215B}", b"frac38" => "\u{215C}", b"frac58" => "\u{215D}", b"frac78" => "\u{215E}", b"larr" | b"leftarrow" | b"LeftArrow" | b"slarr" | b"ShortLeftArrow" => "\u{2190}", b"uarr" | b"uparrow" | b"UpArrow" | b"ShortUpArrow" => "\u{2191}", b"rarr" | b"rightarrow" | b"RightArrow" | b"srarr" | b"ShortRightArrow" => "\u{2192}", b"darr" | b"downarrow" | b"DownArrow" | b"ShortDownArrow" => "\u{2193}", b"harr" | b"leftrightarrow" | b"LeftRightArrow" => "\u{2194}", b"varr" | b"updownarrow" | b"UpDownArrow" => "\u{2195}", b"nwarr" | b"UpperLeftArrow" | b"nwarrow" => "\u{2196}", b"nearr" | b"UpperRightArrow" | b"nearrow" => "\u{2197}", b"searr" | b"searrow" | b"LowerRightArrow" => "\u{2198}", b"swarr" | b"swarrow" | b"LowerLeftArrow" => "\u{2199}", b"nlarr" | b"nleftarrow" => "\u{219A}", b"nrarr" | b"nrightarrow" => "\u{219B}", b"rarrw" | b"rightsquigarrow" => "\u{219D}", b"Larr" | b"twoheadleftarrow" => "\u{219E}", b"Uarr" => "\u{219F}", b"Rarr" | b"twoheadrightarrow" => "\u{21A0}", b"Darr" => "\u{21A1}", b"larrtl" | b"leftarrowtail" => "\u{21A2}", b"rarrtl" | b"rightarrowtail" => "\u{21A3}", b"LeftTeeArrow" | b"mapstoleft" => "\u{21A4}", b"UpTeeArrow" | b"mapstoup" => "\u{21A5}", b"map" | b"RightTeeArrow" | b"mapsto" => "\u{21A6}", b"DownTeeArrow" | b"mapstodown" => "\u{21A7}", b"larrhk" | b"hookleftarrow" => "\u{21A9}", b"rarrhk" | b"hookrightarrow" => "\u{21AA}", b"larrlp" | b"looparrowleft" => "\u{21AB}", b"rarrlp" | b"looparrowright" => "\u{21AC}", b"harrw" | b"leftrightsquigarrow" => "\u{21AD}", b"nharr" | b"nleftrightarrow" => "\u{21AE}", b"lsh" | b"Lsh" => "\u{21B0}", b"rsh" | b"Rsh" => "\u{21B1}", b"ldsh" => "\u{21B2}", b"rdsh" => "\u{21B3}", b"crarr" => "\u{21B5}", b"cularr" | b"curvearrowleft" => "\u{21B6}", b"curarr" | b"curvearrowright" => "\u{21B7}", b"olarr" | b"circlearrowleft" => "\u{21BA}", b"orarr" | b"circlearrowright" => "\u{21BB}", b"lharu" | b"LeftVector" | b"leftharpoonup" => "\u{21BC}", b"lhard" | b"leftharpoondown" | b"DownLeftVector" => "\u{21BD}", b"uharr" | b"upharpoonright" | b"RightUpVector" => "\u{21BE}", b"uharl" | b"upharpoonleft" | b"LeftUpVector" => "\u{21BF}", b"rharu" | b"RightVector" | b"rightharpoonup" => "\u{21C0}", b"rhard" | b"rightharpoondown" | b"DownRightVector" => "\u{21C1}", b"dharr" | b"RightDownVector" | b"downharpoonright" => "\u{21C2}", b"dharl" | b"LeftDownVector" | b"downharpoonleft" => "\u{21C3}", b"rlarr" | b"rightleftarrows" | b"RightArrowLeftArrow" => "\u{21C4}", b"udarr" | b"UpArrowDownArrow" => "\u{21C5}", b"lrarr" | b"leftrightarrows" | b"LeftArrowRightArrow" => "\u{21C6}", b"llarr" | b"leftleftarrows" => "\u{21C7}", b"uuarr" | b"upuparrows" => "\u{21C8}", b"rrarr" | b"rightrightarrows" => "\u{21C9}", b"ddarr" | b"downdownarrows" => "\u{21CA}", b"lrhar" | b"ReverseEquilibrium" | b"leftrightharpoons" => "\u{21CB}", b"rlhar" | b"rightleftharpoons" | b"Equilibrium" => "\u{21CC}", b"nlArr" | b"nLeftarrow" => "\u{21CD}", b"nhArr" | b"nLeftrightarrow" => "\u{21CE}", b"nrArr" | b"nRightarrow" => "\u{21CF}", b"lArr" | b"Leftarrow" | b"DoubleLeftArrow" => "\u{21D0}", b"uArr" | b"Uparrow" | b"DoubleUpArrow" => "\u{21D1}", b"rArr" | b"Rightarrow" | b"Implies" | b"DoubleRightArrow" => "\u{21D2}", b"dArr" | b"Downarrow" | b"DoubleDownArrow" => "\u{21D3}", b"hArr" | b"Leftrightarrow" | b"DoubleLeftRightArrow" | b"iff" => "\u{21D4}", b"vArr" | b"Updownarrow" | b"DoubleUpDownArrow" => "\u{21D5}", b"nwArr" => "\u{21D6}", b"neArr" => "\u{21D7}", b"seArr" => "\u{21D8}", b"swArr" => "\u{21D9}", b"lAarr" | b"Lleftarrow" => "\u{21DA}", b"rAarr" | b"Rrightarrow" => "\u{21DB}", b"zigrarr" => "\u{21DD}", b"larrb" | b"LeftArrowBar" => "\u{21E4}", b"rarrb" | b"RightArrowBar" => "\u{21E5}", b"duarr" | b"DownArrowUpArrow" => "\u{21F5}", b"loarr" => "\u{21FD}", b"roarr" => "\u{21FE}", b"hoarr" => "\u{21FF}", b"forall" | b"ForAll" => "\u{2200}", b"comp" | b"complement" => "\u{2201}", b"part" | b"PartialD" => "\u{2202}", b"exist" | b"Exists" => "\u{2203}", b"nexist" | b"NotExists" | b"nexists" => "\u{2204}", b"empty" | b"emptyset" | b"emptyv" | b"varnothing" => "\u{2205}", b"nabla" | b"Del" => "\u{2207}", b"isin" | b"isinv" | b"Element" | b"in" => "\u{2208}", b"notin" | b"NotElement" | b"notinva" => "\u{2209}", b"niv" | b"ReverseElement" | b"ni" | b"SuchThat" => "\u{220B}", b"notni" | b"notniva" | b"NotReverseElement" => "\u{220C}", b"prod" | b"Product" => "\u{220F}", b"coprod" | b"Coproduct" => "\u{2210}", b"sum" | b"Sum" => "\u{2211}", b"minus" => "\u{2212}", b"mnplus" | b"mp" | b"MinusPlus" => "\u{2213}", b"plusdo" | b"dotplus" => "\u{2214}", b"setmn" | b"setminus" | b"Backslash" | b"ssetmn" | b"smallsetminus" => "\u{2216}", b"lowast" => "\u{2217}", b"compfn" | b"SmallCircle" => "\u{2218}", b"radic" | b"Sqrt" => "\u{221A}", b"prop" | b"propto" | b"Proportional" | b"vprop" | b"varpropto" => "\u{221D}", b"infin" => "\u{221E}", b"angrt" => "\u{221F}", b"ang" | b"angle" => "\u{2220}", b"angmsd" | b"measuredangle" => "\u{2221}", b"angsph" => "\u{2222}", b"mid" | b"VerticalBar" | b"smid" | b"shortmid" => "\u{2223}", b"nmid" | b"NotVerticalBar" | b"nsmid" | b"nshortmid" => "\u{2224}", b"par" | b"parallel" | b"DoubleVerticalBar" | b"spar" | b"shortparallel" => "\u{2225}", b"npar" | b"nparallel" | b"NotDoubleVerticalBar" | b"nspar" | b"nshortparallel" => { "\u{2226}" } b"and" | b"wedge" => "\u{2227}", b"or" | b"vee" => "\u{2228}", b"cap" => "\u{2229}", b"cup" => "\u{222A}", b"int" | b"Integral" => "\u{222B}", b"Int" => "\u{222C}", b"tint" | b"iiint" => "\u{222D}", b"conint" | b"oint" | b"ContourIntegral" => "\u{222E}", b"Conint" | b"DoubleContourIntegral" => "\u{222F}", b"Cconint" => "\u{2230}", b"cwint" => "\u{2231}", b"cwconint" | b"ClockwiseContourIntegral" => "\u{2232}", b"awconint" | b"CounterClockwiseContourIntegral" => "\u{2233}", b"there4" | b"therefore" | b"Therefore" => "\u{2234}", b"becaus" | b"because" | b"Because" => "\u{2235}", b"ratio" => "\u{2236}", b"Colon" | b"Proportion" => "\u{2237}", b"minusd" | b"dotminus" => "\u{2238}", b"mDDot" => "\u{223A}", b"homtht" => "\u{223B}", b"sim" | b"Tilde" | b"thksim" | b"thicksim" => "\u{223C}", b"bsim" | b"backsim" => "\u{223D}", b"ac" | b"mstpos" => "\u{223E}", b"acd" => "\u{223F}", b"wreath" | b"VerticalTilde" | b"wr" => "\u{2240}", b"nsim" | b"NotTilde" => "\u{2241}", b"esim" | b"EqualTilde" | b"eqsim" => "\u{2242}", b"sime" | b"TildeEqual" | b"simeq" => "\u{2243}", b"nsime" | b"nsimeq" | b"NotTildeEqual" => "\u{2244}", b"cong" | b"TildeFullEqual" => "\u{2245}", b"simne" => "\u{2246}", b"ncong" | b"NotTildeFullEqual" => "\u{2247}", b"asymp" | b"ap" | b"TildeTilde" | b"approx" | b"thkap" | b"thickapprox" => "\u{2248}", b"nap" | b"NotTildeTilde" | b"napprox" => "\u{2249}", b"ape" | b"approxeq" => "\u{224A}", b"apid" => "\u{224B}", b"bcong" | b"backcong" => "\u{224C}", b"asympeq" | b"CupCap" => "\u{224D}", b"bump" | b"HumpDownHump" | b"Bumpeq" => "\u{224E}", b"bumpe" | b"HumpEqual" | b"bumpeq" => "\u{224F}", b"esdot" | b"DotEqual" | b"doteq" => "\u{2250}", b"eDot" | b"doteqdot" => "\u{2251}", b"efDot" | b"fallingdotseq" => "\u{2252}", b"erDot" | b"risingdotseq" => "\u{2253}", b"colone" | b"coloneq" | b"Assign" => "\u{2254}", b"ecolon" | b"eqcolon" => "\u{2255}", b"ecir" | b"eqcirc" => "\u{2256}", b"cire" | b"circeq" => "\u{2257}", b"wedgeq" => "\u{2259}", b"veeeq" => "\u{225A}", b"trie" | b"triangleq" => "\u{225C}", b"equest" | b"questeq" => "\u{225F}", b"ne" | b"NotEqual" => "\u{2260}", b"equiv" | b"Congruent" => "\u{2261}", b"nequiv" | b"NotCongruent" => "\u{2262}", b"le" | b"leq" => "\u{2264}", b"ge" | b"GreaterEqual" | b"geq" => "\u{2265}", b"lE" | b"LessFullEqual" | b"leqq" => "\u{2266}", b"gE" | b"GreaterFullEqual" | b"geqq" => "\u{2267}", b"lnE" | b"lneqq" => "\u{2268}", b"gnE" | b"gneqq" => "\u{2269}", b"Lt" | b"NestedLessLess" | b"ll" => "\u{226A}", b"Gt" | b"NestedGreaterGreater" | b"gg" => "\u{226B}", b"twixt" | b"between" => "\u{226C}", b"NotCupCap" => "\u{226D}", b"nlt" | b"NotLess" | b"nless" => "\u{226E}", b"ngt" | b"NotGreater" | b"ngtr" => "\u{226F}", b"nle" | b"NotLessEqual" | b"nleq" => "\u{2270}", b"nge" | b"NotGreaterEqual" | b"ngeq" => "\u{2271}", b"lsim" | b"LessTilde" | b"lesssim" => "\u{2272}", b"gsim" | b"gtrsim" | b"GreaterTilde" => "\u{2273}", b"nlsim" | b"NotLessTilde" => "\u{2274}", b"ngsim" | b"NotGreaterTilde" => "\u{2275}", b"lg" | b"lessgtr" | b"LessGreater" => "\u{2276}", b"gl" | b"gtrless" | b"GreaterLess" => "\u{2277}", b"ntlg" | b"NotLessGreater" => "\u{2278}", b"ntgl" | b"NotGreaterLess" => "\u{2279}", b"pr" | b"Precedes" | b"prec" => "\u{227A}", b"sc" | b"Succeeds" | b"succ" => "\u{227B}", b"prcue" | b"PrecedesSlantEqual" | b"preccurlyeq" => "\u{227C}", b"sccue" | b"SucceedsSlantEqual" | b"succcurlyeq" => "\u{227D}", b"prsim" | b"precsim" | b"PrecedesTilde" => "\u{227E}", b"scsim" | b"succsim" | b"SucceedsTilde" => "\u{227F}", b"npr" | b"nprec" | b"NotPrecedes" => "\u{2280}", b"nsc" | b"nsucc" | b"NotSucceeds" => "\u{2281}", b"sub" | b"subset" => "\u{2282}", b"sup" | b"supset" | b"Superset" => "\u{2283}", b"nsub" => "\u{2284}", b"nsup" => "\u{2285}", b"sube" | b"SubsetEqual" | b"subseteq" => "\u{2286}", b"supe" | b"supseteq" | b"SupersetEqual" => "\u{2287}", b"nsube" | b"nsubseteq" | b"NotSubsetEqual" => "\u{2288}", b"nsupe" | b"nsupseteq" | b"NotSupersetEqual" => "\u{2289}", b"subne" | b"subsetneq" => "\u{228A}", b"supne" | b"supsetneq" => "\u{228B}", b"cupdot" => "\u{228D}", b"uplus" | b"UnionPlus" => "\u{228E}", b"sqsub" | b"SquareSubset" | b"sqsubset" => "\u{228F}", b"sqsup" | b"SquareSuperset" | b"sqsupset" => "\u{2290}", b"sqsube" | b"SquareSubsetEqual" | b"sqsubseteq" => "\u{2291}", b"sqsupe" | b"SquareSupersetEqual" | b"sqsupseteq" => "\u{2292}", b"sqcap" | b"SquareIntersection" => "\u{2293}", b"sqcup" | b"SquareUnion" => "\u{2294}", b"oplus" | b"CirclePlus" => "\u{2295}", b"ominus" | b"CircleMinus" => "\u{2296}", b"otimes" | b"CircleTimes" => "\u{2297}", b"osol" => "\u{2298}", b"odot" | b"CircleDot" => "\u{2299}", b"ocir" | b"circledcirc" => "\u{229A}", b"oast" | b"circledast" => "\u{229B}", b"odash" | b"circleddash" => "\u{229D}", b"plusb" | b"boxplus" => "\u{229E}", b"minusb" | b"boxminus" => "\u{229F}", b"timesb" | b"boxtimes" => "\u{22A0}", b"sdotb" | b"dotsquare" => "\u{22A1}", b"vdash" | b"RightTee" => "\u{22A2}", b"dashv" | b"LeftTee" => "\u{22A3}", b"top" | b"DownTee" => "\u{22A4}", b"bottom" | b"bot" | b"perp" | b"UpTee" => "\u{22A5}", b"models" => "\u{22A7}", b"vDash" | b"DoubleRightTee" => "\u{22A8}", b"Vdash" => "\u{22A9}", b"Vvdash" => "\u{22AA}", b"VDash" => "\u{22AB}", b"nvdash" => "\u{22AC}", b"nvDash" => "\u{22AD}", b"nVdash" => "\u{22AE}", b"nVDash" => "\u{22AF}", b"prurel" => "\u{22B0}", b"vltri" | b"vartriangleleft" | b"LeftTriangle" => "\u{22B2}", b"vrtri" | b"vartriangleright" | b"RightTriangle" => "\u{22B3}", b"ltrie" | b"trianglelefteq" | b"LeftTriangleEqual" => "\u{22B4}", b"rtrie" | b"trianglerighteq" | b"RightTriangleEqual" => "\u{22B5}", b"origof" => "\u{22B6}", b"imof" => "\u{22B7}", b"mumap" | b"multimap" => "\u{22B8}", b"hercon" => "\u{22B9}", b"intcal" | b"intercal" => "\u{22BA}", b"veebar" => "\u{22BB}", b"barvee" => "\u{22BD}", b"angrtvb" => "\u{22BE}", b"lrtri" => "\u{22BF}", b"xwedge" | b"Wedge" | b"bigwedge" => "\u{22C0}", b"xvee" | b"Vee" | b"bigvee" => "\u{22C1}", b"xcap" | b"Intersection" | b"bigcap" => "\u{22C2}", b"xcup" | b"Union" | b"bigcup" => "\u{22C3}", b"diam" | b"diamond" | b"Diamond" => "\u{22C4}", b"sdot" => "\u{22C5}", b"sstarf" | b"Star" => "\u{22C6}", b"divonx" | b"divideontimes" => "\u{22C7}", b"bowtie" => "\u{22C8}", b"ltimes" => "\u{22C9}", b"rtimes" => "\u{22CA}", b"lthree" | b"leftthreetimes" => "\u{22CB}", b"rthree" | b"rightthreetimes" => "\u{22CC}", b"bsime" | b"backsimeq" => "\u{22CD}", b"cuvee" | b"curlyvee" => "\u{22CE}", b"cuwed" | b"curlywedge" => "\u{22CF}", b"Sub" | b"Subset" => "\u{22D0}", b"Sup" | b"Supset" => "\u{22D1}", b"Cap" => "\u{22D2}", b"Cup" => "\u{22D3}", b"fork" | b"pitchfork" => "\u{22D4}", b"epar" => "\u{22D5}", b"ltdot" | b"lessdot" => "\u{22D6}", b"gtdot" | b"gtrdot" => "\u{22D7}", b"Ll" => "\u{22D8}", b"Gg" | b"ggg" => "\u{22D9}", b"leg" | b"LessEqualGreater" | b"lesseqgtr" => "\u{22DA}", b"gel" | b"gtreqless" | b"GreaterEqualLess" => "\u{22DB}", b"cuepr" | b"curlyeqprec" => "\u{22DE}", b"cuesc" | b"curlyeqsucc" => "\u{22DF}", b"nprcue" | b"NotPrecedesSlantEqual" => "\u{22E0}", b"nsccue" | b"NotSucceedsSlantEqual" => "\u{22E1}", b"nsqsube" | b"NotSquareSubsetEqual" => "\u{22E2}", b"nsqsupe" | b"NotSquareSupersetEqual" => "\u{22E3}", b"lnsim" => "\u{22E6}", b"gnsim" => "\u{22E7}", b"prnsim" | b"precnsim" => "\u{22E8}", b"scnsim" | b"succnsim" => "\u{22E9}", b"nltri" | b"ntriangleleft" | b"NotLeftTriangle" => "\u{22EA}", b"nrtri" | b"ntriangleright" | b"NotRightTriangle" => "\u{22EB}", b"nltrie" | b"ntrianglelefteq" | b"NotLeftTriangleEqual" => "\u{22EC}", b"nrtrie" | b"ntrianglerighteq" | b"NotRightTriangleEqual" => "\u{22ED}", b"vellip" => "\u{22EE}", b"ctdot" => "\u{22EF}", b"utdot" => "\u{22F0}", b"dtdot" => "\u{22F1}", b"disin" => "\u{22F2}", b"isinsv" => "\u{22F3}", b"isins" => "\u{22F4}", b"isindot" => "\u{22F5}", b"notinvc" => "\u{22F6}", b"notinvb" => "\u{22F7}", b"isinE" => "\u{22F9}", b"nisd" => "\u{22FA}", b"xnis" => "\u{22FB}", b"nis" => "\u{22FC}", b"notnivc" => "\u{22FD}", b"notnivb" => "\u{22FE}", b"barwed" | b"barwedge" => "\u{2305}", b"Barwed" | b"doublebarwedge" => "\u{2306}", b"lceil" | b"LeftCeiling" => "\u{2308}", b"rceil" | b"RightCeiling" => "\u{2309}", b"lfloor" | b"LeftFloor" => "\u{230A}", b"rfloor" | b"RightFloor" => "\u{230B}", b"drcrop" => "\u{230C}", b"dlcrop" => "\u{230D}", b"urcrop" => "\u{230E}", b"ulcrop" => "\u{230F}", b"bnot" => "\u{2310}", b"profline" => "\u{2312}", b"profsurf" => "\u{2313}", b"telrec" => "\u{2315}", b"target" => "\u{2316}", b"ulcorn" | b"ulcorner" => "\u{231C}", b"urcorn" | b"urcorner" => "\u{231D}", b"dlcorn" | b"llcorner" => "\u{231E}", b"drcorn" | b"lrcorner" => "\u{231F}", b"frown" | b"sfrown" => "\u{2322}", b"smile" | b"ssmile" => "\u{2323}", b"cylcty" => "\u{232D}", b"profalar" => "\u{232E}", b"topbot" => "\u{2336}", b"ovbar" => "\u{233D}", b"solbar" => "\u{233F}", b"angzarr" => "\u{237C}", b"lmoust" | b"lmoustache" => "\u{23B0}", b"rmoust" | b"rmoustache" => "\u{23B1}", b"tbrk" | b"OverBracket" => "\u{23B4}", b"bbrk" | b"UnderBracket" => "\u{23B5}", b"bbrktbrk" => "\u{23B6}", b"OverParenthesis" => "\u{23DC}", b"UnderParenthesis" => "\u{23DD}", b"OverBrace" => "\u{23DE}", b"UnderBrace" => "\u{23DF}", b"trpezium" => "\u{23E2}", b"elinters" => "\u{23E7}", b"blank" => "\u{2423}", b"oS" | b"circledS" => "\u{24C8}", b"boxh" | b"HorizontalLine" => "\u{2500}", b"boxv" => "\u{2502}", b"boxdr" => "\u{250C}", b"boxdl" => "\u{2510}", b"boxur" => "\u{2514}", b"boxul" => "\u{2518}", b"boxvr" => "\u{251C}", b"boxvl" => "\u{2524}", b"boxhd" => "\u{252C}", b"boxhu" => "\u{2534}", b"boxvh" => "\u{253C}", b"boxH" => "\u{2550}", b"boxV" => "\u{2551}", b"boxdR" => "\u{2552}", b"boxDr" => "\u{2553}", b"boxDR" => "\u{2554}", b"boxdL" => "\u{2555}", b"boxDl" => "\u{2556}", b"boxDL" => "\u{2557}", b"boxuR" => "\u{2558}", b"boxUr" => "\u{2559}", b"boxUR" => "\u{255A}", b"boxuL" => "\u{255B}", b"boxUl" => "\u{255C}", b"boxUL" => "\u{255D}", b"boxvR" => "\u{255E}", b"boxVr" => "\u{255F}", b"boxVR" => "\u{2560}", b"boxvL" => "\u{2561}", b"boxVl" => "\u{2562}", b"boxVL" => "\u{2563}", b"boxHd" => "\u{2564}", b"boxhD" => "\u{2565}", b"boxHD" => "\u{2566}", b"boxHu" => "\u{2567}", b"boxhU" => "\u{2568}", b"boxHU" => "\u{2569}", b"boxvH" => "\u{256A}", b"boxVh" => "\u{256B}", b"boxVH" => "\u{256C}", b"uhblk" => "\u{2580}", b"lhblk" => "\u{2584}", b"block" => "\u{2588}", b"blk14" => "\u{2591}", b"blk12" => "\u{2592}", b"blk34" => "\u{2593}", b"squ" | b"square" | b"Square" => "\u{25A1}", b"squf" | b"squarf" | b"blacksquare" | b"FilledVerySmallSquare" => "\u{25AA}", b"EmptyVerySmallSquare" => "\u{25AB}", b"rect" => "\u{25AD}", b"marker" => "\u{25AE}", b"fltns" => "\u{25B1}", b"xutri" | b"bigtriangleup" => "\u{25B3}", b"utrif" | b"blacktriangle" => "\u{25B4}", b"utri" | b"triangle" => "\u{25B5}", b"rtrif" | b"blacktriangleright" => "\u{25B8}", b"rtri" | b"triangleright" => "\u{25B9}", b"xdtri" | b"bigtriangledown" => "\u{25BD}", b"dtrif" | b"blacktriangledown" => "\u{25BE}", b"dtri" | b"triangledown" => "\u{25BF}", b"ltrif" | b"blacktriangleleft" => "\u{25C2}", b"ltri" | b"triangleleft" => "\u{25C3}", b"loz" | b"lozenge" => "\u{25CA}", b"cir" => "\u{25CB}", b"tridot" => "\u{25EC}", b"xcirc" | b"bigcirc" => "\u{25EF}", b"ultri" => "\u{25F8}", b"urtri" => "\u{25F9}", b"lltri" => "\u{25FA}", b"EmptySmallSquare" => "\u{25FB}", b"FilledSmallSquare" => "\u{25FC}", b"starf" | b"bigstar" => "\u{2605}", b"star" => "\u{2606}", b"phone" => "\u{260E}", b"female" => "\u{2640}", b"male" => "\u{2642}", b"spades" | b"spadesuit" => "\u{2660}", b"clubs" | b"clubsuit" => "\u{2663}", b"hearts" | b"heartsuit" => "\u{2665}", b"diams" | b"diamondsuit" => "\u{2666}", b"sung" => "\u{266A}", b"flat" => "\u{266D}", b"natur" | b"natural" => "\u{266E}", b"sharp" => "\u{266F}", b"check" | b"checkmark" => "\u{2713}", b"cross" => "\u{2717}", b"malt" | b"maltese" => "\u{2720}", b"sext" => "\u{2736}", b"VerticalSeparator" => "\u{2758}", b"lbbrk" => "\u{2772}", b"rbbrk" => "\u{2773}", b"lobrk" | b"LeftDoubleBracket" => "\u{27E6}", b"robrk" | b"RightDoubleBracket" => "\u{27E7}", b"lang" | b"LeftAngleBracket" | b"langle" => "\u{27E8}", b"rang" | b"RightAngleBracket" | b"rangle" => "\u{27E9}", b"Lang" => "\u{27EA}", b"Rang" => "\u{27EB}", b"loang" => "\u{27EC}", b"roang" => "\u{27ED}", b"xlarr" | b"longleftarrow" | b"LongLeftArrow" => "\u{27F5}", b"xrarr" | b"longrightarrow" | b"LongRightArrow" => "\u{27F6}", b"xharr" | b"longleftrightarrow" | b"LongLeftRightArrow" => "\u{27F7}", b"xlArr" | b"Longleftarrow" | b"DoubleLongLeftArrow" => "\u{27F8}", b"xrArr" | b"Longrightarrow" | b"DoubleLongRightArrow" => "\u{27F9}", b"xhArr" | b"Longleftrightarrow" | b"DoubleLongLeftRightArrow" => "\u{27FA}", b"xmap" | b"longmapsto" => "\u{27FC}", b"dzigrarr" => "\u{27FF}", b"nvlArr" => "\u{2902}", b"nvrArr" => "\u{2903}", b"nvHarr" => "\u{2904}", b"Map" => "\u{2905}", b"lbarr" => "\u{290C}", b"rbarr" | b"bkarow" => "\u{290D}", b"lBarr" => "\u{290E}", b"rBarr" | b"dbkarow" => "\u{290F}", b"RBarr" | b"drbkarow" => "\u{2910}", b"DDotrahd" => "\u{2911}", b"UpArrowBar" => "\u{2912}", b"DownArrowBar" => "\u{2913}", b"Rarrtl" => "\u{2916}", b"latail" => "\u{2919}", b"ratail" => "\u{291A}", b"lAtail" => "\u{291B}", b"rAtail" => "\u{291C}", b"larrfs" => "\u{291D}", b"rarrfs" => "\u{291E}", b"larrbfs" => "\u{291F}", b"rarrbfs" => "\u{2920}", b"nwarhk" => "\u{2923}", b"nearhk" => "\u{2924}", b"searhk" | b"hksearow" => "\u{2925}", b"swarhk" | b"hkswarow" => "\u{2926}", b"nwnear" => "\u{2927}", b"nesear" | b"toea" => "\u{2928}", b"seswar" | b"tosa" => "\u{2929}", b"swnwar" => "\u{292A}", b"rarrc" => "\u{2933}", b"cudarrr" => "\u{2935}", b"ldca" => "\u{2936}", b"rdca" => "\u{2937}", b"cudarrl" => "\u{2938}", b"larrpl" => "\u{2939}", b"curarrm" => "\u{293C}", b"cularrp" => "\u{293D}", b"rarrpl" => "\u{2945}", b"harrcir" => "\u{2948}", b"Uarrocir" => "\u{2949}", b"lurdshar" => "\u{294A}", b"ldrushar" => "\u{294B}", b"LeftRightVector" => "\u{294E}", b"RightUpDownVector" => "\u{294F}", b"DownLeftRightVector" => "\u{2950}", b"LeftUpDownVector" => "\u{2951}", b"LeftVectorBar" => "\u{2952}", b"RightVectorBar" => "\u{2953}", b"RightUpVectorBar" => "\u{2954}", b"RightDownVectorBar" => "\u{2955}", b"DownLeftVectorBar" => "\u{2956}", b"DownRightVectorBar" => "\u{2957}", b"LeftUpVectorBar" => "\u{2958}", b"LeftDownVectorBar" => "\u{2959}", b"LeftTeeVector" => "\u{295A}", b"RightTeeVector" => "\u{295B}", b"RightUpTeeVector" => "\u{295C}", b"RightDownTeeVector" => "\u{295D}", b"DownLeftTeeVector" => "\u{295E}", b"DownRightTeeVector" => "\u{295F}", b"LeftUpTeeVector" => "\u{2960}", b"LeftDownTeeVector" => "\u{2961}", b"lHar" => "\u{2962}", b"uHar" => "\u{2963}", b"rHar" => "\u{2964}", b"dHar" => "\u{2965}", b"luruhar" => "\u{2966}", b"ldrdhar" => "\u{2967}", b"ruluhar" => "\u{2968}", b"rdldhar" => "\u{2969}", b"lharul" => "\u{296A}", b"llhard" => "\u{296B}", b"rharul" => "\u{296C}", b"lrhard" => "\u{296D}", b"udhar" | b"UpEquilibrium" => "\u{296E}", b"duhar" | b"ReverseUpEquilibrium" => "\u{296F}", b"RoundImplies" => "\u{2970}", b"erarr" => "\u{2971}", b"simrarr" => "\u{2972}", b"larrsim" => "\u{2973}", b"rarrsim" => "\u{2974}", b"rarrap" => "\u{2975}", b"ltlarr" => "\u{2976}", b"gtrarr" => "\u{2978}", b"subrarr" => "\u{2979}", b"suplarr" => "\u{297B}", b"lfisht" => "\u{297C}", b"rfisht" => "\u{297D}", b"ufisht" => "\u{297E}", b"dfisht" => "\u{297F}", b"lopar" => "\u{2985}", b"ropar" => "\u{2986}", b"lbrke" => "\u{298B}", b"rbrke" => "\u{298C}", b"lbrkslu" => "\u{298D}", b"rbrksld" => "\u{298E}", b"lbrksld" => "\u{298F}", b"rbrkslu" => "\u{2990}", b"langd" => "\u{2991}", b"rangd" => "\u{2992}", b"lparlt" => "\u{2993}", b"rpargt" => "\u{2994}", b"gtlPar" => "\u{2995}", b"ltrPar" => "\u{2996}", b"vzigzag" => "\u{299A}", b"vangrt" => "\u{299C}", b"angrtvbd" => "\u{299D}", b"ange" => "\u{29A4}", b"range" => "\u{29A5}", b"dwangle" => "\u{29A6}", b"uwangle" => "\u{29A7}", b"angmsdaa" => "\u{29A8}", b"angmsdab" => "\u{29A9}", b"angmsdac" => "\u{29AA}", b"angmsdad" => "\u{29AB}", b"angmsdae" => "\u{29AC}", b"angmsdaf" => "\u{29AD}", b"angmsdag" => "\u{29AE}", b"angmsdah" => "\u{29AF}", b"bemptyv" => "\u{29B0}", b"demptyv" => "\u{29B1}", b"cemptyv" => "\u{29B2}", b"raemptyv" => "\u{29B3}", b"laemptyv" => "\u{29B4}", b"ohbar" => "\u{29B5}", b"omid" => "\u{29B6}", b"opar" => "\u{29B7}", b"operp" => "\u{29B9}", b"olcross" => "\u{29BB}", b"odsold" => "\u{29BC}", b"olcir" => "\u{29BE}", b"ofcir" => "\u{29BF}", b"olt" => "\u{29C0}", b"ogt" => "\u{29C1}", b"cirscir" => "\u{29C2}", b"cirE" => "\u{29C3}", b"solb" => "\u{29C4}", b"bsolb" => "\u{29C5}", b"boxbox" => "\u{29C9}", b"trisb" => "\u{29CD}", b"rtriltri" => "\u{29CE}", b"LeftTriangleBar" => "\u{29CF}", b"RightTriangleBar" => "\u{29D0}", b"race" => "\u{29DA}", b"iinfin" => "\u{29DC}", b"infintie" => "\u{29DD}", b"nvinfin" => "\u{29DE}", b"eparsl" => "\u{29E3}", b"smeparsl" => "\u{29E4}", b"eqvparsl" => "\u{29E5}", b"lozf" | b"blacklozenge" => "\u{29EB}", b"RuleDelayed" => "\u{29F4}", b"dsol" => "\u{29F6}", b"xodot" | b"bigodot" => "\u{2A00}", b"xoplus" | b"bigoplus" => "\u{2A01}", b"xotime" | b"bigotimes" => "\u{2A02}", b"xuplus" | b"biguplus" => "\u{2A04}", b"xsqcup" | b"bigsqcup" => "\u{2A06}", b"qint" | b"iiiint" => "\u{2A0C}", b"fpartint" => "\u{2A0D}", b"cirfnint" => "\u{2A10}", b"awint" => "\u{2A11}", b"rppolint" => "\u{2A12}", b"scpolint" => "\u{2A13}", b"npolint" => "\u{2A14}", b"pointint" => "\u{2A15}", b"quatint" => "\u{2A16}", b"intlarhk" => "\u{2A17}", b"pluscir" => "\u{2A22}", b"plusacir" => "\u{2A23}", b"simplus" => "\u{2A24}", b"plusdu" => "\u{2A25}", b"plussim" => "\u{2A26}", b"plustwo" => "\u{2A27}", b"mcomma" => "\u{2A29}", b"minusdu" => "\u{2A2A}", b"loplus" => "\u{2A2D}", b"roplus" => "\u{2A2E}", b"Cross" => "\u{2A2F}", b"timesd" => "\u{2A30}", b"timesbar" => "\u{2A31}", b"smashp" => "\u{2A33}", b"lotimes" => "\u{2A34}", b"rotimes" => "\u{2A35}", b"otimesas" => "\u{2A36}", b"Otimes" => "\u{2A37}", b"odiv" => "\u{2A38}", b"triplus" => "\u{2A39}", b"triminus" => "\u{2A3A}", b"tritime" => "\u{2A3B}", b"iprod" | b"intprod" => "\u{2A3C}", b"amalg" => "\u{2A3F}", b"capdot" => "\u{2A40}", b"ncup" => "\u{2A42}", b"ncap" => "\u{2A43}", b"capand" => "\u{2A44}", b"cupor" => "\u{2A45}", b"cupcap" => "\u{2A46}", b"capcup" => "\u{2A47}", b"cupbrcap" => "\u{2A48}", b"capbrcup" => "\u{2A49}", b"cupcup" => "\u{2A4A}", b"capcap" => "\u{2A4B}", b"ccups" => "\u{2A4C}", b"ccaps" => "\u{2A4D}", b"ccupssm" => "\u{2A50}", b"And" => "\u{2A53}", b"Or" => "\u{2A54}", b"andand" => "\u{2A55}", b"oror" => "\u{2A56}", b"orslope" => "\u{2A57}", b"andslope" => "\u{2A58}", b"andv" => "\u{2A5A}", b"orv" => "\u{2A5B}", b"andd" => "\u{2A5C}", b"ord" => "\u{2A5D}", b"wedbar" => "\u{2A5F}", b"sdote" => "\u{2A66}", b"simdot" => "\u{2A6A}", b"congdot" => "\u{2A6D}", b"easter" => "\u{2A6E}", b"apacir" => "\u{2A6F}", b"apE" => "\u{2A70}", b"eplus" => "\u{2A71}", b"pluse" => "\u{2A72}", b"Esim" => "\u{2A73}", b"Colone" => "\u{2A74}", b"Equal" => "\u{2A75}", b"eDDot" | b"ddotseq" => "\u{2A77}", b"equivDD" => "\u{2A78}", b"ltcir" => "\u{2A79}", b"gtcir" => "\u{2A7A}", b"ltquest" => "\u{2A7B}", b"gtquest" => "\u{2A7C}", b"les" | b"LessSlantEqual" | b"leqslant" => "\u{2A7D}", b"ges" | b"GreaterSlantEqual" | b"geqslant" => "\u{2A7E}", b"lesdot" => "\u{2A7F}", b"gesdot" => "\u{2A80}", b"lesdoto" => "\u{2A81}", b"gesdoto" => "\u{2A82}", b"lesdotor" => "\u{2A83}", b"gesdotol" => "\u{2A84}", b"lap" | b"lessapprox" => "\u{2A85}", b"gap" | b"gtrapprox" => "\u{2A86}", b"lne" | b"lneq" => "\u{2A87}", b"gne" | b"gneq" => "\u{2A88}", b"lnap" | b"lnapprox" => "\u{2A89}", b"gnap" | b"gnapprox" => "\u{2A8A}", b"lEg" | b"lesseqqgtr" => "\u{2A8B}", b"gEl" | b"gtreqqless" => "\u{2A8C}", b"lsime" => "\u{2A8D}", b"gsime" => "\u{2A8E}", b"lsimg" => "\u{2A8F}", b"gsiml" => "\u{2A90}", b"lgE" => "\u{2A91}", b"glE" => "\u{2A92}", b"lesges" => "\u{2A93}", b"gesles" => "\u{2A94}", b"els" | b"eqslantless" => "\u{2A95}", b"egs" | b"eqslantgtr" => "\u{2A96}", b"elsdot" => "\u{2A97}", b"egsdot" => "\u{2A98}", b"el" => "\u{2A99}", b"eg" => "\u{2A9A}", b"siml" => "\u{2A9D}", b"simg" => "\u{2A9E}", b"simlE" => "\u{2A9F}", b"simgE" => "\u{2AA0}", b"LessLess" => "\u{2AA1}", b"GreaterGreater" => "\u{2AA2}", b"glj" => "\u{2AA4}", b"gla" => "\u{2AA5}", b"ltcc" => "\u{2AA6}", b"gtcc" => "\u{2AA7}", b"lescc" => "\u{2AA8}", b"gescc" => "\u{2AA9}", b"smt" => "\u{2AAA}", b"lat" => "\u{2AAB}", b"smte" => "\u{2AAC}", b"late" => "\u{2AAD}", b"bumpE" => "\u{2AAE}", b"pre" | b"preceq" | b"PrecedesEqual" => "\u{2AAF}", b"sce" | b"succeq" | b"SucceedsEqual" => "\u{2AB0}", b"prE" => "\u{2AB3}", b"scE" => "\u{2AB4}", b"prnE" | b"precneqq" => "\u{2AB5}", b"scnE" | b"succneqq" => "\u{2AB6}", b"prap" | b"precapprox" => "\u{2AB7}", b"scap" | b"succapprox" => "\u{2AB8}", b"prnap" | b"precnapprox" => "\u{2AB9}", b"scnap" | b"succnapprox" => "\u{2ABA}", b"Pr" => "\u{2ABB}", b"Sc" => "\u{2ABC}", b"subdot" => "\u{2ABD}", b"supdot" => "\u{2ABE}", b"subplus" => "\u{2ABF}", b"supplus" => "\u{2AC0}", b"submult" => "\u{2AC1}", b"supmult" => "\u{2AC2}", b"subedot" => "\u{2AC3}", b"supedot" => "\u{2AC4}", b"subE" | b"subseteqq" => "\u{2AC5}", b"supE" | b"supseteqq" => "\u{2AC6}", b"subsim" => "\u{2AC7}", b"supsim" => "\u{2AC8}", b"subnE" | b"subsetneqq" => "\u{2ACB}", b"supnE" | b"supsetneqq" => "\u{2ACC}", b"csub" => "\u{2ACF}", b"csup" => "\u{2AD0}", b"csube" => "\u{2AD1}", b"csupe" => "\u{2AD2}", b"subsup" => "\u{2AD3}", b"supsub" => "\u{2AD4}", b"subsub" => "\u{2AD5}", b"supsup" => "\u{2AD6}", b"suphsub" => "\u{2AD7}", b"supdsub" => "\u{2AD8}", b"forkv" => "\u{2AD9}", b"topfork" => "\u{2ADA}", b"mlcp" => "\u{2ADB}", b"Dashv" | b"DoubleLeftTee" => "\u{2AE4}", b"Vdashl" => "\u{2AE6}", b"Barv" => "\u{2AE7}", b"vBar" => "\u{2AE8}", b"vBarv" => "\u{2AE9}", b"Vbar" => "\u{2AEB}", b"Not" => "\u{2AEC}", b"bNot" => "\u{2AED}", b"rnmid" => "\u{2AEE}", b"cirmid" => "\u{2AEF}", b"midcir" => "\u{2AF0}", b"topcir" => "\u{2AF1}", b"nhpar" => "\u{2AF2}", b"parsim" => "\u{2AF3}", b"parsl" => "\u{2AFD}", b"fflig" => "\u{FB00}", b"filig" => "\u{FB01}", b"fllig" => "\u{FB02}", b"ffilig" => "\u{FB03}", b"ffllig" => "\u{FB04}", b"Ascr" => "\u{1D49}", b"Cscr" => "\u{1D49}", b"Dscr" => "\u{1D49}", b"Gscr" => "\u{1D4A}", b"Jscr" => "\u{1D4A}", b"Kscr" => "\u{1D4A}", b"Nscr" => "\u{1D4A}", b"Oscr" => "\u{1D4A}", b"Pscr" => "\u{1D4A}", b"Qscr" => "\u{1D4A}", b"Sscr" => "\u{1D4A}", b"Tscr" => "\u{1D4A}", b"Uscr" => "\u{1D4B}", b"Vscr" => "\u{1D4B}", b"Wscr" => "\u{1D4B}", b"Xscr" => "\u{1D4B}", b"Yscr" => "\u{1D4B}", b"Zscr" => "\u{1D4B}", b"ascr" => "\u{1D4B}", b"bscr" => "\u{1D4B}", b"cscr" => "\u{1D4B}", b"dscr" => "\u{1D4B}", b"fscr" => "\u{1D4B}", b"hscr" => "\u{1D4B}", b"iscr" => "\u{1D4B}", b"jscr" => "\u{1D4B}", b"kscr" => "\u{1D4C}", b"lscr" => "\u{1D4C}", b"mscr" => "\u{1D4C}", b"nscr" => "\u{1D4C}", b"pscr" => "\u{1D4C}", b"qscr" => "\u{1D4C}", b"rscr" => "\u{1D4C}", b"sscr" => "\u{1D4C}", b"tscr" => "\u{1D4C}", b"uscr" => "\u{1D4C}", b"vscr" => "\u{1D4C}", b"wscr" => "\u{1D4C}", b"xscr" => "\u{1D4C}", b"yscr" => "\u{1D4C}", b"zscr" => "\u{1D4C}", b"Afr" => "\u{1D50}", b"Bfr" => "\u{1D50}", b"Dfr" => "\u{1D50}", b"Efr" => "\u{1D50}", b"Ffr" => "\u{1D50}", b"Gfr" => "\u{1D50}", b"Jfr" => "\u{1D50}", b"Kfr" => "\u{1D50}", b"Lfr" => "\u{1D50}", b"Mfr" => "\u{1D51}", b"Nfr" => "\u{1D51}", b"Ofr" => "\u{1D51}", b"Pfr" => "\u{1D51}", b"Qfr" => "\u{1D51}", b"Sfr" => "\u{1D51}", b"Tfr" => "\u{1D51}", b"Ufr" => "\u{1D51}", b"Vfr" => "\u{1D51}", b"Wfr" => "\u{1D51}", b"Xfr" => "\u{1D51}", b"Yfr" => "\u{1D51}", b"afr" => "\u{1D51}", b"bfr" => "\u{1D51}", b"cfr" => "\u{1D52}", b"dfr" => "\u{1D52}", b"efr" => "\u{1D52}", b"ffr" => "\u{1D52}", b"gfr" => "\u{1D52}", b"hfr" => "\u{1D52}", b"ifr" => "\u{1D52}", b"jfr" => "\u{1D52}", b"kfr" => "\u{1D52}", b"lfr" => "\u{1D52}", b"mfr" => "\u{1D52}", b"nfr" => "\u{1D52}", b"ofr" => "\u{1D52}", b"pfr" => "\u{1D52}", b"qfr" => "\u{1D52}", b"rfr" => "\u{1D52}", b"sfr" => "\u{1D53}", b"tfr" => "\u{1D53}", b"ufr" => "\u{1D53}", b"vfr" => "\u{1D53}", b"wfr" => "\u{1D53}", b"xfr" => "\u{1D53}", b"yfr" => "\u{1D53}", b"zfr" => "\u{1D53}", b"Aopf" => "\u{1D53}", b"Bopf" => "\u{1D53}", b"Dopf" => "\u{1D53}", b"Eopf" => "\u{1D53}", b"Fopf" => "\u{1D53}", b"Gopf" => "\u{1D53}", b"Iopf" => "\u{1D54}", b"Jopf" => "\u{1D54}", b"Kopf" => "\u{1D54}", b"Lopf" => "\u{1D54}", b"Mopf" => "\u{1D54}", b"Oopf" => "\u{1D54}", b"Sopf" => "\u{1D54}", b"Topf" => "\u{1D54}", b"Uopf" => "\u{1D54}", b"Vopf" => "\u{1D54}", b"Wopf" => "\u{1D54}", b"Xopf" => "\u{1D54}", b"Yopf" => "\u{1D55}", b"aopf" => "\u{1D55}", b"bopf" => "\u{1D55}", b"copf" => "\u{1D55}", b"dopf" => "\u{1D55}", b"eopf" => "\u{1D55}", b"fopf" => "\u{1D55}", b"gopf" => "\u{1D55}", b"hopf" => "\u{1D55}", b"iopf" => "\u{1D55}", b"jopf" => "\u{1D55}", b"kopf" => "\u{1D55}", b"lopf" => "\u{1D55}", b"mopf" => "\u{1D55}", b"nopf" => "\u{1D55}", b"oopf" => "\u{1D56}", b"popf" => "\u{1D56}", b"qopf" => "\u{1D56}", b"ropf" => "\u{1D56}", b"sopf" => "\u{1D56}", b"topf" => "\u{1D56}", b"uopf" => "\u{1D56}", b"vopf" => "\u{1D56}", b"wopf" => "\u{1D56}", b"xopf" => "\u{1D56}", b"yopf" => "\u{1D56}", b"zopf" => "\u{1D56}", _ => return None, }; Some(s) } fn parse_number(bytes: &str, range: Range) -> Result { let code = if bytes.starts_with('x') { parse_hexadecimal(&bytes[1..]) } else { parse_decimal(bytes) }?; if code == 0 { return Err(EscapeError::EntityWithNull(range)); } match std::char::from_u32(code) { Some(c) => Ok(c), None => Err(EscapeError::InvalidCodepoint(code)), } } fn parse_hexadecimal(bytes: &str) -> Result { // maximum code is 0x10FFFF => 6 characters if bytes.len() > 6 { return Err(EscapeError::TooLongHexadecimal); } let mut code = 0; for b in bytes.bytes() { code <<= 4; code += match b { b'0'..=b'9' => b - b'0', b'a'..=b'f' => b - b'a' + 10, b'A'..=b'F' => b - b'A' + 10, b => return Err(EscapeError::InvalidHexadecimal(b as char)), } as u32; } Ok(code) } fn parse_decimal(bytes: &str) -> Result { // maximum code is 0x10FFFF = 1114111 => 7 characters if bytes.len() > 7 { return Err(EscapeError::TooLongDecimal); } let mut code = 0; for b in bytes.bytes() { code *= 10; code += match b { b'0'..=b'9' => b - b'0', b => return Err(EscapeError::InvalidDecimal(b as char)), } as u32; } Ok(code) } #[test] fn test_unescape() { assert_eq!(unescape("test").unwrap(), Cow::Borrowed("test")); assert_eq!(unescape("<test>").unwrap(), ""); assert_eq!(unescape("0").unwrap(), "0"); assert_eq!(unescape("0").unwrap(), "0"); assert!(unescape("&foo;").is_err()); } #[test] fn test_unescape_with() { let custom_entities = |ent: &str| match ent { "foo" => Some("BAR"), _ => None, }; assert_eq!( unescape_with("test", custom_entities).unwrap(), Cow::Borrowed("test") ); assert_eq!( unescape_with("<test>", custom_entities).unwrap(), "" ); assert_eq!(unescape_with("0", custom_entities).unwrap(), "0"); assert_eq!(unescape_with("0", custom_entities).unwrap(), "0"); assert_eq!(unescape_with("&foo;", custom_entities).unwrap(), "BAR"); assert!(unescape_with("&fop;", custom_entities).is_err()); } #[test] fn test_escape() { assert_eq!(escape("test"), Cow::Borrowed("test")); assert_eq!(escape(""), "<test>"); assert_eq!(escape("\"a\"bc"), ""a"bc"); assert_eq!(escape("\"a\"b&c"), ""a"b&c"); assert_eq!( escape("prefix_\"a\"b&<>c"), "prefix_"a"b&<>c" ); } #[test] fn test_partial_escape() { assert_eq!(partial_escape("test"), Cow::Borrowed("test")); assert_eq!(partial_escape(""), "<test>"); assert_eq!(partial_escape("\"a\"bc"), "\"a\"bc"); assert_eq!(partial_escape("\"a\"b&c"), "\"a\"b&c"); assert_eq!( partial_escape("prefix_\"a\"b&<>c"), "prefix_\"a\"b&<>c" ); } quick-xml-0.27.1/src/events/attributes.rs000064400000000000000000002400310072674642500165040ustar 00000000000000//! Xml Attributes module //! //! Provides an iterator over attributes key/value pairs use crate::errors::Result as XmlResult; use crate::escape::{escape, unescape_with}; use crate::name::QName; use crate::reader::{is_whitespace, Reader}; use crate::utils::{write_byte_string, write_cow_string, Bytes}; use std::fmt::{self, Debug, Display, Formatter}; use std::iter::FusedIterator; use std::{borrow::Cow, ops::Range}; /// A struct representing a key/value XML attribute. /// /// Field `value` stores raw bytes, possibly containing escape-sequences. Most users will likely /// want to access the value using one of the [`unescape_value`] and [`decode_and_unescape_value`] /// functions. /// /// [`unescape_value`]: Self::unescape_value /// [`decode_and_unescape_value`]: Self::decode_and_unescape_value #[derive(Clone, PartialEq)] pub struct Attribute<'a> { /// The key to uniquely define the attribute. /// /// If [`Attributes::with_checks`] is turned off, the key might not be unique. pub key: QName<'a>, /// The raw value of the attribute. pub value: Cow<'a, [u8]>, } impl<'a> Attribute<'a> { /// Decodes using UTF-8 then unescapes the value. /// /// This is normally the value you are interested in. Escape sequences such as `>` are /// replaced with their unescaped equivalents such as `>`. /// /// This will allocate if the value contains any escape sequences. /// /// See also [`unescape_value_with()`](Self::unescape_value_with) /// /// This method is available only if `encoding` feature is **not** enabled. #[cfg(any(doc, not(feature = "encoding")))] pub fn unescape_value(&self) -> XmlResult> { self.unescape_value_with(|_| None) } /// Decodes using UTF-8 then unescapes the value, using custom entities. /// /// This is normally the value you are interested in. Escape sequences such as `>` are /// replaced with their unescaped equivalents such as `>`. /// A fallback resolver for additional custom entities can be provided via /// `resolve_entity`. /// /// This will allocate if the value contains any escape sequences. /// /// See also [`unescape_value()`](Self::unescape_value) /// /// This method is available only if `encoding` feature is **not** enabled. #[cfg(any(doc, not(feature = "encoding")))] pub fn unescape_value_with<'entity>( &self, resolve_entity: impl Fn(&str) -> Option<&'entity str>, ) -> XmlResult> { // from_utf8 should never fail because content is always UTF-8 encoded let decoded = match &self.value { Cow::Borrowed(bytes) => Cow::Borrowed(std::str::from_utf8(bytes)?), // Convert to owned, because otherwise Cow will be bound with wrong lifetime Cow::Owned(bytes) => Cow::Owned(std::str::from_utf8(bytes)?.to_string()), }; match unescape_with(&decoded, resolve_entity)? { // Because result is borrowed, no replacements was done and we can use original string Cow::Borrowed(_) => Ok(decoded), Cow::Owned(s) => Ok(s.into()), } } /// Decodes then unescapes the value. /// /// This will allocate if the value contains any escape sequences or in /// non-UTF-8 encoding. pub fn decode_and_unescape_value(&self, reader: &Reader) -> XmlResult> { self.decode_and_unescape_value_with(reader, |_| None) } /// Decodes then unescapes the value with custom entities. /// /// This will allocate if the value contains any escape sequences or in /// non-UTF-8 encoding. pub fn decode_and_unescape_value_with<'entity, B>( &self, reader: &Reader, resolve_entity: impl Fn(&str) -> Option<&'entity str>, ) -> XmlResult> { let decoded = match &self.value { Cow::Borrowed(bytes) => reader.decoder().decode(bytes)?, // Convert to owned, because otherwise Cow will be bound with wrong lifetime Cow::Owned(bytes) => reader.decoder().decode(bytes)?.into_owned().into(), }; match unescape_with(&decoded, resolve_entity)? { // Because result is borrowed, no replacements was done and we can use original string Cow::Borrowed(_) => Ok(decoded), Cow::Owned(s) => Ok(s.into()), } } } impl<'a> Debug for Attribute<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "Attribute {{ key: ")?; write_byte_string(f, self.key.as_ref())?; write!(f, ", value: ")?; write_cow_string(f, &self.value)?; write!(f, " }}") } } impl<'a> From<(&'a [u8], &'a [u8])> for Attribute<'a> { /// Creates new attribute from raw bytes. /// Does not apply any transformation to both key and value. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::attributes::Attribute; /// /// let features = Attribute::from(("features".as_bytes(), "Bells & whistles".as_bytes())); /// assert_eq!(features.value, "Bells & whistles".as_bytes()); /// ``` fn from(val: (&'a [u8], &'a [u8])) -> Attribute<'a> { Attribute { key: QName(val.0), value: Cow::from(val.1), } } } impl<'a> From<(&'a str, &'a str)> for Attribute<'a> { /// Creates new attribute from text representation. /// Key is stored as-is, but the value will be escaped. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::attributes::Attribute; /// /// let features = Attribute::from(("features", "Bells & whistles")); /// assert_eq!(features.value, "Bells & whistles".as_bytes()); /// ``` fn from(val: (&'a str, &'a str)) -> Attribute<'a> { Attribute { key: QName(val.0.as_bytes()), value: match escape(val.1) { Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), Cow::Owned(s) => Cow::Owned(s.into_bytes()), }, } } } impl<'a> From> for Attribute<'a> { #[inline] fn from(attr: Attr<&'a [u8]>) -> Self { Self { key: attr.key(), value: Cow::Borrowed(attr.value()), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Iterator over XML attributes. /// /// Yields `Result`. An `Err` will be yielded if an attribute is malformed or duplicated. /// The duplicate check can be turned off by calling [`with_checks(false)`]. /// /// [`with_checks(false)`]: Self::with_checks #[derive(Clone, Debug)] pub struct Attributes<'a> { /// Slice of `BytesStart` corresponding to attributes bytes: &'a [u8], /// Iterator state, independent from the actual source of bytes state: IterState, } impl<'a> Attributes<'a> { /// Internal constructor, used by `BytesStart`. Supplies data in reader's encoding #[inline] pub(crate) fn wrap(buf: &'a [u8], pos: usize, html: bool) -> Self { Self { bytes: buf, state: IterState::new(pos, html), } } /// Creates a new attribute iterator from a buffer. pub fn new(buf: &'a str, pos: usize) -> Self { Self::wrap(buf.as_bytes(), pos, false) } /// Creates a new attribute iterator from a buffer, allowing HTML attribute syntax. pub fn html(buf: &'a str, pos: usize) -> Self { Self::wrap(buf.as_bytes(), pos, true) } /// Changes whether attributes should be checked for uniqueness. /// /// The XML specification requires attribute keys in the same element to be unique. This check /// can be disabled to improve performance slightly. /// /// (`true` by default) pub fn with_checks(&mut self, val: bool) -> &mut Attributes<'a> { self.state.check_duplicates = val; self } } impl<'a> Iterator for Attributes<'a> { type Item = Result, AttrError>; #[inline] fn next(&mut self) -> Option { match self.state.next(self.bytes) { None => None, Some(Ok(a)) => Some(Ok(a.map(|range| &self.bytes[range]).into())), Some(Err(e)) => Some(Err(e)), } } } impl<'a> FusedIterator for Attributes<'a> {} //////////////////////////////////////////////////////////////////////////////////////////////////// /// Errors that can be raised during parsing attributes. /// /// Recovery position in examples shows the position from which parsing of the /// next attribute will be attempted. #[derive(Clone, Debug, PartialEq, Eq)] pub enum AttrError { /// Attribute key was not followed by `=`, position relative to the start of /// the owning tag is provided. /// /// Example of input that raises this error: /// /// ```xml /// /// /// ``` /// /// This error can be raised only when the iterator is in XML mode. ExpectedEq(usize), /// Attribute value was not found after `=`, position relative to the start /// of the owning tag is provided. /// /// Example of input that raises this error: /// /// ```xml /// /// /// ``` /// /// This error can be returned only for the last attribute in the list, /// because otherwise any content after `=` will be threated as a value. /// The XML /// /// ```xml /// /// /// /// ``` /// /// will be treated as `Attribute { key = b"key", value = b"another-key" }` /// and or [`Attribute`] is returned, or [`AttrError::UnquotedValue`] is raised, /// depending on the parsing mode. ExpectedValue(usize), /// Attribute value is not quoted, position relative to the start of the /// owning tag is provided. /// /// Example of input that raises this error: /// /// ```xml /// /// /// /// ``` /// /// This error can be raised only when the iterator is in XML mode. UnquotedValue(usize), /// Attribute value was not finished with a matching quote, position relative /// to the start of owning tag and a quote is provided. That position is always /// a last character in the tag content. /// /// Example of input that raises this error: /// /// ```xml /// /// /// /// /// ``` /// /// This error is returned only when [`Attributes::with_checks()`] is set /// to `true` (that is default behavior). Duplicated(usize, usize), } impl Display for AttrError { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { Self::ExpectedEq(pos) => write!( f, r#"position {}: attribute key must be directly followed by `=` or space"#, pos ), Self::ExpectedValue(pos) => write!( f, r#"position {}: `=` must be followed by an attribute value"#, pos ), Self::UnquotedValue(pos) => write!( f, r#"position {}: attribute value must be enclosed in `"` or `'`"#, pos ), Self::ExpectedQuote(pos, quote) => write!( f, r#"position {}: missing closing quote `{}` in attribute value"#, pos, *quote as char ), Self::Duplicated(pos1, pos2) => write!( f, r#"position {}: duplicated attribute, previous declaration at position {}"#, pos1, pos2 ), } } } impl std::error::Error for AttrError {} //////////////////////////////////////////////////////////////////////////////////////////////////// /// A struct representing a key/value XML or HTML [attribute]. /// /// [attribute]: https://www.w3.org/TR/xml11/#NT-Attribute #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Attr { /// Attribute with value enclosed in double quotes (`"`). Attribute key and /// value provided. This is a canonical XML-style attribute. DoubleQ(T, T), /// Attribute with value enclosed in single quotes (`'`). Attribute key and /// value provided. This is an XML-style attribute. SingleQ(T, T), /// Attribute with value not enclosed in quotes. Attribute key and value /// provided. This is HTML-style attribute, it can be returned in HTML-mode /// parsing only. In an XML mode [`AttrError::UnquotedValue`] will be raised /// instead. /// /// Attribute value can be invalid according to the [HTML specification], /// in particular, it can contain `"`, `'`, `=`, `<`, and ` /// characters. The absence of the `>` character is nevertheless guaranteed, /// since the parser extracts [events] based on them even before the start /// of parsing attributes. /// /// [HTML specification]: https://html.spec.whatwg.org/#unquoted /// [events]: crate::events::Event::Start Unquoted(T, T), /// Attribute without value. Attribute key provided. This is HTML-style attribute, /// it can be returned in HTML-mode parsing only. In XML mode /// [`AttrError::ExpectedEq`] will be raised instead. Empty(T), } impl Attr { /// Maps an `Attr` to `Attr` by applying a function to a contained key and value. #[inline] pub fn map(self, mut f: F) -> Attr where F: FnMut(T) -> U, { match self { Attr::DoubleQ(key, value) => Attr::DoubleQ(f(key), f(value)), Attr::SingleQ(key, value) => Attr::SingleQ(f(key), f(value)), Attr::Empty(key) => Attr::Empty(f(key)), Attr::Unquoted(key, value) => Attr::Unquoted(f(key), f(value)), } } } impl<'a> Attr<&'a [u8]> { /// Returns the key value #[inline] pub fn key(&self) -> QName<'a> { QName(match self { Attr::DoubleQ(key, _) => key, Attr::SingleQ(key, _) => key, Attr::Empty(key) => key, Attr::Unquoted(key, _) => key, }) } /// Returns the attribute value. For [`Self::Empty`] variant an empty slice /// is returned according to the [HTML specification]. /// /// [HTML specification]: https://www.w3.org/TR/2012/WD-html-markup-20120329/syntax.html#syntax-attr-empty #[inline] pub fn value(&self) -> &'a [u8] { match self { Attr::DoubleQ(_, value) => value, Attr::SingleQ(_, value) => value, Attr::Empty(_) => &[], Attr::Unquoted(_, value) => value, } } } impl> Debug for Attr { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { Attr::DoubleQ(key, value) => f .debug_tuple("Attr::DoubleQ") .field(&Bytes(key.as_ref())) .field(&Bytes(value.as_ref())) .finish(), Attr::SingleQ(key, value) => f .debug_tuple("Attr::SingleQ") .field(&Bytes(key.as_ref())) .field(&Bytes(value.as_ref())) .finish(), Attr::Empty(key) => f .debug_tuple("Attr::Empty") // Comment to prevent formatting and keep style consistent .field(&Bytes(key.as_ref())) .finish(), Attr::Unquoted(key, value) => f .debug_tuple("Attr::Unquoted") .field(&Bytes(key.as_ref())) .field(&Bytes(value.as_ref())) .finish(), } } } /// Unpacks attribute key and value into tuple of this two elements. /// `None` value element is returned only for [`Attr::Empty`] variant. impl From> for (T, Option) { #[inline] fn from(attr: Attr) -> Self { match attr { Attr::DoubleQ(key, value) => (key, Some(value)), Attr::SingleQ(key, value) => (key, Some(value)), Attr::Empty(key) => (key, None), Attr::Unquoted(key, value) => (key, Some(value)), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// type AttrResult = Result>, AttrError>; #[derive(Clone, Copy, Debug)] enum State { /// Iteration finished, iterator will return `None` to all [`IterState::next`] /// requests. Done, /// The last attribute returned was deserialized successfully. Contains an /// offset from which next attribute should be searched. Next(usize), /// The last attribute returns [`AttrError::UnquotedValue`], offset pointed /// to the beginning of the value. Recover should skip a value SkipValue(usize), /// The last attribute returns [`AttrError::Duplicated`], offset pointed to /// the equal (`=`) sign. Recover should skip it and a value SkipEqValue(usize), } /// External iterator over spans of attribute key and value #[derive(Clone, Debug)] pub(crate) struct IterState { /// Iteration state that determines what actions should be done before the /// actual parsing of the next attribute state: State, /// If `true`, enables ability to parse unquoted values and key-only (empty) /// attributes html: bool, /// If `true`, checks for duplicate names check_duplicates: bool, /// If `check_duplicates` is set, contains the ranges of already parsed attribute /// names. We store a ranges instead of slices to able to report a previous /// attribute position keys: Vec>, } impl IterState { pub fn new(offset: usize, html: bool) -> Self { Self { state: State::Next(offset), html, check_duplicates: true, keys: Vec::new(), } } /// Recover from an error that could have been made on a previous step. /// Returns an offset from which parsing should continue. /// If there no input left, returns `None`. fn recover(&self, slice: &[u8]) -> Option { match self.state { State::Done => None, State::Next(offset) => Some(offset), State::SkipValue(offset) => self.skip_value(slice, offset), State::SkipEqValue(offset) => self.skip_eq_value(slice, offset), } } /// Skip all characters up to first space symbol or end-of-input #[inline] fn skip_value(&self, slice: &[u8], offset: usize) -> Option { let mut iter = (offset..).zip(slice[offset..].iter()); match iter.find(|(_, &b)| is_whitespace(b)) { // Input: ` key = value ` // | ^ // offset e Some((e, _)) => Some(e), // Input: ` key = value` // | ^ // offset e = len() None => None, } } /// Skip all characters up to first space symbol or end-of-input #[inline] fn skip_eq_value(&self, slice: &[u8], offset: usize) -> Option { let mut iter = (offset..).zip(slice[offset..].iter()); // Skip all up to the quote and get the quote type let quote = match iter.find(|(_, &b)| !is_whitespace(b)) { // Input: ` key = "` // | ^ // offset Some((_, b'"')) => b'"', // Input: ` key = '` // | ^ // offset Some((_, b'\'')) => b'\'', // Input: ` key = x` // | ^ // offset Some((offset, _)) => return self.skip_value(slice, offset), // Input: ` key = ` // | ^ // offset None => return None, }; match iter.find(|(_, &b)| b == quote) { // Input: ` key = " "` // ^ Some((e, b'"')) => Some(e), // Input: ` key = ' '` // ^ Some((e, _)) => Some(e), // Input: ` key = " ` // Input: ` key = ' ` // ^ // Closing quote not found None => None, } } #[inline] fn check_for_duplicates( &mut self, slice: &[u8], key: Range, ) -> Result, AttrError> { if self.check_duplicates { if let Some(prev) = self .keys .iter() .find(|r| slice[(*r).clone()] == slice[key.clone()]) { return Err(AttrError::Duplicated(key.start, prev.start)); } self.keys.push(key.clone()); } Ok(key) } /// # Parameters /// /// - `slice`: content of the tag, used for checking for duplicates /// - `key`: Range of key in slice, if iterator in HTML mode /// - `offset`: Position of error if iterator in XML mode #[inline] fn key_only(&mut self, slice: &[u8], key: Range, offset: usize) -> Option { Some(if self.html { self.check_for_duplicates(slice, key).map(Attr::Empty) } else { Err(AttrError::ExpectedEq(offset)) }) } #[inline] fn double_q(&mut self, key: Range, value: Range) -> Option { self.state = State::Next(value.end + 1); // +1 for `"` Some(Ok(Attr::DoubleQ(key, value))) } #[inline] fn single_q(&mut self, key: Range, value: Range) -> Option { self.state = State::Next(value.end + 1); // +1 for `'` Some(Ok(Attr::SingleQ(key, value))) } pub fn next(&mut self, slice: &[u8]) -> Option { let mut iter = match self.recover(slice) { Some(offset) => (offset..).zip(slice[offset..].iter()), None => return None, }; // Index where next key started let start_key = match iter.find(|(_, &b)| !is_whitespace(b)) { // Input: ` key` // ^ Some((s, _)) => s, // Input: ` ` // ^ None => { // Because we reach end-of-input, stop iteration on next call self.state = State::Done; return None; } }; // Span of a key let (key, offset) = match iter.find(|(_, &b)| b == b'=' || is_whitespace(b)) { // Input: ` key=` // | ^ // s e Some((e, b'=')) => (start_key..e, e), // Input: ` key ` // ^ Some((e, _)) => match iter.find(|(_, &b)| !is_whitespace(b)) { // Input: ` key =` // | | ^ // start_key e Some((offset, b'=')) => (start_key..e, offset), // Input: ` key x` // | | ^ // start_key e // If HTML-like attributes is allowed, this is the result, otherwise error Some((offset, _)) => { // In any case, recovering is not required self.state = State::Next(offset); return self.key_only(slice, start_key..e, offset); } // Input: ` key ` // | | ^ // start_key e // If HTML-like attributes is allowed, this is the result, otherwise error None => { // Because we reach end-of-input, stop iteration on next call self.state = State::Done; return self.key_only(slice, start_key..e, slice.len()); } }, // Input: ` key` // | ^ // s e = len() // If HTML-like attributes is allowed, this is the result, otherwise error None => { // Because we reach end-of-input, stop iteration on next call self.state = State::Done; let e = slice.len(); return self.key_only(slice, start_key..e, e); } }; let key = match self.check_for_duplicates(slice, key) { Err(e) => { self.state = State::SkipEqValue(offset); return Some(Err(e)); } Ok(key) => key, }; //////////////////////////////////////////////////////////////////////// // Gets the position of quote and quote type let (start_value, quote) = match iter.find(|(_, &b)| !is_whitespace(b)) { // Input: ` key = "` // ^ Some((s, b'"')) => (s + 1, b'"'), // Input: ` key = '` // ^ Some((s, b'\'')) => (s + 1, b'\''), // Input: ` key = x` // ^ // If HTML-like attributes is allowed, this is the start of the value Some((s, _)) if self.html => { // We do not check validity of attribute value characters as required // according to https://html.spec.whatwg.org/#unquoted. It can be done // during validation phase let end = match iter.find(|(_, &b)| is_whitespace(b)) { // Input: ` key = value ` // | ^ // s e Some((e, _)) => e, // Input: ` key = value` // | ^ // s e = len() None => slice.len(), }; self.state = State::Next(end); return Some(Ok(Attr::Unquoted(key, s..end))); } // Input: ` key = x` // ^ Some((s, _)) => { self.state = State::SkipValue(s); return Some(Err(AttrError::UnquotedValue(s))); } // Input: ` key = ` // ^ None => { // Because we reach end-of-input, stop iteration on next call self.state = State::Done; return Some(Err(AttrError::ExpectedValue(slice.len()))); } }; match iter.find(|(_, &b)| b == quote) { // Input: ` key = " "` // ^ Some((e, b'"')) => self.double_q(key, start_value..e), // Input: ` key = ' '` // ^ Some((e, _)) => self.single_q(key, start_value..e), // Input: ` key = " ` // Input: ` key = ' ` // ^ // Closing quote not found None => { // Because we reach end-of-input, stop iteration on next call self.state = State::Done; return Some(Err(AttrError::ExpectedQuote(slice.len(), quote))); } } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Checks, how parsing of XML-style attributes works. Each attribute should /// have a value, enclosed in single or double quotes. #[cfg(test)] mod xml { use super::*; use pretty_assertions::assert_eq; /// Checked attribute is the single attribute mod single { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::new(r#"tag key='value'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::new(r#"tag key="value""#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::new(r#"tag key=value"#, 3); // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(8)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::new(r#"tag key"#, 3); // 0 ^ = 7 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(7)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key is started with an invalid symbol (a single quote in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_start_invalid() { let mut iter = Attributes::new(r#"tag 'key'='value'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'key'"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key contains an invalid symbol (an ampersand in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_contains_invalid() { let mut iter = Attributes::new(r#"tag key&jey='value'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key&jey"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute value is missing after `=` #[test] fn missed_value() { let mut iter = Attributes::new(r#"tag key="#, 3); // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(8)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Checked attribute is the first attribute in the list of many attributes mod first { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::new(r#"tag key='value' regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::new(r#"tag key="value" regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::new(r#"tag key=value regular='attribute'"#, 3); // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(8)))); // check error recovery assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::new(r#"tag key regular='attribute'"#, 3); // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(8)))); // check error recovery assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key is started with an invalid symbol (a single quote in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_start_invalid() { let mut iter = Attributes::new(r#"tag 'key'='value' regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'key'"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key contains an invalid symbol (an ampersand in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_contains_invalid() { let mut iter = Attributes::new(r#"tag key&jey='value' regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key&jey"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute value is missing after `=`. #[test] fn missed_value() { let mut iter = Attributes::new(r#"tag key= regular='attribute'"#, 3); // 0 ^ = 9 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9)))); // Because we do not check validity of keys and values during parsing, // "error='recovery'" is considered, as unquoted attribute value and // skipped during recovery and iteration finished assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); //////////////////////////////////////////////////////////////////// let mut iter = Attributes::new(r#"tag key= regular= 'attribute'"#, 3); // 0 ^ = 9 ^ = 29 // In that case "regular=" considered as unquoted value assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9)))); // In that case "'attribute'" considered as a key, because we do not check // validity of key names assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(29)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); //////////////////////////////////////////////////////////////////// let mut iter = Attributes::new(r#"tag key= regular ='attribute'"#, 3); // 0 ^ = 9 ^ = 29 // In that case "regular" considered as unquoted value assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9)))); // In that case "='attribute'" considered as a key, because we do not check // validity of key names assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(29)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); //////////////////////////////////////////////////////////////////// let mut iter = Attributes::new(r#"tag key= regular = 'attribute'"#, 3); // 0 ^ = 9 ^ = 19 ^ = 30 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9)))); // In that case second "=" considered as a key, because we do not check // validity of key names assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(19)))); // In that case "'attribute'" considered as a key, because we do not check // validity of key names assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(30)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Copy of single, but with additional spaces in markup mod sparsed { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::new(r#"tag key = 'value' "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::new(r#"tag key = "value" "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::new(r#"tag key = value "#, 3); // 0 ^ = 10 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(10)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::new(r#"tag key "#, 3); // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(8)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key is started with an invalid symbol (a single quote in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_start_invalid() { let mut iter = Attributes::new(r#"tag 'key' = 'value' "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'key'"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key contains an invalid symbol (an ampersand in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_contains_invalid() { let mut iter = Attributes::new(r#"tag key&jey = 'value' "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key&jey"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute value is missing after `=` #[test] fn missed_value() { let mut iter = Attributes::new(r#"tag key = "#, 3); // 0 ^ = 10 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(10)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Checks that duplicated attributes correctly reported and recovering is /// possible after that mod duplicated { use super::*; mod with_check { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::new(r#"tag key='value' key='dup' another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::new(r#"tag key='value' key="dup" another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::new(r#"tag key='value' key=dup another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::new(r#"tag key='value' key another=''"#, 3); // 0 ^ = 20 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(20)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Check for duplicated names is disabled mod without_check { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::new(r#"tag key='value' key='dup' another=''"#, 3); iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"dup"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::new(r#"tag key='value' key="dup" another=''"#, 3); iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"dup"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::new(r#"tag key='value' key=dup another=''"#, 3); // 0 ^ = 20 iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(20)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::new(r#"tag key='value' key another=''"#, 3); // 0 ^ = 20 iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(20)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } } #[test] fn mixed_quote() { let mut iter = Attributes::new(r#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"a"), value: Cow::Borrowed(b"a"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"b"), value: Cow::Borrowed(b"b"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"c"), value: Cow::Borrowed(br#"cc"cc"#), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"d"), value: Cow::Borrowed(b"dd'dd"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Checks, how parsing of HTML-style attributes works. Each attribute can be /// in three forms: /// - XML-like: have a value, enclosed in single or double quotes /// - have a value, do not enclosed in quotes /// - without value, key only #[cfg(test)] mod html { use super::*; use pretty_assertions::assert_eq; /// Checked attribute is the single attribute mod single { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::html(r#"tag key='value'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::html(r#"tag key="value""#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::html(r#"tag key=value"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::html(r#"tag key"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(&[]), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key is started with an invalid symbol (a single quote in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_start_invalid() { let mut iter = Attributes::html(r#"tag 'key'='value'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'key'"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key contains an invalid symbol (an ampersand in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_contains_invalid() { let mut iter = Attributes::html(r#"tag key&jey='value'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key&jey"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute value is missing after `=` #[test] fn missed_value() { let mut iter = Attributes::html(r#"tag key="#, 3); // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(8)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Checked attribute is the first attribute in the list of many attributes mod first { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::html(r#"tag key='value' regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::html(r#"tag key="value" regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::html(r#"tag key=value regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::html(r#"tag key regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(&[]), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key is started with an invalid symbol (a single quote in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_start_invalid() { let mut iter = Attributes::html(r#"tag 'key'='value' regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'key'"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key contains an invalid symbol (an ampersand in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_contains_invalid() { let mut iter = Attributes::html(r#"tag key&jey='value' regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key&jey"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute value is missing after `=` #[test] fn missed_value() { let mut iter = Attributes::html(r#"tag key= regular='attribute'"#, 3); // Because we do not check validity of keys and values during parsing, // "regular='attribute'" is considered as unquoted attribute value assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"regular='attribute'"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); //////////////////////////////////////////////////////////////////// let mut iter = Attributes::html(r#"tag key= regular= 'attribute'"#, 3); // Because we do not check validity of keys and values during parsing, // "regular=" is considered as unquoted attribute value assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"regular="), })) ); // Because we do not check validity of keys and values during parsing, // "'attribute'" is considered as key-only attribute assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'attribute'"), value: Cow::Borrowed(&[]), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); //////////////////////////////////////////////////////////////////// let mut iter = Attributes::html(r#"tag key= regular ='attribute'"#, 3); // Because we do not check validity of keys and values during parsing, // "regular" is considered as unquoted attribute value assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"regular"), })) ); // Because we do not check validity of keys and values during parsing, // "='attribute'" is considered as key-only attribute assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"='attribute'"), value: Cow::Borrowed(&[]), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); //////////////////////////////////////////////////////////////////// let mut iter = Attributes::html(r#"tag key= regular = 'attribute'"#, 3); // 0 ^ = 9 ^ = 19 ^ = 30 // Because we do not check validity of keys and values during parsing, // "regular" is considered as unquoted attribute value assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"regular"), })) ); // Because we do not check validity of keys and values during parsing, // "=" is considered as key-only attribute assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"="), value: Cow::Borrowed(&[]), })) ); // Because we do not check validity of keys and values during parsing, // "'attribute'" is considered as key-only attribute assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'attribute'"), value: Cow::Borrowed(&[]), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Copy of single, but with additional spaces in markup mod sparsed { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::html(r#"tag key = 'value' "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::html(r#"tag key = "value" "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::html(r#"tag key = value "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::html(r#"tag key "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(&[]), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key is started with an invalid symbol (a single quote in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_start_invalid() { let mut iter = Attributes::html(r#"tag 'key' = 'value' "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'key'"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key contains an invalid symbol (an ampersand in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_contains_invalid() { let mut iter = Attributes::html(r#"tag key&jey = 'value' "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key&jey"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute value is missing after `=` #[test] fn missed_value() { let mut iter = Attributes::html(r#"tag key = "#, 3); // 0 ^ = 10 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(10)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Checks that duplicated attributes correctly reported and recovering is /// possible after that mod duplicated { use super::*; mod with_check { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::html(r#"tag key='value' key='dup' another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::html(r#"tag key='value' key="dup" another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::html(r#"tag key='value' key=dup another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::html(r#"tag key='value' key another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Check for duplicated names is disabled mod without_check { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::html(r#"tag key='value' key='dup' another=''"#, 3); iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"dup"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::html(r#"tag key='value' key="dup" another=''"#, 3); iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"dup"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::html(r#"tag key='value' key=dup another=''"#, 3); iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"dup"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::html(r#"tag key='value' key another=''"#, 3); iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(&[]), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } } #[test] fn mixed_quote() { let mut iter = Attributes::html(r#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"a"), value: Cow::Borrowed(b"a"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"b"), value: Cow::Borrowed(b"b"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"c"), value: Cow::Borrowed(br#"cc"cc"#), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"d"), value: Cow::Borrowed(b"dd'dd"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } quick-xml-0.27.1/src/events/mod.rs000064400000000000000000001025340072674642500151020ustar 00000000000000//! Defines zero-copy XML events used throughout this library. //! //! A XML event often represents part of a XML element. //! They occur both during reading and writing and are //! usually used with the stream-oriented API. //! //! For example, the XML element //! ```xml //! Inner text //! ``` //! consists of the three events `Start`, `Text` and `End`. //! They can also represent other parts in an XML document like the //! XML declaration. Each Event usually contains further information, //! like the tag name, the attribute or the inner text. //! //! See [`Event`] for a list of all possible events. //! //! # Reading //! When reading a XML stream, the events are emitted by [`Reader::read_event`] //! and [`Reader::read_event_into`]. You must listen //! for the different types of events you are interested in. //! //! See [`Reader`] for further information. //! //! # Writing //! When writing the XML document, you must create the XML element //! by constructing the events it consists of and pass them to the writer //! sequentially. //! //! See [`Writer`] for further information. //! //! [`Reader::read_event`]: crate::reader::Reader::read_event //! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into //! [`Reader`]: crate::reader::Reader //! [`Writer`]: crate::writer::Writer //! [`Event`]: crate::events::Event pub mod attributes; #[cfg(feature = "encoding")] use encoding_rs::Encoding; use std::borrow::Cow; use std::fmt::{self, Debug, Formatter}; use std::ops::Deref; use std::str::from_utf8; use crate::encoding::Decoder; use crate::errors::{Error, Result}; use crate::escape::{escape, partial_escape, unescape_with}; use crate::name::{LocalName, QName}; use crate::utils::write_cow_string; use attributes::{Attribute, Attributes}; /// Opening tag data (`Event::Start`), with optional attributes. /// /// ``. /// /// The name can be accessed using the [`name`] or [`local_name`] methods. /// An iterator over the attributes is returned by the [`attributes`] method. /// /// [`name`]: Self::name /// [`local_name`]: Self::local_name /// [`attributes`]: Self::attributes #[derive(Clone, Eq, PartialEq)] pub struct BytesStart<'a> { /// content of the element, before any utf8 conversion pub(crate) buf: Cow<'a, [u8]>, /// end of the element name, the name starts at that the start of `buf` pub(crate) name_len: usize, } impl<'a> BytesStart<'a> { /// Internal constructor, used by `Reader`. Supplies data in reader's encoding #[inline] pub(crate) fn wrap(content: &'a [u8], name_len: usize) -> Self { BytesStart { buf: Cow::Borrowed(content), name_len, } } /// Creates a new `BytesStart` from the given name. /// /// # Warning /// /// `name` must be a valid name. #[inline] pub fn new>>(name: C) -> Self { let buf = str_cow_to_bytes(name); BytesStart { name_len: buf.len(), buf, } } /// Creates a new `BytesStart` from the given content (name + attributes). /// /// # Warning /// /// `&content[..name_len]` must be a valid name, and the remainder of `content` /// must be correctly-formed attributes. Neither are checked, it is possible /// to generate invalid XML if `content` or `name_len` are incorrect. #[inline] pub fn from_content>>(content: C, name_len: usize) -> Self { BytesStart { buf: str_cow_to_bytes(content), name_len, } } /// Converts the event into an owned event. pub fn into_owned(self) -> BytesStart<'static> { BytesStart { buf: Cow::Owned(self.buf.into_owned()), name_len: self.name_len, } } /// Converts the event into an owned event without taking ownership of Event pub fn to_owned(&self) -> BytesStart<'static> { BytesStart { buf: Cow::Owned(self.buf.to_owned().into()), name_len: self.name_len, } } /// Converts the event into a borrowed event. Most useful when paired with [`to_end`]. /// /// # Example /// /// ``` /// use quick_xml::events::{BytesStart, Event}; /// # use quick_xml::writer::Writer; /// # use quick_xml::Error; /// /// struct SomeStruct<'a> { /// attrs: BytesStart<'a>, /// // ... /// } /// # impl<'a> SomeStruct<'a> { /// # fn example(&self) -> Result<(), Error> { /// # let mut writer = Writer::new(Vec::new()); /// /// writer.write_event(Event::Start(self.attrs.borrow()))?; /// // ... /// writer.write_event(Event::End(self.attrs.to_end()))?; /// # Ok(()) /// # }} /// ``` /// /// [`to_end`]: Self::to_end pub fn borrow(&self) -> BytesStart { BytesStart { buf: Cow::Borrowed(&self.buf), name_len: self.name_len, } } /// Creates new paired close tag pub fn to_end(&self) -> BytesEnd { BytesEnd::wrap(self.name().into_inner().into()) } /// Gets the undecoded raw tag name, as present in the input stream. #[inline] pub fn name(&self) -> QName { QName(&self.buf[..self.name_len]) } /// Gets the undecoded raw local tag name (excluding namespace) as present /// in the input stream. /// /// All content up to and including the first `:` character is removed from the tag name. #[inline] pub fn local_name(&self) -> LocalName { self.name().into() } /// Edit the name of the BytesStart in-place /// /// # Warning /// /// `name` must be a valid name. pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> { let bytes = self.buf.to_mut(); bytes.splice(..self.name_len, name.iter().cloned()); self.name_len = name.len(); self } } /// Attribute-related methods impl<'a> BytesStart<'a> { /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator. /// /// The yielded items must be convertible to [`Attribute`] using `Into`. pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self where I: IntoIterator, I::Item: Into>, { self.extend_attributes(attributes); self } /// Add additional attributes to this tag using an iterator. /// /// The yielded items must be convertible to [`Attribute`] using `Into`. pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a> where I: IntoIterator, I::Item: Into>, { for attr in attributes { self.push_attribute(attr); } self } /// Adds an attribute to this element. pub fn push_attribute<'b, A>(&mut self, attr: A) where A: Into>, { let a = attr.into(); let bytes = self.buf.to_mut(); bytes.push(b' '); bytes.extend_from_slice(a.key.as_ref()); bytes.extend_from_slice(b"=\""); bytes.extend_from_slice(a.value.as_ref()); bytes.push(b'"'); } /// Remove all attributes from the ByteStart pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> { self.buf.to_mut().truncate(self.name_len); self } /// Returns an iterator over the attributes of this tag. pub fn attributes(&self) -> Attributes { Attributes::wrap(&self.buf, self.name_len, false) } /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`). pub fn html_attributes(&self) -> Attributes { Attributes::wrap(&self.buf, self.name_len, true) } /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`, /// including the whitespace after the tag name if there is any. #[inline] pub fn attributes_raw(&self) -> &[u8] { &self.buf[self.name_len..] } /// Try to get an attribute pub fn try_get_attribute + Sized>( &'a self, attr_name: N, ) -> Result>> { for a in self.attributes().with_checks(false) { let a = a?; if a.key.as_ref() == attr_name.as_ref() { return Ok(Some(a)); } } Ok(None) } } impl<'a> Debug for BytesStart<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "BytesStart {{ buf: ")?; write_cow_string(f, &self.buf)?; write!(f, ", name_len: {} }}", self.name_len) } } impl<'a> Deref for BytesStart<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { &self.buf } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// An XML declaration (`Event::Decl`). /// /// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd) #[derive(Clone, Debug, Eq, PartialEq)] pub struct BytesDecl<'a> { content: BytesStart<'a>, } impl<'a> BytesDecl<'a> { /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`), /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`) /// attribute. /// /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values. /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since /// the double quote character is not allowed in any of the attribute values. pub fn new( version: &str, encoding: Option<&str>, standalone: Option<&str>, ) -> BytesDecl<'static> { // Compute length of the buffer based on supplied attributes // ' encoding=""' => 12 let encoding_attr_len = if let Some(xs) = encoding { 12 + xs.len() } else { 0 }; // ' standalone=""' => 14 let standalone_attr_len = if let Some(xs) = standalone { 14 + xs.len() } else { 0 }; // 'xml version=""' => 14 let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len); buf.push_str("xml version=\""); buf.push_str(version); if let Some(encoding_val) = encoding { buf.push_str("\" encoding=\""); buf.push_str(encoding_val); } if let Some(standalone_val) = standalone { buf.push_str("\" standalone=\""); buf.push_str(standalone_val); } buf.push('"'); BytesDecl { content: BytesStart::from_content(buf, 3), } } /// Creates a `BytesDecl` from a `BytesStart` pub fn from_start(start: BytesStart<'a>) -> Self { Self { content: start } } /// Gets xml version, excluding quotes (`'` or `"`). /// /// According to the [grammar], the version *must* be the first thing in the declaration. /// This method tries to extract the first thing in the declaration and return it. /// In case of multiple attributes value of the first one is returned. /// /// If version is missed in the declaration, or the first thing is not a version, /// [`Error::XmlDeclWithoutVersion`] will be returned. /// /// # Examples /// /// ``` /// use std::borrow::Cow; /// use quick_xml::Error; /// use quick_xml::events::{BytesDecl, BytesStart}; /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0)); /// assert_eq!( /// decl.version().unwrap(), /// Cow::Borrowed(b"1.1".as_ref()) /// ); /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0)); /// assert_eq!( /// decl.version().unwrap(), /// Cow::Borrowed(b"1.0".as_ref()) /// ); /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0)); /// match decl.version() { /// Err(Error::XmlDeclWithoutVersion(Some(key))) => assert_eq!(key, "encoding".to_string()), /// _ => assert!(false), /// } /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0)); /// match decl.version() { /// Err(Error::XmlDeclWithoutVersion(Some(key))) => assert_eq!(key, "encoding".to_string()), /// _ => assert!(false), /// } /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0)); /// match decl.version() { /// Err(Error::XmlDeclWithoutVersion(None)) => {}, /// _ => assert!(false), /// } /// ``` /// /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl pub fn version(&self) -> Result> { // The version *must* be the first thing in the declaration. match self.content.attributes().with_checks(false).next() { Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value), // first attribute was not "version" Some(Ok(a)) => { let found = from_utf8(a.key.as_ref())?.to_string(); Err(Error::XmlDeclWithoutVersion(Some(found))) } // error parsing attributes Some(Err(e)) => Err(e.into()), // no attributes None => Err(Error::XmlDeclWithoutVersion(None)), } } /// Gets xml encoding, excluding quotes (`'` or `"`). /// /// Although according to the [grammar] encoding must appear before `"standalone"` /// and after `"version"`, this method does not check that. The first occurrence /// of the attribute will be returned even if there are several. Also, method does /// not restrict symbols that can forming the encoding, so the returned encoding /// name may not correspond to the grammar. /// /// # Examples /// /// ``` /// use std::borrow::Cow; /// use quick_xml::Error; /// use quick_xml::events::{BytesDecl, BytesStart}; /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0)); /// assert!(decl.encoding().is_none()); /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0)); /// match decl.encoding() { /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"), /// _ => assert!(false), /// } /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0)); /// match decl.encoding() { /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"), /// _ => assert!(false), /// } /// ``` /// /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl pub fn encoding(&self) -> Option>> { self.content .try_get_attribute("encoding") .map(|a| a.map(|a| a.value)) .transpose() } /// Gets xml standalone, excluding quotes (`'` or `"`). /// /// Although according to the [grammar] standalone flag must appear after `"version"` /// and `"encoding"`, this method does not check that. The first occurrence of the /// attribute will be returned even if there are several. Also, method does not /// restrict symbols that can forming the value, so the returned flag name may not /// correspond to the grammar. /// /// # Examples /// /// ``` /// use std::borrow::Cow; /// use quick_xml::Error; /// use quick_xml::events::{BytesDecl, BytesStart}; /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0)); /// assert!(decl.standalone().is_none()); /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0)); /// match decl.standalone() { /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"), /// _ => assert!(false), /// } /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0)); /// match decl.standalone() { /// Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"), /// _ => assert!(false), /// } /// ``` /// /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl pub fn standalone(&self) -> Option>> { self.content .try_get_attribute("standalone") .map(|a| a.map(|a| a.value)) .transpose() } /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get) /// algorithm. /// /// If encoding in not known, or `encoding` key was not found, returns `None`. /// In case of duplicated `encoding` key, encoding, corresponding to the first /// one, is returned. #[cfg(feature = "encoding")] pub fn encoder(&self) -> Option<&'static Encoding> { self.encoding() .and_then(|e| e.ok()) .and_then(|e| Encoding::for_label(&e)) } /// Converts the event into an owned event. pub fn into_owned(self) -> BytesDecl<'static> { BytesDecl { content: self.content.into_owned(), } } /// Converts the event into a borrowed event. #[inline] pub fn borrow(&self) -> BytesDecl { BytesDecl { content: self.content.borrow(), } } } impl<'a> Deref for BytesDecl<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { &self.content } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A struct to manage `Event::End` events #[derive(Clone, Eq, PartialEq)] pub struct BytesEnd<'a> { name: Cow<'a, [u8]>, } impl<'a> BytesEnd<'a> { /// Internal constructor, used by `Reader`. Supplies data in reader's encoding #[inline] pub(crate) fn wrap(name: Cow<'a, [u8]>) -> Self { BytesEnd { name } } /// Creates a new `BytesEnd` borrowing a slice. /// /// # Warning /// /// `name` must be a valid name. #[inline] pub fn new>>(name: C) -> Self { Self::wrap(str_cow_to_bytes(name)) } /// Converts the event into an owned event. pub fn into_owned(self) -> BytesEnd<'static> { BytesEnd { name: Cow::Owned(self.name.into_owned()), } } /// Converts the event into a borrowed event. #[inline] pub fn borrow(&self) -> BytesEnd { BytesEnd { name: Cow::Borrowed(&self.name), } } /// Gets the undecoded raw tag name, as present in the input stream. #[inline] pub fn name(&self) -> QName { QName(&self.name) } /// Gets the undecoded raw local tag name (excluding namespace) as present /// in the input stream. /// /// All content up to and including the first `:` character is removed from the tag name. #[inline] pub fn local_name(&self) -> LocalName { self.name().into() } } impl<'a> Debug for BytesEnd<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "BytesEnd {{ name: ")?; write_cow_string(f, &self.name)?; write!(f, " }}") } } impl<'a> Deref for BytesEnd<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { &self.name } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Data from various events (most notably, `Event::Text`) that stored in XML /// in escaped form. Internally data is stored in escaped form #[derive(Clone, Eq, PartialEq)] pub struct BytesText<'a> { /// Escaped then encoded content of the event. Content is encoded in the XML /// document encoding when event comes from the reader and should be in the /// document encoding when event passed to the writer content: Cow<'a, [u8]>, /// Encoding in which the `content` is stored inside the event decoder: Decoder, } impl<'a> BytesText<'a> { /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding. #[inline] pub(crate) fn wrap>>(content: C, decoder: Decoder) -> Self { Self { content: content.into(), decoder, } } /// Creates a new `BytesText` from an escaped string. #[inline] pub fn from_escaped>>(content: C) -> Self { Self::wrap(str_cow_to_bytes(content), Decoder::utf8()) } /// Creates a new `BytesText` from a string. The string is expected not to /// be escaped. #[inline] pub fn new(content: &'a str) -> Self { Self::from_escaped(escape(content)) } /// Ensures that all data is owned to extend the object's lifetime if /// necessary. #[inline] pub fn into_owned(self) -> BytesText<'static> { BytesText { content: self.content.into_owned().into(), decoder: self.decoder, } } /// Extracts the inner `Cow` from the `BytesText` event container. #[inline] pub fn into_inner(self) -> Cow<'a, [u8]> { self.content } /// Converts the event into a borrowed event. #[inline] pub fn borrow(&self) -> BytesText { BytesText { content: Cow::Borrowed(&self.content), decoder: self.decoder, } } /// Decodes then unescapes the content of the event. /// /// This will allocate if the value contains any escape sequences or in /// non-UTF-8 encoding. pub fn unescape(&self) -> Result> { self.unescape_with(|_| None) } /// Decodes then unescapes the content of the event with custom entities. /// /// This will allocate if the value contains any escape sequences or in /// non-UTF-8 encoding. pub fn unescape_with<'entity>( &self, resolve_entity: impl Fn(&str) -> Option<&'entity str>, ) -> Result> { let decoded = match &self.content { Cow::Borrowed(bytes) => self.decoder.decode(bytes)?, // Convert to owned, because otherwise Cow will be bound with wrong lifetime Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(), }; match unescape_with(&decoded, resolve_entity)? { // Because result is borrowed, no replacements was done and we can use original string Cow::Borrowed(_) => Ok(decoded), Cow::Owned(s) => Ok(s.into()), } } /// Gets content of this text buffer in the specified encoding and optionally /// unescapes it. #[cfg(feature = "serialize")] pub(crate) fn decode(&self, unescape: bool) -> Result> { let text = match &self.content { Cow::Borrowed(bytes) => self.decoder.decode(bytes)?, // Convert to owned, because otherwise Cow will be bound with wrong lifetime Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(), }; let text = if unescape { //FIXME: need to take into account entities defined in the document match unescape_with(&text, |_| None)? { // Because result is borrowed, no replacements was done and we can use original string Cow::Borrowed(_) => text, Cow::Owned(s) => s.into(), } } else { text }; Ok(text) } } impl<'a> Debug for BytesText<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "BytesText {{ content: ")?; write_cow_string(f, &self.content)?; write!(f, " }}") } } impl<'a> Deref for BytesText<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { &self.content } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// CDATA content contains unescaped data from the reader. If you want to write them as a text, /// [convert](Self::escape) it to [`BytesText`] #[derive(Clone, Eq, PartialEq)] pub struct BytesCData<'a> { content: Cow<'a, [u8]>, /// Encoding in which the `content` is stored inside the event decoder: Decoder, } impl<'a> BytesCData<'a> { /// Creates a new `BytesCData` from a byte sequence in the specified encoding. #[inline] pub(crate) fn wrap>>(content: C, decoder: Decoder) -> Self { Self { content: content.into(), decoder, } } /// Creates a new `BytesCData` from a string. /// /// # Warning /// /// `content` must not contain the `]]>` sequence. #[inline] pub fn new>>(content: C) -> Self { Self::wrap(str_cow_to_bytes(content), Decoder::utf8()) } /// Ensures that all data is owned to extend the object's lifetime if /// necessary. #[inline] pub fn into_owned(self) -> BytesCData<'static> { BytesCData { content: self.content.into_owned().into(), decoder: self.decoder, } } /// Extracts the inner `Cow` from the `BytesCData` event container. #[inline] pub fn into_inner(self) -> Cow<'a, [u8]> { self.content } /// Converts the event into a borrowed event. #[inline] pub fn borrow(&self) -> BytesCData { BytesCData { content: Cow::Borrowed(&self.content), decoder: self.decoder, } } /// Converts this CDATA content to an escaped version, that can be written /// as an usual text in XML. /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `>` | `>` /// | `&` | `&` /// | `'` | `'` /// | `"` | `"` pub fn escape(self) -> Result> { let decoded = self.decode()?; Ok(BytesText::wrap( match escape(&decoded) { // Because result is borrowed, no replacements was done and we can use original content Cow::Borrowed(_) => self.content, Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()), }, Decoder::utf8(), )) } /// Converts this CDATA content to an escaped version, that can be written /// as an usual text in XML. /// /// In XML text content, it is allowed (though not recommended) to leave /// the quote special characters `"` and `'` unescaped. /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `>` | `>` /// | `&` | `&` pub fn partial_escape(self) -> Result> { let decoded = self.decode()?; Ok(BytesText::wrap( match partial_escape(&decoded) { // Because result is borrowed, no replacements was done and we can use original content Cow::Borrowed(_) => self.content, Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()), }, Decoder::utf8(), )) } /// Gets content of this text buffer in the specified encoding pub(crate) fn decode(&self) -> Result> { Ok(match &self.content { Cow::Borrowed(bytes) => self.decoder.decode(bytes)?, // Convert to owned, because otherwise Cow will be bound with wrong lifetime Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(), }) } } impl<'a> Debug for BytesCData<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "BytesCData {{ content: ")?; write_cow_string(f, &self.content)?; write!(f, " }}") } } impl<'a> Deref for BytesCData<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { &self.content } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Event emitted by [`Reader::read_event_into`]. /// /// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into #[derive(Clone, Debug, Eq, PartialEq)] pub enum Event<'a> { /// Start tag (with attributes) ``. Start(BytesStart<'a>), /// End tag ``. End(BytesEnd<'a>), /// Empty element tag (with attributes) ``. Empty(BytesStart<'a>), /// Character data between `Start` and `End` element. Text(BytesText<'a>), /// Comment ``. Comment(BytesText<'a>), /// CData ``. CData(BytesCData<'a>), /// XML declaration ``. Decl(BytesDecl<'a>), /// Processing instruction ``. PI(BytesText<'a>), /// Doctype ``. DocType(BytesText<'a>), /// End of XML document. Eof, } impl<'a> Event<'a> { /// Converts the event to an owned version, untied to the lifetime of /// buffer used when reading but incurring a new, separate allocation. pub fn into_owned(self) -> Event<'static> { match self { Event::Start(e) => Event::Start(e.into_owned()), Event::End(e) => Event::End(e.into_owned()), Event::Empty(e) => Event::Empty(e.into_owned()), Event::Text(e) => Event::Text(e.into_owned()), Event::Comment(e) => Event::Comment(e.into_owned()), Event::CData(e) => Event::CData(e.into_owned()), Event::Decl(e) => Event::Decl(e.into_owned()), Event::PI(e) => Event::PI(e.into_owned()), Event::DocType(e) => Event::DocType(e.into_owned()), Event::Eof => Event::Eof, } } /// Converts the event into a borrowed event. #[inline] pub fn borrow(&self) -> Event { match self { Event::Start(e) => Event::Start(e.borrow()), Event::End(e) => Event::End(e.borrow()), Event::Empty(e) => Event::Empty(e.borrow()), Event::Text(e) => Event::Text(e.borrow()), Event::Comment(e) => Event::Comment(e.borrow()), Event::CData(e) => Event::CData(e.borrow()), Event::Decl(e) => Event::Decl(e.borrow()), Event::PI(e) => Event::PI(e.borrow()), Event::DocType(e) => Event::DocType(e.borrow()), Event::Eof => Event::Eof, } } } impl<'a> Deref for Event<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { match *self { Event::Start(ref e) | Event::Empty(ref e) => e, Event::End(ref e) => e, Event::Text(ref e) => e, Event::Decl(ref e) => e, Event::PI(ref e) => e, Event::CData(ref e) => e, Event::Comment(ref e) => e, Event::DocType(ref e) => e, Event::Eof => &[], } } } impl<'a> AsRef> for Event<'a> { fn as_ref(&self) -> &Event<'a> { self } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[inline] fn str_cow_to_bytes<'a, C: Into>>(content: C) -> Cow<'a, [u8]> { match content.into() { Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), Cow::Owned(s) => Cow::Owned(s.into_bytes()), } } #[cfg(test)] mod test { use super::*; use pretty_assertions::assert_eq; #[test] fn bytestart_create() { let b = BytesStart::new("test"); assert_eq!(b.len(), 4); assert_eq!(b.name(), QName(b"test")); } #[test] fn bytestart_set_name() { let mut b = BytesStart::new("test"); assert_eq!(b.len(), 4); assert_eq!(b.name(), QName(b"test")); assert_eq!(b.attributes_raw(), b""); b.push_attribute(("x", "a")); assert_eq!(b.len(), 10); assert_eq!(b.attributes_raw(), b" x=\"a\""); b.set_name(b"g"); assert_eq!(b.len(), 7); assert_eq!(b.name(), QName(b"g")); } #[test] fn bytestart_clear_attributes() { let mut b = BytesStart::new("test"); b.push_attribute(("x", "y\"z")); b.push_attribute(("x", "y\"z")); b.clear_attributes(); assert!(b.attributes().next().is_none()); assert_eq!(b.len(), 4); assert_eq!(b.name(), QName(b"test")); } } quick-xml-0.27.1/src/lib.rs000064400000000000000000000045510072674642500135650ustar 00000000000000//! High performance XML reader/writer. //! //! # Description //! //! quick-xml contains two modes of operation: //! //! A streaming API based on the [StAX] model. This is suited for larger XML documents which //! cannot completely read into memory at once. //! //! The user has to explicitly _ask_ for the next XML event, similar to a database cursor. //! This is achieved by the following two structs: //! //! - [`Reader`]: A low level XML pull-reader where buffer allocation/clearing is left to user. //! - [`Writer`]: A XML writer. Can be nested with readers if you want to transform XMLs. //! //! Especially for nested XML elements, the user must keep track _where_ (how deep) //! in the XML document the current event is located. //! //! quick-xml contains optional support of asynchronous reading using [tokio]. //! //! Furthermore, quick-xml also contains optional [Serde] support to directly //! serialize and deserialize from structs, without having to deal with the XML events. //! //! # Examples //! //! - For a reading example see [`Reader`] //! - For a writing example see [`Writer`] //! //! # Features //! //! `quick-xml` supports the following features: //! //! [StAX]: https://en.wikipedia.org/wiki/StAX //! [tokio]: https://tokio.rs/ //! [Serde]: https://serde.rs/ #![cfg_attr( feature = "document-features", cfg_attr(doc, doc = ::document_features::document_features!()) )] #![forbid(unsafe_code)] #![deny(missing_docs)] #![recursion_limit = "1024"] // Enable feature requirements in the docs from 1.57 // See https://stackoverflow.com/questions/61417452 #![cfg_attr(docs_rs, feature(doc_auto_cfg))] #[cfg(feature = "serialize")] pub mod de; pub mod encoding; mod errors; mod escapei; pub mod escape { //! Manage xml character escapes pub(crate) use crate::escapei::EscapeError; pub use crate::escapei::{escape, partial_escape, unescape, unescape_with}; } pub mod events; pub mod name; pub mod reader; #[cfg(feature = "serialize")] pub mod se; /// Not an official API, public for integration tests #[doc(hidden)] pub mod utils; pub mod writer; // reexports pub use crate::encoding::Decoder; #[cfg(feature = "serialize")] pub use crate::errors::serialize::DeError; pub use crate::errors::{Error, Result}; pub use crate::reader::{NsReader, Reader}; pub use crate::writer::{ElementWriter, Writer}; quick-xml-0.27.1/src/name.rs000064400000000000000000000772340072674642500137470ustar 00000000000000//! Module for handling names according to the W3C [Namespaces in XML 1.1 (Second Edition)][spec] //! specification //! //! [spec]: https://www.w3.org/TR/xml-names11 use crate::errors::{Error, Result}; use crate::events::attributes::Attribute; use crate::events::BytesStart; use crate::utils::write_byte_string; use memchr::memchr; use std::convert::TryFrom; use std::fmt::{self, Debug, Formatter}; /// A [qualified name] of an element or an attribute, including an optional /// namespace [prefix](Prefix) and a [local name](LocalName). /// /// [qualified name]: https://www.w3.org/TR/xml-names11/#dt-qualname #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))] pub struct QName<'a>(pub &'a [u8]); impl<'a> QName<'a> { /// Converts this name to an internal slice representation. #[inline(always)] pub fn into_inner(self) -> &'a [u8] { self.0 } /// Returns local part of this qualified name. /// /// All content up to and including the first `:` character is removed from /// the tag name. /// /// # Examples /// /// ``` /// # use quick_xml::name::QName; /// let simple = QName(b"simple-name"); /// assert_eq!(simple.local_name().as_ref(), b"simple-name"); /// /// let qname = QName(b"namespace:simple-name"); /// assert_eq!(qname.local_name().as_ref(), b"simple-name"); /// ``` pub fn local_name(&self) -> LocalName<'a> { LocalName(self.index().map_or(self.0, |i| &self.0[i + 1..])) } /// Returns namespace part of this qualified name or `None` if namespace part /// is not defined (symbol `':'` not found). /// /// # Examples /// /// ``` /// # use std::convert::AsRef; /// # use quick_xml::name::QName; /// let simple = QName(b"simple-name"); /// assert_eq!(simple.prefix(), None); /// /// let qname = QName(b"prefix:simple-name"); /// assert_eq!(qname.prefix().as_ref().map(|n| n.as_ref()), Some(b"prefix".as_ref())); /// ``` pub fn prefix(&self) -> Option> { self.index().map(|i| Prefix(&self.0[..i])) } /// The same as `(qname.local_name(), qname.prefix())`, but does only one /// lookup for a `':'` symbol. pub fn decompose(&self) -> (LocalName<'a>, Option>) { match self.index() { None => (LocalName(self.0), None), Some(i) => (LocalName(&self.0[i + 1..]), Some(Prefix(&self.0[..i]))), } } /// If that `QName` represents `"xmlns"` series of names, returns `Some`, /// otherwise `None` is returned. /// /// # Examples /// /// ``` /// # use quick_xml::name::{QName, PrefixDeclaration}; /// let qname = QName(b"xmlns"); /// assert_eq!(qname.as_namespace_binding(), Some(PrefixDeclaration::Default)); /// /// let qname = QName(b"xmlns:prefix"); /// assert_eq!(qname.as_namespace_binding(), Some(PrefixDeclaration::Named(b"prefix"))); /// /// // Be aware that this method does not check the validity of the prefix - it can be empty! /// let qname = QName(b"xmlns:"); /// assert_eq!(qname.as_namespace_binding(), Some(PrefixDeclaration::Named(b""))); /// /// let qname = QName(b"other-name"); /// assert_eq!(qname.as_namespace_binding(), None); /// /// // https://www.w3.org/TR/xml-names11/#xmlReserved /// let qname = QName(b"xmlns-reserved-name"); /// assert_eq!(qname.as_namespace_binding(), None); /// ``` pub fn as_namespace_binding(&self) -> Option> { if self.0.starts_with(b"xmlns") { return match self.0.get(5) { None => Some(PrefixDeclaration::Default), Some(&b':') => Some(PrefixDeclaration::Named(&self.0[6..])), _ => None, }; } None } /// Returns the index in the name where prefix ended #[inline(always)] fn index(&self) -> Option { memchr(b':', self.0) } } impl<'a> Debug for QName<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "QName(")?; write_byte_string(f, self.0)?; write!(f, ")") } } impl<'a> AsRef<[u8]> for QName<'a> { #[inline] fn as_ref(&self) -> &[u8] { self.0 } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A [local (unqualified) name] of an element or an attribute, i.e. a name /// without [prefix](Prefix). /// /// [local (unqualified) name]: https://www.w3.org/TR/xml-names11/#dt-localname #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))] pub struct LocalName<'a>(&'a [u8]); impl<'a> LocalName<'a> { /// Converts this name to an internal slice representation. #[inline(always)] pub fn into_inner(self) -> &'a [u8] { self.0 } } impl<'a> Debug for LocalName<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "LocalName(")?; write_byte_string(f, self.0)?; write!(f, ")") } } impl<'a> AsRef<[u8]> for LocalName<'a> { #[inline] fn as_ref(&self) -> &[u8] { self.0 } } impl<'a> From> for LocalName<'a> { /// Creates `LocalName` from a [`QName`] /// /// # Examples /// /// ``` /// # use quick_xml::name::{LocalName, QName}; /// /// let local: LocalName = QName(b"unprefixed").into(); /// assert_eq!(local.as_ref(), b"unprefixed"); /// /// let local: LocalName = QName(b"some:prefix").into(); /// assert_eq!(local.as_ref(), b"prefix"); /// ``` #[inline] fn from(name: QName<'a>) -> Self { Self(name.index().map_or(name.0, |i| &name.0[i + 1..])) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A [namespace prefix] part of the [qualified name](QName) of an element tag /// or an attribute: a `prefix` in `` or /// `prefix:local-attribute-name="attribute value"`. /// /// [namespace prefix]: https://www.w3.org/TR/xml-names11/#dt-prefix #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))] pub struct Prefix<'a>(&'a [u8]); impl<'a> Prefix<'a> { /// Extracts internal slice #[inline(always)] pub fn into_inner(self) -> &'a [u8] { self.0 } } impl<'a> Debug for Prefix<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "Prefix(")?; write_byte_string(f, self.0)?; write!(f, ")") } } impl<'a> AsRef<[u8]> for Prefix<'a> { #[inline] fn as_ref(&self) -> &[u8] { self.0 } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A namespace prefix declaration, `xmlns` or `xmlns:`, as defined in /// [XML Schema specification](https://www.w3.org/TR/xml-names/#ns-decl) #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum PrefixDeclaration<'a> { /// XML attribute binds a default namespace. Corresponds to `xmlns` in `xmlns="..."` Default, /// XML attribute binds a specified prefix to a namespace. Corresponds to a /// `prefix` in `xmlns:prefix="..."`, which is stored as payload of this variant. Named(&'a [u8]), } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A [namespace name] that is declared in a `xmlns[:prefix]="namespace name"`. /// /// [namespace name]: https://www.w3.org/TR/xml-names11/#dt-NSName #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))] pub struct Namespace<'a>(pub &'a [u8]); impl<'a> Namespace<'a> { /// Converts this namespace to an internal slice representation. /// /// This is [non-normalized] attribute value, i.e. any entity references is /// not expanded and space characters are not removed. This means, that /// different byte slices, returned from this method, can represent the same /// namespace and would be treated by parser as identical. /// /// For example, if the entity **eacute** has been defined to be **é**, /// the empty tags below all contain namespace declarations binding the /// prefix `p` to the same [IRI reference], `http://example.org/rosé`. /// /// ```xml /// /// /// /// /// /// ``` /// /// This is because XML entity references are expanded during attribute value /// normalization. /// /// [non-normalized]: https://www.w3.org/TR/REC-xml/#AVNormalize /// [IRI reference]: https://datatracker.ietf.org/doc/html/rfc3987 #[inline(always)] pub fn into_inner(self) -> &'a [u8] { self.0 } //TODO: implement value normalization and use it when comparing namespaces } impl<'a> Debug for Namespace<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "Namespace(")?; write_byte_string(f, self.0)?; write!(f, ")") } } impl<'a> AsRef<[u8]> for Namespace<'a> { #[inline] fn as_ref(&self) -> &[u8] { self.0 } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Result of [prefix] resolution which creates by [`NsReader::resolve_attribute`], /// [`NsReader::resolve_element`], [`NsReader::read_resolved_event`] and /// [`NsReader::read_resolved_event_into`] methods. /// /// [prefix]: Prefix /// [`NsReader::resolve_attribute`]: crate::reader::NsReader::resolve_attribute /// [`NsReader::resolve_element`]: crate::reader::NsReader::resolve_element /// [`NsReader::read_resolved_event`]: crate::reader::NsReader::read_resolved_event /// [`NsReader::read_resolved_event_into`]: crate::reader::NsReader::read_resolved_event_into #[derive(Clone, PartialEq, Eq, Hash)] pub enum ResolveResult<'ns> { /// Qualified name does not contain prefix, and resolver does not define /// default namespace, so name is not bound to any namespace Unbound, /// [`Prefix`] resolved to the specified namespace Bound(Namespace<'ns>), /// Specified prefix was not found in scope Unknown(Vec), } impl<'ns> Debug for ResolveResult<'ns> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { Self::Unbound => write!(f, "Unbound"), Self::Bound(ns) => write!(f, "Bound({:?})", ns), Self::Unknown(p) => { write!(f, "Unknown(")?; write_byte_string(f, p)?; write!(f, ")") } } } } impl<'ns> TryFrom> for Option> { type Error = Error; /// Try to convert this result to an optional namespace and returns /// [`Error::UnknownPrefix`] if this result represents unknown prefix fn try_from(result: ResolveResult<'ns>) -> Result { use ResolveResult::*; match result { Unbound => Ok(None), Bound(ns) => Ok(Some(ns)), Unknown(p) => Err(Error::UnknownPrefix(p)), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// An entry that contains index into the buffer with namespace bindings. /// /// Defines a mapping from *[namespace prefix]* to *[namespace name]*. /// If prefix is empty, defines a *default namespace* binding that applies to /// unprefixed element names (unprefixed attribute names do not bind to any /// namespace and they processing is dependent on the element in which their /// defined). /// /// [namespace prefix]: https://www.w3.org/TR/xml-names11/#dt-prefix /// [namespace name]: https://www.w3.org/TR/xml-names11/#dt-NSName #[derive(Debug, Clone)] struct NamespaceEntry { /// Index of the namespace in the buffer start: usize, /// Length of the prefix /// * if greater than zero, then binds this namespace to the slice /// `[start..start + prefix_len]` in the buffer. /// * else defines the current default namespace. prefix_len: usize, /// The length of a namespace name (the URI) of this namespace declaration. /// Name started just after prefix and extend for `value_len` bytes. /// /// The XML standard [specifies] that an empty namespace value 'removes' a namespace declaration /// for the extent of its scope. For prefix declarations that's not very interesting, but it is /// vital for default namespace declarations. With `xmlns=""` you can revert back to the default /// behaviour of leaving unqualified element names unqualified. /// /// [specifies]: https://www.w3.org/TR/xml-names11/#scoping value_len: usize, /// Level of nesting at which this namespace was declared. The declaring element is included, /// i.e., a declaration on the document root has `level = 1`. /// This is used to pop the namespace when the element gets closed. level: i32, } impl NamespaceEntry { /// Get the namespace prefix, bound to this namespace declaration, or `None`, /// if this declaration is for default namespace (`xmlns="..."`). #[inline] fn prefix<'b>(&self, ns_buffer: &'b [u8]) -> Option> { if self.prefix_len == 0 { None } else { Some(Prefix(&ns_buffer[self.start..self.start + self.prefix_len])) } } /// Gets the namespace name (the URI) slice out of namespace buffer /// /// Returns `None` if namespace for this prefix was explicitly removed from /// scope, using `xmlns[:prefix]=""` #[inline] fn namespace<'ns>(&self, buffer: &'ns [u8]) -> ResolveResult<'ns> { if self.value_len == 0 { ResolveResult::Unbound } else { let start = self.start + self.prefix_len; ResolveResult::Bound(Namespace(&buffer[start..start + self.value_len])) } } } /// A namespace management buffer. /// /// Holds all internal logic to push/pop namespaces with their levels. #[derive(Debug, Default, Clone)] pub(crate) struct NamespaceResolver { /// A stack of namespace bindings to prefixes that currently in scope bindings: Vec, /// The number of open tags at the moment. We need to keep track of this to know which namespace /// declarations to remove when we encounter an `End` event. nesting_level: i32, } impl NamespaceResolver { /// Begins a new scope and add to it all [namespace bindings] that found in /// the specified start element. /// /// [namespace binding]: https://www.w3.org/TR/xml-names11/#dt-NSDecl pub fn push(&mut self, start: &BytesStart, buffer: &mut Vec) { self.nesting_level += 1; let level = self.nesting_level; // adds new namespaces for attributes starting with 'xmlns:' and for the 'xmlns' // (default namespace) attribute. for a in start.attributes().with_checks(false) { if let Ok(Attribute { key: k, value: v }) = a { match k.as_namespace_binding() { Some(PrefixDeclaration::Default) => { let start = buffer.len(); buffer.extend_from_slice(&v); self.bindings.push(NamespaceEntry { start, prefix_len: 0, value_len: v.len(), level, }); } Some(PrefixDeclaration::Named(prefix)) => { let start = buffer.len(); buffer.extend_from_slice(prefix); buffer.extend_from_slice(&v); self.bindings.push(NamespaceEntry { start, prefix_len: prefix.len(), value_len: v.len(), level, }); } None => {} } } else { break; } } } /// Ends a top-most scope by popping all [namespace binding], that was added by /// last call to [`Self::push()`]. /// /// [namespace binding]: https://www.w3.org/TR/xml-names11/#dt-NSDecl pub fn pop(&mut self, buffer: &mut Vec) { self.nesting_level -= 1; let current_level = self.nesting_level; // from the back (most deeply nested scope), look for the first scope that is still valid match self.bindings.iter().rposition(|n| n.level <= current_level) { // none of the namespaces are valid, remove all of them None => { buffer.clear(); self.bindings.clear(); } // drop all namespaces past the last valid namespace Some(last_valid_pos) => { if let Some(len) = self.bindings.get(last_valid_pos + 1).map(|n| n.start) { buffer.truncate(len); self.bindings.truncate(last_valid_pos + 1); } } } } /// Resolves a potentially qualified **element name** or **attribute name** /// into (namespace name, local name). /// /// *Qualified* names have the form `prefix:local-name` where the `prefix` is /// defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. /// The namespace prefix can be defined on the same element as the element or /// attribute in question. /// /// *Unqualified* attribute names do *not* inherit the current *default namespace*. /// /// # Lifetimes /// /// - `'n`: lifetime of an attribute or an element name /// - `'ns`: lifetime of a namespaces buffer, where all found namespaces are stored #[inline] pub fn resolve<'n, 'ns>( &self, name: QName<'n>, buffer: &'ns [u8], use_default: bool, ) -> (ResolveResult<'ns>, LocalName<'n>) { let (local_name, prefix) = name.decompose(); (self.resolve_prefix(prefix, buffer, use_default), local_name) } /// Finds a [namespace name] for a given qualified **element name**, borrow /// it from the specified buffer. /// /// Returns `None`, if: /// - name is unqualified /// - prefix not found in the current scope /// - prefix was [unbound] using `xmlns:prefix=""` /// /// # Lifetimes /// /// - `'ns`: lifetime of a namespaces buffer, where all found namespaces are stored /// /// [namespace name]: https://www.w3.org/TR/xml-names11/#dt-NSName /// [unbound]: https://www.w3.org/TR/xml-names11/#scoping #[inline] pub fn find<'ns>(&self, element_name: QName, buffer: &'ns [u8]) -> ResolveResult<'ns> { self.resolve_prefix(element_name.prefix(), buffer, true) } fn resolve_prefix<'ns>( &self, prefix: Option, buffer: &'ns [u8], use_default: bool, ) -> ResolveResult<'ns> { self.bindings .iter() // Find the last defined binding that corresponds to the given prefix .rev() .find_map(|n| match (n.prefix(buffer), prefix) { // This is default namespace definition and name has no explicit prefix (None, None) if use_default => Some(n.namespace(buffer)), (None, None) => Some(ResolveResult::Unbound), // One part has prefix but other is not -> skip (None, Some(_)) => None, (Some(_), None) => None, // Prefixes does not match -> skip (Some(definition), Some(usage)) if definition != usage => None, // Prefixes the same, entry defines binding reset (corresponds to `xmlns:p=""`) _ if n.value_len == 0 => Some(Self::maybe_unknown(prefix)), // Prefixes the same, returns corresponding namespace _ => Some(n.namespace(buffer)), }) .unwrap_or_else(|| Self::maybe_unknown(prefix)) } #[inline] fn maybe_unknown(prefix: Option) -> ResolveResult<'static> { match prefix { Some(p) => ResolveResult::Unknown(p.into_inner().to_vec()), None => ResolveResult::Unbound, } } } #[cfg(test)] mod namespaces { use super::*; use pretty_assertions::assert_eq; use ResolveResult::*; /// Unprefixed attribute names (resolved with `false` flag) never have a namespace /// according to : /// /// > A default namespace declaration applies to all unprefixed element names /// > within its scope. Default namespace declarations do not apply directly /// > to attribute names; the interpretation of unprefixed attributes is /// > determined by the element on which they appear. mod unprefixed { use super::*; use pretty_assertions::assert_eq; /// Basic tests that checks that basic resolver functionality is working #[test] fn basic() { let name = QName(b"simple"); let ns = Namespace(b"default"); let mut resolver = NamespaceResolver::default(); let mut buffer = Vec::new(); resolver.push( &BytesStart::from_content(" xmlns='default'", 0), &mut buffer, ); assert_eq!(buffer, b"default"); // Check that tags without namespaces does not change result resolver.push(&BytesStart::from_content("", 0), &mut buffer); assert_eq!(buffer, b"default"); resolver.pop(&mut buffer); assert_eq!(buffer, b"default"); assert_eq!( resolver.resolve(name, &buffer, true), (Bound(ns), LocalName(b"simple")) ); assert_eq!( resolver.resolve(name, &buffer, false), (Unbound, LocalName(b"simple")) ); assert_eq!(resolver.find(name, &buffer), Bound(ns)); } /// Test adding a second level of namespaces, which replaces the previous binding #[test] fn override_namespace() { let name = QName(b"simple"); let old_ns = Namespace(b"old"); let new_ns = Namespace(b"new"); let mut resolver = NamespaceResolver::default(); let mut buffer = Vec::new(); resolver.push(&BytesStart::from_content(" xmlns='old'", 0), &mut buffer); resolver.push(&BytesStart::from_content(" xmlns='new'", 0), &mut buffer); assert_eq!(buffer, b"oldnew"); assert_eq!( resolver.resolve(name, &buffer, true), (Bound(new_ns), LocalName(b"simple")) ); assert_eq!( resolver.resolve(name, &buffer, false), (Unbound, LocalName(b"simple")) ); assert_eq!(resolver.find(name, &buffer), Bound(new_ns)); resolver.pop(&mut buffer); assert_eq!(buffer, b"old"); assert_eq!( resolver.resolve(name, &buffer, true), (Bound(old_ns), LocalName(b"simple")) ); assert_eq!( resolver.resolve(name, &buffer, false), (Unbound, LocalName(b"simple")) ); assert_eq!(resolver.find(name, &buffer), Bound(old_ns)); } /// Test adding a second level of namespaces, which reset the previous binding /// to not bound state by specifying an empty namespace name. /// /// See #[test] fn reset() { let name = QName(b"simple"); let old_ns = Namespace(b"old"); let mut resolver = NamespaceResolver::default(); let mut buffer = Vec::new(); resolver.push(&BytesStart::from_content(" xmlns='old'", 0), &mut buffer); resolver.push(&BytesStart::from_content(" xmlns=''", 0), &mut buffer); assert_eq!(buffer, b"old"); assert_eq!( resolver.resolve(name, &buffer, true), (Unbound, LocalName(b"simple")) ); assert_eq!( resolver.resolve(name, &buffer, false), (Unbound, LocalName(b"simple")) ); assert_eq!(resolver.find(name, &buffer), Unbound); resolver.pop(&mut buffer); assert_eq!(buffer, b"old"); assert_eq!( resolver.resolve(name, &buffer, true), (Bound(old_ns), LocalName(b"simple")) ); assert_eq!( resolver.resolve(name, &buffer, false), (Unbound, LocalName(b"simple")) ); assert_eq!(resolver.find(name, &buffer), Bound(old_ns)); } } mod declared_prefix { use super::*; use pretty_assertions::assert_eq; /// Basic tests that checks that basic resolver functionality is working #[test] fn basic() { let name = QName(b"p:with-declared-prefix"); let ns = Namespace(b"default"); let mut resolver = NamespaceResolver::default(); let mut buffer = Vec::new(); resolver.push( &BytesStart::from_content(" xmlns:p='default'", 0), &mut buffer, ); assert_eq!(buffer, b"pdefault"); // Check that tags without namespaces does not change result resolver.push(&BytesStart::from_content("", 0), &mut buffer); assert_eq!(buffer, b"pdefault"); resolver.pop(&mut buffer); assert_eq!(buffer, b"pdefault"); assert_eq!( resolver.resolve(name, &buffer, true), (Bound(ns), LocalName(b"with-declared-prefix")) ); assert_eq!( resolver.resolve(name, &buffer, false), (Bound(ns), LocalName(b"with-declared-prefix")) ); assert_eq!(resolver.find(name, &buffer), Bound(ns)); } /// Test adding a second level of namespaces, which replaces the previous binding #[test] fn override_namespace() { let name = QName(b"p:with-declared-prefix"); let old_ns = Namespace(b"old"); let new_ns = Namespace(b"new"); let mut resolver = NamespaceResolver::default(); let mut buffer = Vec::new(); resolver.push(&BytesStart::from_content(" xmlns:p='old'", 0), &mut buffer); resolver.push(&BytesStart::from_content(" xmlns:p='new'", 0), &mut buffer); assert_eq!(buffer, b"poldpnew"); assert_eq!( resolver.resolve(name, &buffer, true), (Bound(new_ns), LocalName(b"with-declared-prefix")) ); assert_eq!( resolver.resolve(name, &buffer, false), (Bound(new_ns), LocalName(b"with-declared-prefix")) ); assert_eq!(resolver.find(name, &buffer), Bound(new_ns)); resolver.pop(&mut buffer); assert_eq!(buffer, b"pold"); assert_eq!( resolver.resolve(name, &buffer, true), (Bound(old_ns), LocalName(b"with-declared-prefix")) ); assert_eq!( resolver.resolve(name, &buffer, false), (Bound(old_ns), LocalName(b"with-declared-prefix")) ); assert_eq!(resolver.find(name, &buffer), Bound(old_ns)); } /// Test adding a second level of namespaces, which reset the previous binding /// to not bound state by specifying an empty namespace name. /// /// See #[test] fn reset() { let name = QName(b"p:with-declared-prefix"); let old_ns = Namespace(b"old"); let mut resolver = NamespaceResolver::default(); let mut buffer = Vec::new(); resolver.push(&BytesStart::from_content(" xmlns:p='old'", 0), &mut buffer); resolver.push(&BytesStart::from_content(" xmlns:p=''", 0), &mut buffer); assert_eq!(buffer, b"poldp"); assert_eq!( resolver.resolve(name, &buffer, true), (Unknown(b"p".to_vec()), LocalName(b"with-declared-prefix")) ); assert_eq!( resolver.resolve(name, &buffer, false), (Unknown(b"p".to_vec()), LocalName(b"with-declared-prefix")) ); assert_eq!(resolver.find(name, &buffer), Unknown(b"p".to_vec())); resolver.pop(&mut buffer); assert_eq!(buffer, b"pold"); assert_eq!( resolver.resolve(name, &buffer, true), (Bound(old_ns), LocalName(b"with-declared-prefix")) ); assert_eq!( resolver.resolve(name, &buffer, false), (Bound(old_ns), LocalName(b"with-declared-prefix")) ); assert_eq!(resolver.find(name, &buffer), Bound(old_ns)); } } #[test] fn undeclared_prefix() { let name = QName(b"unknown:prefix"); let resolver = NamespaceResolver::default(); let buffer = Vec::new(); assert_eq!(buffer, b""); assert_eq!( resolver.resolve(name, &buffer, true), (Unknown(b"unknown".to_vec()), LocalName(b"prefix")) ); assert_eq!( resolver.resolve(name, &buffer, false), (Unknown(b"unknown".to_vec()), LocalName(b"prefix")) ); assert_eq!(resolver.find(name, &buffer), Unknown(b"unknown".to_vec())); } /// Checks how the QName is decomposed to a prefix and a local name #[test] fn prefix_and_local_name() { let name = QName(b"foo:bus"); assert_eq!(name.prefix(), Some(Prefix(b"foo"))); assert_eq!(name.local_name(), LocalName(b"bus")); assert_eq!(name.decompose(), (LocalName(b"bus"), Some(Prefix(b"foo")))); let name = QName(b"foo:"); assert_eq!(name.prefix(), Some(Prefix(b"foo"))); assert_eq!(name.local_name(), LocalName(b"")); assert_eq!(name.decompose(), (LocalName(b""), Some(Prefix(b"foo")))); let name = QName(b":foo"); assert_eq!(name.prefix(), Some(Prefix(b""))); assert_eq!(name.local_name(), LocalName(b"foo")); assert_eq!(name.decompose(), (LocalName(b"foo"), Some(Prefix(b"")))); let name = QName(b"foo:bus:baz"); assert_eq!(name.prefix(), Some(Prefix(b"foo"))); assert_eq!(name.local_name(), LocalName(b"bus:baz")); assert_eq!( name.decompose(), (LocalName(b"bus:baz"), Some(Prefix(b"foo"))) ); } } quick-xml-0.27.1/src/reader/async_tokio.rs000064400000000000000000000361650072674642500166110ustar 00000000000000//! This is an implementation of [`Reader`] for reading from a [`AsyncBufRead`] //! as underlying byte stream. This reader fully implements async/await so reading //! can use non-blocking I/O. use std::future::Future; use std::pin::Pin; use tokio::io::{self, AsyncBufRead, AsyncBufReadExt}; use crate::events::Event; use crate::name::{QName, ResolveResult}; use crate::reader::buffered_reader::impl_buffered_source; use crate::reader::{ is_whitespace, BangType, NsReader, ParseState, ReadElementState, Reader, Span, }; use crate::{Error, Result}; /// A struct for read XML asynchronously from an [`AsyncBufRead`]. /// /// Having own struct allows us to implement anything without risk of name conflicts /// and does not suffer from the impossibility of having `async` in traits. struct TokioAdapter<'a, R>(&'a mut R); impl<'a, R: AsyncBufRead + Unpin> TokioAdapter<'a, R> { impl_buffered_source!('b, 0, async, await); } //////////////////////////////////////////////////////////////////////////////////////////////////// impl Reader { /// An asynchronous version of [`read_event_into()`]. Reads the next event into /// given buffer. /// /// > This function should be defined as /// > ```ignore /// > pub async fn read_event_into_async<'b>( /// > &mut self, /// > buf: &'b mut Vec /// > ) -> Result>; /// > ``` /// > but Rust does not allow to write that for recursive asynchronous function /// /// This is the main entry point for reading XML `Event`s when using an async reader. /// /// See the documentation of [`read_event_into()`] for more information. /// /// # Examples /// /// ``` /// # tokio_test::block_on(async { /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::reader::Reader; /// /// // This explicitly uses `from_reader("...".as_bytes())` to use a buffered /// // reader instead of relying on the zero-copy optimizations for reading /// // from byte slices, which is provides the sync interface anyway. /// let mut reader = Reader::from_reader(r#" /// /// Test /// Test 2 /// /// "#.as_bytes()); /// reader.trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); /// loop { /// match reader.read_event_into_async(&mut buf).await { /// Ok(Event::Start(_)) => count += 1, /// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()), /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), /// Ok(Event::Eof) => break, /// _ => (), /// } /// buf.clear(); /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// # }) // tokio_test::block_on /// ``` /// /// [`read_event_into()`]: Reader::read_event_into pub fn read_event_into_async<'reader, 'b: 'reader>( &'reader mut self, buf: &'b mut Vec, ) -> Pin>> + 'reader>> { Box::pin(async move { read_event_impl!( self, buf, TokioAdapter(&mut self.reader), read_until_open_async, read_until_close_async, await ) }) } /// An asynchronous version of [`read_to_end_into()`]. /// Reads asynchronously until end element is found using provided buffer as /// intermediate storage for events content. This function is supposed to be /// called after you already read a [`Start`] event. /// /// See the documentation of [`read_to_end_into()`] for more information. /// /// # Examples /// /// This example shows, how you can skip XML content after you read the /// start event. /// /// ``` /// # tokio_test::block_on(async { /// # use pretty_assertions::assert_eq; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::Reader; /// /// let mut reader = Reader::from_reader(r#" /// /// /// /// /// /// /// /// /// "#.as_bytes()); /// reader.trim_text(true); /// let mut buf = Vec::new(); /// /// let start = BytesStart::new("outer"); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!(reader.read_event_into_async(&mut buf).await.unwrap(), Event::Start(start)); /// /// // ...then, we could skip all events to the corresponding end event. /// // This call will correctly handle nested elements. /// // Note, however, that this method does not handle namespaces. /// reader.read_to_end_into_async(end.name(), &mut buf).await.unwrap(); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!(reader.read_event_into_async(&mut buf).await.unwrap(), Event::Eof); /// # }) // tokio_test::block_on /// ``` /// /// [`read_to_end_into()`]: Self::read_to_end_into /// [`Start`]: Event::Start pub async fn read_to_end_into_async<'n>( &mut self, // We should name that lifetime due to https://github.com/rust-lang/rust/issues/63033` end: QName<'n>, buf: &mut Vec, ) -> Result { Ok(read_to_end!(self, end, buf, read_event_into_async, { buf.clear(); }, await)) } /// Read until '<' is found, moves reader to an `OpenedTag` state and returns a `Text` event. async fn read_until_open_async<'b>(&mut self, buf: &'b mut Vec) -> Result> { read_until_open!(self, buf, TokioAdapter(&mut self.reader), read_event_into_async, await) } /// Private function to read until `>` is found. This function expects that /// it was called just after encounter a `<` symbol. async fn read_until_close_async<'b>(&mut self, buf: &'b mut Vec) -> Result> { read_until_close!(self, buf, TokioAdapter(&mut self.reader), await) } } //////////////////////////////////////////////////////////////////////////////////////////////////// impl NsReader { /// An asynchronous version of [`read_event_into()`]. Reads the next event into /// given buffer. /// /// This method manages namespaces but doesn't resolve them automatically. /// You should call [`resolve_element()`] if you want to get a namespace. /// /// You also can use [`read_resolved_event_into_async()`] instead if you want /// to resolve namespace as soon as you get an event. /// /// # Examples /// /// ``` /// # tokio_test::block_on(async { /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_reader(r#" /// /// Test /// Test 2 /// /// "#.as_bytes()); /// reader.trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); /// loop { /// match reader.read_event_into_async(&mut buf).await.unwrap() { /// Event::Start(e) => { /// count += 1; /// let (ns, local) = reader.resolve_element(e.name()); /// match local.as_ref() { /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))), /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))), /// _ => unreachable!(), /// } /// } /// Event::Text(e) => { /// txt.push(e.unescape().unwrap().into_owned()) /// } /// Event::Eof => break, /// _ => (), /// } /// buf.clear(); /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// # }) // tokio_test::block_on /// ``` /// /// [`read_event_into()`]: NsReader::read_event_into /// [`resolve_element()`]: Self::resolve_element /// [`read_resolved_event_into_async()`]: Self::read_resolved_event_into_async pub async fn read_event_into_async<'b>(&mut self, buf: &'b mut Vec) -> Result> { self.pop(); let event = self.reader.read_event_into_async(buf).await; self.process_event(event) } /// An asynchronous version of [`read_to_end_into()`]. /// Reads asynchronously until end element is found using provided buffer as /// intermediate storage for events content. This function is supposed to be /// called after you already read a [`Start`] event. /// /// See the documentation of [`read_to_end_into()`] for more information. /// /// # Examples /// /// This example shows, how you can skip XML content after you read the /// start event. /// /// ``` /// # tokio_test::block_on(async { /// # use pretty_assertions::assert_eq; /// use quick_xml::name::{Namespace, ResolveResult}; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_reader(r#" /// /// /// /// /// /// /// /// /// /// /// /// /// "#.as_bytes()); /// reader.trim_text(true); /// let mut buf = Vec::new(); /// /// let ns = Namespace(b"namespace 1"); /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!( /// reader.read_resolved_event_into_async(&mut buf).await.unwrap(), /// (ResolveResult::Bound(ns), Event::Start(start)) /// ); /// /// // ...then, we could skip all events to the corresponding end event. /// // This call will correctly handle nested elements. /// // Note, however, that this method does not handle namespaces. /// reader.read_to_end_into_async(end.name(), &mut buf).await.unwrap(); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!( /// reader.read_resolved_event_into_async(&mut buf).await.unwrap(), /// (ResolveResult::Unbound, Event::Eof) /// ); /// # }) // tokio_test::block_on /// ``` /// /// [`read_to_end_into()`]: Self::read_to_end_into /// [`Start`]: Event::Start pub async fn read_to_end_into_async<'n>( &mut self, // We should name that lifetime due to https://github.com/rust-lang/rust/issues/63033` end: QName<'n>, buf: &mut Vec, ) -> Result { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should // match literally the start name. See `Reader::check_end_names` documentation self.reader.read_to_end_into_async(end, buf).await } /// An asynchronous version of [`read_resolved_event_into()`]. Reads the next /// event into given buffer asynchronously and resolves its namespace (if applicable). /// /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events. /// For all other events the concept of namespace is not defined, so /// a [`ResolveResult::Unbound`] is returned. /// /// If you are not interested in namespaces, you can use [`read_event_into_async()`] /// which will not automatically resolve namespaces for you. /// /// # Examples /// /// ``` /// # tokio_test::block_on(async { /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_reader(r#" /// /// Test /// Test 2 /// /// "#.as_bytes()); /// reader.trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); /// loop { /// match reader.read_resolved_event_into_async(&mut buf).await.unwrap() { /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag1").into()); /// } /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag2").into()); /// } /// (_, Event::Start(_)) => unreachable!(), /// /// (_, Event::Text(e)) => { /// txt.push(e.unescape().unwrap().into_owned()) /// } /// (_, Event::Eof) => break, /// _ => (), /// } /// buf.clear(); /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// # }) // tokio_test::block_on /// ``` /// /// [`read_resolved_event_into()`]: NsReader::read_resolved_event_into /// [`Start`]: Event::Start /// [`Empty`]: Event::Empty /// [`End`]: Event::End /// [`read_event_into_async()`]: Self::read_event_into_async pub async fn read_resolved_event_into_async<'ns, 'b>( // Name 'ns lifetime, because otherwise we get an error // "implicit elided lifetime not allowed here" on ResolveResult &'ns mut self, buf: &'b mut Vec, ) -> Result<(ResolveResult<'ns>, Event<'b>)> { let event = self.read_event_into_async(buf).await; self.resolve_event(event) } } #[cfg(test)] mod test { use super::TokioAdapter; use crate::reader::test::{check, small_buffers}; check!( #[tokio::test] read_event_into_async, read_until_close_async, TokioAdapter, &mut Vec::new(), async, await ); small_buffers!( #[tokio::test] read_event_into_async: tokio::io::BufReader<_>, async, await ); } quick-xml-0.27.1/src/reader/buffered_reader.rs000064400000000000000000000430540072674642500173660ustar 00000000000000//! This is an implementation of [`Reader`] for reading from a [`BufRead`] as //! underlying byte stream. use std::fs::File; use std::io::{self, BufRead, BufReader}; use std::path::Path; use memchr; use crate::errors::{Error, Result}; use crate::events::Event; use crate::name::QName; use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, Span, XmlSource}; macro_rules! impl_buffered_source { ($($lf:lifetime, $reader:tt, $async:ident, $await:ident)?) => { #[cfg(not(feature = "encoding"))] $($async)? fn remove_utf8_bom(&mut self) -> Result<()> { use crate::encoding::UTF8_BOM; loop { break match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) => { if n.starts_with(UTF8_BOM) { self $(.$reader)? .consume(UTF8_BOM.len()); } Ok(()) }, Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => Err(Error::Io(e.into())), }; } } #[cfg(feature = "encoding")] $($async)? fn detect_encoding(&mut self) -> Result> { loop { break match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) => if let Some((enc, bom_len)) = crate::encoding::detect_encoding(n) { self $(.$reader)? .consume(bom_len); Ok(Some(enc)) } else { Ok(None) }, Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => Err(Error::Io(e.into())), }; } } #[inline] $($async)? fn read_bytes_until $(<$lf>)? ( &mut self, byte: u8, buf: &'b mut Vec, position: &mut usize, ) -> Result> { // search byte must be within the ascii range debug_assert!(byte.is_ascii()); let mut read = 0; let mut done = false; let start = buf.len(); while !done { let used = { let available = match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) if n.is_empty() => break, Ok(n) => n, Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => { *position += read; return Err(Error::Io(e.into())); } }; match memchr::memchr(byte, available) { Some(i) => { buf.extend_from_slice(&available[..i]); done = true; i + 1 } None => { buf.extend_from_slice(available); available.len() } } }; self $(.$reader)? .consume(used); read += used; } *position += read; if read == 0 { Ok(None) } else { Ok(Some(&buf[start..])) } } $($async)? fn read_bang_element $(<$lf>)? ( &mut self, buf: &'b mut Vec, position: &mut usize, ) -> Result> { // Peeked one bang ('!') before being called, so it's guaranteed to // start with it. let start = buf.len(); let mut read = 1; buf.push(b'!'); self $(.$reader)? .consume(1); let bang_type = BangType::new(self.peek_one() $(.$await)? ?)?; loop { match self $(.$reader)? .fill_buf() $(.$await)? { // Note: Do not update position, so the error points to // somewhere sane rather than at the EOF Ok(n) if n.is_empty() => return Err(bang_type.to_err()), Ok(available) => { if let Some((consumed, used)) = bang_type.parse(buf, available) { buf.extend_from_slice(consumed); self $(.$reader)? .consume(used); read += used; *position += read; break; } else { buf.extend_from_slice(available); let used = available.len(); self $(.$reader)? .consume(used); read += used; } } Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => { *position += read; return Err(Error::Io(e.into())); } } } if read == 0 { Ok(None) } else { Ok(Some((bang_type, &buf[start..]))) } } #[inline] $($async)? fn read_element $(<$lf>)? ( &mut self, buf: &'b mut Vec, position: &mut usize, ) -> Result> { let mut state = ReadElementState::Elem; let mut read = 0; let start = buf.len(); loop { match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) if n.is_empty() => break, Ok(available) => { if let Some((consumed, used)) = state.change(available) { buf.extend_from_slice(consumed); self $(.$reader)? .consume(used); read += used; *position += read; break; } else { buf.extend_from_slice(available); let used = available.len(); self $(.$reader)? .consume(used); read += used; } } Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => { *position += read; return Err(Error::Io(e.into())); } }; } if read == 0 { Ok(None) } else { Ok(Some(&buf[start..])) } } $($async)? fn skip_whitespace(&mut self, position: &mut usize) -> Result<()> { loop { break match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) => { let count = n.iter().position(|b| !is_whitespace(*b)).unwrap_or(n.len()); if count > 0 { self $(.$reader)? .consume(count); *position += count; continue; } else { Ok(()) } } Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => Err(Error::Io(e.into())), }; } } $($async)? fn skip_one(&mut self, byte: u8, position: &mut usize) -> Result { // search byte must be within the ascii range debug_assert!(byte.is_ascii()); match self.peek_one() $(.$await)? ? { Some(b) if b == byte => { *position += 1; self $(.$reader)? .consume(1); Ok(true) } _ => Ok(false), } } $($async)? fn peek_one(&mut self) -> Result> { loop { break match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) if n.is_empty() => Ok(None), Ok(n) => Ok(Some(n[0])), Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => Err(Error::Io(e.into())), }; } } }; } // Make it public for use in async implementations pub(super) use impl_buffered_source; /// Implementation of `XmlSource` for any `BufRead` reader using a user-given /// `Vec` as buffer that will be borrowed by events. impl<'b, R: BufRead> XmlSource<'b, &'b mut Vec> for R { impl_buffered_source!(); } //////////////////////////////////////////////////////////////////////////////////////////////////// /// This is an implementation for reading from a [`BufRead`] as underlying byte stream. impl Reader { /// Reads the next `Event`. /// /// This is the main entry point for reading XML `Event`s. /// /// `Event`s borrow `buf` and can be converted to own their data if needed (uses `Cow` /// internally). /// /// Having the possibility to control the internal buffers gives you some additional benefits /// such as: /// /// - Reduce the number of allocations by reusing the same buffer. For constrained systems, /// you can call `buf.clear()` once you are done with processing the event (typically at the /// end of your loop). /// - Reserve the buffer length if you know the file size (using `Vec::with_capacity`). /// /// # Examples /// /// ``` /// use quick_xml::events::Event; /// use quick_xml::reader::Reader; /// /// let xml = r#" /// Test /// Test 2 /// "#; /// let mut reader = Reader::from_str(xml); /// reader.trim_text(true); /// let mut count = 0; /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); /// loop { /// match reader.read_event_into(&mut buf) { /// Ok(Event::Start(_)) => count += 1, /// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()), /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), /// Ok(Event::Eof) => break, /// _ => (), /// } /// buf.clear(); /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` #[inline] pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec) -> Result> { self.read_event_impl(buf) } /// Reads until end element is found using provided buffer as intermediate /// storage for events content. This function is supposed to be called after /// you already read a [`Start`] event. /// /// Returns a span that cover content between `>` of an opening tag and `<` of /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// /// Manages nested cases where parent and child elements have the _literally_ /// same name. /// /// If corresponding [`End`] event will not be found, the [`Error::UnexpectedEof`] /// will be returned. In particularly, that error will be returned if you call /// this method without consuming the corresponding [`Start`] event first. /// /// If your reader created from a string slice or byte array slice, it is /// better to use [`read_to_end()`] method, because it will not copy bytes /// into intermediate buffer. /// /// The provided `buf` buffer will be filled only by one event content at time. /// Before reading of each event the buffer will be cleared. If you know an /// appropriate size of each event, you can preallocate the buffer to reduce /// number of reallocations. /// /// The `end` parameter should contain name of the end element _in the reader /// encoding_. It is good practice to always get that parameter using /// [`BytesStart::to_end()`] method. /// /// The correctness of the skipped events does not checked, if you disabled /// the [`check_end_names`] option. /// /// # Namespaces /// /// While the `Reader` does not support namespace resolution, namespaces /// does not change the algorithm for comparing names. Although the names /// `a:name` and `b:name` where both prefixes `a` and `b` resolves to the /// same namespace, are semantically equivalent, `` cannot close /// ``, because according to [the specification] /// /// > The end of every element that begins with a **start-tag** MUST be marked /// > by an **end-tag** containing a name that echoes the element's type as /// > given in the **start-tag** /// /// # Examples /// /// This example shows, how you can skip XML content after you read the /// start event. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::Reader; /// /// let mut reader = Reader::from_str(r#" /// /// /// /// /// /// /// /// /// "#); /// reader.trim_text(true); /// let mut buf = Vec::new(); /// /// let start = BytesStart::new("outer"); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Start(start)); /// /// // ...then, we could skip all events to the corresponding end event. /// // This call will correctly handle nested elements. /// // Note, however, that this method does not handle namespaces. /// reader.read_to_end_into(end.name(), &mut buf).unwrap(); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); /// ``` /// /// [`Start`]: Event::Start /// [`End`]: Event::End /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end /// [`read_to_end()`]: Self::read_to_end /// [`expand_empty_elements`]: Self::expand_empty_elements /// [`check_end_names`]: Self::check_end_names /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result { Ok(read_to_end!(self, end, buf, read_event_impl, { buf.clear(); })) } } impl Reader> { /// Creates an XML reader from a file path. pub fn from_file>(path: P) -> Result { let file = File::open(path)?; let reader = BufReader::new(file); Ok(Self::from_reader(reader)) } } #[cfg(test)] mod test { use crate::reader::test::{check, small_buffers}; use crate::reader::XmlSource; /// Default buffer constructor just pass the byte array from the test fn identity(input: T) -> T { input } check!( #[test] read_event_impl, read_until_close, identity, &mut Vec::new() ); small_buffers!( #[test] read_event_into: std::io::BufReader<_> ); #[cfg(feature = "encoding")] mod encoding { use crate::events::Event; use crate::reader::Reader; use encoding_rs::{UTF_16LE, UTF_8, WINDOWS_1251}; use pretty_assertions::assert_eq; /// Checks that encoding is detected by BOM and changed after XML declaration /// BOM indicates UTF-16LE, but XML - windows-1251 #[test] fn bom_detected() { let mut reader = Reader::from_reader(b"\xFF\xFE".as_ref()); let mut buf = Vec::new(); assert_eq!(reader.decoder().encoding(), UTF_8); reader.read_event_into(&mut buf).unwrap(); assert_eq!(reader.decoder().encoding(), WINDOWS_1251); assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); } /// Checks that encoding is changed by XML declaration, but only once #[test] fn xml_declaration() { let mut reader = Reader::from_reader( b"".as_ref(), ); let mut buf = Vec::new(); assert_eq!(reader.decoder().encoding(), UTF_8); reader.read_event_into(&mut buf).unwrap(); assert_eq!(reader.decoder().encoding(), UTF_16LE); reader.read_event_into(&mut buf).unwrap(); assert_eq!(reader.decoder().encoding(), UTF_16LE); assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); } } } quick-xml-0.27.1/src/reader/mod.rs000064400000000000000000002333300072674642500150370ustar 00000000000000//! Contains high-level interface for a pull-based XML parser. #[cfg(feature = "encoding")] use encoding_rs::Encoding; use std::ops::Range; use crate::encoding::Decoder; use crate::errors::{Error, Result}; use crate::events::Event; use crate::reader::parser::Parser; use memchr; macro_rules! configure_methods { ($($holder:ident)?) => { /// Changes whether empty elements should be split into an `Open` and a `Close` event. /// /// When set to `true`, all [`Empty`] events produced by a self-closing tag like `` are /// expanded into a [`Start`] event followed by an [`End`] event. When set to `false` (the /// default), those tags are represented by an [`Empty`] event instead. /// /// Note, that setting this to `true` will lead to additional allocates that /// needed to store tag name for an [`End`] event. However if [`check_end_names`] /// is also set, only one additional allocation will be performed that support /// both these options. /// /// (`false` by default) /// /// [`Empty`]: Event::Empty /// [`Start`]: Event::Start /// [`End`]: Event::End /// [`check_end_names`]: Self::check_end_names pub fn expand_empty_elements(&mut self, val: bool) -> &mut Self { self $(.$holder)? .parser.expand_empty_elements = val; self } /// Changes whether whitespace before and after character data should be removed. /// /// When set to `true`, all [`Text`] events are trimmed. /// If after that the event is empty it will not be pushed. /// /// Changing this option automatically changes the [`trim_text_end`] option. /// /// (`false` by default) /// /// [`Text`]: Event::Text /// [`trim_text_end`]: Self::trim_text_end pub fn trim_text(&mut self, val: bool) -> &mut Self { self $(.$holder)? .parser.trim_text_start = val; self $(.$holder)? .parser.trim_text_end = val; self } /// Changes whether whitespace after character data should be removed. /// /// When set to `true`, trailing whitespace is trimmed in [`Text`] events. /// If after that the event is empty it will not be pushed. /// /// (`false` by default) /// /// [`Text`]: Event::Text pub fn trim_text_end(&mut self, val: bool) -> &mut Self { self $(.$holder)? .parser.trim_text_end = val; self } /// Changes whether trailing whitespaces after the markup name are trimmed in closing tags /// ``. /// /// If true the emitted [`End`] event is stripped of trailing whitespace after the markup name. /// /// Note that if set to `false` and `check_end_names` is true the comparison of markup names is /// going to fail erroneously if a closing tag contains trailing whitespaces. /// /// (`true` by default) /// /// [`End`]: Event::End pub fn trim_markup_names_in_closing_tags(&mut self, val: bool) -> &mut Self { self $(.$holder)? .parser.trim_markup_names_in_closing_tags = val; self } /// Changes whether mismatched closing tag names should be detected. /// /// Note, that start and end tags [should match literally][spec], they cannot /// have different prefixes even if both prefixes resolve to the same namespace. /// The XML /// /// ```xml /// /// /// ``` /// /// is not valid, even though semantically the start tag is the same as the /// end tag. The reason is that namespaces are an extension of the original /// XML specification (without namespaces) and it should be backward-compatible. /// /// When set to `false`, it won't check if a closing tag matches the corresponding opening tag. /// For example, `` will be permitted. /// /// If the XML is known to be sane (already processed, etc.) this saves extra time. /// /// Note that the emitted [`End`] event will not be modified if this is disabled, ie. it will /// contain the data of the mismatched end tag. /// /// Note, that setting this to `true` will lead to additional allocates that /// needed to store tag name for an [`End`] event. However if [`expand_empty_elements`] /// is also set, only one additional allocation will be performed that support /// both these options. /// /// (`true` by default) /// /// [spec]: https://www.w3.org/TR/xml11/#dt-etag /// [`End`]: Event::End /// [`expand_empty_elements`]: Self::expand_empty_elements pub fn check_end_names(&mut self, val: bool) -> &mut Self { self $(.$holder)? .parser.check_end_names = val; self } /// Changes whether comments should be validated. /// /// When set to `true`, every [`Comment`] event will be checked for not containing `--`, which /// is not allowed in XML comments. Most of the time we don't want comments at all so we don't /// really care about comment correctness, thus the default value is `false` to improve /// performance. /// /// (`false` by default) /// /// [`Comment`]: Event::Comment pub fn check_comments(&mut self, val: bool) -> &mut Self { self $(.$holder)? .parser.check_comments = val; self } }; } macro_rules! read_event_impl { ( $self:ident, $buf:ident, $reader:expr, $read_until_open:ident, $read_until_close:ident $(, $await:ident)? ) => {{ let event = match $self.parser.state { ParseState::Init => { // If encoding set explicitly, we not need to detect it. For example, // explicit UTF-8 set automatically if Reader was created using `from_str`. // But we still need to remove BOM for consistency with no encoding // feature enabled path #[cfg(feature = "encoding")] if let Some(encoding) = $reader.detect_encoding() $(.$await)? ? { if $self.parser.encoding.can_be_refined() { $self.parser.encoding = crate::reader::EncodingRef::BomDetected(encoding); } } // Removes UTF-8 BOM if it is present #[cfg(not(feature = "encoding"))] $reader.remove_utf8_bom() $(.$await)? ?; $self.$read_until_open($buf) $(.$await)? }, ParseState::ClosedTag => $self.$read_until_open($buf) $(.$await)?, ParseState::OpenedTag => $self.$read_until_close($buf) $(.$await)?, ParseState::Empty => $self.parser.close_expanded_empty(), ParseState::Exit => return Ok(Event::Eof), }; match event { Err(_) | Ok(Event::Eof) => $self.parser.state = ParseState::Exit, _ => {} } event }}; } macro_rules! read_until_open { ( $self:ident, $buf:ident, $reader:expr, $read_event:ident $(, $await:ident)? ) => {{ $self.parser.state = ParseState::OpenedTag; if $self.parser.trim_text_start { $reader.skip_whitespace(&mut $self.parser.offset) $(.$await)? ?; } // If we already at the `<` symbol, do not try to return an empty Text event if $reader.skip_one(b'<', &mut $self.parser.offset) $(.$await)? ? { return $self.$read_event($buf) $(.$await)?; } match $reader .read_bytes_until(b'<', $buf, &mut $self.parser.offset) $(.$await)? { Ok(Some(bytes)) => $self.parser.read_text(bytes), Ok(None) => Ok(Event::Eof), Err(e) => Err(e), } }}; } macro_rules! read_until_close { ( $self:ident, $buf:ident, $reader:expr $(, $await:ident)? ) => {{ $self.parser.state = ParseState::ClosedTag; match $reader.peek_one() $(.$await)? { // ` match $reader .read_bang_element($buf, &mut $self.parser.offset) $(.$await)? { Ok(None) => Ok(Event::Eof), Ok(Some((bang_type, bytes))) => $self.parser.read_bang(bang_type, bytes), Err(e) => Err(e), }, // ` match $reader .read_bytes_until(b'>', $buf, &mut $self.parser.offset) $(.$await)? { Ok(None) => Ok(Event::Eof), Ok(Some(bytes)) => $self.parser.read_end(bytes), Err(e) => Err(e), }, // ` match $reader .read_bytes_until(b'>', $buf, &mut $self.parser.offset) $(.$await)? { Ok(None) => Ok(Event::Eof), Ok(Some(bytes)) => $self.parser.read_question_mark(bytes), Err(e) => Err(e), }, // `<...` - opening or self-closed tag Ok(Some(_)) => match $reader .read_element($buf, &mut $self.parser.offset) $(.$await)? { Ok(None) => Ok(Event::Eof), Ok(Some(bytes)) => $self.parser.read_start(bytes), Err(e) => Err(e), }, Ok(None) => Ok(Event::Eof), Err(e) => Err(e), } }}; } /// Generalization of `read_to_end` method for buffered and borrowed readers macro_rules! read_to_end { ( $self:expr, $end:expr, $buf:expr, $read_event:ident, // Code block that performs clearing of internal buffer after read of each event $clear:block $(, $await:ident)? ) => {{ let start = $self.buffer_position(); let mut depth = 0; loop { $clear let end = $self.buffer_position(); match $self.$read_event($buf) $(.$await)? { Err(e) => return Err(e), Ok(Event::Start(e)) if e.name() == $end => depth += 1, Ok(Event::End(e)) if e.name() == $end => { if depth == 0 { break start..end; } depth -= 1; } Ok(Event::Eof) => { let name = $self.decoder().decode($end.as_ref()); return Err(Error::UnexpectedEof(format!("", name))); } _ => (), } } }}; } #[cfg(feature = "async-tokio")] mod async_tokio; mod buffered_reader; mod ns_reader; mod parser; mod slice_reader; pub use ns_reader::NsReader; /// Range of input in bytes, that corresponds to some piece of XML pub type Span = Range; //////////////////////////////////////////////////////////////////////////////////////////////////// /// Possible reader states. The state transition diagram (`true` and `false` shows /// value of [`Reader::expand_empty_elements()`] option): /// /// ```mermaid /// flowchart LR /// subgraph _ /// direction LR /// /// Init -- "(no event)"\n --> OpenedTag /// OpenedTag -- Decl, DocType, PI\nComment, CData\nStart, Empty, End --> ClosedTag /// ClosedTag -- "#lt;false#gt;\n(no event)"\nText --> OpenedTag /// end /// ClosedTag -- "#lt;true#gt;"\nStart --> Empty /// Empty -- End --> ClosedTag /// _ -. Eof .-> Exit /// ``` #[derive(Clone)] enum ParseState { /// Initial state in which reader stay after creation. Transition from that /// state could produce a `Text`, `Decl`, `Comment` or `Start` event. The next /// state is always `OpenedTag`. The reader will never return to this state. The /// event emitted during transition to `OpenedTag` is a `StartEvent` if the /// first symbol not `<`, otherwise no event are emitted. Init, /// State after seeing the `<` symbol. Depending on the next symbol all other /// events could be generated. /// /// After generating one event the reader moves to the `ClosedTag` state. OpenedTag, /// State in which reader searches the `<` symbol of a markup. All bytes before /// that symbol will be returned in the [`Event::Text`] event. After that /// the reader moves to the `OpenedTag` state. ClosedTag, /// This state is used only if option `expand_empty_elements` is set to `true`. /// Reader enters to this state when it is in a `ClosedTag` state and emits an /// [`Event::Start`] event. The next event emitted will be an [`Event::End`], /// after which reader returned to the `ClosedTag` state. Empty, /// Reader enters this state when `Eof` event generated or an error occurred. /// This is the last state, the reader stay in it forever. Exit, } /// A reference to an encoding together with information about how it was retrieved. /// /// The state transition diagram: /// /// ```mermaid /// flowchart LR /// Implicit -- from_str --> Explicit /// Implicit -- BOM --> BomDetected /// Implicit -- "encoding=..." --> XmlDetected /// BomDetected -- "encoding=..." --> XmlDetected /// ``` #[cfg(feature = "encoding")] #[derive(Clone, Copy)] enum EncodingRef { /// Encoding was implicitly assumed to have a specified value. It can be refined /// using BOM or by the XML declaration event (``) Implicit(&'static Encoding), /// Encoding was explicitly set to the desired value. It cannot be changed /// nor by BOM, nor by parsing XML declaration (``) Explicit(&'static Encoding), /// Encoding was detected from a byte order mark (BOM) or by the first bytes /// of the content. It can be refined by the XML declaration event (``) BomDetected(&'static Encoding), /// Encoding was detected using XML declaration event (``). /// It can no longer change XmlDetected(&'static Encoding), } #[cfg(feature = "encoding")] impl EncodingRef { #[inline] fn encoding(&self) -> &'static Encoding { match self { Self::Implicit(e) => e, Self::Explicit(e) => e, Self::BomDetected(e) => e, Self::XmlDetected(e) => e, } } #[inline] fn can_be_refined(&self) -> bool { match self { Self::Implicit(_) | Self::BomDetected(_) => true, Self::Explicit(_) | Self::XmlDetected(_) => false, } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A low level encoding-agnostic XML event reader. /// /// Consumes bytes and streams XML [`Event`]s. /// /// This reader does not manage namespace declarations and not able to resolve /// prefixes. If you want these features, use the [`NsReader`]. /// /// # Examples /// /// ``` /// use quick_xml::events::Event; /// use quick_xml::reader::Reader; /// /// let xml = r#" /// Test /// Test 2 /// "#; /// let mut reader = Reader::from_str(xml); /// reader.trim_text(true); /// /// let mut count = 0; /// let mut txt = Vec::new(); /// let mut buf = Vec::new(); /// /// // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s) /// loop { /// // NOTE: this is the generic case when we don't know about the input BufRead. /// // when the input is a &str or a &[u8], we don't actually need to use another /// // buffer, we could directly call `reader.read_event()` /// match reader.read_event_into(&mut buf) { /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), /// // exits the loop when reaching end of file /// Ok(Event::Eof) => break, /// /// Ok(Event::Start(e)) => { /// match e.name().as_ref() { /// b"tag1" => println!("attributes values: {:?}", /// e.attributes().map(|a| a.unwrap().value) /// .collect::>()), /// b"tag2" => count += 1, /// _ => (), /// } /// } /// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()), /// /// // There are several other `Event`s we do not consider here /// _ => (), /// } /// // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low /// buf.clear(); /// } /// ``` /// /// [`NsReader`]: crate::reader::NsReader #[derive(Clone)] pub struct Reader { /// Source of data for parse reader: R, /// Configuration and current parse state parser: Parser, } /// Builder methods impl Reader { /// Creates a `Reader` that reads from a given reader. pub fn from_reader(reader: R) -> Self { Self { reader, parser: Parser::default(), } } configure_methods!(); } /// Getters impl Reader { /// Consumes `Reader` returning the underlying reader /// /// Can be used to compute line and column of a parsing error position /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use std::{str, io::Cursor}; /// use quick_xml::events::Event; /// use quick_xml::reader::Reader; /// /// let xml = r#" /// Test /// Test 2 /// "#; /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes())); /// let mut buf = Vec::new(); /// /// fn into_line_and_column(reader: Reader>) -> (usize, usize) { /// let end_pos = reader.buffer_position(); /// let mut cursor = reader.into_inner(); /// let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned()) /// .expect("can't make a string"); /// let mut line = 1; /// let mut column = 0; /// for c in s.chars() { /// if c == '\n' { /// line += 1; /// column = 0; /// } else { /// column += 1; /// } /// } /// (line, column) /// } /// /// loop { /// match reader.read_event_into(&mut buf) { /// Ok(Event::Start(ref e)) => match e.name().as_ref() { /// b"tag1" | b"tag2" => (), /// tag => { /// assert_eq!(b"tag3", tag); /// assert_eq!((3, 22), into_line_and_column(reader)); /// break; /// } /// }, /// Ok(Event::Eof) => unreachable!(), /// _ => (), /// } /// buf.clear(); /// } /// ``` pub fn into_inner(self) -> R { self.reader } /// Gets a reference to the underlying reader. pub fn get_ref(&self) -> &R { &self.reader } /// Gets a mutable reference to the underlying reader. pub fn get_mut(&mut self) -> &mut R { &mut self.reader } /// Gets the current byte position in the input data. /// /// Useful when debugging errors. pub fn buffer_position(&self) -> usize { // when internal state is OpenedTag, we have actually read until '<', // which we don't want to show if let ParseState::OpenedTag = self.parser.state { self.parser.offset - 1 } else { self.parser.offset } } /// Get the decoder, used to decode bytes, read by this reader, to the strings. /// /// If `encoding` feature is enabled, the used encoding may change after /// parsing the XML declaration, otherwise encoding is fixed to UTF-8. /// /// If `encoding` feature is enabled and no encoding is specified in declaration, /// defaults to UTF-8. #[inline] pub fn decoder(&self) -> Decoder { self.parser.decoder() } } /// Private sync reading methods impl Reader { /// Read text into the given buffer, and return an event that borrows from /// either that buffer or from the input itself, based on the type of the /// reader. fn read_event_impl<'i, B>(&mut self, buf: B) -> Result> where R: XmlSource<'i, B>, { read_event_impl!(self, buf, self.reader, read_until_open, read_until_close) } /// Read until '<' is found, moves reader to an `OpenedTag` state and returns a `Text` event. fn read_until_open<'i, B>(&mut self, buf: B) -> Result> where R: XmlSource<'i, B>, { read_until_open!(self, buf, self.reader, read_event_impl) } /// Private function to read until `>` is found. This function expects that /// it was called just after encounter a `<` symbol. fn read_until_close<'i, B>(&mut self, buf: B) -> Result> where R: XmlSource<'i, B>, { read_until_close!(self, buf, self.reader) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Represents an input for a reader that can return borrowed data. /// /// There are two implementors of this trait: generic one that read data from /// `Self`, copies some part of it into a provided buffer of type `B` and then /// returns data that borrow from that buffer. /// /// The other implementor is for `&[u8]` and instead of copying data returns /// borrowed data from `Self` instead. This implementation allows zero-copy /// deserialization. /// /// # Parameters /// - `'r`: lifetime of a buffer from which events will borrow /// - `B`: a type of a buffer that can be used to store data read from `Self` and /// from which events can borrow trait XmlSource<'r, B> { /// Removes UTF-8 BOM if it is present #[cfg(not(feature = "encoding"))] fn remove_utf8_bom(&mut self) -> Result<()>; /// Determines encoding from the start of input and removes BOM if it is present #[cfg(feature = "encoding")] fn detect_encoding(&mut self) -> Result>; /// Read input until `byte` is found or end of input is reached. /// /// Returns a slice of data read up to `byte`, which does not include into result. /// If input (`Self`) is exhausted, returns `None`. /// /// # Example /// /// ```ignore /// let mut position = 0; /// let mut input = b"abc*def".as_ref(); /// // ^= 4 /// /// assert_eq!( /// input.read_bytes_until(b'*', (), &mut position).unwrap(), /// Some(b"abc".as_ref()) /// ); /// assert_eq!(position, 4); // position after the symbol matched /// ``` /// /// # Parameters /// - `byte`: Byte for search /// - `buf`: Buffer that could be filled from an input (`Self`) and /// from which [events] could borrow their data /// - `position`: Will be increased by amount of bytes consumed /// /// [events]: crate::events::Event fn read_bytes_until( &mut self, byte: u8, buf: B, position: &mut usize, ) -> Result>; /// Read input until comment, CDATA or processing instruction is finished. /// /// This method expect that `<` already was read. /// /// Returns a slice of data read up to end of comment, CDATA or processing /// instruction (`>`), which does not include into result. /// /// If input (`Self`) is exhausted and nothing was read, returns `None`. /// /// # Parameters /// - `buf`: Buffer that could be filled from an input (`Self`) and /// from which [events] could borrow their data /// - `position`: Will be increased by amount of bytes consumed /// /// [events]: crate::events::Event fn read_bang_element( &mut self, buf: B, position: &mut usize, ) -> Result>; /// Read input until XML element is closed by approaching a `>` symbol. /// Returns `Some(buffer)` that contains a data between `<` and `>` or /// `None` if end-of-input was reached and nothing was read. /// /// Derived from `read_until`, but modified to handle XML attributes /// using a minimal state machine. /// /// Attribute values are [defined] as follows: /// ```plain /// AttValue := '"' (([^<&"]) | Reference)* '"' /// | "'" (([^<&']) | Reference)* "'" /// ``` /// (`Reference` is something like `"`, but we don't care about /// escaped characters at this level) /// /// # Parameters /// - `buf`: Buffer that could be filled from an input (`Self`) and /// from which [events] could borrow their data /// - `position`: Will be increased by amount of bytes consumed /// /// [defined]: https://www.w3.org/TR/xml11/#NT-AttValue /// [events]: crate::events::Event fn read_element(&mut self, buf: B, position: &mut usize) -> Result>; /// Consume and discard all the whitespace until the next non-whitespace /// character or EOF. /// /// # Parameters /// - `position`: Will be increased by amount of bytes consumed fn skip_whitespace(&mut self, position: &mut usize) -> Result<()>; /// Consume and discard one character if it matches the given byte. Return /// `true` if it matched. /// /// # Parameters /// - `position`: Will be increased by 1 if byte is matched fn skip_one(&mut self, byte: u8, position: &mut usize) -> Result; /// Return one character without consuming it, so that future `read_*` calls /// will still include it. On EOF, return `None`. fn peek_one(&mut self) -> Result>; } /// Possible elements started with ` CData, /// Comment, /// DocType, } impl BangType { #[inline(always)] fn new(byte: Option) -> Result { Ok(match byte { Some(b'[') => Self::CData, Some(b'-') => Self::Comment, Some(b'D') | Some(b'd') => Self::DocType, Some(b) => return Err(Error::UnexpectedBang(b)), None => return Err(Error::UnexpectedEof("Bang".to_string())), }) } /// If element is finished, returns its content up to `>` symbol and /// an index of this symbol, otherwise returns `None` /// /// # Parameters /// - `buf`: buffer with data consumed on previous iterations /// - `chunk`: data read on current iteration and not yet consumed from reader #[inline(always)] fn parse<'b>(&self, buf: &[u8], chunk: &'b [u8]) -> Option<(&'b [u8], usize)> { for i in memchr::memchr_iter(b'>', chunk) { match self { // Need to read at least 6 symbols (`!---->`) for properly finished comment // - XML comment // 012345 - i Self::Comment if buf.len() + i > 4 => { if chunk[..i].ends_with(b"--") { // We cannot strip last `--` from the buffer because we need it in case of // check_comments enabled option. XML standard requires that comment // will not end with `--->` sequence because this is a special case of // `--` in the comment (https://www.w3.org/TR/xml11/#sec-comments) return Some((&chunk[..i], i + 1)); // +1 for `>` } // End sequence `-|->` was splitted at | // buf --/ \-- chunk if i == 1 && buf.ends_with(b"-") && chunk[0] == b'-' { return Some((&chunk[..i], i + 1)); // +1 for `>` } // End sequence `--|>` was splitted at | // buf --/ \-- chunk if i == 0 && buf.ends_with(b"--") { return Some((&[], i + 1)); // +1 for `>` } } Self::Comment => {} Self::CData => { if chunk[..i].ends_with(b"]]") { return Some((&chunk[..i], i + 1)); // +1 for `>` } // End sequence `]|]>` was splitted at | // buf --/ \-- chunk if i == 1 && buf.ends_with(b"]") && chunk[0] == b']' { return Some((&chunk[..i], i + 1)); // +1 for `>` } // End sequence `]]|>` was splitted at | // buf --/ \-- chunk if i == 0 && buf.ends_with(b"]]") { return Some((&[], i + 1)); // +1 for `>` } } Self::DocType => { let content = &chunk[..i]; let balance = memchr::memchr2_iter(b'<', b'>', content) .map(|p| if content[p] == b'<' { 1i32 } else { -1 }) .sum::(); if balance == 0 { return Some((content, i + 1)); // +1 for `>` } } } } None } #[inline] fn to_err(self) -> Error { let bang_str = match self { Self::CData => "CData", Self::Comment => "Comment", Self::DocType => "DOCTYPE", }; Error::UnexpectedEof(bang_str.to_string()) } } /// State machine for the [`XmlSource::read_element`] #[derive(Clone, Copy)] enum ReadElementState { /// The initial state (inside element, but outside of attribute value) Elem, /// Inside a single-quoted attribute value SingleQ, /// Inside a double-quoted attribute value DoubleQ, } impl ReadElementState { /// Changes state by analyzing part of input. /// Returns a tuple with part of chunk up to element closing symbol `>` /// and a position after that symbol or `None` if such symbol was not found #[inline(always)] fn change<'b>(&mut self, chunk: &'b [u8]) -> Option<(&'b [u8], usize)> { for i in memchr::memchr3_iter(b'>', b'\'', b'"', chunk) { *self = match (*self, chunk[i]) { // only allowed to match `>` while we are in state `Elem` (Self::Elem, b'>') => return Some((&chunk[..i], i + 1)), (Self::Elem, b'\'') => Self::SingleQ, (Self::Elem, b'\"') => Self::DoubleQ, // the only end_byte that gets us out if the same character (Self::SingleQ, b'\'') | (Self::DoubleQ, b'"') => Self::Elem, // all other bytes: no state change _ => *self, }; } None } } /// A function to check whether the byte is a whitespace (blank, new line, carriage return or tab) #[inline] pub(crate) fn is_whitespace(b: u8) -> bool { match b { b' ' | b'\r' | b'\n' | b'\t' => true, _ => false, } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod test { /// Checks the internal implementation of the various reader methods macro_rules! check { ( #[$test:meta] $read_event:ident, $read_until_close:ident, // constructor of the XML source on which internal functions will be called $source:path, // constructor of the buffer to which read data will stored $buf:expr $(, $async:ident, $await:ident)? ) => { mod read_bytes_until { use super::*; // Use Bytes for printing bytes as strings for ASCII range use crate::utils::Bytes; use pretty_assertions::assert_eq; /// Checks that search in the empty buffer returns `None` #[$test] $($async)? fn empty() { let buf = $buf; let mut position = 0; let mut input = b"".as_ref(); // ^= 0 assert_eq!( $source(&mut input) .read_bytes_until(b'*', buf, &mut position) $(.$await)? .unwrap() .map(Bytes), None ); assert_eq!(position, 0); } /// Checks that search in the buffer non-existent value returns entire buffer /// as a result and set `position` to `len()` #[$test] $($async)? fn non_existent() { let buf = $buf; let mut position = 0; let mut input = b"abcdef".as_ref(); // ^= 6 assert_eq!( $source(&mut input) .read_bytes_until(b'*', buf, &mut position) $(.$await)? .unwrap() .map(Bytes), Some(Bytes(b"abcdef")) ); assert_eq!(position, 6); } /// Checks that search in the buffer an element that is located in the front of /// buffer returns empty slice as a result and set `position` to one symbol /// after match (`1`) #[$test] $($async)? fn at_the_start() { let buf = $buf; let mut position = 0; let mut input = b"*abcdef".as_ref(); // ^= 1 assert_eq!( $source(&mut input) .read_bytes_until(b'*', buf, &mut position) $(.$await)? .unwrap() .map(Bytes), Some(Bytes(b"")) ); assert_eq!(position, 1); // position after the symbol matched } /// Checks that search in the buffer an element that is located in the middle of /// buffer returns slice before that symbol as a result and set `position` to one /// symbol after match #[$test] $($async)? fn inside() { let buf = $buf; let mut position = 0; let mut input = b"abc*def".as_ref(); // ^= 4 assert_eq!( $source(&mut input) .read_bytes_until(b'*', buf, &mut position) $(.$await)? .unwrap() .map(Bytes), Some(Bytes(b"abc")) ); assert_eq!(position, 4); // position after the symbol matched } /// Checks that search in the buffer an element that is located in the end of /// buffer returns slice before that symbol as a result and set `position` to one /// symbol after match (`len()`) #[$test] $($async)? fn in_the_end() { let buf = $buf; let mut position = 0; let mut input = b"abcdef*".as_ref(); // ^= 7 assert_eq!( $source(&mut input) .read_bytes_until(b'*', buf, &mut position) $(.$await)? .unwrap() .map(Bytes), Some(Bytes(b"abcdef")) ); assert_eq!(position, 7); // position after the symbol matched } } mod read_bang_element { use super::*; /// Checks that reading CDATA content works correctly mod cdata { use super::*; use crate::errors::Error; use crate::reader::BangType; use crate::utils::Bytes; use pretty_assertions::assert_eq; /// Checks that if input begins like CDATA element, but CDATA start sequence /// is not finished, parsing ends with an error #[$test] #[ignore = "start CDATA sequence fully checked outside of `read_bang_element`"] $($async)? fn not_properly_start() { let buf = $buf; let mut position = 0; let mut input = b"![]]>other content".as_ref(); // ^= 0 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "CData" => {} x => assert!( false, r#"Expected `UnexpectedEof("CData")`, but result is: {:?}"#, x ), } assert_eq!(position, 0); } /// Checks that if CDATA startup sequence was matched, but an end sequence /// is not found, parsing ends with an error #[$test] $($async)? fn not_closed() { let buf = $buf; let mut position = 0; let mut input = b"![CDATA[other content".as_ref(); // ^= 0 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "CData" => {} x => assert!( false, r#"Expected `UnexpectedEof("CData")`, but result is: {:?}"#, x ), } assert_eq!(position, 0); } /// Checks that CDATA element without content inside parsed successfully #[$test] $($async)? fn empty() { let buf = $buf; let mut position = 0; let mut input = b"![CDATA[]]>other content".as_ref(); // ^= 11 assert_eq!( $source(&mut input) .read_bang_element(buf, &mut position) $(.$await)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::CData, Bytes(b"![CDATA[]]"))) ); assert_eq!(position, 11); } /// Checks that CDATA element with content parsed successfully. /// Additionally checks that sequences inside CDATA that may look like /// a CDATA end sequence do not interrupt CDATA parsing #[$test] $($async)? fn with_content() { let buf = $buf; let mut position = 0; let mut input = b"![CDATA[cdata]] ]>content]]>other content]]>".as_ref(); // ^= 28 assert_eq!( $source(&mut input) .read_bang_element(buf, &mut position) $(.$await)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::CData, Bytes(b"![CDATA[cdata]] ]>content]]"))) ); assert_eq!(position, 28); } } /// Checks that reading XML comments works correctly. According to the [specification], /// comment data can contain any sequence except `--`: /// /// ```peg /// comment = '<--' (!'--' char)* '-->'; /// char = [#x1-#x2C] /// / [#x2E-#xD7FF] /// / [#xE000-#xFFFD] /// / [#x10000-#x10FFFF] /// ``` /// /// The presence of this limitation, however, is simply a poorly designed specification /// (maybe for purpose of building of LL(1) XML parser) and quick-xml does not check for /// presence of these sequences by default. This tests allow such content. /// /// [specification]: https://www.w3.org/TR/xml11/#dt-comment mod comment { use super::*; use crate::errors::Error; use crate::reader::BangType; use crate::utils::Bytes; use pretty_assertions::assert_eq; #[$test] #[ignore = "start comment sequence fully checked outside of `read_bang_element`"] $($async)? fn not_properly_start() { let buf = $buf; let mut position = 0; let mut input = b"!- -->other content".as_ref(); // ^= 0 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#, x ), } assert_eq!(position, 0); } #[$test] $($async)? fn not_properly_end() { let buf = $buf; let mut position = 0; let mut input = b"!->other content".as_ref(); // ^= 0 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#, x ), } assert_eq!(position, 0); } #[$test] $($async)? fn not_closed1() { let buf = $buf; let mut position = 0; let mut input = b"!--other content".as_ref(); // ^= 0 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#, x ), } assert_eq!(position, 0); } #[$test] $($async)? fn not_closed2() { let buf = $buf; let mut position = 0; let mut input = b"!-->other content".as_ref(); // ^= 0 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#, x ), } assert_eq!(position, 0); } #[$test] $($async)? fn not_closed3() { let buf = $buf; let mut position = 0; let mut input = b"!--->other content".as_ref(); // ^= 0 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#, x ), } assert_eq!(position, 0); } #[$test] $($async)? fn empty() { let buf = $buf; let mut position = 0; let mut input = b"!---->other content".as_ref(); // ^= 6 assert_eq!( $source(&mut input) .read_bang_element(buf, &mut position) $(.$await)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::Comment, Bytes(b"!----"))) ); assert_eq!(position, 6); } #[$test] $($async)? fn with_content() { let buf = $buf; let mut position = 0; let mut input = b"!--->comment<--->other content".as_ref(); // ^= 17 assert_eq!( $source(&mut input) .read_bang_element(buf, &mut position) $(.$await)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::Comment, Bytes(b"!--->comment<---"))) ); assert_eq!(position, 17); } } /// Checks that reading DOCTYPE definition works correctly mod doctype { use super::*; mod uppercase { use super::*; use crate::errors::Error; use crate::reader::BangType; use crate::utils::Bytes; use pretty_assertions::assert_eq; #[$test] $($async)? fn not_properly_start() { let buf = $buf; let mut position = 0; let mut input = b"!D other content".as_ref(); // ^= 0 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#, x ), } assert_eq!(position, 0); } #[$test] $($async)? fn without_space() { let buf = $buf; let mut position = 0; let mut input = b"!DOCTYPEother content".as_ref(); // ^= 0 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#, x ), } assert_eq!(position, 0); } #[$test] $($async)? fn empty() { let buf = $buf; let mut position = 0; let mut input = b"!DOCTYPE>other content".as_ref(); // ^= 9 assert_eq!( $source(&mut input) .read_bang_element(buf, &mut position) $(.$await)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::DocType, Bytes(b"!DOCTYPE"))) ); assert_eq!(position, 9); } #[$test] $($async)? fn not_closed() { let buf = $buf; let mut position = 0; let mut input = b"!DOCTYPE other content".as_ref(); // ^= 0 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#, x ), } assert_eq!(position, 0); } } mod lowercase { use super::*; use crate::errors::Error; use crate::reader::BangType; use crate::utils::Bytes; use pretty_assertions::assert_eq; #[$test] $($async)? fn not_properly_start() { let buf = $buf; let mut position = 0; let mut input = b"!d other content".as_ref(); // ^= 0 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#, x ), } assert_eq!(position, 0); } #[$test] $($async)? fn without_space() { let buf = $buf; let mut position = 0; let mut input = b"!doctypeother content".as_ref(); // ^= 0 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#, x ), } assert_eq!(position, 0); } #[$test] $($async)? fn empty() { let buf = $buf; let mut position = 0; let mut input = b"!doctype>other content".as_ref(); // ^= 9 assert_eq!( $source(&mut input) .read_bang_element(buf, &mut position) $(.$await)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::DocType, Bytes(b"!doctype"))) ); assert_eq!(position, 9); } #[$test] $($async)? fn not_closed() { let buf = $buf; let mut position = 0; let mut input = b"!doctype other content".as_ref(); // ^= 0 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#, x ), } assert_eq!(position, 0); } } } } mod read_element { use super::*; use crate::utils::Bytes; use pretty_assertions::assert_eq; /// Checks that nothing was read from empty buffer #[$test] $($async)? fn empty() { let buf = $buf; let mut position = 0; let mut input = b"".as_ref(); // ^= 0 assert_eq!( $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), None ); assert_eq!(position, 0); } mod open { use super::*; use crate::utils::Bytes; use pretty_assertions::assert_eq; #[$test] $($async)? fn empty_tag() { let buf = $buf; let mut position = 0; let mut input = b">".as_ref(); // ^= 1 assert_eq!( $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b"")) ); assert_eq!(position, 1); } #[$test] $($async)? fn normal() { let buf = $buf; let mut position = 0; let mut input = b"tag>".as_ref(); // ^= 4 assert_eq!( $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b"tag")) ); assert_eq!(position, 4); } #[$test] $($async)? fn empty_ns_empty_tag() { let buf = $buf; let mut position = 0; let mut input = b":>".as_ref(); // ^= 2 assert_eq!( $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b":")) ); assert_eq!(position, 2); } #[$test] $($async)? fn empty_ns() { let buf = $buf; let mut position = 0; let mut input = b":tag>".as_ref(); // ^= 5 assert_eq!( $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b":tag")) ); assert_eq!(position, 5); } #[$test] $($async)? fn with_attributes() { let buf = $buf; let mut position = 0; let mut input = br#"tag attr-1=">" attr2 = '>' 3attr>"#.as_ref(); // ^= 38 assert_eq!( $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(br#"tag attr-1=">" attr2 = '>' 3attr"#)) ); assert_eq!(position, 38); } } mod self_closed { use super::*; use crate::utils::Bytes; use pretty_assertions::assert_eq; #[$test] $($async)? fn empty_tag() { let buf = $buf; let mut position = 0; let mut input = b"/>".as_ref(); // ^= 2 assert_eq!( $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b"/")) ); assert_eq!(position, 2); } #[$test] $($async)? fn normal() { let buf = $buf; let mut position = 0; let mut input = b"tag/>".as_ref(); // ^= 5 assert_eq!( $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b"tag/")) ); assert_eq!(position, 5); } #[$test] $($async)? fn empty_ns_empty_tag() { let buf = $buf; let mut position = 0; let mut input = b":/>".as_ref(); // ^= 3 assert_eq!( $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b":/")) ); assert_eq!(position, 3); } #[$test] $($async)? fn empty_ns() { let buf = $buf; let mut position = 0; let mut input = b":tag/>".as_ref(); // ^= 6 assert_eq!( $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b":tag/")) ); assert_eq!(position, 6); } #[$test] $($async)? fn with_attributes() { let buf = $buf; let mut position = 0; let mut input = br#"tag attr-1="/>" attr2 = '/>' 3attr/>"#.as_ref(); // ^= 41 assert_eq!( $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(br#"tag attr-1="/>" attr2 = '/>' 3attr/"#)) ); assert_eq!(position, 41); } } } mod issue_344 { use crate::errors::Error; use crate::reader::Reader; #[$test] $($async)? fn cdata() { let mut reader = Reader::from_str("![]]>"); match reader.$read_until_close($buf) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "CData" => {} x => assert!( false, r#"Expected `UnexpectedEof("CData")`, but result is: {:?}"#, x ), } } #[$test] $($async)? fn comment() { let mut reader = Reader::from_str("!- -->"); match reader.$read_until_close($buf) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#, x ), } } #[$test] $($async)? fn doctype_uppercase() { let mut reader = Reader::from_str("!D>"); match reader.$read_until_close($buf) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#, x ), } } #[$test] $($async)? fn doctype_lowercase() { let mut reader = Reader::from_str("!d>"); match reader.$read_until_close($buf) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#, x ), } } } /// Ensures, that no empty `Text` events are generated mod $read_event { use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event}; use crate::reader::Reader; use pretty_assertions::assert_eq; /// When `encoding` feature is enabled, encoding should be detected /// from BOM (UTF-8) and BOM should be stripped. /// /// When `encoding` feature is disabled, UTF-8 is assumed and BOM /// character should be stripped for consistency #[$test] $($async)? fn bom_from_reader() { let mut reader = Reader::from_reader("\u{feff}\u{feff}".as_bytes()); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Text(BytesText::from_escaped("\u{feff}")) ); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Eof ); } /// When parsing from &str, encoding is fixed (UTF-8), so /// - when `encoding` feature is disabled, the behavior the /// same as in `bom_from_reader` text /// - when `encoding` feature is enabled, the behavior should /// stay consistent, so the first BOM character is stripped #[$test] $($async)? fn bom_from_str() { let mut reader = Reader::from_str("\u{feff}\u{feff}"); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Text(BytesText::from_escaped("\u{feff}")) ); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Eof ); } #[$test] $($async)? fn declaration() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3))) ); } #[$test] $($async)? fn doctype() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::DocType(BytesText::from_escaped("x")) ); } #[$test] $($async)? fn processing_instruction() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::PI(BytesText::from_escaped("xml-stylesheet")) ); } #[$test] $($async)? fn start() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Start(BytesStart::new("tag")) ); } #[$test] $($async)? fn end() { let mut reader = Reader::from_str(""); // Because we expect invalid XML, do not check that // the end name paired with the start name reader.check_end_names(false); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::End(BytesEnd::new("tag")) ); } #[$test] $($async)? fn empty() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Empty(BytesStart::new("tag")) ); } #[$test] $($async)? fn text() { let mut reader = Reader::from_str("text"); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Text(BytesText::from_escaped("text")) ); } #[$test] $($async)? fn cdata() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::CData(BytesCData::new("")) ); } #[$test] $($async)? fn comment() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Comment(BytesText::from_escaped("")) ); } #[$test] $($async)? fn eof() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Eof ); } } }; } /// Tests for https://github.com/tafia/quick-xml/issues/469 macro_rules! small_buffers { ( #[$test:meta] $read_event:ident: $BufReader:ty $(, $async:ident, $await:ident)? ) => { mod small_buffers { use crate::events::{BytesCData, BytesDecl, BytesStart, BytesText, Event}; use crate::reader::Reader; use pretty_assertions::assert_eq; #[$test] $($async)? fn decl() { let xml = ""; // ^^^^^^^ data that fit into buffer let size = xml.match_indices("?>").next().unwrap().0 + 1; let br = <$BufReader>::with_capacity(size, xml.as_bytes()); let mut reader = Reader::from_reader(br); let mut buf = Vec::new(); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3))) ); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), Event::Eof ); } #[$test] $($async)? fn pi() { let xml = ""; // ^^^^^ data that fit into buffer let size = xml.match_indices("?>").next().unwrap().0 + 1; let br = <$BufReader>::with_capacity(size, xml.as_bytes()); let mut reader = Reader::from_reader(br); let mut buf = Vec::new(); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), Event::PI(BytesText::new("pi")) ); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), Event::Eof ); } #[$test] $($async)? fn empty() { let xml = ""; // ^^^^^^^ data that fit into buffer let size = xml.match_indices("/>").next().unwrap().0 + 1; let br = <$BufReader>::with_capacity(size, xml.as_bytes()); let mut reader = Reader::from_reader(br); let mut buf = Vec::new(); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), Event::Empty(BytesStart::new("empty")) ); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), Event::Eof ); } #[$test] $($async)? fn cdata1() { let xml = ""; // ^^^^^^^^^^^^^^^ data that fit into buffer let size = xml.match_indices("]]>").next().unwrap().0 + 1; let br = <$BufReader>::with_capacity(size, xml.as_bytes()); let mut reader = Reader::from_reader(br); let mut buf = Vec::new(); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), Event::CData(BytesCData::new("cdata")) ); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), Event::Eof ); } #[$test] $($async)? fn cdata2() { let xml = ""; // ^^^^^^^^^^^^^^^^ data that fit into buffer let size = xml.match_indices("]]>").next().unwrap().0 + 2; let br = <$BufReader>::with_capacity(size, xml.as_bytes()); let mut reader = Reader::from_reader(br); let mut buf = Vec::new(); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), Event::CData(BytesCData::new("cdata")) ); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), Event::Eof ); } #[$test] $($async)? fn comment1() { let xml = ""; // ^^^^^^^^^^^^ data that fit into buffer let size = xml.match_indices("-->").next().unwrap().0 + 1; let br = <$BufReader>::with_capacity(size, xml.as_bytes()); let mut reader = Reader::from_reader(br); let mut buf = Vec::new(); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), Event::Comment(BytesText::new("comment")) ); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), Event::Eof ); } #[$test] $($async)? fn comment2() { let xml = ""; // ^^^^^^^^^^^^^ data that fit into buffer let size = xml.match_indices("-->").next().unwrap().0 + 2; let br = <$BufReader>::with_capacity(size, xml.as_bytes()); let mut reader = Reader::from_reader(br); let mut buf = Vec::new(); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), Event::Comment(BytesText::new("comment")) ); assert_eq!( reader.$read_event(&mut buf) $(.$await)? .unwrap(), Event::Eof ); } } }; } // Export macros for the child modules: // - buffered_reader // - slice_reader pub(super) use check; pub(super) use small_buffers; } quick-xml-0.27.1/src/reader/ns_reader.rs000064400000000000000000001017460072674642500162270ustar 00000000000000//! A reader that manages namespace declarations found in the input and able //! to resolve [qualified names] to [expanded names]. //! //! [qualified names]: https://www.w3.org/TR/xml-names11/#dt-qualname //! [expanded names]: https://www.w3.org/TR/xml-names11/#dt-expname use std::borrow::Cow; use std::fs::File; use std::io::{BufRead, BufReader}; use std::ops::Deref; use std::path::Path; use crate::errors::Result; use crate::events::Event; use crate::name::{LocalName, NamespaceResolver, QName, ResolveResult}; use crate::reader::{Reader, Span, XmlSource}; /// A low level encoding-agnostic XML event reader that performs namespace resolution. /// /// Consumes a [`BufRead`] and streams XML `Event`s. pub struct NsReader { /// An XML reader pub(super) reader: Reader, /// Buffer that contains names of namespace prefixes (the part between `xmlns:` /// and an `=`) and namespace values. buffer: Vec, /// A buffer to manage namespaces ns_resolver: NamespaceResolver, /// We cannot pop data from the namespace stack until returned `Empty` or `End` /// event will be processed by the user, so we only mark that we should that /// in the next [`Self::read_event_impl()`] call. pending_pop: bool, } /// Builder methods impl NsReader { /// Creates a `NsReader` that reads from a reader. #[inline] pub fn from_reader(reader: R) -> Self { Self::new(Reader::from_reader(reader)) } configure_methods!(reader); } /// Private methods impl NsReader { #[inline] fn new(reader: Reader) -> Self { Self { reader, buffer: Vec::new(), ns_resolver: NamespaceResolver::default(), pending_pop: false, } } fn read_event_impl<'i, B>(&mut self, buf: B) -> Result> where R: XmlSource<'i, B>, { self.pop(); let event = self.reader.read_event_impl(buf); self.process_event(event) } pub(super) fn pop(&mut self) { if self.pending_pop { self.ns_resolver.pop(&mut self.buffer); self.pending_pop = false; } } pub(super) fn process_event<'i>(&mut self, event: Result>) -> Result> { match event { Ok(Event::Start(e)) => { self.ns_resolver.push(&e, &mut self.buffer); Ok(Event::Start(e)) } Ok(Event::Empty(e)) => { self.ns_resolver.push(&e, &mut self.buffer); // notify next `read_event_impl()` invocation that it needs to pop this // namespace scope self.pending_pop = true; Ok(Event::Empty(e)) } Ok(Event::End(e)) => { // notify next `read_event_impl()` invocation that it needs to pop this // namespace scope self.pending_pop = true; Ok(Event::End(e)) } e => e, } } pub(super) fn resolve_event<'i>( &mut self, event: Result>, ) -> Result<(ResolveResult, Event<'i>)> { match event { Ok(Event::Start(e)) => Ok(( self.ns_resolver.find(e.name(), &self.buffer), Event::Start(e), )), Ok(Event::Empty(e)) => Ok(( self.ns_resolver.find(e.name(), &self.buffer), Event::Empty(e), )), Ok(Event::End(e)) => Ok(( // Comment that prevent cargo rmt self.ns_resolver.find(e.name(), &self.buffer), Event::End(e), )), Ok(e) => Ok((ResolveResult::Unbound, e)), Err(e) => Err(e), } } } /// Getters impl NsReader { /// Consumes `NsReader` returning the underlying reader /// /// See the [`Reader::into_inner`] for examples #[inline] pub fn into_inner(self) -> R { self.reader.into_inner() } /// Gets a mutable reference to the underlying reader. pub fn get_mut(&mut self) -> &mut R { self.reader.get_mut() } /// Resolves a potentially qualified **element name** or **attribute name** /// into _(namespace name, local name)_. /// /// _Qualified_ names have the form `prefix:local-name` where the `prefix` /// is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. /// The namespace prefix can be defined on the same element as the name in question. /// /// The method returns following results depending on the `name` shape, /// `attribute` flag and the presence of the default namespace: /// /// |attribute|`xmlns="..."`|QName |ResolveResult |LocalName /// |---------|-------------|-------------------|-----------------------|------------ /// |`true` |Not defined |`local-name` |[`Unbound`] |`local-name` /// |`true` |Defined |`local-name` |[`Unbound`] |`local-name` /// |`true` |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name` /// |`false` |Not defined |`local-name` |[`Unbound`] |`local-name` /// |`false` |Defined |`local-name` |[`Bound`] (default) |`local-name` /// |`false` |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name` /// /// If you want to clearly indicate that name that you resolve is an element /// or an attribute name, you could use [`resolve_attribute()`] or [`resolve_element()`] /// methods. /// /// # Lifetimes /// /// - `'n`: lifetime of a name. Returned local name will be bound to the same /// lifetime as the name in question. /// - returned namespace name will be bound to the reader itself /// /// [`Bound`]: ResolveResult::Bound /// [`Unbound`]: ResolveResult::Unbound /// [`Unknown`]: ResolveResult::Unknown /// [`resolve_attribute()`]: Self::resolve_attribute() /// [`resolve_element()`]: Self::resolve_element() #[inline] pub fn resolve<'n>(&self, name: QName<'n>, attribute: bool) -> (ResolveResult, LocalName<'n>) { self.ns_resolver.resolve(name, &self.buffer, !attribute) } /// Resolves a potentially qualified **element name** into _(namespace name, local name)_. /// /// _Qualified_ element names have the form `prefix:local-name` where the /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. /// The namespace prefix can be defined on the same element as the element /// in question. /// /// _Unqualified_ elements inherits the current _default namespace_. /// /// The method returns following results depending on the `name` shape and /// the presence of the default namespace: /// /// |`xmlns="..."`|QName |ResolveResult |LocalName /// |-------------|-------------------|-----------------------|------------ /// |Not defined |`local-name` |[`Unbound`] |`local-name` /// |Defined |`local-name` |[`Bound`] (default) |`local-name` /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name` /// /// # Lifetimes /// /// - `'n`: lifetime of an element name. Returned local name will be bound /// to the same lifetime as the name in question. /// - returned namespace name will be bound to the reader itself /// /// # Examples /// /// This example shows how you can resolve qualified name into a namespace. /// Note, that in the code like this you do not need to do that manually, /// because the namespace resolution result returned by the [`read_resolved_event()`]. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(""); /// /// match reader.read_event().unwrap() { /// Event::Empty(e) => assert_eq!( /// reader.resolve_element(e.name()), /// (Bound(Namespace(b"root namespace")), QName(b"tag").into()) /// ), /// _ => unreachable!(), /// } /// ``` /// /// [`Bound`]: ResolveResult::Bound /// [`Unbound`]: ResolveResult::Unbound /// [`Unknown`]: ResolveResult::Unknown /// [`read_resolved_event()`]: Self::read_resolved_event #[inline] pub fn resolve_element<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) { self.ns_resolver.resolve(name, &self.buffer, true) } /// Resolves a potentially qualified **attribute name** into _(namespace name, local name)_. /// /// _Qualified_ attribute names have the form `prefix:local-name` where the /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. /// The namespace prefix can be defined on the same element as the attribute /// in question. /// /// _Unqualified_ attribute names do *not* inherit the current _default namespace_. /// /// The method returns following results depending on the `name` shape and /// the presence of the default namespace: /// /// |`xmlns="..."`|QName |ResolveResult |LocalName /// |-------------|-------------------|-----------------------|------------ /// |Not defined |`local-name` |[`Unbound`] |`local-name` /// |Defined |`local-name` |[`Unbound`] |`local-name` /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name` /// /// # Lifetimes /// /// - `'n`: lifetime of an attribute name. Returned local name will be bound /// to the same lifetime as the name in question. /// - returned namespace name will be bound to the reader itself /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::events::attributes::Attribute; /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(" /// /// "); /// reader.trim_text(true); /// /// match reader.read_event().unwrap() { /// Event::Empty(e) => { /// let mut iter = e.attributes(); /// /// // Unlike elements, attributes without explicit namespace /// // not bound to any namespace /// let one = iter.next().unwrap().unwrap(); /// assert_eq!( /// reader.resolve_attribute(one.key), /// (Unbound, QName(b"one").into()) /// ); /// /// let two = iter.next().unwrap().unwrap(); /// assert_eq!( /// reader.resolve_attribute(two.key), /// (Bound(Namespace(b"other namespace")), QName(b"two").into()) /// ); /// } /// _ => unreachable!(), /// } /// ``` /// /// [`Bound`]: ResolveResult::Bound /// [`Unbound`]: ResolveResult::Unbound /// [`Unknown`]: ResolveResult::Unknown #[inline] pub fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) { self.ns_resolver.resolve(name, &self.buffer, false) } } impl NsReader { /// Reads the next event into given buffer. /// /// This method manages namespaces but doesn't resolve them automatically. /// You should call [`resolve_element()`] if you want to get a namespace. /// /// You also can use [`read_resolved_event_into()`] instead if you want to resolve /// namespace as soon as you get an event. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// Test /// Test 2 /// /// "#); /// reader.trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); /// loop { /// match reader.read_event_into(&mut buf).unwrap() { /// Event::Start(e) => { /// count += 1; /// let (ns, local) = reader.resolve_element(e.name()); /// match local.as_ref() { /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))), /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))), /// _ => unreachable!(), /// } /// } /// Event::Text(e) => { /// txt.push(e.unescape().unwrap().into_owned()) /// } /// Event::Eof => break, /// _ => (), /// } /// buf.clear(); /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` /// /// [`resolve_element()`]: Self::resolve_element /// [`read_resolved_event_into()`]: Self::read_resolved_event_into #[inline] pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec) -> Result> { self.read_event_impl(buf) } /// Reads the next event into given buffer and resolves its namespace (if applicable). /// /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events. /// For all other events the concept of namespace is not defined, so /// a [`ResolveResult::Unbound`] is returned. /// /// If you are not interested in namespaces, you can use [`read_event_into()`] /// which will not automatically resolve namespaces for you. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// Test /// Test 2 /// /// "#); /// reader.trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); /// loop { /// match reader.read_resolved_event_into(&mut buf).unwrap() { /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag1").into()); /// } /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag2").into()); /// } /// (_, Event::Start(_)) => unreachable!(), /// /// (_, Event::Text(e)) => { /// txt.push(e.unescape().unwrap().into_owned()) /// } /// (_, Event::Eof) => break, /// _ => (), /// } /// buf.clear(); /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` /// /// [`Start`]: Event::Start /// [`Empty`]: Event::Empty /// [`End`]: Event::End /// [`read_event_into()`]: Self::read_event_into #[inline] pub fn read_resolved_event_into<'b>( &mut self, buf: &'b mut Vec, ) -> Result<(ResolveResult, Event<'b>)> { let event = self.read_event_impl(buf); self.resolve_event(event) } /// Reads until end element is found using provided buffer as intermediate /// storage for events content. This function is supposed to be called after /// you already read a [`Start`] event. /// /// Returns a span that cover content between `>` of an opening tag and `<` of /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// /// Manages nested cases where parent and child elements have the _literally_ /// same name. /// /// If corresponding [`End`] event will not be found, the [`UnexpectedEof`] /// will be returned. In particularly, that error will be returned if you call /// this method without consuming the corresponding [`Start`] event first. /// /// If your reader created from a string slice or byte array slice, it is /// better to use [`read_to_end()`] method, because it will not copy bytes /// into intermediate buffer. /// /// The provided `buf` buffer will be filled only by one event content at time. /// Before reading of each event the buffer will be cleared. If you know an /// appropriate size of each event, you can preallocate the buffer to reduce /// number of reallocations. /// /// The `end` parameter should contain name of the end element _in the reader /// encoding_. It is good practice to always get that parameter using /// [`BytesStart::to_end()`] method. /// /// # Namespaces /// /// While the `NsReader` does namespace resolution, namespaces does not /// change the algorithm for comparing names. Although the names `a:name` /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace, /// are semantically equivalent, `` cannot close ``, because /// according to [the specification] /// /// > The end of every element that begins with a **start-tag** MUST be marked /// > by an **end-tag** containing a name that echoes the element's type as /// > given in the **start-tag** /// /// # Examples /// /// This example shows, how you can skip XML content after you read the /// start event. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::name::{Namespace, ResolveResult}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// /// /// /// /// /// /// /// /// /// /// /// "#); /// reader.trim_text(true); /// let mut buf = Vec::new(); /// /// let ns = Namespace(b"namespace 1"); /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!( /// reader.read_resolved_event_into(&mut buf).unwrap(), /// (ResolveResult::Bound(ns), Event::Start(start)) /// ); /// /// // ...then, we could skip all events to the corresponding end event. /// // This call will correctly handle nested elements. /// // Note, however, that this method does not handle namespaces. /// reader.read_to_end_into(end.name(), &mut buf).unwrap(); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!( /// reader.read_resolved_event_into(&mut buf).unwrap(), /// (ResolveResult::Unbound, Event::Eof) /// ); /// ``` /// /// [`Start`]: Event::Start /// [`End`]: Event::End /// [`UnexpectedEof`]: crate::errors::Error::UnexpectedEof /// [`read_to_end()`]: Self::read_to_end /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end /// [`expand_empty_elements`]: Self::expand_empty_elements /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag #[inline] pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should // match literally the start name. See `Self::check_end_names` documentation self.reader.read_to_end_into(end, buf) } } impl NsReader> { /// Creates an XML reader from a file path. pub fn from_file>(path: P) -> Result { Ok(Self::new(Reader::from_file(path)?)) } } impl<'i> NsReader<&'i [u8]> { /// Creates an XML reader from a string slice. #[inline] pub fn from_str(s: &'i str) -> Self { Self::new(Reader::from_str(s)) } /// Reads the next event, borrow its content from the input buffer. /// /// This method manages namespaces but doesn't resolve them automatically. /// You should call [`resolve_element()`] if you want to get a namespace. /// /// You also can use [`read_resolved_event()`] instead if you want to resolve namespace /// as soon as you get an event. /// /// There is no asynchronous `read_event_async()` version of this function, /// because it is not necessary -- the contents are already in memory and no IO /// is needed, therefore there is no potential for blocking. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// Test /// Test 2 /// /// "#); /// reader.trim_text(true); /// /// let mut count = 0; /// let mut txt = Vec::new(); /// loop { /// match reader.read_event().unwrap() { /// Event::Start(e) => { /// count += 1; /// let (ns, local) = reader.resolve_element(e.name()); /// match local.as_ref() { /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))), /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))), /// _ => unreachable!(), /// } /// } /// Event::Text(e) => { /// txt.push(e.unescape().unwrap().into_owned()) /// } /// Event::Eof => break, /// _ => (), /// } /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` /// /// [`resolve_element()`]: Self::resolve_element /// [`read_resolved_event()`]: Self::read_resolved_event #[inline] pub fn read_event(&mut self) -> Result> { self.read_event_impl(()) } /// Reads the next event, borrow its content from the input buffer, and resolves /// its namespace (if applicable). /// /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events. /// For all other events the concept of namespace is not defined, so /// a [`ResolveResult::Unbound`] is returned. /// /// If you are not interested in namespaces, you can use [`read_event()`] /// which will not automatically resolve namespaces for you. /// /// There is no asynchronous `read_resolved_event_async()` version of this function, /// because it is not necessary -- the contents are already in memory and no IO /// is needed, therefore there is no potential for blocking. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// Test /// Test 2 /// /// "#); /// reader.trim_text(true); /// /// let mut count = 0; /// let mut txt = Vec::new(); /// loop { /// match reader.read_resolved_event().unwrap() { /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag1").into()); /// } /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag2").into()); /// } /// (_, Event::Start(_)) => unreachable!(), /// /// (_, Event::Text(e)) => { /// txt.push(e.unescape().unwrap().into_owned()) /// } /// (_, Event::Eof) => break, /// _ => (), /// } /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` /// /// [`Start`]: Event::Start /// [`Empty`]: Event::Empty /// [`End`]: Event::End /// [`read_event()`]: Self::read_event #[inline] pub fn read_resolved_event(&mut self) -> Result<(ResolveResult, Event<'i>)> { let event = self.read_event_impl(()); self.resolve_event(event) } /// Reads until end element is found. This function is supposed to be called /// after you already read a [`Start`] event. /// /// Returns a span that cover content between `>` of an opening tag and `<` of /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// /// Manages nested cases where parent and child elements have the _literally_ /// same name. /// /// If corresponding [`End`] event will not be found, the [`UnexpectedEof`] /// will be returned. In particularly, that error will be returned if you call /// this method without consuming the corresponding [`Start`] event first. /// /// The `end` parameter should contain name of the end element _in the reader /// encoding_. It is good practice to always get that parameter using /// [`BytesStart::to_end()`] method. /// /// There is no asynchronous `read_to_end_async()` version of this function, /// because it is not necessary -- the contents are already in memory and no IO /// is needed, therefore there is no potential for blocking. /// /// # Namespaces /// /// While the `NsReader` does namespace resolution, namespaces does not /// change the algorithm for comparing names. Although the names `a:name` /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace, /// are semantically equivalent, `` cannot close ``, because /// according to [the specification] /// /// > The end of every element that begins with a **start-tag** MUST be marked /// > by an **end-tag** containing a name that echoes the element's type as /// > given in the **start-tag** /// /// # Examples /// /// This example shows, how you can skip XML content after you read the /// start event. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::name::{Namespace, ResolveResult}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// /// /// /// /// /// /// /// /// /// /// /// "#); /// reader.trim_text(true); /// /// let ns = Namespace(b"namespace 1"); /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!( /// reader.read_resolved_event().unwrap(), /// (ResolveResult::Bound(ns), Event::Start(start)) /// ); /// /// // ...then, we could skip all events to the corresponding end event. /// // This call will correctly handle nested elements. /// // Note, however, that this method does not handle namespaces. /// reader.read_to_end(end.name()).unwrap(); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!( /// reader.read_resolved_event().unwrap(), /// (ResolveResult::Unbound, Event::Eof) /// ); /// ``` /// /// [`Start`]: Event::Start /// [`End`]: Event::End /// [`UnexpectedEof`]: crate::errors::Error::UnexpectedEof /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end /// [`expand_empty_elements`]: Self::expand_empty_elements /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag #[inline] pub fn read_to_end(&mut self, end: QName) -> Result { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should // match literally the start name. See `Self::check_end_names` documentation self.reader.read_to_end(end) } /// Reads content between start and end tags, including any markup. This /// function is supposed to be called after you already read a [`Start`] event. /// /// Manages nested cases where parent and child elements have the _literally_ /// same name. /// /// This method does not unescape read data, instead it returns content /// "as is" of the XML document. This is because it has no idea what text /// it reads, and if, for example, it contains CDATA section, attempt to /// unescape it content will spoil data. /// /// Any text will be decoded using the XML current [`decoder()`]. /// /// Actually, this method perform the following code: /// /// ```ignore /// let span = reader.read_to_end(end)?; /// let text = reader.decoder().decode(&reader.inner_slice[span]); /// ``` /// /// # Examples /// /// This example shows, how you can read a HTML content from your XML document. /// /// ``` /// # use pretty_assertions::assert_eq; /// # use std::borrow::Cow; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// This is a HTML text ///

Usual XML rules does not apply inside it ///

For example, elements not needed to be "closed" /// /// "#); /// reader.trim_text(true); /// /// let start = BytesStart::new("html"); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!(reader.read_event().unwrap(), Event::Start(start)); /// // ...and disable checking of end names because we expect HTML further... /// reader.check_end_names(false); /// /// // ...then, we could read text content until close tag. /// // This call will correctly handle nested elements. /// let text = reader.read_text(end.name()).unwrap(); /// assert_eq!(text, Cow::Borrowed(r#" /// This is a HTML text ///

Usual XML rules does not apply inside it ///

For example, elements not needed to be "closed" /// "#)); /// /// // Now we can enable checks again /// reader.check_end_names(true); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!(reader.read_event().unwrap(), Event::Eof); /// ``` /// /// [`Start`]: Event::Start /// [`decoder()`]: Reader::decoder() #[inline] pub fn read_text(&mut self, end: QName) -> Result> { self.reader.read_text(end) } } impl Deref for NsReader { type Target = Reader; #[inline] fn deref(&self) -> &Self::Target { &self.reader } } quick-xml-0.27.1/src/reader/parser.rs000064400000000000000000000251470072674642500155610ustar 00000000000000#[cfg(feature = "encoding")] use encoding_rs::UTF_8; use crate::encoding::Decoder; use crate::errors::{Error, Result}; use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event}; #[cfg(feature = "encoding")] use crate::reader::EncodingRef; use crate::reader::{is_whitespace, BangType, ParseState}; use memchr; /// A struct that holds a current parse state and a parser configuration. /// It is independent on a way of reading data: the reader feed data into it and /// get back produced [`Event`]s. #[derive(Clone)] pub(super) struct Parser { /// Number of bytes read from the source of data since the parser was created pub offset: usize, /// Defines how to process next byte pub state: ParseState, /// Expand empty element into an opening and closing element pub expand_empty_elements: bool, /// Trims leading whitespace in Text events, skip the element if text is empty pub trim_text_start: bool, /// Trims trailing whitespace in Text events. pub trim_text_end: bool, /// Trims trailing whitespaces from markup names in closing tags `` pub trim_markup_names_in_closing_tags: bool, /// Check if [`Event::End`] nodes match last [`Event::Start`] node pub check_end_names: bool, /// Check if comments contains `--` (false per default) pub check_comments: bool, /// All currently Started elements which didn't have a matching /// End element yet. /// /// For an XML /// /// ```xml /// | /// ``` /// when cursor at the `|` position buffer contains: /// /// ```text /// rootinner /// ^ ^ /// ``` /// /// The `^` symbols shows which positions stored in the [`Self::opened_starts`] /// (0 and 4 in that case). opened_buffer: Vec, /// Opened name start indexes into [`Self::opened_buffer`]. See documentation /// for that field for details opened_starts: Vec, #[cfg(feature = "encoding")] /// Reference to the encoding used to read an XML pub encoding: EncodingRef, } impl Parser { /// Trims whitespaces from `bytes`, if required, and returns a [`Text`] event. /// /// # Parameters /// - `bytes`: data from the start of stream to the first `<` or from `>` to `<` /// /// [`Text`]: Event::Text pub fn read_text<'b>(&mut self, bytes: &'b [u8]) -> Result> { let mut content = bytes; if self.trim_text_end { // Skip the ending '<' let len = bytes .iter() .rposition(|&b| !is_whitespace(b)) .map_or_else(|| bytes.len(), |p| p + 1); content = &bytes[..len]; } Ok(Event::Text(BytesText::wrap(content, self.decoder()))) } /// reads `BytesElement` starting with a `!`, /// return `Comment`, `CData` or `DocType` event pub fn read_bang<'b>(&mut self, bang_type: BangType, buf: &'b [u8]) -> Result> { let uncased_starts_with = |string: &[u8], prefix: &[u8]| { string.len() >= prefix.len() && string[..prefix.len()].eq_ignore_ascii_case(prefix) }; let len = buf.len(); match bang_type { BangType::Comment if buf.starts_with(b"!--") => { debug_assert!(buf.ends_with(b"--")); if self.check_comments { // search if '--' not in comments if let Some(p) = memchr::memchr_iter(b'-', &buf[3..len - 2]) .position(|p| buf[3 + p + 1] == b'-') { self.offset += len - p; return Err(Error::UnexpectedToken("--".to_string())); } } Ok(Event::Comment(BytesText::wrap( &buf[3..len - 2], self.decoder(), ))) } BangType::CData if uncased_starts_with(buf, b"![CDATA[") => { debug_assert!(buf.ends_with(b"]]")); Ok(Event::CData(BytesCData::wrap( &buf[8..len - 2], self.decoder(), ))) } BangType::DocType if uncased_starts_with(buf, b"!DOCTYPE") => { let start = buf[8..] .iter() .position(|b| !is_whitespace(*b)) .unwrap_or(len - 8); debug_assert!(start < len - 8, "DocType must have a name"); Ok(Event::DocType(BytesText::wrap( &buf[8 + start..], self.decoder(), ))) } _ => Err(bang_type.to_err()), } } /// Wraps content of `buf` into the [`Event::End`] event. Does the check that /// end name matches the last opened start name if `self.check_end_names` is set. pub fn read_end<'b>(&mut self, buf: &'b [u8]) -> Result> { // XML standard permits whitespaces after the markup name in closing tags. // Let's strip them from the buffer before comparing tag names. let name = if self.trim_markup_names_in_closing_tags { if let Some(pos_end_name) = buf[1..].iter().rposition(|&b| !b.is_ascii_whitespace()) { let (name, _) = buf[1..].split_at(pos_end_name + 1); name } else { &buf[1..] } } else { &buf[1..] }; let decoder = self.decoder(); let mismatch_err = |expected: String, found: &[u8], offset: &mut usize| { *offset -= buf.len(); Err(Error::EndEventMismatch { expected, found: decoder.decode(found).unwrap_or_default().into_owned(), }) }; // Get the index in self.opened_buffer of the name of the last opened tag match self.opened_starts.pop() { Some(start) => { if self.check_end_names { let expected = &self.opened_buffer[start..]; if name != expected { let expected = decoder.decode(expected).unwrap_or_default().into_owned(); // #513: In order to allow error recovery we should drop content of the buffer self.opened_buffer.truncate(start); return mismatch_err(expected, name, &mut self.offset); } } self.opened_buffer.truncate(start); } None => { if self.check_end_names { return mismatch_err("".to_string(), &buf[1..], &mut self.offset); } } } Ok(Event::End(BytesEnd::wrap(name.into()))) } /// reads `BytesElement` starting with a `?`, /// return `Decl` or `PI` event pub fn read_question_mark<'b>(&mut self, buf: &'b [u8]) -> Result> { let len = buf.len(); if len > 2 && buf[len - 1] == b'?' { if len > 5 && &buf[1..4] == b"xml" && is_whitespace(buf[4]) { let event = BytesDecl::from_start(BytesStart::wrap(&buf[1..len - 1], 3)); // Try getting encoding from the declaration event #[cfg(feature = "encoding")] if self.encoding.can_be_refined() { if let Some(encoding) = event.encoder() { self.encoding = EncodingRef::XmlDetected(encoding); } } Ok(Event::Decl(event)) } else { Ok(Event::PI(BytesText::wrap(&buf[1..len - 1], self.decoder()))) } } else { self.offset -= len; Err(Error::UnexpectedEof("XmlDecl".to_string())) } } /// reads `BytesElement` starting with any character except `/`, `!` or ``?` /// return `Start` or `Empty` event pub fn read_start<'b>(&mut self, buf: &'b [u8]) -> Result> { // TODO: do this directly when reading bufreader ... let len = buf.len(); let name_end = buf.iter().position(|&b| is_whitespace(b)).unwrap_or(len); if let Some(&b'/') = buf.last() { let end = if name_end < len { name_end } else { len - 1 }; if self.expand_empty_elements { self.state = ParseState::Empty; self.opened_starts.push(self.opened_buffer.len()); self.opened_buffer.extend(&buf[..end]); Ok(Event::Start(BytesStart::wrap(&buf[..len - 1], end))) } else { Ok(Event::Empty(BytesStart::wrap(&buf[..len - 1], end))) } } else { // #514: Always store names event when .check_end_names == false, // because checks can be temporary disabled and when they would be // enabled, we should have that information self.opened_starts.push(self.opened_buffer.len()); self.opened_buffer.extend(&buf[..name_end]); Ok(Event::Start(BytesStart::wrap(buf, name_end))) } } #[inline] pub fn close_expanded_empty(&mut self) -> Result> { self.state = ParseState::ClosedTag; let name = self .opened_buffer .split_off(self.opened_starts.pop().unwrap()); Ok(Event::End(BytesEnd::wrap(name.into()))) } /// Get the decoder, used to decode bytes, read by this reader, to the strings. /// /// If `encoding` feature is enabled, the used encoding may change after /// parsing the XML declaration, otherwise encoding is fixed to UTF-8. /// /// If `encoding` feature is enabled and no encoding is specified in declaration, /// defaults to UTF-8. pub fn decoder(&self) -> Decoder { Decoder { #[cfg(feature = "encoding")] encoding: self.encoding.encoding(), } } } impl Default for Parser { fn default() -> Self { Self { offset: 0, state: ParseState::Init, expand_empty_elements: false, trim_text_start: false, trim_text_end: false, trim_markup_names_in_closing_tags: true, check_end_names: true, check_comments: false, opened_buffer: Vec::new(), opened_starts: Vec::new(), #[cfg(feature = "encoding")] encoding: EncodingRef::Implicit(UTF_8), } } } quick-xml-0.27.1/src/reader/slice_reader.rs000064400000000000000000000333430072674642500167030ustar 00000000000000//! This is an implementation of [`Reader`] for reading from a `&[u8]` as //! underlying byte stream. This implementation supports not using an //! intermediate buffer as the byte slice itself can be used to borrow from. use std::borrow::Cow; #[cfg(feature = "encoding")] use crate::reader::EncodingRef; #[cfg(feature = "encoding")] use encoding_rs::{Encoding, UTF_8}; use crate::errors::{Error, Result}; use crate::events::Event; use crate::name::QName; use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, Span, XmlSource}; use memchr; /// This is an implementation for reading from a `&[u8]` as underlying byte stream. /// This implementation supports not using an intermediate buffer as the byte slice /// itself can be used to borrow from. impl<'a> Reader<&'a [u8]> { /// Creates an XML reader from a string slice. pub fn from_str(s: &'a str) -> Self { // Rust strings are guaranteed to be UTF-8, so lock the encoding #[cfg(feature = "encoding")] { let mut reader = Self::from_reader(s.as_bytes()); reader.parser.encoding = EncodingRef::Explicit(UTF_8); reader } #[cfg(not(feature = "encoding"))] Self::from_reader(s.as_bytes()) } /// Read an event that borrows from the input rather than a buffer. /// /// There is no asynchronous `read_event_async()` version of this function, /// because it is not necessary -- the contents are already in memory and no IO /// is needed, therefore there is no potential for blocking. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::reader::Reader; /// /// let mut reader = Reader::from_str(r#" /// /// Test /// Test 2 /// /// "#); /// reader.trim_text(true); /// /// let mut count = 0; /// let mut txt = Vec::new(); /// loop { /// match reader.read_event().unwrap() { /// Event::Start(e) => count += 1, /// Event::Text(e) => txt.push(e.unescape().unwrap().into_owned()), /// Event::Eof => break, /// _ => (), /// } /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` #[inline] pub fn read_event(&mut self) -> Result> { self.read_event_impl(()) } /// Reads until end element is found. This function is supposed to be called /// after you already read a [`Start`] event. /// /// Returns a span that cover content between `>` of an opening tag and `<` of /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// /// Manages nested cases where parent and child elements have the _literally_ /// same name. /// /// If corresponding [`End`] event will not be found, the [`Error::UnexpectedEof`] /// will be returned. In particularly, that error will be returned if you call /// this method without consuming the corresponding [`Start`] event first. /// /// The `end` parameter should contain name of the end element _in the reader /// encoding_. It is good practice to always get that parameter using /// [`BytesStart::to_end()`] method. /// /// The correctness of the skipped events does not checked, if you disabled /// the [`check_end_names`] option. /// /// There is no asynchronous `read_to_end_async()` version of this function, /// because it is not necessary -- the contents are already in memory and no IO /// is needed, therefore there is no potential for blocking. /// /// # Namespaces /// /// While the `Reader` does not support namespace resolution, namespaces /// does not change the algorithm for comparing names. Although the names /// `a:name` and `b:name` where both prefixes `a` and `b` resolves to the /// same namespace, are semantically equivalent, `` cannot close /// ``, because according to [the specification] /// /// > The end of every element that begins with a **start-tag** MUST be marked /// > by an **end-tag** containing a name that echoes the element's type as /// > given in the **start-tag** /// /// # Examples /// /// This example shows, how you can skip XML content after you read the /// start event. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::Reader; /// /// let mut reader = Reader::from_str(r#" /// /// /// /// /// /// /// /// /// "#); /// reader.trim_text(true); /// /// let start = BytesStart::new("outer"); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!(reader.read_event().unwrap(), Event::Start(start)); /// /// // ...then, we could skip all events to the corresponding end event. /// // This call will correctly handle nested elements. /// // Note, however, that this method does not handle namespaces. /// reader.read_to_end(end.name()).unwrap(); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!(reader.read_event().unwrap(), Event::Eof); /// ``` /// /// [`Start`]: Event::Start /// [`End`]: Event::End /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end /// [`expand_empty_elements`]: Self::expand_empty_elements /// [`check_end_names`]: Self::check_end_names /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag pub fn read_to_end(&mut self, end: QName) -> Result { Ok(read_to_end!(self, end, (), read_event_impl, {})) } /// Reads content between start and end tags, including any markup. This /// function is supposed to be called after you already read a [`Start`] event. /// /// Manages nested cases where parent and child elements have the _literally_ /// same name. /// /// This method does not unescape read data, instead it returns content /// "as is" of the XML document. This is because it has no idea what text /// it reads, and if, for example, it contains CDATA section, attempt to /// unescape it content will spoil data. /// /// Any text will be decoded using the XML current [`decoder()`]. /// /// Actually, this method perform the following code: /// /// ```ignore /// let span = reader.read_to_end(end)?; /// let text = reader.decoder().decode(&reader.inner_slice[span]); /// ``` /// /// # Examples /// /// This example shows, how you can read a HTML content from your XML document. /// /// ``` /// # use pretty_assertions::assert_eq; /// # use std::borrow::Cow; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::Reader; /// /// let mut reader = Reader::from_str(" /// /// This is a HTML text ///

Usual XML rules does not apply inside it ///

For example, elements not needed to be "closed" /// /// "); /// reader.trim_text(true); /// /// let start = BytesStart::new("html"); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!(reader.read_event().unwrap(), Event::Start(start)); /// // ...and disable checking of end names because we expect HTML further... /// reader.check_end_names(false); /// /// // ...then, we could read text content until close tag. /// // This call will correctly handle nested elements. /// let text = reader.read_text(end.name()).unwrap(); /// assert_eq!(text, Cow::Borrowed(r#" /// This is a HTML text ///

Usual XML rules does not apply inside it ///

For example, elements not needed to be "closed" /// "#)); /// /// // Now we can enable checks again /// reader.check_end_names(true); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!(reader.read_event().unwrap(), Event::Eof); /// ``` /// /// [`Start`]: Event::Start /// [`decoder()`]: Self::decoder() pub fn read_text(&mut self, end: QName) -> Result> { // self.reader will be changed, so store original reference let buffer = self.reader; let span = self.read_to_end(end)?; self.decoder().decode(&buffer[0..span.len()]) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Implementation of `XmlSource` for `&[u8]` reader using a `Self` as buffer /// that will be borrowed by events. This implementation provides a zero-copy deserialization impl<'a> XmlSource<'a, ()> for &'a [u8] { #[cfg(not(feature = "encoding"))] fn remove_utf8_bom(&mut self) -> Result<()> { if self.starts_with(crate::encoding::UTF8_BOM) { *self = &self[crate::encoding::UTF8_BOM.len()..]; } Ok(()) } #[cfg(feature = "encoding")] fn detect_encoding(&mut self) -> Result> { if let Some((enc, bom_len)) = crate::encoding::detect_encoding(self) { *self = &self[bom_len..]; return Ok(Some(enc)); } Ok(None) } fn read_bytes_until( &mut self, byte: u8, _buf: (), position: &mut usize, ) -> Result> { // search byte must be within the ascii range debug_assert!(byte.is_ascii()); if self.is_empty() { return Ok(None); } Ok(Some(if let Some(i) = memchr::memchr(byte, self) { *position += i + 1; let bytes = &self[..i]; *self = &self[i + 1..]; bytes } else { *position += self.len(); let bytes = &self[..]; *self = &[]; bytes })) } fn read_bang_element( &mut self, _buf: (), position: &mut usize, ) -> Result> { // Peeked one bang ('!') before being called, so it's guaranteed to // start with it. debug_assert_eq!(self[0], b'!'); let bang_type = BangType::new(self[1..].first().copied())?; if let Some((bytes, i)) = bang_type.parse(&[], self) { *position += i; *self = &self[i..]; return Ok(Some((bang_type, bytes))); } // Note: Do not update position, so the error points to // somewhere sane rather than at the EOF Err(bang_type.to_err()) } fn read_element(&mut self, _buf: (), position: &mut usize) -> Result> { if self.is_empty() { return Ok(None); } let mut state = ReadElementState::Elem; if let Some((bytes, i)) = state.change(self) { *position += i; *self = &self[i..]; return Ok(Some(bytes)); } // Note: Do not update position, so the error points to a sane place // rather than at the EOF. Err(Error::UnexpectedEof("Element".to_string())) // FIXME: Figure out why the other one works without UnexpectedEof } fn skip_whitespace(&mut self, position: &mut usize) -> Result<()> { let whitespaces = self .iter() .position(|b| !is_whitespace(*b)) .unwrap_or(self.len()); *position += whitespaces; *self = &self[whitespaces..]; Ok(()) } fn skip_one(&mut self, byte: u8, position: &mut usize) -> Result { // search byte must be within the ascii range debug_assert!(byte.is_ascii()); if self.first() == Some(&byte) { *self = &self[1..]; *position += 1; Ok(true) } else { Ok(false) } } fn peek_one(&mut self) -> Result> { Ok(self.first().copied()) } } #[cfg(test)] mod test { use crate::reader::test::check; use crate::reader::XmlSource; /// Default buffer constructor just pass the byte array from the test fn identity(input: T) -> T { input } check!( #[test] read_event_impl, read_until_close, identity, () ); #[cfg(feature = "encoding")] mod encoding { use crate::events::Event; use crate::reader::Reader; use encoding_rs::UTF_8; use pretty_assertions::assert_eq; /// Checks that XML declaration cannot change the encoding from UTF-8 if /// a `Reader` was created using `from_str` method #[test] fn str_always_has_utf8() { let mut reader = Reader::from_str(""); assert_eq!(reader.decoder().encoding(), UTF_8); reader.read_event().unwrap(); assert_eq!(reader.decoder().encoding(), UTF_8); assert_eq!(reader.read_event().unwrap(), Event::Eof); } } } quick-xml-0.27.1/src/se/content.rs000064400000000000000000000776320072674642500151120ustar 00000000000000//! Contains serializer for content of an XML element use crate::errors::serialize::DeError; use crate::se::element::{ElementSerializer, Struct}; use crate::se::simple_type::{QuoteTarget, SimpleTypeSerializer}; use crate::se::{Indent, QuoteLevel, XmlName}; use serde::ser::{ Impossible, Serialize, SerializeSeq, SerializeTuple, SerializeTupleStruct, Serializer, }; use serde::serde_if_integer128; use std::fmt::Write; macro_rules! write_primitive { ($method:ident ( $ty:ty )) => { #[inline] fn $method(self, value: $ty) -> Result { self.into_simple_type_serializer().$method(value) } }; } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A serializer used to serialize content of the element. It does not write /// surrounding tags. /// /// This serializer does the following: /// - primitives (booleans, numbers and strings) serialized as naked strings /// - `None` does not write anything /// - sequences serialized without delimiters. `[1, 2, 3]` would be serialized as `123` /// - units (`()`) and unit structs are not supported /// - structs and maps are not supported /// - unit variants serialized as self-closed `<${variant}/>` /// - tuple variants serialized as sequences where each is wrapped in /// `<${variant}>...` /// - struct variants serialized wrapped `<${variant}>...` /// /// The difference between this serializer and [`SimpleTypeSerializer`] is in how /// sequences and maps are serialized. Unlike `SimpleTypeSerializer` it supports /// any types in sequences and serializes them as list of elements, but that has /// drawbacks. Sequence of primitives would be serialized without delimiters and /// it will be impossible to distinguish between them. Even worse, when serializing /// with indent, sequence of strings become one big string with additional content /// and it would be impossible to distinguish between content of the original /// strings and inserted indent characters. pub struct ContentSerializer<'i, W: Write> { pub writer: W, /// Defines which XML characters need to be escaped in text content pub level: QuoteLevel, /// Current indentation level. Note, that `Indent::None` means that there is /// no indentation at all, but `write_indent == false` means only, that indent /// writing is disabled in this instantiation of `ContentSerializer`, but /// child serializers should have access to the actual state of indentation. pub(super) indent: Indent<'i>, /// If `true`, then current indent will be written before writing the content, /// but only if content is not empty. pub write_indent: bool, //TODO: add settings to disallow consequent serialization of primitives } impl<'i, W: Write> ContentSerializer<'i, W> { /// Turns this serializer into serializer of a text content #[inline] pub fn into_simple_type_serializer(self) -> SimpleTypeSerializer<'i, W> { //TODO: Customization point: choose between CDATA and Text representation SimpleTypeSerializer { writer: self.writer, target: QuoteTarget::Text, level: self.level, indent: if self.write_indent { self.indent } else { Indent::None }, } } /// Creates new serializer that shares state with this serializer and /// writes to the same underlying writer #[inline] pub fn new_seq_element_serializer(&mut self) -> ContentSerializer<&mut W> { ContentSerializer { writer: &mut self.writer, level: self.level, indent: self.indent.borrow(), write_indent: self.write_indent, } } /// Writes `name` as self-closed tag #[inline] pub(super) fn write_empty(mut self, name: XmlName) -> Result { self.write_indent()?; self.writer.write_char('<')?; self.writer.write_str(name.0)?; self.writer.write_str("/>")?; Ok(self.writer) } /// Writes simple type content between `name` tags pub(super) fn write_wrapped(mut self, name: XmlName, serialize: S) -> Result where S: FnOnce(SimpleTypeSerializer<'i, W>) -> Result, { self.write_indent()?; self.writer.write_char('<')?; self.writer.write_str(name.0)?; self.writer.write_char('>')?; let mut writer = serialize(self.into_simple_type_serializer())?; writer.write_str("')?; Ok(writer) } pub(super) fn write_indent(&mut self) -> Result<(), DeError> { if self.write_indent { self.indent.write_indent(&mut self.writer)?; self.write_indent = false; } Ok(()) } } impl<'i, W: Write> Serializer for ContentSerializer<'i, W> { type Ok = W; type Error = DeError; type SerializeSeq = Self; type SerializeTuple = Self; type SerializeTupleStruct = Self; type SerializeTupleVariant = ElementSerializer<'i, W>; type SerializeMap = Impossible; type SerializeStruct = Impossible; type SerializeStructVariant = Struct<'i, W>; write_primitive!(serialize_bool(bool)); write_primitive!(serialize_i8(i8)); write_primitive!(serialize_i16(i16)); write_primitive!(serialize_i32(i32)); write_primitive!(serialize_i64(i64)); write_primitive!(serialize_u8(u8)); write_primitive!(serialize_u16(u16)); write_primitive!(serialize_u32(u32)); write_primitive!(serialize_u64(u64)); serde_if_integer128! { write_primitive!(serialize_i128(i128)); write_primitive!(serialize_u128(u128)); } write_primitive!(serialize_f32(f32)); write_primitive!(serialize_f64(f64)); write_primitive!(serialize_char(char)); write_primitive!(serialize_bytes(&[u8])); #[inline] fn serialize_str(self, value: &str) -> Result { if value.is_empty() { Ok(self.writer) } else { self.into_simple_type_serializer().serialize_str(value) } } /// Does not write anything #[inline] fn serialize_none(self) -> Result { Ok(self.writer) } fn serialize_some(self, value: &T) -> Result { value.serialize(self) } /// Does not write anything #[inline] fn serialize_unit(self) -> Result { Ok(self.writer) } /// Does not write anything #[inline] fn serialize_unit_struct(self, _name: &'static str) -> Result { Ok(self.writer) } /// Checks `variant` for XML name validity and writes `<${variant}/>` fn serialize_unit_variant( self, _name: &'static str, _variant_index: u32, variant: &'static str, ) -> Result { let name = XmlName::try_from(variant)?; self.write_empty(name) } fn serialize_newtype_struct( self, _name: &'static str, value: &T, ) -> Result { value.serialize(self) } /// Checks `variant` for XML name validity and writes `value` as new element /// with name `variant`. fn serialize_newtype_variant( self, _name: &'static str, _variant_index: u32, variant: &'static str, value: &T, ) -> Result { value.serialize(ElementSerializer { key: XmlName::try_from(variant)?, ser: self, }) } #[inline] fn serialize_seq(self, _len: Option) -> Result { Ok(self) } #[inline] fn serialize_tuple(self, len: usize) -> Result { self.serialize_seq(Some(len)) } #[inline] fn serialize_tuple_struct( self, _name: &'static str, len: usize, ) -> Result { self.serialize_tuple(len) } #[inline] fn serialize_tuple_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, len: usize, ) -> Result { let ser = ElementSerializer { key: XmlName::try_from(variant)?, ser: self, }; // `ElementSerializer::serialize_tuple_variant` is the same as // `ElementSerializer::serialize_tuple_struct`, except that it replaces `.key` // to `variant` which is not required here ser.serialize_tuple_struct(name, len) } fn serialize_map(self, _len: Option) -> Result { Err(DeError::Unsupported( format!("serialization of map types is not supported in `$value` field").into(), )) } #[inline] fn serialize_struct( self, name: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!("serialization of struct `{name}` is not supported in `$value` field").into(), )) } #[inline] fn serialize_struct_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, len: usize, ) -> Result { let ser = ElementSerializer { key: XmlName::try_from(variant)?, ser: self, }; // `ElementSerializer::serialize_struct_variant` is the same as // `ElementSerializer::serialize_struct`, except that it replaces `.key` // to `variant` which is not required here ser.serialize_struct(name, len) } } impl<'i, W: Write> SerializeSeq for ContentSerializer<'i, W> { type Ok = W; type Error = DeError; fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { value.serialize(self.new_seq_element_serializer())?; // Write indent for next element self.write_indent = true; Ok(()) } #[inline] fn end(self) -> Result { Ok(self.writer) } } impl<'i, W: Write> SerializeTuple for ContentSerializer<'i, W> { type Ok = W; type Error = DeError; #[inline] fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { ::serialize_element(self, value) } #[inline] fn end(self) -> Result { ::end(self) } } impl<'i, W: Write> SerializeTupleStruct for ContentSerializer<'i, W> { type Ok = W; type Error = DeError; #[inline] fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { ::serialize_element(self, value) } #[inline] fn end(self) -> Result { ::end(self) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Make tests public to reuse types in `elements::tests` module #[cfg(test)] pub(super) mod tests { use super::*; use crate::utils::Bytes; use serde::Serialize; use std::collections::BTreeMap; #[derive(Debug, Serialize, PartialEq)] pub struct Unit; #[derive(Debug, Serialize, PartialEq)] #[serde(rename = "<\"&'>")] pub struct UnitEscaped; #[derive(Debug, Serialize, PartialEq)] pub struct Newtype(pub usize); #[derive(Debug, Serialize, PartialEq)] pub struct Tuple(pub &'static str, pub usize); #[derive(Debug, Serialize, PartialEq)] pub struct Struct { pub key: &'static str, pub val: (usize, usize), } #[derive(Debug, Serialize, PartialEq)] pub struct Text { pub before: &'static str, #[serde(rename = "$text")] pub content: T, pub after: &'static str, } #[derive(Debug, Serialize, PartialEq)] pub struct Value { pub before: &'static str, #[serde(rename = "$value")] pub content: T, pub after: &'static str, } /// Attributes identified by starting with `@` character #[derive(Debug, Serialize, PartialEq)] pub struct Attributes { #[serde(rename = "@key")] pub key: &'static str, #[serde(rename = "@val")] pub val: (usize, usize), } #[derive(Debug, Serialize, PartialEq)] pub struct AttributesBefore { #[serde(rename = "@key")] pub key: &'static str, pub val: usize, } #[derive(Debug, Serialize, PartialEq)] pub struct AttributesAfter { pub key: &'static str, #[serde(rename = "@val")] pub val: usize, } #[derive(Debug, Serialize, PartialEq)] pub enum Enum { Unit, /// Variant name becomes a tag name, but the name of variant is invalid /// XML name. Serialization of this element should be forbidden #[serde(rename = "<\"&'>")] UnitEscaped, Newtype(usize), Tuple(&'static str, usize), Struct { key: &'static str, /// Should be serialized as elements val: (usize, usize), }, Attributes { #[serde(rename = "@key")] key: &'static str, #[serde(rename = "@val")] val: (usize, usize), }, AttributesBefore { #[serde(rename = "@key")] key: &'static str, val: usize, }, AttributesAfter { key: &'static str, #[serde(rename = "@val")] val: usize, }, } #[derive(Debug, Serialize, PartialEq)] pub enum SpecialEnum { Text { before: &'static str, #[serde(rename = "$text")] content: T, after: &'static str, }, Value { before: &'static str, #[serde(rename = "$value")] content: T, after: &'static str, }, } mod without_indent { use super::Struct; use super::*; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:literal) => { #[test] fn $name() { let ser = ContentSerializer { writer: String::new(), level: QuoteLevel::Full, indent: Indent::None, write_indent: false, }; let buffer = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::None, write_indent: false, }; match $data.serialize(ser).unwrap_err() { DeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `{}({})`, found `{:?}`", stringify!($kind), $reason, e ), } // We could write something before fail // assert_eq!(buffer, ""); } }; } // Primitives is serialized in the same way as for SimpleTypeSerializer serialize_as!(false_: false => "false"); serialize_as!(true_: true => "true"); serialize_as!(i8_: -42i8 => "-42"); serialize_as!(i16_: -4200i16 => "-4200"); serialize_as!(i32_: -42000000i32 => "-42000000"); serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); serialize_as!(isize_: -42000000000000isize => "-42000000000000"); serialize_as!(u8_: 42u8 => "42"); serialize_as!(u16_: 4200u16 => "4200"); serialize_as!(u32_: 42000000u32 => "42000000"); serialize_as!(u64_: 42000000000000u64 => "42000000000000"); serialize_as!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } serialize_as!(f32_: 4.2f32 => "4.2"); serialize_as!(f64_: 4.2f64 => "4.2"); serialize_as!(char_non_escaped: 'h' => "h"); serialize_as!(char_lt: '<' => "<"); serialize_as!(char_gt: '>' => ">"); serialize_as!(char_amp: '&' => "&"); serialize_as!(char_apos: '\'' => "'"); serialize_as!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content serialize_as!(char_space: ' ' => " "); serialize_as!(str_non_escaped: "non-escaped string" => "non-escaped string"); serialize_as!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::::None => ""); serialize_as!(option_some: Some("non-escaped string") => "non-escaped string"); serialize_as!(option_some_empty_str: Some("") => ""); serialize_as!(unit: () => ""); serialize_as!(unit_struct: Unit => ""); serialize_as!(unit_struct_escaped: UnitEscaped => ""); // Unlike SimpleTypeSerializer, enumeration values serialized as tags serialize_as!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: Enum::UnitEscaped => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); // Newtypes recursively applies ContentSerializer serialize_as!(newtype: Newtype(42) => "42"); serialize_as!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! serialize_as!(seq: vec![1, 2, 3] => "123"); serialize_as!(seq_empty: Vec::::new() => ""); serialize_as!(tuple: ("<\"&'>", "with\t\r\n spaces", 3usize) => "<"&'>\ with\t\r\n spaces\ 3"); serialize_as!(tuple_struct: Tuple("first", 42) => "first\ 42"); serialize_as!(enum_tuple: Enum::Tuple("first", 42) => "first\ 42"); // Structured types cannot be serialized without surrounding tag, which // only `enum` can provide err!(map: BTreeMap::from([("_1", 2), ("_3", 4)]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: Struct { key: "answer", val: (42, 42) } => Unsupported("serialization of struct `Struct` is not supported in `$value` field")); serialize_as!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\ answer\ 42\ 42\ "); /// Special field name `$text` should be serialized as a text content mod text { use super::*; use pretty_assertions::assert_eq; err!(map: BTreeMap::from([("$text", 2), ("_3", 4)]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: Text { before: "answer", content: (42, 42), after: "answer", } => Unsupported("serialization of struct `Text` is not supported in `$value` field")); serialize_as!(enum_struct: SpecialEnum::Text { before: "answer", content: (42, 42), after: "answer", } => "\ answer\ 42 42\ answer\ "); } mod attributes { use super::*; use pretty_assertions::assert_eq; err!(map_attr: BTreeMap::from([("@key1", 1), ("@key2", 2)]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(map_mixed: BTreeMap::from([("@key1", 1), ("key2", 2)]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: Attributes { key: "answer", val: (42, 42) } => Unsupported("serialization of struct `Attributes` is not supported in `$value` field")); err!(struct_before: AttributesBefore { key: "answer", val: 42 } => Unsupported("serialization of struct `AttributesBefore` is not supported in `$value` field")); err!(struct_after: AttributesAfter { key: "answer", val: 42 } => Unsupported("serialization of struct `AttributesAfter` is not supported in `$value` field")); serialize_as!(enum_: Enum::Attributes { key: "answer", val: (42, 42) } => r#""#); serialize_as!(enum_before: Enum::AttributesBefore { key: "answer", val: 42 } => r#"42"#); serialize_as!(enum_after: Enum::AttributesAfter { key: "answer", val: 42 } => r#"answer"#); } } mod with_indent { use super::Struct; use super::*; use crate::writer::Indentation; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:literal) => { #[test] fn $name() { let ser = ContentSerializer { writer: String::new(), level: QuoteLevel::Full, indent: Indent::Owned(Indentation::new(b' ', 2)), write_indent: false, }; let buffer = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::Owned(Indentation::new(b' ', 2)), write_indent: false, }; match $data.serialize(ser).unwrap_err() { DeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `{}({})`, found `{:?}`", stringify!($kind), $reason, e ), } // We can write something before fail // assert_eq!(buffer, ""); } }; } serialize_as!(false_: false => "false"); serialize_as!(true_: true => "true"); serialize_as!(i8_: -42i8 => "-42"); serialize_as!(i16_: -4200i16 => "-4200"); serialize_as!(i32_: -42000000i32 => "-42000000"); serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); serialize_as!(isize_: -42000000000000isize => "-42000000000000"); serialize_as!(u8_: 42u8 => "42"); serialize_as!(u16_: 4200u16 => "4200"); serialize_as!(u32_: 42000000u32 => "42000000"); serialize_as!(u64_: 42000000000000u64 => "42000000000000"); serialize_as!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } serialize_as!(f32_: 4.2f32 => "4.2"); serialize_as!(f64_: 4.2f64 => "4.2"); serialize_as!(char_non_escaped: 'h' => "h"); serialize_as!(char_lt: '<' => "<"); serialize_as!(char_gt: '>' => ">"); serialize_as!(char_amp: '&' => "&"); serialize_as!(char_apos: '\'' => "'"); serialize_as!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content serialize_as!(char_space: ' ' => " "); serialize_as!(str_non_escaped: "non-escaped string" => "non-escaped string"); serialize_as!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::::None => ""); serialize_as!(option_some: Some(Enum::Unit) => ""); serialize_as!(unit: () => ""); serialize_as!(unit_struct: Unit => ""); serialize_as!(unit_struct_escaped: UnitEscaped => ""); // Unlike SimpleTypeSerializer, enumeration values serialized as tags serialize_as!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: Enum::UnitEscaped => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); // Newtypes recursively applies ContentSerializer serialize_as!(newtype: Newtype(42) => "42"); serialize_as!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters other that indent! serialize_as!(seq: vec![1, 2, 3] => "1\n\ 2\n\ 3"); serialize_as!(seq_empty: Vec::::new() => ""); serialize_as!(tuple: ("<\"&'>", "with\t\r\n spaces", 3usize) => "<"&'>\n\ with\t\r\n spaces\n\ 3"); serialize_as!(tuple_struct: Tuple("first", 42) => "first\n\ 42"); serialize_as!(enum_tuple: Enum::Tuple("first", 42) => "first\n\ 42"); // Structured types cannot be serialized without surrounding tag, which // only `enum` can provide err!(map: BTreeMap::from([("_1", 2), ("_3", 4)]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: Struct { key: "answer", val: (42, 42) } => Unsupported("serialization of struct `Struct` is not supported in `$value` field")); serialize_as!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\n \ answer\n \ 42\n \ 42\n\ "); /// Special field name `$text` should be serialized as text content mod text { use super::*; use pretty_assertions::assert_eq; err!(map: BTreeMap::from([("$text", 2), ("_3", 4)]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: Text { before: "answer", content: (42, 42), after: "answer", } => Unsupported("serialization of struct `Text` is not supported in `$value` field")); serialize_as!(enum_struct: SpecialEnum::Text { before: "answer", content: (42, 42), after: "answer", } => "\n \ answer\n \ 42 42\n \ answer\n\ "); } mod attributes { use super::*; use pretty_assertions::assert_eq; err!(map_attr: BTreeMap::from([("@key1", 1), ("@key2", 2)]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(map_mixed: BTreeMap::from([("@key1", 1), ("key2", 2)]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: Attributes { key: "answer", val: (42, 42) } => Unsupported("serialization of struct `Attributes` is not supported in `$value` field")); err!(struct_before: AttributesBefore { key: "answer", val: 42 } => Unsupported("serialization of struct `AttributesBefore` is not supported in `$value` field")); err!(struct_after: AttributesAfter { key: "answer", val: 42 } => Unsupported("serialization of struct `AttributesAfter` is not supported in `$value` field")); serialize_as!(enum_: Enum::Attributes { key: "answer", val: (42, 42) } => r#""#); serialize_as!(enum_before: Enum::AttributesBefore { key: "answer", val: 42 } => "\n \ 42\n\ "); serialize_as!(enum_after: Enum::AttributesAfter { key: "answer", val: 42 } => "\n \ answer\n\ "); } } } quick-xml-0.27.1/src/se/element.rs000064400000000000000000003247620072674642500150700ustar 00000000000000//! Contains serializer for an XML element use crate::de::{TEXT_KEY, VALUE_KEY}; use crate::errors::serialize::DeError; use crate::se::content::ContentSerializer; use crate::se::key::QNameSerializer; use crate::se::simple_type::{QuoteTarget, SimpleTypeSerializer}; use crate::se::{Indent, XmlName}; use serde::ser::{ Serialize, SerializeMap, SerializeSeq, SerializeStruct, SerializeStructVariant, SerializeTuple, SerializeTupleStruct, SerializeTupleVariant, Serializer, }; use serde::serde_if_integer128; use std::fmt::Write; macro_rules! write_primitive { ($method:ident ( $ty:ty )) => { fn $method(self, value: $ty) -> Result { self.ser.write_wrapped(self.key, |ser| ser.$method(value)) } }; } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A serializer used to serialize element with specified name. pub struct ElementSerializer<'k, W: Write> { pub ser: ContentSerializer<'k, W>, /// Tag name used to wrap serialized types except enum variants which uses the variant name pub(super) key: XmlName<'k>, } impl<'k, W: Write> Serializer for ElementSerializer<'k, W> { type Ok = W; type Error = DeError; type SerializeSeq = Self; type SerializeTuple = Self; type SerializeTupleStruct = Self; type SerializeTupleVariant = Self; type SerializeMap = Map<'k, W>; type SerializeStruct = Struct<'k, W>; type SerializeStructVariant = Struct<'k, W>; write_primitive!(serialize_bool(bool)); write_primitive!(serialize_i8(i8)); write_primitive!(serialize_i16(i16)); write_primitive!(serialize_i32(i32)); write_primitive!(serialize_i64(i64)); write_primitive!(serialize_u8(u8)); write_primitive!(serialize_u16(u16)); write_primitive!(serialize_u32(u32)); write_primitive!(serialize_u64(u64)); serde_if_integer128! { write_primitive!(serialize_i128(i128)); write_primitive!(serialize_u128(u128)); } write_primitive!(serialize_f32(f32)); write_primitive!(serialize_f64(f64)); write_primitive!(serialize_char(char)); write_primitive!(serialize_bytes(&[u8])); fn serialize_str(self, value: &str) -> Result { if value.is_empty() { self.ser.write_empty(self.key) } else { self.ser .write_wrapped(self.key, |ser| ser.serialize_str(value)) } } /// By serde contract we should serialize key of [`None`] values. If someone /// wants to skip the field entirely, he should use /// `#[serde(skip_serializing_if = "Option::is_none")]`. /// /// In XML when we serialize field, we write field name as: /// - element name, or /// - attribute name /// /// and field value as /// - content of the element, or /// - attribute value /// /// So serialization of `None` works the same as [serialization of `()`](#method.serialize_unit) fn serialize_none(self) -> Result { self.serialize_unit() } fn serialize_some(self, value: &T) -> Result { value.serialize(self) } fn serialize_unit(self) -> Result { self.ser.write_empty(self.key) } fn serialize_unit_struct(self, _name: &'static str) -> Result { self.ser.write_empty(self.key) } fn serialize_unit_variant( self, _name: &'static str, _variant_index: u32, variant: &'static str, ) -> Result { let name = XmlName::try_from(variant)?; self.ser.write_empty(name) } fn serialize_newtype_struct( self, _name: &'static str, value: &T, ) -> Result { value.serialize(self) } fn serialize_newtype_variant( mut self, _name: &'static str, _variant_index: u32, variant: &'static str, value: &T, ) -> Result { self.key = XmlName::try_from(variant)?; value.serialize(self) } #[inline] fn serialize_seq(self, _len: Option) -> Result { Ok(self) } #[inline] fn serialize_tuple(self, len: usize) -> Result { self.serialize_seq(Some(len)) } #[inline] fn serialize_tuple_struct( self, _name: &'static str, len: usize, ) -> Result { self.serialize_tuple(len) } #[inline] fn serialize_tuple_variant( mut self, name: &'static str, _variant_index: u32, variant: &'static str, len: usize, ) -> Result { self.key = XmlName::try_from(variant)?; self.serialize_tuple_struct(name, len) } fn serialize_map(self, _len: Option) -> Result { Ok(Map { ser: self.serialize_struct("", 0)?, key: None, }) } #[inline] fn serialize_struct( mut self, _name: &'static str, _len: usize, ) -> Result { self.ser.write_indent()?; self.ser.indent.increase(); self.ser.writer.write_char('<')?; self.ser.writer.write_str(self.key.0)?; Ok(Struct { ser: self, children: String::new(), }) } #[inline] fn serialize_struct_variant( mut self, name: &'static str, _variant_index: u32, variant: &'static str, len: usize, ) -> Result { self.key = XmlName::try_from(variant)?; self.serialize_struct(name, len) } } impl<'k, W: Write> SerializeSeq for ElementSerializer<'k, W> { type Ok = W; type Error = DeError; fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { value.serialize(ElementSerializer { ser: self.ser.new_seq_element_serializer(), key: self.key, })?; // Write indent for next element self.ser.write_indent = true; Ok(()) } #[inline] fn end(self) -> Result { Ok(self.ser.writer) } } impl<'k, W: Write> SerializeTuple for ElementSerializer<'k, W> { type Ok = W; type Error = DeError; #[inline] fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { ::serialize_element(self, value) } #[inline] fn end(self) -> Result { ::end(self) } } impl<'k, W: Write> SerializeTupleStruct for ElementSerializer<'k, W> { type Ok = W; type Error = DeError; #[inline] fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { ::serialize_element(self, value) } #[inline] fn end(self) -> Result { ::end(self) } } impl<'k, W: Write> SerializeTupleVariant for ElementSerializer<'k, W> { type Ok = W; type Error = DeError; #[inline] fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { ::serialize_element(self, value) } #[inline] fn end(self) -> Result { ::end(self) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A serializer for struct variants, which serializes the struct contents inside /// of wrapping tags (`<${tag}>...`). /// /// Serialization of each field depends on it representation: /// - attributes written directly to the higher serializer /// - elements buffered into internal buffer and at the end written into higher /// serializer pub struct Struct<'k, W: Write> { ser: ElementSerializer<'k, W>, /// Buffer to store serialized elements // TODO: Customization point: allow direct writing of elements, but all // attributes should be listed first. Fail, if attribute encountered after // element. Use feature to configure children: String, } impl<'k, W: Write> Struct<'k, W> { #[inline] fn write_field(&mut self, key: &str, value: &T) -> Result<(), DeError> where T: ?Sized + Serialize, { //TODO: Customization point: allow user to determine if field is attribute or not if let Some(key) = key.strip_prefix('@') { let key = XmlName::try_from(key)?; self.write_attribute(key, value) } else { self.write_element(key, value) } } /// Writes `value` as an attribute #[inline] fn write_attribute(&mut self, key: XmlName, value: &T) -> Result<(), DeError> where T: ?Sized + Serialize, { //TODO: Customization point: each attribute on new line self.ser.ser.writer.write_char(' ')?; self.ser.ser.writer.write_str(key.0)?; self.ser.ser.writer.write_char('=')?; //TODO: Customization point: preferred quote style self.ser.ser.writer.write_char('"')?; value.serialize(SimpleTypeSerializer { writer: &mut self.ser.ser.writer, target: QuoteTarget::DoubleQAttr, level: self.ser.ser.level, indent: Indent::None, })?; self.ser.ser.writer.write_char('"')?; Ok(()) } /// Writes `value` either as a text content, or as an element. /// /// If `key` has a magic value [`TEXT_KEY`], then `value` serialized as a /// [simple type]. /// /// If `key` has a magic value [`VALUE_KEY`], then `value` serialized as a /// [content] without wrapping in tags, otherwise it is wrapped in /// `<${key}>...`. /// /// [simple type]: SimpleTypeSerializer /// [content]: ContentSerializer fn write_element(&mut self, key: &str, value: &T) -> Result<(), DeError> where T: ?Sized + Serialize, { let ser = ContentSerializer { writer: &mut self.children, level: self.ser.ser.level, indent: self.ser.ser.indent.borrow(), write_indent: true, }; if key == TEXT_KEY { value.serialize(ser.into_simple_type_serializer())?; } else if key == VALUE_KEY { value.serialize(ser)?; } else { value.serialize(ElementSerializer { key: XmlName::try_from(key)?, ser, })?; } Ok(()) } } impl<'k, W: Write> SerializeStruct for Struct<'k, W> { type Ok = W; type Error = DeError; fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { self.write_field(key, value) } fn end(mut self) -> Result { self.ser.ser.indent.decrease(); if self.children.is_empty() { self.ser.ser.writer.write_str("/>")?; } else { self.ser.ser.writer.write_char('>')?; self.ser.ser.writer.write_str(&self.children)?; self.ser.ser.indent.write_indent(&mut self.ser.ser.writer)?; self.ser.ser.writer.write_str("')?; } Ok(self.ser.ser.writer) } } impl<'k, W: Write> SerializeStructVariant for Struct<'k, W> { type Ok = W; type Error = DeError; #[inline] fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { ::serialize_field(self, key, value) } #[inline] fn end(self) -> Result { ::end(self) } } //////////////////////////////////////////////////////////////////////////////////////////////////// pub struct Map<'k, W: Write> { ser: Struct<'k, W>, /// Key, serialized by `QNameSerializer` if consumer uses `serialize_key` + /// `serialize_value` calls instead of `serialize_entry` key: Option, } impl<'k, W: Write> Map<'k, W> { fn make_key(&mut self, key: &T) -> Result where T: ?Sized + Serialize, { key.serialize(QNameSerializer { writer: String::new(), }) } } impl<'k, W: Write> SerializeMap for Map<'k, W> { type Ok = W; type Error = DeError; fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { if let Some(_) = self.key.take() { return Err(DeError::Custom( "calling `serialize_key` twice without `serialize_value`".to_string(), )); } self.key = Some(self.make_key(key)?); Ok(()) } fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { if let Some(key) = self.key.take() { return self.ser.write_field(&key, value); } Err(DeError::Custom( "calling `serialize_value` without call of `serialize_key`".to_string(), )) } fn serialize_entry(&mut self, key: &K, value: &V) -> Result<(), Self::Error> where K: ?Sized + Serialize, V: ?Sized + Serialize, { let key = self.make_key(key)?; self.ser.write_field(&key, value) } fn end(mut self) -> Result { if let Some(key) = self.key.take() { return Err(DeError::Custom(format!( "calling `end` without call of `serialize_value` for key `{key}`" ))); } SerializeStruct::end(self.ser) } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod tests { use super::*; use crate::se::content::tests::*; use crate::se::{Indent, QuoteLevel}; use crate::utils::Bytes; use serde::Serialize; use std::collections::BTreeMap; #[derive(Debug, Serialize, PartialEq)] struct OptionalElements { a: Option<&'static str>, #[serde(skip_serializing_if = "Option::is_none")] b: Option<&'static str>, } #[derive(Debug, Serialize, PartialEq)] struct OptionalAttributes { #[serde(rename = "@a")] a: Option<&'static str>, #[serde(rename = "@b")] #[serde(skip_serializing_if = "Option::is_none")] b: Option<&'static str>, } mod without_indent { use super::*; use crate::se::content::tests::Struct; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:expr) => { #[test] fn $name() { let ser = ElementSerializer { ser: ContentSerializer { writer: String::new(), level: QuoteLevel::Full, indent: Indent::None, write_indent: false, }, key: XmlName("root"), }; let buffer = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ElementSerializer { ser: ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::None, write_indent: false, }, key: XmlName("root"), }; match $data.serialize(ser).unwrap_err() { DeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `{}({})`, found `{:?}`", stringify!($kind), $reason, e ), } // We can write something before fail // assert_eq!(buffer, ""); } }; } serialize_as!(false_: false => "false"); serialize_as!(true_: true => "true"); serialize_as!(i8_: -42i8 => "-42"); serialize_as!(i16_: -4200i16 => "-4200"); serialize_as!(i32_: -42000000i32 => "-42000000"); serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); serialize_as!(isize_: -42000000000000isize => "-42000000000000"); serialize_as!(u8_: 42u8 => "42"); serialize_as!(u16_: 4200u16 => "4200"); serialize_as!(u32_: 42000000u32 => "42000000"); serialize_as!(u64_: 42000000000000u64 => "42000000000000"); serialize_as!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } serialize_as!(f32_: 4.2f32 => "4.2"); serialize_as!(f64_: 4.2f64 => "4.2"); serialize_as!(char_non_escaped: 'h' => "h"); serialize_as!(char_lt: '<' => "<"); serialize_as!(char_gt: '>' => ">"); serialize_as!(char_amp: '&' => "&"); serialize_as!(char_apos: '\'' => "'"); serialize_as!(char_quot: '"' => """); serialize_as!(str_non_escaped: "non-escaped string" => "non-escaped string"); serialize_as!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::<&str>::None => ""); serialize_as!(option_some: Some("non-escaped string") => "non-escaped string"); serialize_as!(option_some_empty_str: Some("") => ""); serialize_as!(unit: () => ""); serialize_as!(unit_struct: Unit => ""); serialize_as!(unit_struct_escaped: UnitEscaped => ""); serialize_as!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: Enum::UnitEscaped => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); serialize_as!(newtype: Newtype(42) => "42"); serialize_as!(enum_newtype: Enum::Newtype(42) => "42"); serialize_as!(seq: vec![1, 2, 3] => "1\ 2\ 3"); serialize_as!(seq_empty: Vec::::new() => ""); serialize_as!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'>\ with\t\n\r spaces\ 3"); serialize_as!(tuple_struct: Tuple("first", 42) => "first\ 42"); serialize_as!(enum_tuple: Enum::Tuple("first", 42) => "first\ 42"); serialize_as!(map: BTreeMap::from([("_1", 2), ("_3", 4)]) => "\ <_1>2\ <_3>4\ "); serialize_as!(struct_: Struct { key: "answer", val: (42, 42) } => "\ answer\ 42\ 42\ "); serialize_as!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\ answer\ 42\ 42\ "); /// Special field name `$text` should be serialized as text content. /// Sequences serialized as an `xs:list` content mod text { use super::*; /// `$text` key in a map mod map { use super::*; use pretty_assertions::assert_eq; macro_rules! text { ($name:ident: $data:expr) => { serialize_as!($name: BTreeMap::from([("$text", $data)]) => ""); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: BTreeMap::from([("$text", $data)]) => concat!("", $expected,"")); }; } text!(false_: false => "false"); text!(true_: true => "true"); text!(i8_: -42i8 => "-42"); text!(i16_: -4200i16 => "-4200"); text!(i32_: -42000000i32 => "-42000000"); text!(i64_: -42000000000000i64 => "-42000000000000"); text!(isize_: -42000000000000isize => "-42000000000000"); text!(u8_: 42u8 => "42"); text!(u16_: 4200u16 => "4200"); text!(u32_: 42000000u32 => "42000000"); text!(u64_: 42000000000000u64 => "42000000000000"); text!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { text!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); text!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } text!(f32_: 4.2f32 => "4.2"); text!(f64_: 4.2f64 => "4.2"); text!(char_non_escaped: 'h' => "h"); text!(char_lt: '<' => "<"); text!(char_gt: '>' => ">"); text!(char_amp: '&' => "&"); text!(char_apos: '\'' => "'"); text!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content text!(char_space: ' ' => " "); text!(str_non_escaped: "non-escaped string" => "non-escaped string"); text!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Text { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); text!(option_none: Option::<&str>::None); text!(option_some: Some("non-escaped string") => "non-escaped string"); text!(option_some_empty_str: Some("")); text!(unit: ()); text!(unit_struct: Unit); text!(unit_struct_escaped: UnitEscaped); text!(enum_unit: Enum::Unit => "Unit"); text!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); text!(newtype: Newtype(42) => "42"); // We have no space where name of a variant can be stored err!(enum_newtype: Text { before: "answer", content: Enum::Newtype(42), after: "answer", } => Unsupported("enum newtype variant `Enum::Newtype` cannot be serialized as an attribute or text content value")); // Sequences are serialized separated by spaces, all spaces inside are escaped text!(seq: vec![1, 2, 3] => "1 2 3"); text!(seq_empty: Vec::::new()); text!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> \ with spaces \ 3"); text!(tuple_struct: Tuple("first", 42) => "first 42"); // We have no space where name of a variant can be stored err!(enum_tuple: Text { before: "answer", content: Enum::Tuple("first", 42), after: "answer", } => Unsupported("enum tuple variant `Enum::Tuple` cannot be serialized as an attribute or text content value")); // Complex types cannot be serialized in `$text` field err!(map: Text { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("map cannot be serialized as an attribute or text content value")); err!(struct_: Text { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("struct `Struct` cannot be serialized as an attribute or text content value")); err!(enum_struct: Text { before: "answer", content: Enum::Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("enum struct variant `Enum::Struct` cannot be serialized as an attribute or text content value")); } /// `$text` field inside a struct mod struct_ { use super::*; use pretty_assertions::assert_eq; macro_rules! text { ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: Text { before: "answer", content: $data, after: "answer", } => concat!( "answer", $expected, "answer", )); }; } text!(false_: false => "false"); text!(true_: true => "true"); text!(i8_: -42i8 => "-42"); text!(i16_: -4200i16 => "-4200"); text!(i32_: -42000000i32 => "-42000000"); text!(i64_: -42000000000000i64 => "-42000000000000"); text!(isize_: -42000000000000isize => "-42000000000000"); text!(u8_: 42u8 => "42"); text!(u16_: 4200u16 => "4200"); text!(u32_: 42000000u32 => "42000000"); text!(u64_: 42000000000000u64 => "42000000000000"); text!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { text!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); text!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } text!(f32_: 4.2f32 => "4.2"); text!(f64_: 4.2f64 => "4.2"); text!(char_non_escaped: 'h' => "h"); text!(char_lt: '<' => "<"); text!(char_gt: '>' => ">"); text!(char_amp: '&' => "&"); text!(char_apos: '\'' => "'"); text!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content text!(char_space: ' ' => " "); text!(str_non_escaped: "non-escaped string" => "non-escaped string"); text!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Text { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); text!(option_none: Option::<&str>::None => ""); text!(option_some: Some("non-escaped string") => "non-escaped string"); text!(option_some_empty_str: Some("") => ""); text!(unit: () => ""); text!(unit_struct: Unit => ""); text!(unit_struct_escaped: UnitEscaped => ""); text!(enum_unit: Enum::Unit => "Unit"); text!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); text!(newtype: Newtype(42) => "42"); // We have no space where name of a variant can be stored err!(enum_newtype: Text { before: "answer", content: Enum::Newtype(42), after: "answer", } => Unsupported("enum newtype variant `Enum::Newtype` cannot be serialized as an attribute or text content value")); // Sequences are serialized separated by spaces, all spaces inside are escaped text!(seq: vec![1, 2, 3] => "1 2 3"); text!(seq_empty: Vec::::new() => ""); text!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> \ with spaces \ 3"); text!(tuple_struct: Tuple("first", 42) => "first 42"); // We have no space where name of a variant can be stored err!(enum_tuple: Text { before: "answer", content: Enum::Tuple("first", 42), after: "answer", } => Unsupported("enum tuple variant `Enum::Tuple` cannot be serialized as an attribute or text content value")); // Complex types cannot be serialized in `$text` field err!(map: Text { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("map cannot be serialized as an attribute or text content value")); err!(struct_: Text { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("struct `Struct` cannot be serialized as an attribute or text content value")); err!(enum_struct: Text { before: "answer", content: Enum::Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("enum struct variant `Enum::Struct` cannot be serialized as an attribute or text content value")); } /// `$text` field inside a struct variant of an enum mod enum_struct { use super::*; use pretty_assertions::assert_eq; macro_rules! text { ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: SpecialEnum::Text { before: "answer", content: $data, after: "answer", } => concat!( "answer", $expected, "answer", )); }; } text!(false_: false => "false"); text!(true_: true => "true"); text!(i8_: -42i8 => "-42"); text!(i16_: -4200i16 => "-4200"); text!(i32_: -42000000i32 => "-42000000"); text!(i64_: -42000000000000i64 => "-42000000000000"); text!(isize_: -42000000000000isize => "-42000000000000"); text!(u8_: 42u8 => "42"); text!(u16_: 4200u16 => "4200"); text!(u32_: 42000000u32 => "42000000"); text!(u64_: 42000000000000u64 => "42000000000000"); text!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { text!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); text!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } text!(f32_: 4.2f32 => "4.2"); text!(f64_: 4.2f64 => "4.2"); text!(char_non_escaped: 'h' => "h"); text!(char_lt: '<' => "<"); text!(char_gt: '>' => ">"); text!(char_amp: '&' => "&"); text!(char_apos: '\'' => "'"); text!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content text!(char_space: ' ' => " "); text!(str_non_escaped: "non-escaped string" => "non-escaped string"); text!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: SpecialEnum::Text { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); text!(option_none: Option::<&str>::None => ""); text!(option_some: Some("non-escaped string") => "non-escaped string"); text!(option_some_empty_str: Some("") => ""); text!(unit: () => ""); text!(unit_struct: Unit => ""); text!(unit_struct_escaped: UnitEscaped => ""); text!(enum_unit: Enum::Unit => "Unit"); text!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); text!(newtype: Newtype(42) => "42"); // We have no space where name of a variant can be stored err!(enum_newtype: SpecialEnum::Text { before: "answer", content: Enum::Newtype(42), after: "answer", } => Unsupported("enum newtype variant `Enum::Newtype` cannot be serialized as an attribute or text content value")); // Sequences are serialized separated by spaces, all spaces inside are escaped text!(seq: vec![1, 2, 3] => "1 2 3"); text!(seq_empty: Vec::::new() => ""); text!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> \ with spaces \ 3"); text!(tuple_struct: Tuple("first", 42) => "first 42"); // We have no space where name of a variant can be stored err!(enum_tuple: SpecialEnum::Text { before: "answer", content: Enum::Tuple("first", 42), after: "answer", } => Unsupported("enum tuple variant `Enum::Tuple` cannot be serialized as an attribute or text content value")); // Complex types cannot be serialized in `$text` field err!(map: SpecialEnum::Text { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("map cannot be serialized as an attribute or text content value")); err!(struct_: SpecialEnum::Text { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("struct `Struct` cannot be serialized as an attribute or text content value")); err!(enum_struct: SpecialEnum::Text { before: "answer", content: Enum::Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("enum struct variant `Enum::Struct` cannot be serialized as an attribute or text content value")); } } /// Special field name `$value` should be serialized using name, provided /// by the type of value instead of a key. Sequences serialized as a list /// of tags with that name (each element can have their own name) mod value { use super::*; /// `$value` key in a map mod map { use super::*; use pretty_assertions::assert_eq; macro_rules! value { ($name:ident: $data:expr) => { serialize_as!($name: BTreeMap::from([("$value", $data)]) => ""); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: BTreeMap::from([("$value", $data)]) => concat!("", $expected,"")); }; } value!(false_: false => "false"); value!(true_: true => "true"); value!(i8_: -42i8 => "-42"); value!(i16_: -4200i16 => "-4200"); value!(i32_: -42000000i32 => "-42000000"); value!(i64_: -42000000000000i64 => "-42000000000000"); value!(isize_: -42000000000000isize => "-42000000000000"); value!(u8_: 42u8 => "42"); value!(u16_: 4200u16 => "4200"); value!(u32_: 42000000u32 => "42000000"); value!(u64_: 42000000000000u64 => "42000000000000"); value!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { value!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); value!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } value!(f32_: 4.2f32 => "4.2"); value!(f64_: 4.2f64 => "4.2"); value!(char_non_escaped: 'h' => "h"); value!(char_lt: '<' => "<"); value!(char_gt: '>' => ">"); value!(char_amp: '&' => "&"); value!(char_apos: '\'' => "'"); value!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content value!(char_space: ' ' => " "); value!(str_non_escaped: "non-escaped string" => "non-escaped string"); value!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: BTreeMap::from([("$value", Bytes(b"<\"escaped & bytes'>"))]) => Unsupported("`serialize_bytes` not supported yet")); value!(option_none: Option::<&str>::None); value!(option_some: Some("non-escaped string") => "non-escaped string"); value!(option_some_empty_str: Some("")); value!(unit: ()); value!(unit_struct: Unit); value!(unit_struct_escaped: UnitEscaped); value!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: BTreeMap::from([("$value", Enum::UnitEscaped)]) => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! value!(seq: vec![1, 2, 3] => "123"); value!(seq_empty: Vec::::new()); value!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'>\ with\t\n\r spaces\ 3"); value!(tuple_struct: Tuple("first", 42) => "first42"); value!(enum_tuple: Enum::Tuple("first", 42) => "first\ 42"); // We cannot wrap map or struct in any container and should not // flatten it, so it is impossible to serialize maps and structs err!(map: BTreeMap::from([("$value", BTreeMap::from([("_1", 2), ("_3", 4)]))]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: BTreeMap::from([("$value", Struct { key: "answer", val: (42, 42) })]) => Unsupported("serialization of struct `Struct` is not supported in `$value` field")); value!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\ answer\ 42\ 42\ "); } /// `$value` field inside a struct mod struct_ { use super::*; use pretty_assertions::assert_eq; macro_rules! value { ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: Value { before: "answer", content: $data, after: "answer", } => concat!( "answer", $expected, "answer", )); }; } value!(false_: false => "false"); value!(true_: true => "true"); value!(i8_: -42i8 => "-42"); value!(i16_: -4200i16 => "-4200"); value!(i32_: -42000000i32 => "-42000000"); value!(i64_: -42000000000000i64 => "-42000000000000"); value!(isize_: -42000000000000isize => "-42000000000000"); value!(u8_: 42u8 => "42"); value!(u16_: 4200u16 => "4200"); value!(u32_: 42000000u32 => "42000000"); value!(u64_: 42000000000000u64 => "42000000000000"); value!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { value!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); value!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } value!(f32_: 4.2f32 => "4.2"); value!(f64_: 4.2f64 => "4.2"); value!(char_non_escaped: 'h' => "h"); value!(char_lt: '<' => "<"); value!(char_gt: '>' => ">"); value!(char_amp: '&' => "&"); value!(char_apos: '\'' => "'"); value!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content value!(char_space: ' ' => " "); value!(str_non_escaped: "non-escaped string" => "non-escaped string"); value!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Value { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); value!(option_none: Option::<&str>::None => ""); value!(option_some: Some("non-escaped string") => "non-escaped string"); value!(option_some_empty_str: Some("") => ""); value!(unit: () => ""); value!(unit_struct: Unit => ""); value!(unit_struct_escaped: UnitEscaped => ""); value!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: Value { before: "answer", content: Enum::UnitEscaped, after: "answer", } => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! value!(seq: vec![1, 2, 3] => "123"); value!(seq_empty: Vec::::new() => ""); value!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'>\ with\t\n\r spaces\ 3"); value!(tuple_struct: Tuple("first", 42) => "first42"); value!(enum_tuple: Enum::Tuple("first", 42) => "first\ 42"); // We cannot wrap map or struct in any container and should not // flatten it, so it is impossible to serialize maps and structs err!(map: Value { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: Value { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("serialization of struct `Struct` is not supported in `$value` field")); value!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\ answer\ 42\ 42\ "); } /// `$value` field inside a struct variant of an enum mod enum_struct { use super::*; use pretty_assertions::assert_eq; macro_rules! value { ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: SpecialEnum::Value { before: "answer", content: $data, after: "answer", } => concat!( "answer", $expected, "answer", )); }; } value!(false_: false => "false"); value!(true_: true => "true"); value!(i8_: -42i8 => "-42"); value!(i16_: -4200i16 => "-4200"); value!(i32_: -42000000i32 => "-42000000"); value!(i64_: -42000000000000i64 => "-42000000000000"); value!(isize_: -42000000000000isize => "-42000000000000"); value!(u8_: 42u8 => "42"); value!(u16_: 4200u16 => "4200"); value!(u32_: 42000000u32 => "42000000"); value!(u64_: 42000000000000u64 => "42000000000000"); value!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { value!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); value!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } value!(f32_: 4.2f32 => "4.2"); value!(f64_: 4.2f64 => "4.2"); value!(char_non_escaped: 'h' => "h"); value!(char_lt: '<' => "<"); value!(char_gt: '>' => ">"); value!(char_amp: '&' => "&"); value!(char_apos: '\'' => "'"); value!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content value!(char_space: ' ' => " "); value!(str_non_escaped: "non-escaped string" => "non-escaped string"); value!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: SpecialEnum::Value { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); value!(option_none: Option::<&str>::None => ""); value!(option_some: Some("non-escaped string") => "non-escaped string"); value!(option_some_empty_str: Some("") => ""); value!(unit: () => ""); value!(unit_struct: Unit => ""); value!(unit_struct_escaped: UnitEscaped => ""); value!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: SpecialEnum::Value { before: "answer", content: Enum::UnitEscaped, after: "answer", } => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! value!(seq: vec![1, 2, 3] => "123"); value!(seq_empty: Vec::::new() => ""); value!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'>\ with\t\n\r spaces\ 3"); value!(tuple_struct: Tuple("first", 42) => "first42"); value!(enum_tuple: Enum::Tuple("first", 42) => "first\ 42"); // We cannot wrap map or struct in any container and should not // flatten it, so it is impossible to serialize maps and structs err!(map: SpecialEnum::Value { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: SpecialEnum::Value { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("serialization of struct `Struct` is not supported in `$value` field")); value!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\ answer\ 42\ 42\ "); } } mod attributes { use super::*; use pretty_assertions::assert_eq; serialize_as!(map_attr: BTreeMap::from([("@key1", 1), ("@key2", 2)]) => r#""#); serialize_as!(map_mixed: BTreeMap::from([("@key1", 1), ("key2", 2)]) => r#"2"#); serialize_as!(struct_: Attributes { key: "answer", val: (42, 42) } => r#""#); serialize_as!(struct_before: AttributesBefore { key: "answer", val: 42 } => r#"42"#); serialize_as!(struct_after: AttributesAfter { key: "answer", val: 42 } => r#"answer"#); serialize_as!(enum_: Enum::Attributes { key: "answer", val: (42, 42) } => r#""#); serialize_as!(enum_before: Enum::AttributesBefore { key: "answer", val: 42 } => r#"42"#); serialize_as!(enum_after: Enum::AttributesAfter { key: "answer", val: 42 } => r#"answer"#); /// Test for https://github.com/tafia/quick-xml/issues/252 mod optional { use super::*; use pretty_assertions::assert_eq; serialize_as!(none: OptionalAttributes { a: None, b: None } => r#""#); serialize_as!(some_empty_str: OptionalAttributes { a: Some(""), b: Some(""), } => r#""#); serialize_as!(some_non_empty: OptionalAttributes { a: Some("1"), b: Some("2"), } => r#""#); } } /// Test for https://github.com/tafia/quick-xml/issues/252 mod optional { use super::*; use pretty_assertions::assert_eq; serialize_as!(none: OptionalElements { a: None, b: None } => "\ \ "); serialize_as!(some_empty_str: OptionalElements { a: Some(""), b: Some(""), } => "\ \ \ "); serialize_as!(some_non_empty: OptionalElements { a: Some("1"), b: Some("2"), } => "\ 1\ 2\ "); } } mod with_indent { use super::*; use crate::se::content::tests::Struct; use crate::writer::Indentation; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:expr) => { #[test] fn $name() { let ser = ElementSerializer { ser: ContentSerializer { writer: String::new(), level: QuoteLevel::Full, indent: Indent::Owned(Indentation::new(b' ', 2)), write_indent: false, }, key: XmlName("root"), }; let buffer = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ElementSerializer { ser: ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::Owned(Indentation::new(b' ', 2)), write_indent: false, }, key: XmlName("root"), }; match $data.serialize(ser).unwrap_err() { DeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `{}({})`, found `{:?}`", stringify!($kind), $reason, e ), } // We can write something before fail // assert_eq!(buffer, ""); } }; } serialize_as!(false_: false => "false"); serialize_as!(true_: true => "true"); serialize_as!(i8_: -42i8 => "-42"); serialize_as!(i16_: -4200i16 => "-4200"); serialize_as!(i32_: -42000000i32 => "-42000000"); serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); serialize_as!(isize_: -42000000000000isize => "-42000000000000"); serialize_as!(u8_: 42u8 => "42"); serialize_as!(u16_: 4200u16 => "4200"); serialize_as!(u32_: 42000000u32 => "42000000"); serialize_as!(u64_: 42000000000000u64 => "42000000000000"); serialize_as!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } serialize_as!(f32_: 4.2f32 => "4.2"); serialize_as!(f64_: 4.2f64 => "4.2"); serialize_as!(char_non_escaped: 'h' => "h"); serialize_as!(char_lt: '<' => "<"); serialize_as!(char_gt: '>' => ">"); serialize_as!(char_amp: '&' => "&"); serialize_as!(char_apos: '\'' => "'"); serialize_as!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content serialize_as!(char_space: ' ' => " "); serialize_as!(str_non_escaped: "non-escaped string" => "non-escaped string"); serialize_as!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::<&str>::None => ""); serialize_as!(option_some: Some("non-escaped string") => "non-escaped string"); serialize_as!(option_some_empty: Some("") => ""); serialize_as!(unit: () => ""); serialize_as!(unit_struct: Unit => ""); serialize_as!(unit_struct_escaped: UnitEscaped => ""); serialize_as!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: Enum::UnitEscaped => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); serialize_as!(newtype: Newtype(42) => "42"); serialize_as!(enum_newtype: Enum::Newtype(42) => "42"); serialize_as!(seq: vec![1, 2, 3] => "1\n\ 2\n\ 3"); serialize_as!(seq_empty: Vec::::new() => ""); serialize_as!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'>\n\ with\t\n\r spaces\n\ 3"); serialize_as!(tuple_struct: Tuple("first", 42) => "first\n\ 42"); serialize_as!(enum_tuple: Enum::Tuple("first", 42) => "first\n\ 42"); serialize_as!(map: BTreeMap::from([("_1", 2), ("_3", 4)]) => "\n \ <_1>2\n \ <_3>4\n\ "); serialize_as!(struct_: Struct { key: "answer", val: (42, 42) } => "\n \ answer\n \ 42\n \ 42\n\ "); serialize_as!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\n \ answer\n \ 42\n \ 42\n\ "); /// Special field name `$text` should be serialized as text content. /// Sequences serialized as an `xs:list` content mod text { use super::*; /// `$text` key in a map mod map { use super::*; use pretty_assertions::assert_eq; macro_rules! text { ($name:ident: $data:expr) => { serialize_as!($name: BTreeMap::from([("$text", $data)]) => ""); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: BTreeMap::from([("$text", $data)]) => concat!("\n ", $expected,"\n")); }; } text!(false_: false => "false"); text!(true_: true => "true"); text!(i8_: -42i8 => "-42"); text!(i16_: -4200i16 => "-4200"); text!(i32_: -42000000i32 => "-42000000"); text!(i64_: -42000000000000i64 => "-42000000000000"); text!(isize_: -42000000000000isize => "-42000000000000"); text!(u8_: 42u8 => "42"); text!(u16_: 4200u16 => "4200"); text!(u32_: 42000000u32 => "42000000"); text!(u64_: 42000000000000u64 => "42000000000000"); text!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { text!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); text!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } text!(f32_: 4.2f32 => "4.2"); text!(f64_: 4.2f64 => "4.2"); text!(char_non_escaped: 'h' => "h"); text!(char_lt: '<' => "<"); text!(char_gt: '>' => ">"); text!(char_amp: '&' => "&"); text!(char_apos: '\'' => "'"); text!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content text!(char_space: ' ' => " "); text!(str_non_escaped: "non-escaped string" => "non-escaped string"); text!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Text { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); text!(option_none: Option::<&str>::None); text!(option_some: Some("non-escaped string") => "non-escaped string"); text!(option_some_empty_str: Some("")); text!(unit: ()); text!(unit_struct: Unit); text!(unit_struct_escaped: UnitEscaped); text!(enum_unit: Enum::Unit => "Unit"); text!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); text!(newtype: Newtype(42) => "42"); // We have no space where name of a variant can be stored err!(enum_newtype: Text { before: "answer", content: Enum::Newtype(42), after: "answer", } => Unsupported("enum newtype variant `Enum::Newtype` cannot be serialized as an attribute or text content value")); // Sequences are serialized separated by spaces, all spaces inside are escaped text!(seq: vec![1, 2, 3] => "1 2 3"); text!(seq_empty: Vec::::new()); text!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> \ with spaces \ 3"); text!(tuple_struct: Tuple("first", 42) => "first 42"); // We have no space where name of a variant can be stored err!(enum_tuple: Text { before: "answer", content: Enum::Tuple("first", 42), after: "answer", } => Unsupported("enum tuple variant `Enum::Tuple` cannot be serialized as an attribute or text content value")); // Complex types cannot be serialized in `$text` field err!(map: Text { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("map cannot be serialized as an attribute or text content value")); err!(struct_: Text { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("struct `Struct` cannot be serialized as an attribute or text content value")); err!(enum_struct: Text { before: "answer", content: Enum::Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("enum struct variant `Enum::Struct` cannot be serialized as an attribute or text content value")); } /// `$text` field inside a struct mod struct_ { use super::*; use pretty_assertions::assert_eq; macro_rules! text { ($name:ident: $data:expr) => { serialize_as!($name: Text { before: "answer", content: $data, after: "answer", } => "\n \ answer\n \ answer\n\ "); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: Text { before: "answer", content: $data, after: "answer", } => concat!( "\n answer\n ", $expected, "\n answer\n", )); }; } text!(false_: false => "false"); text!(true_: true => "true"); text!(i8_: -42i8 => "-42"); text!(i16_: -4200i16 => "-4200"); text!(i32_: -42000000i32 => "-42000000"); text!(i64_: -42000000000000i64 => "-42000000000000"); text!(isize_: -42000000000000isize => "-42000000000000"); text!(u8_: 42u8 => "42"); text!(u16_: 4200u16 => "4200"); text!(u32_: 42000000u32 => "42000000"); text!(u64_: 42000000000000u64 => "42000000000000"); text!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { text!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); text!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } text!(f32_: 4.2f32 => "4.2"); text!(f64_: 4.2f64 => "4.2"); text!(char_non_escaped: 'h' => "h"); text!(char_lt: '<' => "<"); text!(char_gt: '>' => ">"); text!(char_amp: '&' => "&"); text!(char_apos: '\'' => "'"); text!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content text!(char_space: ' ' => " "); text!(str_non_escaped: "non-escaped string" => "non-escaped string"); text!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Text { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); text!(option_none: Option::<&str>::None); text!(option_some: Some("non-escaped string") => "non-escaped string"); text!(option_some_empty_str: Some("")); text!(unit: ()); text!(unit_struct: Unit); text!(unit_struct_escaped: UnitEscaped); text!(enum_unit: Enum::Unit => "Unit"); text!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); text!(newtype: Newtype(42) => "42"); // We have no space where name of a variant can be stored err!(enum_newtype: Text { before: "answer", content: Enum::Newtype(42), after: "answer", } => Unsupported("enum newtype variant `Enum::Newtype` cannot be serialized as an attribute or text content value")); // Sequences are serialized separated by spaces, all spaces inside are escaped text!(seq: vec![1, 2, 3] => "1 2 3"); text!(seq_empty: Vec::::new()); text!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> \ with spaces \ 3"); text!(tuple_struct: Tuple("first", 42) => "first 42"); // We have no space where name of a variant can be stored err!(enum_tuple: Text { before: "answer", content: Enum::Tuple("first", 42), after: "answer", } => Unsupported("enum tuple variant `Enum::Tuple` cannot be serialized as an attribute or text content value")); // Complex types cannot be serialized in `$text` field err!(map: Text { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("map cannot be serialized as an attribute or text content value")); err!(struct_: Text { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("struct `Struct` cannot be serialized as an attribute or text content value")); err!(enum_struct: Text { before: "answer", content: Enum::Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("enum struct variant `Enum::Struct` cannot be serialized as an attribute or text content value")); } /// `$text` field inside a struct variant of an enum mod enum_struct { use super::*; use pretty_assertions::assert_eq; macro_rules! text { ($name:ident: $data:expr) => { serialize_as!($name: SpecialEnum::Text { before: "answer", content: $data, after: "answer", } => "\n \ answer\n \ answer\n\ "); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: SpecialEnum::Text { before: "answer", content: $data, after: "answer", } => concat!( "\n answer\n ", $expected, "\n answer\n", )); }; } text!(false_: false => "false"); text!(true_: true => "true"); text!(i8_: -42i8 => "-42"); text!(i16_: -4200i16 => "-4200"); text!(i32_: -42000000i32 => "-42000000"); text!(i64_: -42000000000000i64 => "-42000000000000"); text!(isize_: -42000000000000isize => "-42000000000000"); text!(u8_: 42u8 => "42"); text!(u16_: 4200u16 => "4200"); text!(u32_: 42000000u32 => "42000000"); text!(u64_: 42000000000000u64 => "42000000000000"); text!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { text!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); text!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } text!(f32_: 4.2f32 => "4.2"); text!(f64_: 4.2f64 => "4.2"); text!(char_non_escaped: 'h' => "h"); text!(char_lt: '<' => "<"); text!(char_gt: '>' => ">"); text!(char_amp: '&' => "&"); text!(char_apos: '\'' => "'"); text!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content text!(char_space: ' ' => " "); text!(str_non_escaped: "non-escaped string" => "non-escaped string"); text!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: SpecialEnum::Text { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); text!(option_none: Option::<&str>::None); text!(option_some: Some("non-escaped string") => "non-escaped string"); text!(option_some_empty_str: Some("")); text!(unit: ()); text!(unit_struct: Unit); text!(unit_struct_escaped: UnitEscaped); text!(enum_unit: Enum::Unit => "Unit"); text!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); text!(newtype: Newtype(42) => "42"); // We have no space where name of a variant can be stored err!(enum_newtype: SpecialEnum::Text { before: "answer", content: Enum::Newtype(42), after: "answer", } => Unsupported("enum newtype variant `Enum::Newtype` cannot be serialized as an attribute or text content value")); // Sequences are serialized separated by spaces, all spaces inside are escaped text!(seq: vec![1, 2, 3] => "1 2 3"); text!(seq_empty: Vec::::new()); text!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> \ with spaces \ 3"); text!(tuple_struct: Tuple("first", 42) => "first 42"); // We have no space where name of a variant can be stored err!(enum_tuple: SpecialEnum::Text { before: "answer", content: Enum::Tuple("first", 42), after: "answer", } => Unsupported("enum tuple variant `Enum::Tuple` cannot be serialized as an attribute or text content value")); // Complex types cannot be serialized in `$text` field err!(map: SpecialEnum::Text { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("map cannot be serialized as an attribute or text content value")); err!(struct_: SpecialEnum::Text { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("struct `Struct` cannot be serialized as an attribute or text content value")); err!(enum_struct: SpecialEnum::Text { before: "answer", content: Enum::Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("enum struct variant `Enum::Struct` cannot be serialized as an attribute or text content value")); } } /// Special field name `$value` should be serialized using name, provided /// by the type of value instead of a key. Sequences serialized as a list /// of tags with that name (each element can have their own name) mod value { use super::*; /// `$value` key in a map mod map { use super::*; use pretty_assertions::assert_eq; macro_rules! value { ($name:ident: $data:expr) => { serialize_as!($name: BTreeMap::from([("$value", $data)]) => ""); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: BTreeMap::from([("$value", $data)]) => concat!("\n ", $expected,"\n")); }; } value!(false_: false => "false"); value!(true_: true => "true"); value!(i8_: -42i8 => "-42"); value!(i16_: -4200i16 => "-4200"); value!(i32_: -42000000i32 => "-42000000"); value!(i64_: -42000000000000i64 => "-42000000000000"); value!(isize_: -42000000000000isize => "-42000000000000"); value!(u8_: 42u8 => "42"); value!(u16_: 4200u16 => "4200"); value!(u32_: 42000000u32 => "42000000"); value!(u64_: 42000000000000u64 => "42000000000000"); value!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { value!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); value!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } value!(f32_: 4.2f32 => "4.2"); value!(f64_: 4.2f64 => "4.2"); value!(char_non_escaped: 'h' => "h"); value!(char_lt: '<' => "<"); value!(char_gt: '>' => ">"); value!(char_amp: '&' => "&"); value!(char_apos: '\'' => "'"); value!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content value!(char_space: ' ' => " "); value!(str_non_escaped: "non-escaped string" => "non-escaped string"); value!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: BTreeMap::from([("$value", Bytes(b"<\"escaped & bytes'>"))]) => Unsupported("`serialize_bytes` not supported yet")); value!(option_none: Option::<&str>::None); value!(option_some: Some("non-escaped string") => "non-escaped string"); value!(option_some_empty_str: Some("")); value!(unit: ()); value!(unit_struct: Unit); value!(unit_struct_escaped: UnitEscaped); value!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: BTreeMap::from([("$value", Enum::UnitEscaped)]) => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "42"); value!(seq: vec![1, 2, 3] => "1\n 2\n 3"); value!(seq_empty: Vec::::new()); value!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'>\n \ with\t\n\r spaces\n \ 3"); value!(tuple_struct: Tuple("first", 42) => "first\n 42"); value!(enum_tuple: Enum::Tuple("first", 42) => "first\n \ 42"); // We cannot wrap map or struct in any container and should not // flatten it, so it is impossible to serialize maps and structs err!(map: BTreeMap::from([("$value", BTreeMap::from([("_1", 2), ("_3", 4)]))]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: BTreeMap::from([("$value", Struct { key: "answer", val: (42, 42) })]) => Unsupported("serialization of struct `Struct` is not supported in `$value` field")); value!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\n \ answer\n \ 42\n \ 42\n \ "); } /// `$value` field inside a struct mod struct_ { use super::*; use pretty_assertions::assert_eq; macro_rules! value { ($name:ident: $data:expr) => { serialize_as!($name: Value { before: "answer", content: $data, after: "answer", } => "\n \ answer\n \ answer\n\ "); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: Value { before: "answer", content: $data, after: "answer", } => concat!( "\n answer\n ", $expected, "\n answer\n", )); }; } value!(false_: false => "false"); value!(true_: true => "true"); value!(i8_: -42i8 => "-42"); value!(i16_: -4200i16 => "-4200"); value!(i32_: -42000000i32 => "-42000000"); value!(i64_: -42000000000000i64 => "-42000000000000"); value!(isize_: -42000000000000isize => "-42000000000000"); value!(u8_: 42u8 => "42"); value!(u16_: 4200u16 => "4200"); value!(u32_: 42000000u32 => "42000000"); value!(u64_: 42000000000000u64 => "42000000000000"); value!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { value!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); value!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } value!(f32_: 4.2f32 => "4.2"); value!(f64_: 4.2f64 => "4.2"); value!(char_non_escaped: 'h' => "h"); value!(char_lt: '<' => "<"); value!(char_gt: '>' => ">"); value!(char_amp: '&' => "&"); value!(char_apos: '\'' => "'"); value!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content value!(char_space: ' ' => " "); value!(str_non_escaped: "non-escaped string" => "non-escaped string"); value!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Value { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); value!(option_none: Option::<&str>::None); value!(option_some: Some("non-escaped string") => "non-escaped string"); value!(option_some_empty_str: Some("")); value!(unit: ()); value!(unit_struct: Unit); value!(unit_struct_escaped: UnitEscaped); value!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: Value { before: "answer", content: Enum::UnitEscaped, after: "answer", } => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! value!(seq: vec![1, 2, 3] => "1\n 2\n 3"); value!(seq_empty: Vec::::new()); value!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'>\n \ with\t\n\r spaces\n \ 3"); value!(tuple_struct: Tuple("first", 42) => "first\n 42"); value!(enum_tuple: Enum::Tuple("first", 42) => "first\n \ 42"); // We cannot wrap map or struct in any container and should not // flatten it, so it is impossible to serialize maps and structs err!(map: Value { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: Value { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("serialization of struct `Struct` is not supported in `$value` field")); value!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\n \ answer\n \ 42\n \ 42\n \ "); } /// `$value` field inside a struct variant of an enum mod enum_struct { use super::*; use pretty_assertions::assert_eq; macro_rules! value { ($name:ident: $data:expr) => { serialize_as!($name: SpecialEnum::Value { before: "answer", content: $data, after: "answer", } => "\n \ answer\n \ answer\n\ "); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: SpecialEnum::Value { before: "answer", content: $data, after: "answer", } => concat!( "\n answer\n ", $expected, "\n answer\n", )); }; } value!(false_: false => "false"); value!(true_: true => "true"); value!(i8_: -42i8 => "-42"); value!(i16_: -4200i16 => "-4200"); value!(i32_: -42000000i32 => "-42000000"); value!(i64_: -42000000000000i64 => "-42000000000000"); value!(isize_: -42000000000000isize => "-42000000000000"); value!(u8_: 42u8 => "42"); value!(u16_: 4200u16 => "4200"); value!(u32_: 42000000u32 => "42000000"); value!(u64_: 42000000000000u64 => "42000000000000"); value!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { value!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); value!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } value!(f32_: 4.2f32 => "4.2"); value!(f64_: 4.2f64 => "4.2"); value!(char_non_escaped: 'h' => "h"); value!(char_lt: '<' => "<"); value!(char_gt: '>' => ">"); value!(char_amp: '&' => "&"); value!(char_apos: '\'' => "'"); value!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content value!(char_space: ' ' => " "); value!(str_non_escaped: "non-escaped string" => "non-escaped string"); value!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: SpecialEnum::Value { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); value!(option_none: Option::<&str>::None); value!(option_some: Some("non-escaped string") => "non-escaped string"); value!(option_some_empty_str: Some("")); value!(unit: ()); value!(unit_struct: Unit); value!(unit_struct_escaped: UnitEscaped); value!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: SpecialEnum::Value { before: "answer", content: Enum::UnitEscaped, after: "answer", } => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! value!(seq: vec![1, 2, 3] => "1\n 2\n 3"); value!(seq_empty: Vec::::new()); value!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'>\n \ with\t\n\r spaces\n \ 3"); value!(tuple_struct: Tuple("first", 42) => "first\n 42"); value!(enum_tuple: Enum::Tuple("first", 42) => "first\n \ 42"); // We cannot wrap map or struct in any container and should not // flatten it, so it is impossible to serialize maps and structs err!(map: SpecialEnum::Value { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: SpecialEnum::Value { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("serialization of struct `Struct` is not supported in `$value` field")); value!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\n \ answer\n \ 42\n \ 42\n \ "); } } mod attributes { use super::*; use pretty_assertions::assert_eq; serialize_as!(map_attr: BTreeMap::from([("@key1", 1), ("@key2", 2)]) => r#""#); serialize_as!(map_mixed: BTreeMap::from([("@key1", 1), ("key2", 2)]) => "\n \ 2\n\ "); serialize_as!(struct_: Attributes { key: "answer", val: (42, 42) } => r#""#); serialize_as!(struct_before: AttributesBefore { key: "answer", val: 42 } => "\n \ 42\n\ "); serialize_as!(struct_after: AttributesAfter { key: "answer", val: 42 } => "\n \ answer\n\ "); serialize_as!(enum_: Enum::Attributes { key: "answer", val: (42, 42) } => r#""#); serialize_as!(enum_before: Enum::AttributesBefore { key: "answer", val: 42 } => "\n \ 42\n\ "); serialize_as!(enum_after: Enum::AttributesAfter { key: "answer", val: 42 } => "\n \ answer\n\ "); /// Test for https://github.com/tafia/quick-xml/issues/252 mod optional { use super::*; use pretty_assertions::assert_eq; serialize_as!(none: OptionalAttributes { a: None, b: None } => r#""#); serialize_as!(some_empty_str: OptionalAttributes { a: Some(""), b: Some("") } => r#""#); serialize_as!(some_non_empty: OptionalAttributes { a: Some("a"), b: Some("b") } => r#""#); } } /// Test for https://github.com/tafia/quick-xml/issues/252 mod optional { use super::*; use pretty_assertions::assert_eq; serialize_as!(none: OptionalElements { a: None, b: None } => "\n \ \n\ "); serialize_as!(some_empty_str: OptionalElements { a: Some(""), b: Some("") } => "\n \ \n \ \n\ "); serialize_as!(some_non_empty: OptionalElements { a: Some("a"), b: Some("b") } => "\n \ a\n \ b\n\ "); } } } quick-xml-0.27.1/src/se/key.rs000064400000000000000000000253020072674642500142130ustar 00000000000000use crate::errors::serialize::DeError; use serde::ser::{Impossible, Serialize, Serializer}; use serde::serde_if_integer128; use std::fmt::Write; /// A serializer, that ensures, that only plain types can be serialized, /// so result can be used as an XML tag or attribute name. /// /// This serializer does not check that name does not contain characters that /// [not allowed] in XML names, because in some cases it should pass names /// that would be filtered on higher level. /// /// [not allowed]: https://www.w3.org/TR/REC-xml/#sec-common-syn pub struct QNameSerializer { /// Writer to which this serializer writes content pub writer: W, } impl QNameSerializer { #[inline] fn write_str(&mut self, value: &str) -> Result<(), DeError> { Ok(self.writer.write_str(value)?) } } impl Serializer for QNameSerializer { type Ok = W; type Error = DeError; type SerializeSeq = Impossible; type SerializeTuple = Impossible; type SerializeTupleStruct = Impossible; type SerializeTupleVariant = Impossible; type SerializeMap = Impossible; type SerializeStruct = Impossible; type SerializeStructVariant = Impossible; write_primitive!(); fn serialize_str(mut self, value: &str) -> Result { self.write_str(value)?; Ok(self.writer) } /// Because unit type can be represented only by empty string which is not /// a valid XML name, serialization of unit returns `Err(Unsupported)` fn serialize_unit(self) -> Result { Err(DeError::Unsupported( "unit type `()` cannot be serialized as an XML tag name".into(), )) } /// Because unit struct can be represented only by empty string which is not /// a valid XML name, serialization of unit struct returns `Err(Unsupported)` fn serialize_unit_struct(self, name: &'static str) -> Result { Err(DeError::Unsupported( format!( "unit struct `{}` cannot be serialized as an XML tag name", name ) .into(), )) } /// We cannot store both a variant discriminant and a variant value, /// so serialization of enum newtype variant returns `Err(Unsupported)` fn serialize_newtype_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _value: &T, ) -> Result { Err(DeError::Unsupported( format!( "enum newtype variant `{}::{}` cannot be serialized as an XML tag name", name, variant ) .into(), )) } fn serialize_seq(self, _len: Option) -> Result { Err(DeError::Unsupported( "sequence cannot be serialized as an XML tag name".into(), )) } fn serialize_tuple(self, _len: usize) -> Result { Err(DeError::Unsupported( "tuple cannot be serialized as an XML tag name".into(), )) } fn serialize_tuple_struct( self, name: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!( "tuple struct `{}` cannot be serialized as an XML tag name", name ) .into(), )) } fn serialize_tuple_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!( "enum tuple variant `{}::{}` cannot be serialized as an XML tag name", name, variant ) .into(), )) } fn serialize_map(self, _len: Option) -> Result { Err(DeError::Unsupported( "map cannot be serialized as an XML tag name".into(), )) } fn serialize_struct( self, name: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!("struct `{}` cannot be serialized as an XML tag name", name).into(), )) } fn serialize_struct_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!( "enum struct variant `{}::{}` cannot be serialized as an XML tag name", name, variant ) .into(), )) } } #[cfg(test)] mod tests { use super::*; use crate::utils::Bytes; use pretty_assertions::assert_eq; use serde::Serialize; use std::collections::BTreeMap; #[derive(Debug, Serialize, PartialEq)] struct Unit; #[derive(Debug, Serialize, PartialEq)] struct Newtype(bool); #[derive(Debug, Serialize, PartialEq)] struct Tuple(&'static str, usize); #[derive(Debug, Serialize, PartialEq)] struct Struct { key: &'static str, val: usize, } #[derive(Debug, Serialize, PartialEq)] enum Enum { Unit, #[serde(rename = "<\"&'>")] UnitEscaped, Newtype(bool), Tuple(&'static str, usize), Struct { key: &'static str, val: usize, }, } /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:literal) => { #[test] fn $name() { let ser = QNameSerializer { writer: String::new(), }; let buffer = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = QNameSerializer { writer: &mut buffer, }; match $data.serialize(ser).unwrap_err() { DeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `{}({})`, found `{:?}`", stringify!($kind), $reason, e ), } assert_eq!(buffer, ""); } }; } serialize_as!(false_: false => "false"); serialize_as!(true_: true => "true"); serialize_as!(i8_: -42i8 => "-42"); serialize_as!(i16_: -4200i16 => "-4200"); serialize_as!(i32_: -42000000i32 => "-42000000"); serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); serialize_as!(isize_: -42000000000000isize => "-42000000000000"); serialize_as!(u8_: 42u8 => "42"); serialize_as!(u16_: 4200u16 => "4200"); serialize_as!(u32_: 42000000u32 => "42000000"); serialize_as!(u64_: 42000000000000u64 => "42000000000000"); serialize_as!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } serialize_as!(f32_: 4.2f32 => "4.2"); serialize_as!(f64_: 4.2f64 => "4.2"); serialize_as!(char_non_escaped: 'h' => "h"); serialize_as!(char_lt: '<' => "<"); serialize_as!(char_gt: '>' => ">"); serialize_as!(char_amp: '&' => "&"); serialize_as!(char_apos: '\'' => "'"); serialize_as!(char_quot: '"' => "\""); serialize_as!(str_valid_name: "valid-name" => "valid-name"); serialize_as!(str_space: "string with spaces" => "string with spaces"); serialize_as!(str_lt: "string<" => "string<"); serialize_as!(str_gt: "string>" => "string>"); serialize_as!(str_amp: "string&" => "string&"); serialize_as!(str_apos: "string'" => "string'"); serialize_as!(str_quot: "string\"" => "string\""); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::<&str>::None => ""); serialize_as!(option_some: Some("non-escaped-string") => "non-escaped-string"); err!(unit: () => Unsupported("unit type `()` cannot be serialized as an XML tag name")); err!(unit_struct: Unit => Unsupported("unit struct `Unit` cannot be serialized as an XML tag name")); serialize_as!(enum_unit: Enum::Unit => "Unit"); serialize_as!(enum_unit_escaped: Enum::UnitEscaped => "<\"&'>"); serialize_as!(newtype: Newtype(true) => "true"); err!(enum_newtype: Enum::Newtype(false) => Unsupported("enum newtype variant `Enum::Newtype` cannot be serialized as an XML tag name")); err!(seq: vec![1, 2, 3] => Unsupported("sequence cannot be serialized as an XML tag name")); err!(tuple: ("<\"&'>", "with\t\r\n spaces", 3usize) => Unsupported("tuple cannot be serialized as an XML tag name")); err!(tuple_struct: Tuple("first", 42) => Unsupported("tuple struct `Tuple` cannot be serialized as an XML tag name")); err!(enum_tuple: Enum::Tuple("first", 42) => Unsupported("enum tuple variant `Enum::Tuple` cannot be serialized as an XML tag name")); err!(map: BTreeMap::from([("_1", 2), ("_3", 4)]) => Unsupported("map cannot be serialized as an XML tag name")); err!(struct_: Struct { key: "answer", val: 42 } => Unsupported("struct `Struct` cannot be serialized as an XML tag name")); err!(enum_struct: Enum::Struct { key: "answer", val: 42 } => Unsupported("enum struct variant `Enum::Struct` cannot be serialized as an XML tag name")); } quick-xml-0.27.1/src/se/mod.rs000064400000000000000000000431020072674642500142000ustar 00000000000000//! Module to handle custom serde `Serializer` /// Implements writing primitives to the underlying writer. /// Implementor must provide `write_str(self, &str) -> Result<(), DeError>` method macro_rules! write_primitive { ($method:ident ( $ty:ty )) => { fn $method(mut self, value: $ty) -> Result { self.write_str(&value.to_string())?; Ok(self.writer) } }; () => { fn serialize_bool(mut self, value: bool) -> Result { self.write_str(if value { "true" } else { "false" })?; Ok(self.writer) } write_primitive!(serialize_i8(i8)); write_primitive!(serialize_i16(i16)); write_primitive!(serialize_i32(i32)); write_primitive!(serialize_i64(i64)); write_primitive!(serialize_u8(u8)); write_primitive!(serialize_u16(u16)); write_primitive!(serialize_u32(u32)); write_primitive!(serialize_u64(u64)); serde_if_integer128! { write_primitive!(serialize_i128(i128)); write_primitive!(serialize_u128(u128)); } write_primitive!(serialize_f32(f32)); write_primitive!(serialize_f64(f64)); fn serialize_char(self, value: char) -> Result { self.serialize_str(&value.to_string()) } fn serialize_bytes(self, _value: &[u8]) -> Result { //TODO: customization point - allow user to decide how to encode bytes Err(DeError::Unsupported( "`serialize_bytes` not supported yet".into(), )) } fn serialize_none(self) -> Result { Ok(self.writer) } fn serialize_some(self, value: &T) -> Result { value.serialize(self) } fn serialize_unit_variant( self, _name: &'static str, _variant_index: u32, variant: &'static str, ) -> Result { self.serialize_str(variant) } fn serialize_newtype_struct( self, _name: &'static str, value: &T, ) -> Result { value.serialize(self) } }; } //////////////////////////////////////////////////////////////////////////////////////////////////// mod content; mod element; pub(crate) mod key; pub(crate) mod simple_type; use self::content::ContentSerializer; use self::element::ElementSerializer; use crate::errors::serialize::DeError; use crate::writer::Indentation; use serde::ser::{self, Serialize}; use serde::serde_if_integer128; use std::fmt::Write; use std::str::from_utf8; /// Serialize struct into a `Write`r pub fn to_writer(writer: W, value: &S) -> Result { value.serialize(Serializer::new(writer)) } /// Serialize struct into a `String` pub fn to_string(value: &S) -> Result { to_writer(String::new(), value) } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Defines which characters would be escaped in [`Text`] events and attribute /// values. /// /// [`Text`]: crate::events::Event::Text #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum QuoteLevel { /// Performs escaping, escape all characters that could have special meaning /// in the XML. This mode is compatible with SGML specification. /// /// Characters that will be replaced: /// /// Original | Replacement /// ---------|------------ /// `<` | `<` /// `>` | `>` /// `&` | `&` /// `"` | `"` /// `'` | `'` Full, /// Performs escaping that is compatible with SGML specification. /// /// This level adds escaping of `>` to the `Minimal` level, which is [required] /// for compatibility with SGML. /// /// Characters that will be replaced: /// /// Original | Replacement /// ---------|------------ /// `<` | `<` /// `>` | `>` /// `&` | `&` /// /// [required]: https://www.w3.org/TR/xml11/#syntax Partial, /// Performs the minimal possible escaping, escape only strictly necessary /// characters. /// /// Characters that will be replaced: /// /// Original | Replacement /// ---------|------------ /// `<` | `<` /// `&` | `&` Minimal, } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Implements serialization method by forwarding it to the serializer created by /// the helper method [`Serializer::ser`]. macro_rules! forward { ($name:ident($ty:ty)) => { fn $name(self, value: $ty) -> Result { self.ser(&concat!("`", stringify!($ty), "`"))?.$name(value) } }; } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Almost all characters can form a name. Citation from : /// /// > The overall philosophy of names has changed since XML 1.0. Whereas XML 1.0 /// > provided a rigid definition of names, wherein everything that was not permitted /// > was forbidden, XML 1.1 names are designed so that everything that is not /// > forbidden (for a specific reason) is permitted. Since Unicode will continue /// > to grow past version 4.0, further changes to XML can be avoided by allowing /// > almost any character, including those not yet assigned, in names. /// /// const fn is_xml11_name_start_char(ch: char) -> bool { match ch { ':' | 'A'..='Z' | '_' | 'a'..='z' | '\u{00C0}'..='\u{00D6}' | '\u{00D8}'..='\u{00F6}' | '\u{00F8}'..='\u{02FF}' | '\u{0370}'..='\u{037D}' | '\u{037F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' | '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | '\u{10000}'..='\u{EFFFF}' => true, _ => false, } } /// const fn is_xml11_name_char(ch: char) -> bool { match ch { '-' | '.' | '0'..='9' | '\u{00B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}' => { true } _ => is_xml11_name_start_char(ch), } } /// Helper struct to self-defense from errors #[derive(Clone, Copy, Debug, PartialEq)] pub(self) struct XmlName<'n>(&'n str); impl<'n> XmlName<'n> { /// Checks correctness of the XML name according to [XML 1.1 specification] /// /// [XML 1.1 specification]: https://www.w3.org/TR/REC-xml/#NT-Name pub fn try_from(name: &'n str) -> Result, DeError> { //TODO: Customization point: allow user to decide if he want to reject or encode the name match name.chars().next() { Some(ch) if !is_xml11_name_start_char(ch) => Err(DeError::Unsupported( format!("character `{ch}` is not allowed at the start of an XML name `{name}`") .into(), )), _ => match name.matches(|ch| !is_xml11_name_char(ch)).next() { Some(s) => Err(DeError::Unsupported( format!("character `{s}` is not allowed in an XML name `{name}`").into(), )), None => Ok(XmlName(name)), }, } } } //////////////////////////////////////////////////////////////////////////////////////////////////// pub(crate) enum Indent<'i> { None, Owned(Indentation), Borrow(&'i mut Indentation), } impl<'i> Indent<'i> { pub fn borrow(&mut self) -> Indent { match self { Self::None => Indent::None, Self::Owned(ref mut i) => Indent::Borrow(i), Self::Borrow(i) => Indent::Borrow(i), } } pub fn increase(&mut self) { match self { Self::None => {} Self::Owned(i) => i.grow(), Self::Borrow(i) => i.grow(), } } pub fn decrease(&mut self) { match self { Self::None => {} Self::Owned(i) => i.shrink(), Self::Borrow(i) => i.shrink(), } } pub fn write_indent(&mut self, mut writer: W) -> Result<(), DeError> { match self { Self::None => {} Self::Owned(i) => { writer.write_char('\n')?; writer.write_str(from_utf8(i.current())?)?; } Self::Borrow(i) => { writer.write_char('\n')?; writer.write_str(from_utf8(i.current())?)?; } } Ok(()) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A Serializer pub struct Serializer<'r, W: Write> { ser: ContentSerializer<'r, W>, /// Name of the root tag. If not specified, deduced from the structure name root_tag: Option>, } impl<'r, W: Write> Serializer<'r, W> { /// Creates a new `Serializer` that uses struct name as a root tag name. /// /// Note, that attempt to serialize a non-struct (including unit structs /// and newtype structs) will end up to an error. Use `with_root` to create /// serializer with explicitly defined root element name pub fn new(writer: W) -> Self { Self { ser: ContentSerializer { writer, level: QuoteLevel::Full, indent: Indent::None, write_indent: false, }, root_tag: None, } } /// Creates a new `Serializer` that uses specified root tag name. `name` should /// be valid [XML name], otherwise error is returned. /// /// # Examples /// /// When serializing a primitive type, only its representation will be written: /// /// ``` /// # use pretty_assertions::assert_eq; /// # use serde::Serialize; /// # use quick_xml::se::Serializer; /// /// let ser = Serializer::with_root(String::new(), Some("root")).unwrap(); /// /// assert_eq!( /// "node".serialize(ser).unwrap(), /// "node" /// ); /// ``` /// /// When serializing a struct, newtype struct, unit struct or tuple `root_tag` /// is used as tag name of root(s) element(s): /// /// ``` /// # use pretty_assertions::assert_eq; /// # use serde::Serialize; /// # use quick_xml::se::Serializer; /// /// #[derive(Debug, PartialEq, Serialize)] /// struct Struct { /// question: String, /// answer: u32, /// } /// /// let ser = Serializer::with_root(String::new(), Some("root")).unwrap(); /// /// let data = Struct { /// question: "The Ultimate Question of Life, the Universe, and Everything".into(), /// answer: 42, /// }; /// /// assert_eq!( /// data.serialize(ser).unwrap(), /// "\ /// The Ultimate Question of Life, the Universe, and Everything\ /// 42\ /// " /// ); /// ``` /// /// [XML name]: https://www.w3.org/TR/REC-xml/#NT-Name pub fn with_root(writer: W, root_tag: Option<&'r str>) -> Result { Ok(Self { ser: ContentSerializer { writer, level: QuoteLevel::Full, indent: Indent::None, write_indent: false, }, root_tag: root_tag.map(|tag| XmlName::try_from(tag)).transpose()?, }) } /// Configure indent for a serializer pub fn indent(&mut self, indent_char: char, indent_size: usize) -> &mut Self { self.ser.indent = Indent::Owned(Indentation::new(indent_char as u8, indent_size)); self } /// Creates actual serializer or returns an error if root tag is not defined. /// In that case `err` contains the name of type that cannot be serialized. fn ser(self, err: &str) -> Result, DeError> { if let Some(key) = self.root_tag { Ok(ElementSerializer { ser: self.ser, key }) } else { Err(DeError::Unsupported( format!("cannot serialize {} without defined root tag", err).into(), )) } } /// Creates actual serializer using root tag or a specified `key` if root tag /// is not defined. Returns an error if root tag is not defined and a `key` /// does not conform [XML rules](XmlName::try_from) for names. fn ser_name(self, key: &'static str) -> Result, DeError> { Ok(ElementSerializer { ser: self.ser, key: match self.root_tag { Some(key) => key, None => XmlName::try_from(key)?, }, }) } } impl<'r, W: Write> ser::Serializer for Serializer<'r, W> { type Ok = W; type Error = DeError; type SerializeSeq = as ser::Serializer>::SerializeSeq; type SerializeTuple = as ser::Serializer>::SerializeTuple; type SerializeTupleStruct = as ser::Serializer>::SerializeTupleStruct; type SerializeTupleVariant = as ser::Serializer>::SerializeTupleVariant; type SerializeMap = as ser::Serializer>::SerializeMap; type SerializeStruct = as ser::Serializer>::SerializeStruct; type SerializeStructVariant = as ser::Serializer>::SerializeStructVariant; forward!(serialize_bool(bool)); forward!(serialize_i8(i8)); forward!(serialize_i16(i16)); forward!(serialize_i32(i32)); forward!(serialize_i64(i64)); forward!(serialize_u8(u8)); forward!(serialize_u16(u16)); forward!(serialize_u32(u32)); forward!(serialize_u64(u64)); serde_if_integer128! { forward!(serialize_i128(i128)); forward!(serialize_u128(u128)); } forward!(serialize_f32(f32)); forward!(serialize_f64(f64)); forward!(serialize_char(char)); forward!(serialize_str(&str)); forward!(serialize_bytes(&[u8])); fn serialize_none(self) -> Result { Ok(self.ser.writer) } fn serialize_some(self, value: &T) -> Result { value.serialize(self) } fn serialize_unit(self) -> Result { self.ser("`()`")?.serialize_unit() } fn serialize_unit_struct(self, name: &'static str) -> Result { self.ser_name(name)?.serialize_unit_struct(name) } fn serialize_unit_variant( self, name: &'static str, variant_index: u32, variant: &'static str, ) -> Result { self.ser_name(name)? .serialize_unit_variant(name, variant_index, variant) } fn serialize_newtype_struct( self, name: &'static str, value: &T, ) -> Result { self.ser_name(name)?.serialize_newtype_struct(name, value) } fn serialize_newtype_variant( self, name: &'static str, variant_index: u32, variant: &'static str, value: &T, ) -> Result { self.ser_name(name)? .serialize_newtype_variant(name, variant_index, variant, value) } fn serialize_seq(self, len: Option) -> Result { self.ser("sequence")?.serialize_seq(len) } fn serialize_tuple(self, len: usize) -> Result { self.ser("unnamed tuple")?.serialize_tuple(len) } fn serialize_tuple_struct( self, name: &'static str, len: usize, ) -> Result { self.ser_name(name)?.serialize_tuple_struct(name, len) } fn serialize_tuple_variant( self, name: &'static str, variant_index: u32, variant: &'static str, len: usize, ) -> Result { self.ser_name(name)? .serialize_tuple_variant(name, variant_index, variant, len) } fn serialize_map(self, len: Option) -> Result { self.ser("map")?.serialize_map(len) } fn serialize_struct( self, name: &'static str, len: usize, ) -> Result { self.ser_name(name)?.serialize_struct(name, len) } fn serialize_struct_variant( self, name: &'static str, variant_index: u32, variant: &'static str, len: usize, ) -> Result { self.ser_name(name)? .serialize_struct_variant(name, variant_index, variant, len) } } quick-xml-0.27.1/src/se/simple_type.rs000064400000000000000000001123240072674642500157560ustar 00000000000000//! Contains Serde `Serializer` for XML [simple types] [as defined] in the XML Schema. //! //! [simple types]: https://www.w3schools.com/xml/el_simpletype.asp //! [as defined]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition use crate::errors::serialize::DeError; use crate::escapei::_escape; use crate::se::{Indent, QuoteLevel}; use serde::ser::{ Impossible, Serialize, SerializeSeq, SerializeTuple, SerializeTupleStruct, Serializer, }; use serde::serde_if_integer128; use std::borrow::Cow; use std::fmt::Write; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum QuoteTarget { /// Escape data for a text content. No additional escape symbols Text, /// Escape data for a double-quoted attribute. `"` always escaped DoubleQAttr, /// Escape data for a single-quoted attribute. `'` always escaped SingleQAttr, } /// Escapes atomic value that could be part of a `xs:list`. All whitespace characters /// additionally escaped fn escape_item(value: &str, target: QuoteTarget, level: QuoteLevel) -> Cow { use QuoteLevel::*; use QuoteTarget::*; match (target, level) { (_, Full) => _escape(value, |ch| match ch { // Spaces used as delimiters of list items, cannot be used in the item b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' | b'>' | b'\'' | b'\"' => true, _ => false, }), //---------------------------------------------------------------------- (Text, Partial) => _escape(value, |ch| match ch { // Spaces used as delimiters of list items, cannot be used in the item b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' | b'>' => true, _ => false, }), (Text, Minimal) => _escape(value, |ch| match ch { // Spaces used as delimiters of list items, cannot be used in the item b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' => true, _ => false, }), //---------------------------------------------------------------------- (DoubleQAttr, Partial) => _escape(value, |ch| match ch { // Spaces used as delimiters of list items, cannot be used in the item b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' | b'>' => true, // Double quoted attribute should escape quote b'"' => true, _ => false, }), (DoubleQAttr, Minimal) => _escape(value, |ch| match ch { // Spaces used as delimiters of list items, cannot be used in the item b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' => true, // Double quoted attribute should escape quote b'"' => true, _ => false, }), //---------------------------------------------------------------------- (SingleQAttr, Partial) => _escape(value, |ch| match ch { // Spaces used as delimiters of list items b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' | b'>' => true, // Single quoted attribute should escape quote b'\'' => true, _ => false, }), (SingleQAttr, Minimal) => _escape(value, |ch| match ch { // Spaces used as delimiters of list items b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' => true, // Single quoted attribute should escape quote b'\'' => true, _ => false, }), } } /// Escapes XSD simple type value fn escape_list(value: &str, target: QuoteTarget, level: QuoteLevel) -> Cow { use QuoteLevel::*; use QuoteTarget::*; match (target, level) { (_, Full) => _escape(value, |ch| match ch { // Required characters to escape b'&' | b'<' | b'>' | b'\'' | b'\"' => true, _ => false, }), //---------------------------------------------------------------------- (Text, Partial) => _escape(value, |ch| match ch { // Required characters to escape b'&' | b'<' | b'>' => true, _ => false, }), (Text, Minimal) => _escape(value, |ch| match ch { // Required characters to escape b'&' | b'<' => true, _ => false, }), //---------------------------------------------------------------------- (DoubleQAttr, Partial) => _escape(value, |ch| match ch { // Required characters to escape b'&' | b'<' | b'>' => true, // Double quoted attribute should escape quote b'"' => true, _ => false, }), (DoubleQAttr, Minimal) => _escape(value, |ch| match ch { // Required characters to escape b'&' | b'<' => true, // Double quoted attribute should escape quote b'"' => true, _ => false, }), //---------------------------------------------------------------------- (SingleQAttr, Partial) => _escape(value, |ch| match ch { // Required characters to escape b'&' | b'<' | b'>' => true, // Single quoted attribute should escape quote b'\'' => true, _ => false, }), (SingleQAttr, Minimal) => _escape(value, |ch| match ch { // Required characters to escape b'&' | b'<' => true, // Single quoted attribute should escape quote b'\'' => true, _ => false, }), } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A serializer that handles ordinary [simple type definition][item] with /// `{variety} = atomic`, or an ordinary [simple type] definition with /// `{variety} = union` whose basic members are all atomic. /// /// This serializer can serialize only primitive types: /// - numbers /// - booleans /// - strings /// - units /// - options /// - unit variants of enums /// /// Identifiers represented as strings and serialized accordingly. /// /// Serialization of all other types returns [`Unsupported`][DeError::Unsupported] error. /// /// [item]: https://www.w3.org/TR/xmlschema11-1/#std-item_type_definition /// [simple type]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition pub struct AtomicSerializer { pub writer: W, pub target: QuoteTarget, /// Defines which XML characters need to be escaped pub level: QuoteLevel, } impl AtomicSerializer { fn write_str(&mut self, value: &str) -> Result<(), DeError> { Ok(self .writer .write_str(&escape_item(value, self.target, self.level))?) } } impl Serializer for AtomicSerializer { type Ok = W; type Error = DeError; type SerializeSeq = Impossible; type SerializeTuple = Impossible; type SerializeTupleStruct = Impossible; type SerializeTupleVariant = Impossible; type SerializeMap = Impossible; type SerializeStruct = Impossible; type SerializeStructVariant = Impossible; write_primitive!(); fn serialize_str(mut self, value: &str) -> Result { self.write_str(value)?; Ok(self.writer) } /// We cannot store anything, so the absence of a unit and presence of it /// does not differ, so serialization of unit returns `Err(Unsupported)` fn serialize_unit(self) -> Result { Err(DeError::Unsupported( "unit type `()` cannot be serialized as an `xs:list` item".into(), )) } /// We cannot store anything, so the absence of a unit and presence of it /// does not differ, so serialization of unit returns `Err(Unsupported)` fn serialize_unit_struct(self, name: &'static str) -> Result { Err(DeError::Unsupported( format!( "unit struct `{}` cannot be serialized as an `xs:list` item", name ) .into(), )) } /// We cannot store both a variant discriminant and a variant value, /// so serialization of enum newtype variant returns `Err(Unsupported)` fn serialize_newtype_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _value: &T, ) -> Result { Err(DeError::Unsupported( format!( "enum newtype variant `{}::{}` cannot be serialized as an `xs:list` item", name, variant ) .into(), )) } fn serialize_seq(self, _len: Option) -> Result { Err(DeError::Unsupported( "sequence cannot be serialized as an `xs:list` item".into(), )) } fn serialize_tuple(self, _len: usize) -> Result { Err(DeError::Unsupported( "tuple cannot be serialized as an `xs:list` item".into(), )) } fn serialize_tuple_struct( self, name: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!( "tuple struct `{}` cannot be serialized as an `xs:list` item", name ) .into(), )) } fn serialize_tuple_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!( "enum tuple variant `{}::{}` cannot be serialized as an `xs:list` item", name, variant ) .into(), )) } fn serialize_map(self, _len: Option) -> Result { Err(DeError::Unsupported( "map cannot be serialized as an `xs:list` item".into(), )) } fn serialize_struct( self, name: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!( "struct `{}` cannot be serialized as an `xs:list` item", name ) .into(), )) } fn serialize_struct_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!( "enum struct variant `{}::{}` cannot be serialized as an `xs:list` item", name, variant ) .into(), )) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A serializer for a values representing XSD [simple types], which used in: /// - attribute values (`<... ...="value" ...>`) /// - text content (`<...>text`) /// - CDATA content (`<...>`) /// /// [simple types]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition pub struct SimpleTypeSerializer<'i, W: Write> { /// Writer to which this serializer writes content pub writer: W, /// Target for which element is serializing. Affects additional characters to escape. pub target: QuoteTarget, /// Defines which XML characters need to be escaped pub level: QuoteLevel, /// Indent that should be written before the content if content is not an empty string pub(crate) indent: Indent<'i>, } impl<'i, W: Write> SimpleTypeSerializer<'i, W> { fn write_str(&mut self, value: &str) -> Result<(), DeError> { self.indent.write_indent(&mut self.writer)?; Ok(self .writer .write_str(&escape_list(value, self.target, self.level))?) } } impl<'i, W: Write> Serializer for SimpleTypeSerializer<'i, W> { type Ok = W; type Error = DeError; type SerializeSeq = SimpleSeq<'i, W>; type SerializeTuple = SimpleSeq<'i, W>; type SerializeTupleStruct = SimpleSeq<'i, W>; type SerializeTupleVariant = Impossible; type SerializeMap = Impossible; type SerializeStruct = Impossible; type SerializeStructVariant = Impossible; write_primitive!(); fn serialize_str(mut self, value: &str) -> Result { if value.is_empty() { self.indent = Indent::None; } self.write_str(value)?; Ok(self.writer) } /// Does not write anything fn serialize_unit(self) -> Result { Ok(self.writer) } /// Does not write anything fn serialize_unit_struct(self, _name: &'static str) -> Result { Ok(self.writer) } /// We cannot store both a variant discriminant and a variant value, /// so serialization of enum newtype variant returns `Err(Unsupported)` fn serialize_newtype_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _value: &T, ) -> Result { Err(DeError::Unsupported( format!("enum newtype variant `{}::{}` cannot be serialized as an attribute or text content value", name, variant).into(), )) } #[inline] fn serialize_seq(self, _len: Option) -> Result { Ok(SimpleSeq { writer: self.writer, target: self.target, level: self.level, first: true, indent: self.indent, }) } #[inline] fn serialize_tuple(self, _len: usize) -> Result { self.serialize_seq(None) } #[inline] fn serialize_tuple_struct( self, _name: &'static str, _len: usize, ) -> Result { self.serialize_seq(None) } fn serialize_tuple_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!("enum tuple variant `{}::{}` cannot be serialized as an attribute or text content value", name, variant).into(), )) } fn serialize_map(self, _len: Option) -> Result { Err(DeError::Unsupported( "map cannot be serialized as an attribute or text content value".into(), )) } fn serialize_struct( self, name: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!( "struct `{}` cannot be serialized as an attribute or text content value", name ) .into(), )) } fn serialize_struct_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!("enum struct variant `{}::{}` cannot be serialized as an attribute or text content value", name, variant).into(), )) } } /// Serializer for a sequence of atomic values delimited by space pub struct SimpleSeq<'i, W: Write> { writer: W, target: QuoteTarget, level: QuoteLevel, /// If `true`, nothing was written yet first: bool, /// Indent that should be written before the content if content is not an empty string indent: Indent<'i>, } impl<'i, W: Write> SerializeSeq for SimpleSeq<'i, W> { type Ok = W; type Error = DeError; fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { // Write indent for the first element and delimiter for others //FIXME: sequence with only empty strings will be serialized as indent only + delimiters if self.first { self.indent.write_indent(&mut self.writer)?; } else { self.writer.write_char(' ')?; } self.first = false; value.serialize(AtomicSerializer { writer: &mut self.writer, target: self.target, level: self.level, })?; Ok(()) } #[inline] fn end(self) -> Result { Ok(self.writer) } } impl<'i, W: Write> SerializeTuple for SimpleSeq<'i, W> { type Ok = W; type Error = DeError; #[inline] fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { ::serialize_element(self, value) } #[inline] fn end(self) -> Result { ::end(self) } } impl<'i, W: Write> SerializeTupleStruct for SimpleSeq<'i, W> { type Ok = W; type Error = DeError; #[inline] fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { ::serialize_element(self, value) } #[inline] fn end(self) -> Result { ::end(self) } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod tests { use super::*; use crate::utils::Bytes; use serde::Serialize; use std::collections::BTreeMap; #[derive(Debug, Serialize, PartialEq)] struct Unit; #[derive(Debug, Serialize, PartialEq)] struct Newtype(usize); #[derive(Debug, Serialize, PartialEq)] struct Tuple(&'static str, usize); #[derive(Debug, Serialize, PartialEq)] struct Struct { key: &'static str, val: usize, } #[derive(Debug, Serialize, PartialEq)] enum Enum { Unit, #[serde(rename = "<\"&'>")] UnitEscaped, Newtype(usize), Tuple(&'static str, usize), Struct { key: &'static str, val: usize, }, } mod escape_item { use super::*; mod full { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { assert_eq!( escape_item("text<\"'&> \t\n\rtext", QuoteTarget::Text, QuoteLevel::Full), "text<"'&> text" ); } #[test] fn double_quote_attr() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::DoubleQAttr, QuoteLevel::Full ), "text<"'&> text" ); } #[test] fn single_quote_attr() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::SingleQAttr, QuoteLevel::Full ), "text<"'&> text" ); } } mod partial { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::Text, QuoteLevel::Partial ), "text<\"'&> text" ); } #[test] fn double_quote_attr() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::DoubleQAttr, QuoteLevel::Partial ), "text<"'&> text" ); } #[test] fn single_quote_attr() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::SingleQAttr, QuoteLevel::Partial ), "text<\"'&> text" ); } } mod minimal { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::Text, QuoteLevel::Minimal ), "text<\"'&> text" ); } #[test] fn double_quote_attr() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::DoubleQAttr, QuoteLevel::Minimal ), "text<"'&> text" ); } #[test] fn single_quote_attr() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::SingleQAttr, QuoteLevel::Minimal ), "text<\"'&> text" ); } } } mod escape_list { use super::*; mod full { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { assert_eq!( escape_list("text<\"'&> \t\n\rtext", QuoteTarget::Text, QuoteLevel::Full), "text<"'&> \t\n\rtext" ); } #[test] fn double_quote_attr() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::DoubleQAttr, QuoteLevel::Full ), "text<"'&> \t\n\rtext" ); } #[test] fn single_quote_attr() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::SingleQAttr, QuoteLevel::Full ), "text<"'&> \t\n\rtext" ); } } mod partial { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::Text, QuoteLevel::Partial ), "text<\"'&> \t\n\rtext" ); } #[test] fn double_quote_attr() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::DoubleQAttr, QuoteLevel::Partial ), "text<"'&> \t\n\rtext" ); } #[test] fn single_quote_attr() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::SingleQAttr, QuoteLevel::Partial ), "text<\"'&> \t\n\rtext" ); } } mod minimal { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::Text, QuoteLevel::Minimal ), "text<\"'&> \t\n\rtext" ); } #[test] fn double_quote_attr() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::DoubleQAttr, QuoteLevel::Minimal ), "text<"'&> \t\n\rtext" ); } #[test] fn single_quote_attr() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::SingleQAttr, QuoteLevel::Minimal ), "text<\"'&> \t\n\rtext" ); } } } /// Tests for serialize atomic and union values, as defined in XSD specification mod atomic { use super::*; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:literal) => { #[test] fn $name() { let ser = AtomicSerializer { writer: String::new(), target: QuoteTarget::Text, level: QuoteLevel::Full, }; let buffer = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = AtomicSerializer { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, }; match $data.serialize(ser).unwrap_err() { DeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `{}({})`, found `{:?}`", stringify!($kind), $reason, e ), } assert_eq!(buffer, ""); } }; } serialize_as!(false_: false => "false"); serialize_as!(true_: true => "true"); serialize_as!(i8_: -42i8 => "-42"); serialize_as!(i16_: -4200i16 => "-4200"); serialize_as!(i32_: -42000000i32 => "-42000000"); serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); serialize_as!(isize_: -42000000000000isize => "-42000000000000"); serialize_as!(u8_: 42u8 => "42"); serialize_as!(u16_: 4200u16 => "4200"); serialize_as!(u32_: 42000000u32 => "42000000"); serialize_as!(u64_: 42000000000000u64 => "42000000000000"); serialize_as!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } serialize_as!(f32_: 4.2f32 => "4.2"); serialize_as!(f64_: 4.2f64 => "4.2"); serialize_as!(char_non_escaped: 'h' => "h"); serialize_as!(char_lt: '<' => "<"); serialize_as!(char_gt: '>' => ">"); serialize_as!(char_amp: '&' => "&"); serialize_as!(char_apos: '\'' => "'"); serialize_as!(char_quot: '"' => """); serialize_as!(str_non_escaped: "non-escaped-string" => "non-escaped-string"); serialize_as!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::<&str>::None => ""); serialize_as!(option_some: Some("non-escaped-string") => "non-escaped-string"); err!(unit: () => Unsupported("unit type `()` cannot be serialized as an `xs:list` item")); err!(unit_struct: Unit => Unsupported("unit struct `Unit` cannot be serialized as an `xs:list` item")); serialize_as!(enum_unit: Enum::Unit => "Unit"); serialize_as!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); serialize_as!(newtype: Newtype(42) => "42"); err!(enum_newtype: Enum::Newtype(42) => Unsupported("enum newtype variant `Enum::Newtype` cannot be serialized as an `xs:list` item")); err!(seq: vec![1, 2, 3] => Unsupported("sequence cannot be serialized as an `xs:list` item")); err!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => Unsupported("tuple cannot be serialized as an `xs:list` item")); err!(tuple_struct: Tuple("first", 42) => Unsupported("tuple struct `Tuple` cannot be serialized as an `xs:list` item")); err!(enum_tuple: Enum::Tuple("first", 42) => Unsupported("enum tuple variant `Enum::Tuple` cannot be serialized as an `xs:list` item")); err!(map: BTreeMap::from([(1, 2), (3, 4)]) => Unsupported("map cannot be serialized as an `xs:list` item")); err!(struct_: Struct { key: "answer", val: 42 } => Unsupported("struct `Struct` cannot be serialized as an `xs:list` item")); err!(enum_struct: Enum::Struct { key: "answer", val: 42 } => Unsupported("enum struct variant `Enum::Struct` cannot be serialized as an `xs:list` item")); } mod simple_type { use super::*; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:literal) => { #[test] fn $name() { let ser = SimpleTypeSerializer { writer: String::new(), target: QuoteTarget::Text, level: QuoteLevel::Full, indent: Indent::None, }; let buffer = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = SimpleTypeSerializer { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, indent: Indent::None, }; match $data.serialize(ser).unwrap_err() { DeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `{}({})`, found `{:?}`", stringify!($kind), $reason, e ), } assert_eq!(buffer, ""); } }; } serialize_as!(false_: false => "false"); serialize_as!(true_: true => "true"); serialize_as!(i8_: -42i8 => "-42"); serialize_as!(i16_: -4200i16 => "-4200"); serialize_as!(i32_: -42000000i32 => "-42000000"); serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); serialize_as!(isize_: -42000000000000isize => "-42000000000000"); serialize_as!(u8_: 42u8 => "42"); serialize_as!(u16_: 4200u16 => "4200"); serialize_as!(u32_: 42000000u32 => "42000000"); serialize_as!(u64_: 42000000000000u64 => "42000000000000"); serialize_as!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } serialize_as!(f32_: 4.2f32 => "4.2"); serialize_as!(f64_: 4.2f64 => "4.2"); serialize_as!(char_non_escaped: 'h' => "h"); serialize_as!(char_lt: '<' => "<"); serialize_as!(char_gt: '>' => ">"); serialize_as!(char_amp: '&' => "&"); serialize_as!(char_apos: '\'' => "'"); serialize_as!(char_quot: '"' => """); serialize_as!(str_non_escaped: "non-escaped string" => "non-escaped string"); serialize_as!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::<&str>::None => ""); serialize_as!(option_some: Some("non-escaped string") => "non-escaped string"); serialize_as!(unit: () => ""); serialize_as!(unit_struct: Unit => ""); serialize_as!(enum_unit: Enum::Unit => "Unit"); serialize_as!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); serialize_as!(newtype: Newtype(42) => "42"); err!(enum_newtype: Enum::Newtype(42) => Unsupported("enum newtype variant `Enum::Newtype` cannot be serialized as an attribute or text content value")); serialize_as!(seq: vec![1, 2, 3] => "1 2 3"); serialize_as!(seq_empty: Vec::::new() => ""); serialize_as!(seq_with_1_empty_str: vec![""] => ""); serialize_as!(seq_with_2_empty_strs: vec!["", ""] => " "); serialize_as!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> with spaces 3"); serialize_as!(tuple_struct: Tuple("first", 42) => "first 42"); err!(enum_tuple: Enum::Tuple("first", 42) => Unsupported("enum tuple variant `Enum::Tuple` cannot be serialized as an attribute or text content value")); err!(map: BTreeMap::from([(1, 2), (3, 4)]) => Unsupported("map cannot be serialized as an attribute or text content value")); err!(struct_: Struct { key: "answer", val: 42 } => Unsupported("struct `Struct` cannot be serialized as an attribute or text content value")); err!(enum_struct: Enum::Struct { key: "answer", val: 42 } => Unsupported("enum struct variant `Enum::Struct` cannot be serialized as an attribute or text content value")); } } quick-xml-0.27.1/src/utils.rs000064400000000000000000000122420072674642500141530ustar 00000000000000use std::borrow::Cow; use std::fmt::{self, Debug, Formatter}; #[cfg(feature = "serialize")] use serde::de::{Deserialize, Deserializer, Error, Visitor}; #[cfg(feature = "serialize")] use serde::ser::{Serialize, Serializer}; pub fn write_cow_string(f: &mut Formatter, cow_string: &Cow<[u8]>) -> fmt::Result { match cow_string { Cow::Owned(s) => { write!(f, "Owned(")?; write_byte_string(f, &s)?; } Cow::Borrowed(s) => { write!(f, "Borrowed(")?; write_byte_string(f, s)?; } } write!(f, ")") } pub fn write_byte_string(f: &mut Formatter, byte_string: &[u8]) -> fmt::Result { write!(f, "\"")?; for b in byte_string { match *b { 32..=33 | 35..=126 => write!(f, "{}", *b as char)?, 34 => write!(f, "\\\"")?, _ => write!(f, "{:#02X}", b)?, } } write!(f, "\"")?; Ok(()) } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Wrapper around `Vec` that has a human-readable debug representation: /// printable ASCII symbols output as is, all other output in HEX notation. /// /// Also, when `serialize` feature is on, this type deserialized using /// [`deserialize_byte_buf`](serde::Deserializer::deserialize_byte_buf) instead /// of vector's generic [`deserialize_seq`](serde::Deserializer::deserialize_seq) #[derive(PartialEq, Eq)] pub struct ByteBuf(pub Vec); impl Debug for ByteBuf { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write_byte_string(f, &self.0) } } #[cfg(feature = "serialize")] impl<'de> Deserialize<'de> for ByteBuf { fn deserialize(d: D) -> Result where D: Deserializer<'de>, { struct ValueVisitor; impl<'de> Visitor<'de> for ValueVisitor { type Value = ByteBuf; fn expecting(&self, f: &mut Formatter) -> fmt::Result { f.write_str("byte data") } fn visit_bytes(self, v: &[u8]) -> Result { Ok(ByteBuf(v.to_vec())) } fn visit_byte_buf(self, v: Vec) -> Result { Ok(ByteBuf(v)) } } d.deserialize_byte_buf(ValueVisitor) } } #[cfg(feature = "serialize")] impl Serialize for ByteBuf { fn serialize(&self, serializer: S) -> Result where S: Serializer, { serializer.serialize_bytes(&self.0) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Wrapper around `&[u8]` that has a human-readable debug representation: /// printable ASCII symbols output as is, all other output in HEX notation. /// /// Also, when `serialize` feature is on, this type deserialized using /// [`deserialize_bytes`](serde::Deserializer::deserialize_bytes) instead /// of vector's generic [`deserialize_seq`](serde::Deserializer::deserialize_seq) #[derive(PartialEq, Eq)] pub struct Bytes<'de>(pub &'de [u8]); impl<'de> Debug for Bytes<'de> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write_byte_string(f, self.0) } } #[cfg(feature = "serialize")] impl<'de> Deserialize<'de> for Bytes<'de> { fn deserialize(d: D) -> Result where D: Deserializer<'de>, { struct ValueVisitor; impl<'de> Visitor<'de> for ValueVisitor { type Value = Bytes<'de>; fn expecting(&self, f: &mut Formatter) -> fmt::Result { f.write_str("borrowed bytes") } fn visit_borrowed_bytes(self, v: &'de [u8]) -> Result { Ok(Bytes(v)) } } d.deserialize_bytes(ValueVisitor) } } #[cfg(feature = "serialize")] impl<'de> Serialize for Bytes<'de> { fn serialize(&self, serializer: S) -> Result where S: Serializer, { serializer.serialize_bytes(self.0) } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod tests { use super::*; use pretty_assertions::assert_eq; #[test] fn write_byte_string0() { let bytes = ByteBuf(vec![10, 32, 32, 32, 32, 32, 32, 32, 32]); assert_eq!(format!("{:?}", bytes), "\"0xA \"".to_owned()); } #[test] fn write_byte_string1() { let bytes = ByteBuf(vec![ 104, 116, 116, 112, 58, 47, 47, 119, 119, 119, 46, 119, 51, 46, 111, 114, 103, 47, 50, 48, 48, 50, 47, 48, 55, 47, 111, 119, 108, 35, ]); assert_eq!( format!("{:?}", bytes), r##""http://www.w3.org/2002/07/owl#""##.to_owned() ); } #[test] fn write_byte_string3() { let bytes = ByteBuf(vec![ 67, 108, 97, 115, 115, 32, 73, 82, 73, 61, 34, 35, 66, 34, ]); assert_eq!(format!("{:?}", bytes), r##""Class IRI=\"#B\"""##.to_owned()); } } quick-xml-0.27.1/src/writer.rs000064400000000000000000000505350072674642500143360ustar 00000000000000//! Contains high-level interface for an events-based XML emitter. use std::io::Write; use crate::encoding::UTF8_BOM; use crate::errors::Result; use crate::events::{attributes::Attribute, BytesCData, BytesStart, BytesText, Event}; /// XML writer. Writes XML [`Event`]s to a [`std::io::Write`] implementor. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::{Event, BytesEnd, BytesStart}; /// use quick_xml::reader::Reader; /// use quick_xml::writer::Writer; /// use std::io::Cursor; /// /// let xml = r#"text"#; /// let mut reader = Reader::from_str(xml); /// reader.trim_text(true); /// let mut writer = Writer::new(Cursor::new(Vec::new())); /// loop { /// match reader.read_event() { /// Ok(Event::Start(e)) if e.name().as_ref() == b"this_tag" => { /// /// // crates a new element ... alternatively we could reuse `e` by calling /// // `e.into_owned()` /// let mut elem = BytesStart::new("my_elem"); /// /// // collect existing attributes /// elem.extend_attributes(e.attributes().map(|attr| attr.unwrap())); /// /// // copy existing attributes, adds a new my-key="some value" attribute /// elem.push_attribute(("my-key", "some value")); /// /// // writes the event to the writer /// assert!(writer.write_event(Event::Start(elem)).is_ok()); /// }, /// Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => { /// assert!(writer.write_event(Event::End(BytesEnd::new("my_elem"))).is_ok()); /// }, /// Ok(Event::Eof) => break, /// // we can either move or borrow the event to write, depending on your use-case /// Ok(e) => assert!(writer.write_event(e).is_ok()), /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), /// } /// } /// /// let result = writer.into_inner().into_inner(); /// let expected = r#"text"#; /// assert_eq!(result, expected.as_bytes()); /// ``` #[derive(Clone)] pub struct Writer { /// underlying writer writer: W, indent: Option, } impl Writer { /// Creates a `Writer` from a generic writer. pub fn new(inner: W) -> Writer { Writer { writer: inner, indent: None, } } /// Creates a `Writer` with configured whitespace indents from a generic writer. pub fn new_with_indent(inner: W, indent_char: u8, indent_size: usize) -> Writer { Writer { writer: inner, indent: Some(Indentation::new(indent_char, indent_size)), } } /// Consumes this `Writer`, returning the underlying writer. pub fn into_inner(self) -> W { self.writer } /// Get inner writer, keeping ownership pub fn inner(&mut self) -> &mut W { &mut self.writer } /// Write a [Byte-Order-Mark] character to the document. /// /// # Example /// /// ```rust /// # use quick_xml::Result; /// # fn main() -> Result<()> { /// use quick_xml::events::{BytesStart, BytesText, Event}; /// use quick_xml::writer::Writer; /// use quick_xml::Error; /// use std::io::Cursor; /// /// let mut buffer = Vec::new(); /// let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); /// /// writer.write_bom()?; /// writer /// .create_element("empty") /// .with_attribute(("attr1", "value1")) /// .write_empty() /// .expect("failure"); /// /// assert_eq!( /// std::str::from_utf8(&buffer).unwrap(), /// "\u{FEFF}" /// ); /// # Ok(()) /// # } /// ``` /// [Byte-Order-Mark]: https://unicode.org/faq/utf_bom.html#BOM pub fn write_bom(&mut self) -> Result<()> { self.write(UTF8_BOM) } /// Writes the given event to the underlying writer. pub fn write_event<'a, E: AsRef>>(&mut self, event: E) -> Result<()> { let mut next_should_line_break = true; let result = match *event.as_ref() { Event::Start(ref e) => { let result = self.write_wrapped(b"<", e, b">"); if let Some(i) = self.indent.as_mut() { i.grow(); } result } Event::End(ref e) => { if let Some(i) = self.indent.as_mut() { i.shrink(); } self.write_wrapped(b"") } Event::Empty(ref e) => self.write_wrapped(b"<", e, b"/>"), Event::Text(ref e) => { next_should_line_break = false; self.write(e) } Event::Comment(ref e) => self.write_wrapped(b""), Event::CData(ref e) => { next_should_line_break = false; self.write(b"") } Event::Decl(ref e) => self.write_wrapped(b""), Event::PI(ref e) => self.write_wrapped(b""), Event::DocType(ref e) => self.write_wrapped(b""), Event::Eof => Ok(()), }; if let Some(i) = self.indent.as_mut() { i.should_line_break = next_should_line_break; } result } /// Writes bytes #[inline] pub(crate) fn write(&mut self, value: &[u8]) -> Result<()> { self.writer.write_all(value).map_err(Into::into) } #[inline] fn write_wrapped(&mut self, before: &[u8], value: &[u8], after: &[u8]) -> Result<()> { if let Some(ref i) = self.indent { if i.should_line_break { self.writer.write_all(b"\n")?; self.writer.write_all(i.current())?; } } self.write(before)?; self.write(value)?; self.write(after)?; Ok(()) } /// Manually write a newline and indentation at the proper level. /// /// This can be used when the heuristic to line break and indent after any /// [`Event`] apart from [`Text`] fails such as when a [`Start`] occurs directly /// after [`Text`]. /// /// This method will do nothing if `Writer` was not constructed with [`new_with_indent`]. /// /// [`Text`]: Event::Text /// [`Start`]: Event::Start /// [`new_with_indent`]: Self::new_with_indent pub fn write_indent(&mut self) -> Result<()> { if let Some(ref i) = self.indent { self.writer.write_all(b"\n")?; self.writer.write_all(i.current())?; } Ok(()) } /// Provides a simple, high-level API for writing XML elements. /// /// Returns an [`ElementWriter`] that simplifies setting attributes and writing /// content inside the element. /// /// # Example /// /// ```rust /// # use quick_xml::Result; /// # fn main() -> Result<()> { /// use quick_xml::events::{BytesStart, BytesText, Event}; /// use quick_xml::writer::Writer; /// use quick_xml::Error; /// use std::io::Cursor; /// /// let mut writer = Writer::new(Cursor::new(Vec::new())); /// /// // writes /// writer.create_element("tag") /// .with_attribute(("attr1", "value1")) // chain `with_attribute()` calls to add many attributes /// .write_empty()?; /// /// // writes with some text inside /// writer.create_element("tag") /// .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()) // or add attributes from an iterator /// .write_text_content(BytesText::new("with some text inside"))?; /// /// // writes appleorange /// writer.create_element("tag") /// .write_inner_content(|writer| { /// let fruits = ["apple", "orange"]; /// for (quant, item) in fruits.iter().enumerate() { /// writer /// .create_element("fruit") /// .with_attribute(("quantity", quant.to_string().as_str())) /// .write_text_content(BytesText::new(item))?; /// } /// Ok(()) /// })?; /// # Ok(()) /// # } /// ``` #[must_use] pub fn create_element<'a, N>(&'a mut self, name: &'a N) -> ElementWriter where N: 'a + AsRef + ?Sized, { ElementWriter { writer: self, start_tag: BytesStart::new(name.as_ref()), } } } /// A struct to write an element. Contains methods to add attributes and inner /// elements to the element pub struct ElementWriter<'a, W: Write> { writer: &'a mut Writer, start_tag: BytesStart<'a>, } impl<'a, W: Write> ElementWriter<'a, W> { /// Adds an attribute to this element. pub fn with_attribute<'b, I>(mut self, attr: I) -> Self where I: Into>, { self.start_tag.push_attribute(attr); self } /// Add additional attributes to this element using an iterator. /// /// The yielded items must be convertible to [`Attribute`] using `Into`. pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self where I: IntoIterator, I::Item: Into>, { self.start_tag.extend_attributes(attributes); self } /// Write some text inside the current element. pub fn write_text_content(self, text: BytesText) -> Result<&'a mut Writer> { self.writer .write_event(Event::Start(self.start_tag.borrow()))?; self.writer.write_event(Event::Text(text))?; self.writer .write_event(Event::End(self.start_tag.to_end()))?; Ok(self.writer) } /// Write a CData event `` inside the current element. pub fn write_cdata_content(self, text: BytesCData) -> Result<&'a mut Writer> { self.writer .write_event(Event::Start(self.start_tag.borrow()))?; self.writer.write_event(Event::CData(text))?; self.writer .write_event(Event::End(self.start_tag.to_end()))?; Ok(self.writer) } /// Write a processing instruction `` inside the current element. pub fn write_pi_content(self, text: BytesText) -> Result<&'a mut Writer> { self.writer .write_event(Event::Start(self.start_tag.borrow()))?; self.writer.write_event(Event::PI(text))?; self.writer .write_event(Event::End(self.start_tag.to_end()))?; Ok(self.writer) } /// Write an empty (self-closing) tag. pub fn write_empty(self) -> Result<&'a mut Writer> { self.writer.write_event(Event::Empty(self.start_tag))?; Ok(self.writer) } /// Create a new scope for writing XML inside the current element. pub fn write_inner_content(self, closure: F) -> Result<&'a mut Writer> where F: Fn(&mut Writer) -> Result<()>, { self.writer .write_event(Event::Start(self.start_tag.borrow()))?; closure(self.writer)?; self.writer .write_event(Event::End(self.start_tag.to_end()))?; Ok(self.writer) } } #[derive(Clone)] pub(crate) struct Indentation { should_line_break: bool, indent_char: u8, indent_size: usize, indents: Vec, indents_len: usize, } impl Indentation { pub fn new(indent_char: u8, indent_size: usize) -> Self { Self { should_line_break: false, indent_char, indent_size, indents: vec![indent_char; 128], indents_len: 0, } } /// Increase indentation by one level pub fn grow(&mut self) { self.indents_len += self.indent_size; if self.indents_len > self.indents.len() { self.indents.resize(self.indents_len, self.indent_char); } } /// Decrease indentation by one level. Do nothing, if level already zero pub fn shrink(&mut self) { self.indents_len = self.indents_len.saturating_sub(self.indent_size); } /// Returns indent string for current level pub fn current(&self) -> &[u8] { &self.indents[..self.indents_len] } } #[cfg(test)] mod indentation { use super::*; use crate::events::*; use pretty_assertions::assert_eq; #[test] fn self_closed() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); let tag = BytesStart::new("self-closed") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); writer .write_event(Event::Empty(tag)) .expect("write tag failed"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#""# ); } #[test] fn empty_paired() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); let end = start.to_end(); writer .write_event(Event::Start(start.clone())) .expect("write start tag failed"); writer .write_event(Event::End(end)) .expect("write end tag failed"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#" "# ); } #[test] fn paired_with_inner() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); let end = start.to_end(); let inner = BytesStart::new("inner"); writer .write_event(Event::Start(start.clone())) .expect("write start tag failed"); writer .write_event(Event::Empty(inner)) .expect("write inner tag failed"); writer .write_event(Event::End(end)) .expect("write end tag failed"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#" "# ); } #[test] fn paired_with_text() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); let end = start.to_end(); let text = BytesText::new("text"); writer .write_event(Event::Start(start.clone())) .expect("write start tag failed"); writer .write_event(Event::Text(text)) .expect("write text failed"); writer .write_event(Event::End(end)) .expect("write end tag failed"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#"text"# ); } #[test] fn mixed_content() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); let end = start.to_end(); let text = BytesText::new("text"); let inner = BytesStart::new("inner"); writer .write_event(Event::Start(start.clone())) .expect("write start tag failed"); writer .write_event(Event::Text(text)) .expect("write text failed"); writer .write_event(Event::Empty(inner)) .expect("write inner tag failed"); writer .write_event(Event::End(end)) .expect("write end tag failed"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#"text "# ); } #[test] fn nested() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); let end = start.to_end(); let inner = BytesStart::new("inner"); writer .write_event(Event::Start(start.clone())) .expect("write start 1 tag failed"); writer .write_event(Event::Start(start.clone())) .expect("write start 2 tag failed"); writer .write_event(Event::Empty(inner)) .expect("write inner tag failed"); writer .write_event(Event::End(end.clone())) .expect("write end tag 2 failed"); writer .write_event(Event::End(end)) .expect("write end tag 1 failed"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#" "# ); } #[test] fn element_writer_empty() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); writer .create_element("empty") .with_attribute(("attr1", "value1")) .with_attribute(("attr2", "value2")) .write_empty() .expect("failure"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#""# ); } #[test] fn element_writer_text() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); writer .create_element("paired") .with_attribute(("attr1", "value1")) .with_attribute(("attr2", "value2")) .write_text_content(BytesText::new("text")) .expect("failure"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#"text"# ); } #[test] fn element_writer_nested() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); writer .create_element("outer") .with_attribute(("attr1", "value1")) .with_attribute(("attr2", "value2")) .write_inner_content(|writer| { let fruits = ["apple", "orange", "banana"]; for (quant, item) in fruits.iter().enumerate() { writer .create_element("fruit") .with_attribute(("quantity", quant.to_string().as_str())) .write_text_content(BytesText::new(item))?; } writer .create_element("inner") .write_inner_content(|writer| { writer.create_element("empty").write_empty()?; Ok(()) })?; Ok(()) }) .expect("failure"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#" apple orange banana "# ); } } quick-xml-0.27.1/tests/README.md000064400000000000000000000030260072674642500142770ustar 00000000000000# Document descrptions document.xml medium length, mostly empty tags, a few short attributes per element, no escaping html5.html html5.txt libreoffice_document.fodt long, mix of attributes and text, not much escaping, lots of non-ascii characters, lots of namespaces linescore.xml medium length, lots of attributes, short attributes, few escapes opennews_all.rss players.xml long, lots of attributes, short attributes, no text, no escapes rpm_filelists.xml long, mostly medium-length text elements, not much escaping rpm_other.xml long, mix of attributes and text, lots of escaping (both entity and char literal), long attributes rpm_primary.xml long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces rpm_primary2.xml long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces sample_1.xml short, mix of attributes and text, lots of escapes sample_1_short.txt sample_1_full.txt sample_2.xml sample_2_short.txt sample_3.xml sample_2_full.txt sample_3_short.txt sample_3_full.txt sample_4.xml sample_4_short.txt sample_4_full.txt sample_5_short.txt sample_5_utf16bom.xml sample_5_full.txt sample_ns_short.txt sample_ns.xml short, lots of namespaces, no escapes sample_rss.xml long, few attributes, mix of attribute lengths, escapes in text content test_writer_indent_cdata.xml test_writer_indent.xml medium length, lots of namespaces, no escaping test_writer.xml utf16be.xml utf16le.xml