quick-xml-0.36.1/.cargo_vcs_info.json0000644000000001360000000000100130350ustar { "git": { "sha1": "2f3824a1d265ac0b683aa485683ad00ddbc5c092" }, "path_in_vcs": "" }quick-xml-0.36.1/Cargo.lock0000644000000560460000000000100110230ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "addr2line" version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" dependencies = [ "gimli", ] [[package]] name = "adler" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "aho-corasick" version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] [[package]] name = "anes" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "arbitrary" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" dependencies = [ "derive_arbitrary", ] [[package]] name = "async-stream" version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51" dependencies = [ "async-stream-impl", "futures-core", "pin-project-lite", ] [[package]] name = "async-stream-impl" version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "atty" version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ "hermit-abi", "libc", "winapi", ] [[package]] name = "autocfg" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "backtrace" version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11" dependencies = [ "addr2line", "cc", "cfg-if", "libc", "miniz_oxide", "object", "rustc-demangle", ] [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bumpalo" version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytes" version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" [[package]] name = "cast" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" version = "1.0.99" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96c51067fd44124faa7f870b4b1c969379ad32b2ba805aa959430ceaa384f695" [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "ciborium" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" dependencies = [ "ciborium-io", "ciborium-ll", "serde", ] [[package]] name = "ciborium-io" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" [[package]] name = "ciborium-ll" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" dependencies = [ "ciborium-io", "half", ] [[package]] name = "clap" version = "3.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" dependencies = [ "bitflags", "clap_lex", "indexmap", "textwrap", ] [[package]] name = "clap_lex" version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" dependencies = [ "os_str_bytes", ] [[package]] name = "criterion" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb" dependencies = [ "anes", "atty", "cast", "ciborium", "clap", "criterion-plot", "itertools", "lazy_static", "num-traits", "oorandom", "plotters", "rayon", "regex", "serde", "serde_derive", "serde_json", "tinytemplate", "walkdir", ] [[package]] name = "criterion-plot" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", "itertools", ] [[package]] name = "crossbeam-deque" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" dependencies = [ "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-utils" version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" [[package]] name = "crunchy" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" [[package]] name = "derive_arbitrary" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "diff" version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" [[package]] name = "document-features" version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef5282ad69563b5fc40319526ba27e0e7363d552a896f0297d54f767717f9b95" dependencies = [ "litrs", ] [[package]] name = "either" version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" [[package]] name = "encoding_rs" version = "0.8.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" dependencies = [ "cfg-if", ] [[package]] name = "futures-core" version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" [[package]] name = "gimli" version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" [[package]] name = "half" version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" dependencies = [ "cfg-if", "crunchy", ] [[package]] name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" [[package]] name = "hermit-abi" version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" dependencies = [ "libc", ] [[package]] name = "indexmap" version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown", ] [[package]] name = "itertools" version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ "either", ] [[package]] name = "itoa" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "js-sys" version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" dependencies = [ "wasm-bindgen", ] [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" [[package]] name = "litrs" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ce301924b7887e9d637144fdade93f9dfff9b60981d4ac161db09720d39aa5" [[package]] name = "log" version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "memchr" version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] name = "miniz_oxide" version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" dependencies = [ "adler", ] [[package]] name = "num-traits" version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", ] [[package]] name = "object" version = "0.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e" dependencies = [ "memchr", ] [[package]] name = "once_cell" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "oorandom" version = "11.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" [[package]] name = "ordered-float" version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" dependencies = [ "num-traits", ] [[package]] name = "os_str_bytes" version = "6.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" [[package]] name = "pin-project-lite" version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" [[package]] name = "plotters" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3" dependencies = [ "num-traits", "plotters-backend", "plotters-svg", "wasm-bindgen", "web-sys", ] [[package]] name = "plotters-backend" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7" [[package]] name = "plotters-svg" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705" dependencies = [ "plotters-backend", ] [[package]] name = "pretty_assertions" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" dependencies = [ "diff", "yansi", ] [[package]] name = "proc-macro2" version = "1.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" dependencies = [ "unicode-ident", ] [[package]] name = "quick-xml" version = "0.36.1" dependencies = [ "arbitrary", "criterion", "document-features", "encoding_rs", "memchr", "pretty_assertions", "regex", "serde", "serde-value", "serde_derive", "tokio", "tokio-test", ] [[package]] name = "quote" version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] [[package]] name = "rayon" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ "either", "rayon-core", ] [[package]] name = "rayon-core" version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ "crossbeam-deque", "crossbeam-utils", ] [[package]] name = "regex" version = "1.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" dependencies = [ "aho-corasick", "memchr", "regex-automata", "regex-syntax", ] [[package]] name = "regex-automata" version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] [[package]] name = "regex-syntax" version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] name = "rustc-demangle" version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] name = "ryu" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "same-file" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" dependencies = [ "winapi-util", ] [[package]] name = "serde" version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" dependencies = [ "serde_derive", ] [[package]] name = "serde-value" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" dependencies = [ "ordered-float", "serde", ] [[package]] name = "serde_derive" version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" dependencies = [ "itoa", "ryu", "serde", ] [[package]] name = "syn" version = "2.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "textwrap" version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" [[package]] name = "tinytemplate" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" dependencies = [ "serde", "serde_json", ] [[package]] name = "tokio" version = "1.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" dependencies = [ "backtrace", "bytes", "pin-project-lite", "tokio-macros", ] [[package]] name = "tokio-macros" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "tokio-stream" version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" dependencies = [ "futures-core", "pin-project-lite", "tokio", ] [[package]] name = "tokio-test" version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2468baabc3311435b55dd935f702f42cd1b8abb7e754fb7dfb16bd36aa88f9f7" dependencies = [ "async-stream", "bytes", "futures-core", "tokio", "tokio-stream", ] [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "walkdir" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", ] [[package]] name = "wasm-bindgen" version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" dependencies = [ "cfg-if", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" dependencies = [ "quote", "wasm-bindgen-macro-support", ] [[package]] name = "wasm-bindgen-macro-support" version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" [[package]] name = "web-sys" version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" dependencies = [ "js-sys", "wasm-bindgen", ] [[package]] name = "winapi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" dependencies = [ "windows-sys", ] [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-sys" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ "windows-targets", ] [[package]] name = "windows-targets" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_gnullvm", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" [[package]] name = "windows_aarch64_msvc" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" [[package]] name = "windows_i686_gnu" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" [[package]] name = "windows_i686_gnullvm" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" [[package]] name = "windows_i686_msvc" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" [[package]] name = "windows_x86_64_gnu" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" [[package]] name = "windows_x86_64_gnullvm" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" [[package]] name = "windows_x86_64_msvc" version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" [[package]] name = "yansi" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" quick-xml-0.36.1/Cargo.toml0000644000000063000000000000100110320ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.56" name = "quick-xml" version = "0.36.1" include = [ "src/*", "LICENSE-MIT.md", "README.md", ] description = "High performance xml reader and writer" documentation = "https://docs.rs/quick-xml" readme = "README.md" keywords = [ "xml", "serde", "parser", "writer", "html", ] categories = [ "asynchronous", "encoding", "parsing", "parser-implementations", ] license = "MIT" repository = "https://github.com/tafia/quick-xml" [package.metadata.docs.rs] all-features = true [lib] bench = false [[example]] name = "read_nodes_serde" path = "examples/read_nodes_serde.rs" required-features = ["serialize"] [[example]] name = "flattened_enum" path = "examples/flattened_enum.rs" required-features = ["serialize"] [[test]] name = "encodings" path = "tests/encodings.rs" required-features = ["encoding"] [[test]] name = "serde_roundtrip" path = "tests/serde_roundtrip.rs" required-features = ["serialize"] [[test]] name = "serde-de" path = "tests/serde-de.rs" required-features = ["serialize"] [[test]] name = "serde-de-enum" path = "tests/serde-de-enum.rs" required-features = ["serialize"] [[test]] name = "serde-de-seq" path = "tests/serde-de-seq.rs" required-features = ["serialize"] [[test]] name = "serde-se" path = "tests/serde-se.rs" required-features = ["serialize"] [[test]] name = "serde-migrated" path = "tests/serde-migrated.rs" required-features = ["serialize"] [[test]] name = "serde-issues" path = "tests/serde-issues.rs" required-features = ["serialize"] [[test]] name = "async-tokio" path = "tests/async-tokio.rs" required-features = ["async-tokio"] [[bench]] name = "microbenches" path = "benches/microbenches.rs" harness = false [[bench]] name = "macrobenches" path = "benches/macrobenches.rs" harness = false [dependencies.arbitrary] version = "1" features = ["derive"] optional = true [dependencies.document-features] version = "0.2" optional = true [dependencies.encoding_rs] version = "0.8" optional = true [dependencies.memchr] version = "2.1" [dependencies.serde] version = ">=1.0.139" optional = true [dependencies.tokio] version = "1.10" features = ["io-util"] optional = true default-features = false [dev-dependencies.criterion] version = "0.4" [dev-dependencies.pretty_assertions] version = "1.4" [dev-dependencies.regex] version = "1" [dev-dependencies.serde-value] version = "0.7" [dev-dependencies.serde_derive] version = "1.0.79" [dev-dependencies.tokio] version = "1.21" features = [ "macros", "rt", ] default-features = false [dev-dependencies.tokio-test] version = "0.4" [features] async-tokio = ["tokio"] default = [] encoding = ["encoding_rs"] escape-html = [] overlapped-lists = [] serde-types = ["serde/derive"] serialize = ["serde"] quick-xml-0.36.1/Cargo.toml.orig000064400000000000000000000203700072674642500145460ustar 00000000000000[package] name = "quick-xml" version = "0.36.1" description = "High performance xml reader and writer" edition = "2021" documentation = "https://docs.rs/quick-xml" repository = "https://github.com/tafia/quick-xml" keywords = ["xml", "serde", "parser", "writer", "html"] categories = ["asynchronous", "encoding", "parsing", "parser-implementations"] license = "MIT" rust-version = "1.56" # We exclude tests & examples & benches to reduce the size of a package. # Unfortunately, this is source of warnings in latest cargo when packaging: # > warning: ignoring {context} `{name}` as `{path}` is not included in the published package # That may become unnecessary once https://github.com/rust-lang/cargo/issues/13491 # will be resolved include = ["src/*", "LICENSE-MIT.md", "README.md"] [dependencies] document-features = { version = "0.2", optional = true } encoding_rs = { version = "0.8", optional = true } serde = { version = ">=1.0.139", optional = true } tokio = { version = "1.10", optional = true, default-features = false, features = ["io-util"] } memchr = "2.1" arbitrary = { version = "1", features = ["derive"], optional = true } [dev-dependencies] criterion = "0.4" pretty_assertions = "1.4" regex = "1" # #[serde(other)] allowed not only inside field_identifier since 1.0.79 # serde does not follow semver in numbering and their dependencies, so we specifying patch here serde_derive = { version = "1.0.79" } serde-value = "0.7" tokio = { version = "1.21", default-features = false, features = ["macros", "rt"] } tokio-test = "0.4" [lib] bench = false [[bench]] name = "microbenches" harness = false path = "benches/microbenches.rs" [[bench]] name = "macrobenches" harness = false path = "benches/macrobenches.rs" [features] default = [] ## Enables support for asynchronous reading and writing from `tokio`'s IO-Traits by enabling ## [reading events] from types implementing [`tokio::io::AsyncBufRead`]. ## ## [reading events]: crate::reader::Reader::read_event_into_async async-tokio = ["tokio"] ## Enables support of non-UTF-8 encoded documents. Encoding will be inferred from ## the XML declaration if it is found, otherwise UTF-8 is assumed. ## ## Currently, only ASCII-compatible encodings are supported. For example, ## UTF-16 will not work (therefore, `quick-xml` is not [standard compliant]). ## ## Thus, quick-xml supports all encodings of [`encoding_rs`] except these: ## - [UTF-16BE] ## - [UTF-16LE] ## - [ISO-2022-JP] ## ## You should stop processing a document when one of these encodings is detected, ## because generated events can be wrong and do not reflect a real document structure! ## ## Because these are the only supported encodings that are not ASCII compatible, you can ## check for them: ## ## ``` ## use quick_xml::events::Event; ## use quick_xml::reader::Reader; ## ## # fn to_utf16le_with_bom(string: &str) -> Vec { ## # let mut bytes = Vec::new(); ## # bytes.extend_from_slice(&[0xFF, 0xFE]); // UTF-16 LE BOM ## # for ch in string.encode_utf16() { ## # bytes.extend_from_slice(&ch.to_le_bytes()); ## # } ## # bytes ## # } ## let xml = to_utf16le_with_bom(r#""#); ## let mut reader = Reader::from_reader(xml.as_ref()); ## reader.config_mut().trim_text(true); ## ## let mut buf = Vec::new(); ## let mut unsupported = false; ## loop { ## if !reader.decoder().encoding().is_ascii_compatible() { ## unsupported = true; ## break; ## } ## buf.clear(); ## match reader.read_event_into(&mut buf).unwrap() { ## Event::Eof => break, ## _ => {} ## } ## } ## assert_eq!(unsupported, true); ## ``` ## This restriction will be eliminated once issue [#158] is resolved. ## ## [standard compliant]: https://www.w3.org/TR/xml11/#charencoding ## [UTF-16BE]: encoding_rs::UTF_16BE ## [UTF-16LE]: encoding_rs::UTF_16LE ## [ISO-2022-JP]: encoding_rs::ISO_2022_JP ## [#158]: https://github.com/tafia/quick-xml/issues/158 encoding = ["encoding_rs"] ## Enables support for recognizing all [HTML 5 entities] in [`unescape`] ## function. The full list of entities also can be found in ## . ## ## [HTML 5 entities]: https://dev.w3.org/html5/html-author/charref ## [`unescape`]: crate::escape::unescape escape-html = [] ## This feature is for the Serde deserializer that enables support for deserializing ## lists where tags are overlapped with tags that do not correspond to the list. ## ## When this feature is enabled, the XML: ## ```xml ## ## ## ## ## ## ## ``` ## could be deserialized to a struct: ## ```no_run ## # use serde::Deserialize; ## #[derive(Deserialize)] ## #[serde(rename_all = "kebab-case")] ## struct AnyName { ## item: Vec<()>, ## another_item: (), ## } ## ``` ## ## When this feature is not enabled (default), only the first element will be ## associated with the field, and the deserialized type will report an error ## (duplicated field) when the deserializer encounters a second ``. ## ## Note, that enabling this feature can lead to high and even unlimited memory ## consumption, because deserializer needs to check all events up to the end of a ## container tag (`` in this example) to figure out that there are no ## more items for a field. If `` or even EOF is not encountered, the ## parsing will never end which can lead to a denial-of-service (DoS) scenario. ## ## Having several lists and overlapped elements for them in XML could also lead ## to quadratic parsing time, because the deserializer must check the list of ## events as many times as the number of sequence fields present in the schema. ## ## To reduce negative consequences, always [limit] the maximum number of events ## that [`Deserializer`] will buffer. ## ## This feature works only with `serialize` feature and has no effect if `serialize` ## is not enabled. ## ## [limit]: crate::de::Deserializer::event_buffer_size ## [`Deserializer`]: crate::de::Deserializer overlapped-lists = [] ## Enables serialization of some quick-xml types using [`serde`]. This feature ## is rarely needed. ## ## This feature does NOT provide XML serializer or deserializer. You should use ## the `serialize` feature for that instead. # Cannot name "serde" to avoid clash with dependency. # "dep:" prefix only avalible from Rust 1.60 serde-types = ["serde/derive"] ## Enables support for [`serde`] serialization and deserialization. When this ## feature is enabled, quick-xml provides serializer and deserializer for XML. ## ## This feature does NOT enables serializaton of the types inside quick-xml. ## If you need that, use the `serde-types` feature. serialize = ["serde"] # "dep:" prefix only avalible from Rust 1.60 [package.metadata.docs.rs] # document all features all-features = true # Tests, benchmarks and examples doesn't included in package on crates.io, # so we need to specify a path, otherwise `cargo package` complains # That may become unnecessary once https://github.com/rust-lang/cargo/issues/13491 # will be resolved [[test]] name = "encodings" required-features = ["encoding"] path = "tests/encodings.rs" [[test]] name = "serde_roundtrip" required-features = ["serialize"] path = "tests/serde_roundtrip.rs" [[test]] name = "serde-de" required-features = ["serialize"] path = "tests/serde-de.rs" [[test]] name = "serde-de-enum" required-features = ["serialize"] path = "tests/serde-de-enum.rs" [[test]] name = "serde-de-seq" required-features = ["serialize"] path = "tests/serde-de-seq.rs" [[test]] name = "serde-se" required-features = ["serialize"] path = "tests/serde-se.rs" [[test]] name = "serde-migrated" required-features = ["serialize"] path = "tests/serde-migrated.rs" [[test]] name = "serde-issues" required-features = ["serialize"] path = "tests/serde-issues.rs" [[test]] name = "async-tokio" required-features = ["async-tokio"] path = "tests/async-tokio.rs" [[example]] name = "read_nodes_serde" required-features = ["serialize"] path = "examples/read_nodes_serde.rs" [[example]] name = "flattened_enum" required-features = ["serialize"] path = "examples/flattened_enum.rs" quick-xml-0.36.1/LICENSE-MIT.md000064400000000000000000000021210072674642500137040ustar 00000000000000The MIT License (MIT) Copyright (c) 2016 Johann Tuffe Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. quick-xml-0.36.1/README.md000064400000000000000000000147560072674642500131510ustar 00000000000000# quick-xml ![status](https://github.com/tafia/quick-xml/actions/workflows/rust.yml/badge.svg) [![Crate](https://img.shields.io/crates/v/quick-xml.svg)](https://crates.io/crates/quick-xml) [![docs.rs](https://docs.rs/quick-xml/badge.svg)](https://docs.rs/quick-xml) [![codecov](https://img.shields.io/codecov/c/github/tafia/quick-xml)](https://codecov.io/gh/tafia/quick-xml) [![MSRV](https://img.shields.io/badge/rustc-1.56.0+-ab6000.svg)](https://blog.rust-lang.org/2021/10/21/Rust-1.56.0.html) High performance xml pull reader/writer. The reader: - is almost zero-copy (use of `Cow` whenever possible) - is easy on memory allocation (the API provides a way to reuse buffers) - support various encoding (with `encoding` feature), namespaces resolution, special characters. Syntax is inspired by [xml-rs](https://github.com/netvl/xml-rs). ## Example ### Reader ```rust use quick_xml::events::Event; use quick_xml::reader::Reader; let xml = r#" Test Test 2 "#; let mut reader = Reader::from_str(xml); reader.config_mut().trim_text(true); let mut count = 0; let mut txt = Vec::new(); let mut buf = Vec::new(); // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s) loop { // NOTE: this is the generic case when we don't know about the input BufRead. // when the input is a &str or a &[u8], we don't actually need to use another // buffer, we could directly call `reader.read_event()` match reader.read_event_into(&mut buf) { Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e), // exits the loop when reaching end of file Ok(Event::Eof) => break, Ok(Event::Start(e)) => { match e.name().as_ref() { b"tag1" => println!("attributes values: {:?}", e.attributes().map(|a| a.unwrap().value) .collect::>()), b"tag2" => count += 1, _ => (), } } Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()), // There are several other `Event`s we do not consider here _ => (), } // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low buf.clear(); } ``` ### Writer ```rust use quick_xml::events::{Event, BytesEnd, BytesStart}; use quick_xml::reader::Reader; use quick_xml::writer::Writer; use std::io::Cursor; let xml = r#"text"#; let mut reader = Reader::from_str(xml); reader.config_mut().trim_text(true); let mut writer = Writer::new(Cursor::new(Vec::new())); loop { match reader.read_event() { Ok(Event::Start(e)) if e.name().as_ref() == b"this_tag" => { // crates a new element ... alternatively we could reuse `e` by calling // `e.into_owned()` let mut elem = BytesStart::new("my_elem"); // collect existing attributes elem.extend_attributes(e.attributes().map(|attr| attr.unwrap())); // copy existing attributes, adds a new my-key="some value" attribute elem.push_attribute(("my-key", "some value")); // writes the event to the writer assert!(writer.write_event(Event::Start(elem)).is_ok()); }, Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => { assert!(writer.write_event(Event::End(BytesEnd::new("my_elem"))).is_ok()); }, Ok(Event::Eof) => break, // we can either move or borrow the event to write, depending on your use-case Ok(e) => assert!(writer.write_event(e).is_ok()), Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e), } } let result = writer.into_inner().into_inner(); let expected = r#"text"#; assert_eq!(result, expected.as_bytes()); ``` ## Serde When using the `serialize` feature, quick-xml can be used with serde's `Serialize`/`Deserialize` traits. The mapping between XML and Rust types, and in particular the syntax that allows you to specify the distinction between *elements* and *attributes*, is described in detail in the documentation for [deserialization](https://docs.rs/quick-xml/latest/quick_xml/de/). ### Credits This has largely been inspired by [serde-xml-rs](https://github.com/RReverser/serde-xml-rs). quick-xml follows its convention for deserialization, including the [`$value`](https://github.com/RReverser/serde-xml-rs#parsing-the-value-of-a-tag) special name. ### Parsing the "value" of a tag If you have an input of the form `bar`, and you want to get at the `bar`, you can use either the special name `$text`, or the special name `$value`: ```rust,ignore struct Foo { #[serde(rename = "@abc")] pub abc: String, #[serde(rename = "$text")] pub body: String, } ``` Read about the difference in the [documentation](https://docs.rs/quick-xml/latest/quick_xml/de/index.html#difference-between-text-and-value-special-names). ### Performance Note that despite not focusing on performance (there are several unnecessary copies), it remains about 10x faster than serde-xml-rs. # Features - `encoding`: support non utf8 xmls - `serialize`: support serde `Serialize`/`Deserialize` ## Performance Benchmarking is hard and the results depend on your input file and your machine. Here on my particular file, quick-xml is around **50 times faster** than [xml-rs](https://crates.io/crates/xml-rs) crate. ``` // quick-xml benches test bench_quick_xml ... bench: 198,866 ns/iter (+/- 9,663) test bench_quick_xml_escaped ... bench: 282,740 ns/iter (+/- 61,625) test bench_quick_xml_namespaced ... bench: 389,977 ns/iter (+/- 32,045) // same bench with xml-rs test bench_xml_rs ... bench: 14,468,930 ns/iter (+/- 321,171) // serde-xml-rs vs serialize feature test bench_serde_quick_xml ... bench: 1,181,198 ns/iter (+/- 138,290) test bench_serde_xml_rs ... bench: 15,039,564 ns/iter (+/- 783,485) ``` For a feature and performance comparison, you can also have a look at RazrFalcon's [parser comparison table](https://github.com/RazrFalcon/roxmltree#parsing). ## Contribute Any PR is welcomed! ## License MIT quick-xml-0.36.1/src/de/key.rs000064400000000000000000000366530072674642500142070ustar 00000000000000use crate::de::simple_type::UnitOnly; use crate::de::str2bool; use crate::encoding::Decoder; use crate::errors::serialize::DeError; use crate::name::QName; use crate::utils::CowRef; use serde::de::{DeserializeSeed, Deserializer, EnumAccess, Visitor}; use serde::{forward_to_deserialize_any, serde_if_integer128}; use std::borrow::Cow; macro_rules! deserialize_num { ($method:ident, $visit:ident) => { fn $method(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.$visit(self.name.parse()?) } }; } /// Decodes raw bytes using the deserializer encoding. /// The method will borrow if encoding is UTF-8 compatible and `name` contains /// only UTF-8 compatible characters (usually only ASCII characters). #[inline] fn decode_name<'n>(name: QName<'n>, decoder: Decoder) -> Result, DeError> { let local = name.local_name(); Ok(decoder.decode(local.into_inner())?) } /// A deserializer for xml names of elements and attributes. /// /// Used for deserializing values from: /// - attribute names (`<... name="..." ...>`) /// - element names (`...`) /// /// Converts a name to an identifier string using the following rules: /// /// - if it is an [`attribute`] name, put `@` in front of the identifier /// - if it is a namespace binding (`xmlns` or `xmlns:xxx`) put the decoded name /// to the identifier /// - put the decoded [`local_name()`] of a name to the identifier /// /// The final identifier looks like `[@]local_name`, or `@xmlns`, or `@xmlns:binding` /// (where `[]` means optional element). /// /// The deserializer also supports deserializing names as other primitive types: /// - numbers /// - booleans /// - unit (`()`) and unit structs /// - unit variants of the enumerations /// /// Because `serde` does not define on which side type conversion should be /// performed, and because [`Deserialize`] implementation for that primitives /// in serde does not accept strings, the deserializer will perform conversion /// by itself. /// /// The deserializer is able to deserialize unit and unit structs, but any name /// will be converted to the same unit instance. This is asymmetry with a serializer, /// which not able to serialize those types, because empty names are impossible /// in XML. /// /// `deserialize_any()` returns the same result as `deserialize_identifier()`. /// /// # Lifetimes /// /// - `'i`: lifetime of the data that the deserializer borrows from the parsed input /// - `'d`: lifetime of a deserializer that holds a buffer with content of events /// /// [`attribute`]: Self::from_attr /// [`local_name()`]: QName::local_name /// [`Deserialize`]: serde::Deserialize pub struct QNameDeserializer<'i, 'd> { name: CowRef<'i, 'd, str>, } impl<'i, 'd> QNameDeserializer<'i, 'd> { /// Creates deserializer from name of an attribute pub fn from_attr( name: QName<'d>, decoder: Decoder, key_buf: &'d mut String, ) -> Result { key_buf.clear(); key_buf.push('@'); // https://github.com/tafia/quick-xml/issues/537 // Namespace bindings (xmlns:xxx) map to `@xmlns:xxx` instead of `@xxx` if name.as_namespace_binding().is_some() { decoder.decode_into(name.into_inner(), key_buf)?; } else { let local = name.local_name(); decoder.decode_into(local.into_inner(), key_buf)?; }; Ok(Self { name: CowRef::Slice(key_buf), }) } /// Creates deserializer from name of an element pub fn from_elem(name: CowRef<'i, 'd, [u8]>, decoder: Decoder) -> Result { let local = match name { CowRef::Input(borrowed) => match decode_name(QName(borrowed), decoder)? { Cow::Borrowed(borrowed) => CowRef::Input(borrowed), Cow::Owned(owned) => CowRef::Owned(owned), }, CowRef::Slice(borrowed) => match decode_name(QName(borrowed), decoder)? { Cow::Borrowed(borrowed) => CowRef::Slice(borrowed), Cow::Owned(owned) => CowRef::Owned(owned), }, CowRef::Owned(owned) => match decode_name(QName(&owned), decoder)? { // SAFETY: Because result is borrowed, no changes was done // and we can safely unwrap here Cow::Borrowed(_) => CowRef::Owned(String::from_utf8(owned).unwrap()), Cow::Owned(owned) => CowRef::Owned(owned), }, }; Ok(Self { name: local }) } } impl<'de, 'd> Deserializer<'de> for QNameDeserializer<'de, 'd> { type Error = DeError; forward_to_deserialize_any! { char str string bytes byte_buf seq tuple tuple_struct map struct ignored_any } /// According to the , /// valid boolean representations are only `"true"`, `"false"`, `"1"`, /// and `"0"`. But this method also handles following: /// /// |`bool` |XML content /// |-------|------------------------------------------------------------- /// |`true` |`"True"`, `"TRUE"`, `"t"`, `"Yes"`, `"YES"`, `"yes"`, `"y"` /// |`false`|`"False"`, `"FALSE"`, `"f"`, `"No"`, `"NO"`, `"no"`, `"n"` fn deserialize_bool(self, visitor: V) -> Result where V: Visitor<'de>, { str2bool(self.name.as_ref(), visitor) } deserialize_num!(deserialize_i8, visit_i8); deserialize_num!(deserialize_i16, visit_i16); deserialize_num!(deserialize_i32, visit_i32); deserialize_num!(deserialize_i64, visit_i64); deserialize_num!(deserialize_u8, visit_u8); deserialize_num!(deserialize_u16, visit_u16); deserialize_num!(deserialize_u32, visit_u32); deserialize_num!(deserialize_u64, visit_u64); serde_if_integer128! { deserialize_num!(deserialize_i128, visit_i128); deserialize_num!(deserialize_u128, visit_u128); } deserialize_num!(deserialize_f32, visit_f32); deserialize_num!(deserialize_f64, visit_f64); /// Calls [`Visitor::visit_unit`] fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } /// Forwards deserialization to the [`Self::deserialize_unit`] fn deserialize_unit_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_unit(visitor) } /// Forwards deserialization to the [`Self::deserialize_identifier`] #[inline] fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_identifier(visitor) } /// If `name` is an empty string then calls [`Visitor::visit_none`], /// otherwise calls [`Visitor::visit_some`] with itself fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { if self.name.is_empty() { visitor.visit_none() } else { visitor.visit_some(self) } } fn deserialize_newtype_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_newtype_struct(self) } /// Calls a [`Visitor::visit_str`] if [`name`] contains only UTF-8 /// compatible encoded characters and represents an element name and /// a [`Visitor::visit_string`] in all other cases. /// /// [`name`]: Self::name fn deserialize_identifier(self, visitor: V) -> Result where V: Visitor<'de>, { match self.name { CowRef::Input(name) => visitor.visit_borrowed_str(name), CowRef::Slice(name) => visitor.visit_str(name), CowRef::Owned(name) => visitor.visit_string(name), } } fn deserialize_enum( self, _name: &str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_enum(self) } } impl<'de, 'd> EnumAccess<'de> for QNameDeserializer<'de, 'd> { type Error = DeError; type Variant = UnitOnly; fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> where V: DeserializeSeed<'de>, { let name = seed.deserialize(self)?; Ok((name, UnitOnly)) } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod tests { use super::*; use crate::se::key::QNameSerializer; use crate::utils::{ByteBuf, Bytes}; use pretty_assertions::assert_eq; use serde::de::IgnoredAny; use serde::{Deserialize, Serialize}; use std::collections::HashMap; #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Unit; #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Newtype(String); #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Tuple((), ()); #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Struct { key: String, val: usize, } #[derive(Debug, Deserialize, Serialize, PartialEq)] enum Enum { Unit, #[serde(rename = "@Attr")] Attr, Newtype(String), Tuple(String, usize), Struct { key: String, val: usize, }, } #[derive(Debug, Deserialize, PartialEq)] #[serde(field_identifier)] enum Id { Field, } #[derive(Debug, Deserialize)] #[serde(transparent)] struct Any(IgnoredAny); impl PartialEq for Any { fn eq(&self, _other: &Any) -> bool { true } } /// Checks that given `$input` successfully deserializing into given `$result` macro_rules! deserialized_to_only { ($name:ident: $type:ty = $input:literal => $result:expr) => { #[test] fn $name() { let de = QNameDeserializer { name: CowRef::Input($input), }; let data: $type = Deserialize::deserialize(de).unwrap(); assert_eq!(data, $result); } }; } /// Checks that given `$input` successfully deserializing into given `$result` macro_rules! deserialized_to { ($name:ident: $type:ty = $input:literal => $result:expr) => { #[test] fn $name() { let de = QNameDeserializer { name: CowRef::Input($input), }; let data: $type = Deserialize::deserialize(de).unwrap(); assert_eq!(data, $result); // Roundtrip to ensure that serializer corresponds to deserializer assert_eq!( data.serialize(QNameSerializer { writer: String::new() }) .unwrap(), $input ); } }; } /// Checks that attempt to deserialize given `$input` as a `$type` results to a /// deserialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $type:ty = $input:literal => $kind:ident($reason:literal)) => { #[test] fn $name() { let de = QNameDeserializer { name: CowRef::Input($input), }; let err = <$type as Deserialize>::deserialize(de).unwrap_err(); match err { DeError::$kind(e) => assert_eq!(e, $reason), _ => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, err ), } } }; } deserialized_to!(false_: bool = "false" => false); deserialized_to!(true_: bool = "true" => true); deserialized_to!(i8_: i8 = "-2" => -2); deserialized_to!(i16_: i16 = "-2" => -2); deserialized_to!(i32_: i32 = "-2" => -2); deserialized_to!(i64_: i64 = "-2" => -2); deserialized_to!(u8_: u8 = "3" => 3); deserialized_to!(u16_: u16 = "3" => 3); deserialized_to!(u32_: u32 = "3" => 3); deserialized_to!(u64_: u64 = "3" => 3); serde_if_integer128! { deserialized_to!(i128_: i128 = "-2" => -2); deserialized_to!(u128_: u128 = "2" => 2); } deserialized_to!(f32_: f32 = "1.23" => 1.23); deserialized_to!(f64_: f64 = "1.23" => 1.23); deserialized_to!(char_unescaped: char = "h" => 'h'); err!(char_escaped: char = "<" => Custom("invalid value: string \"<\", expected a character")); deserialized_to!(string: String = "<escaped string" => "<escaped string"); deserialized_to!(borrowed_str: &str = "name" => "name"); err!(byte_buf: ByteBuf = "<escaped string" => Custom("invalid type: string \"<escaped string\", expected byte data")); err!(borrowed_bytes: Bytes = "name" => Custom("invalid type: string \"name\", expected borrowed bytes")); deserialized_to!(option_none: Option = "" => None); deserialized_to!(option_some: Option = "name" => Some("name".into())); // Unit structs cannot be represented in some meaningful way, but it meaningful // to use them as a placeholder when we want to deserialize _something_ deserialized_to_only!(unit: () = "anything" => ()); deserialized_to_only!(unit_struct: Unit = "anything" => Unit); deserialized_to!(newtype: Newtype = "<escaped string" => Newtype("<escaped string".into())); err!(seq: Vec<()> = "name" => Custom("invalid type: string \"name\", expected a sequence")); err!(tuple: ((), ()) = "name" => Custom("invalid type: string \"name\", expected a tuple of size 2")); err!(tuple_struct: Tuple = "name" => Custom("invalid type: string \"name\", expected tuple struct Tuple")); err!(map: HashMap<(), ()> = "name" => Custom("invalid type: string \"name\", expected a map")); err!(struct_: Struct = "name" => Custom("invalid type: string \"name\", expected struct Struct")); deserialized_to!(enum_unit: Enum = "Unit" => Enum::Unit); deserialized_to!(enum_unit_for_attr: Enum = "@Attr" => Enum::Attr); err!(enum_newtype: Enum = "Newtype" => Custom("invalid type: unit value, expected a string")); err!(enum_tuple: Enum = "Tuple" => Custom("invalid type: unit value, expected tuple variant Enum::Tuple")); err!(enum_struct: Enum = "Struct" => Custom("invalid type: unit value, expected struct variant Enum::Struct")); // Field identifiers cannot be serialized, and IgnoredAny represented _something_ // which is not concrete deserialized_to_only!(identifier: Id = "Field" => Id::Field); deserialized_to_only!(ignored_any: Any = "any-name" => Any(IgnoredAny)); } quick-xml-0.36.1/src/de/map.rs000064400000000000000000001307520072674642500141670ustar 00000000000000//! Serde `Deserializer` module use crate::{ de::key::QNameDeserializer, de::resolver::EntityResolver, de::simple_type::SimpleTypeDeserializer, de::text::TextDeserializer, de::{str2bool, DeEvent, Deserializer, XmlRead, TEXT_KEY, VALUE_KEY}, encoding::Decoder, errors::serialize::DeError, errors::Error, events::attributes::IterState, events::BytesStart, name::QName, }; use serde::de::value::BorrowedStrDeserializer; use serde::de::{self, DeserializeSeed, Deserializer as _, MapAccess, SeqAccess, Visitor}; use serde::serde_if_integer128; use std::borrow::Cow; use std::ops::Range; /// Defines a source that should be used to deserialize a value in the next call /// to [`next_value_seed()`](MapAccess::next_value_seed) #[derive(Debug, PartialEq)] enum ValueSource { /// Source are not specified, because [`next_key_seed()`] not yet called. /// This is an initial state and state after deserializing value /// (after call of [`next_value_seed()`]). /// /// Attempt to call [`next_value_seed()`] while accessor in this state would /// return a [`DeError::KeyNotRead`] error. /// /// [`next_key_seed()`]: MapAccess::next_key_seed /// [`next_value_seed()`]: MapAccess::next_value_seed Unknown, /// Next value should be deserialized from an attribute value; value is located /// at specified span. Attribute(Range), /// Value should be deserialized from the text content of the XML node, which /// represented or by an ordinary text node, or by a CDATA node: /// /// ```xml /// /// text content /// /// /// ``` /// ```xml /// /// /// /// /// ``` Text, /// Next value should be deserialized from an element with an any name, except /// elements with a name matching one of the struct fields. Corresponding tag /// name will always be associated with a field with name [`VALUE_KEY`]. /// /// That state is set when call to [`peek()`] returns a [`Start`] event, which /// [`name()`] is not listed in the [list of known fields] (which for a struct /// is a list of field names, and for a map that is an empty list), _and_ /// struct has a field with a special name [`VALUE_KEY`]. /// /// When in this state, next event, returned by [`next()`], will be a [`Start`], /// which represents both a key, and a value. Value would be deserialized from /// the whole element and how is will be done determined by the value deserializer. /// The [`ElementMapAccess`] do not consume any events in that state. /// /// Because in that state any encountered `` is mapped to the [`VALUE_KEY`] /// field, it is possible to use tag name as an enum discriminator, so `enum`s /// can be deserialized from that XMLs: /// /// ```xml /// /// ... /// /// /// /// ``` /// ```xml /// /// ... /// /// /// /// ``` /// /// both can be deserialized into /// /// ```ignore /// enum Enum { /// variant1, /// variant2, /// } /// struct AnyName { /// #[serde(rename = "$value")] /// field: Enum, /// } /// ``` /// /// That is possible, because value deserializer have access to the full content /// of a `...` or `...` node, including /// the tag name. /// /// [`Start`]: DeEvent::Start /// [`peek()`]: Deserializer::peek() /// [`next()`]: Deserializer::next() /// [`name()`]: BytesStart::name() /// [`Text`]: Self::Text /// [list of known fields]: ElementMapAccess::fields Content, /// Next value should be deserialized from an element with a dedicated name. /// If deserialized type is a sequence, then that sequence will collect all /// elements with the same name until it will be filled. If not all elements /// would be consumed, the rest will be ignored. /// /// That state is set when call to [`peek()`] returns a [`Start`] event, which /// [`name()`] represents a field name. That name will be deserialized as a key. /// /// When in this state, next event, returned by [`next()`], will be a [`Start`], /// which represents both a key, and a value. Value would be deserialized from /// the whole element and how is will be done determined by the value deserializer. /// The [`ElementMapAccess`] do not consume any events in that state. /// /// An illustration below shows, what data is used to deserialize key and value: /// ```xml /// /// ... /// /// /// /// ``` /// /// Although value deserializer will have access to the full content of a `` /// node (including the tag name), it will not get much benefits from that, /// because tag name will always be fixed for a given map field (equal to a /// field name). So, if the field type is an `enum`, it cannot select its /// variant based on the tag name. If that is needed, then [`Content`] variant /// of this enum should be used. Such usage is enabled by annotating a struct /// field as "content" field, which implemented as given the field a special /// [`VALUE_KEY`] name. /// /// [`Start`]: DeEvent::Start /// [`peek()`]: Deserializer::peek() /// [`next()`]: Deserializer::next() /// [`name()`]: BytesStart::name() /// [`Content`]: Self::Content Nested, } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A deserializer that extracts map-like structures from an XML. This deserializer /// represents a one XML tag: /// /// ```xml /// ... /// ``` /// /// Name of this tag is stored in a [`Self::start`] property. /// /// # Lifetimes /// /// - `'de` lifetime represents a buffer, from which deserialized values can /// borrow their data. Depending on the underlying reader, there can be an /// internal buffer of deserializer (i.e. deserializer itself) or an input /// (in that case it is possible to approach zero-copy deserialization). /// /// - `'d` lifetime represents a parent deserializer, which could own the data /// buffer. pub(crate) struct ElementMapAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Tag -- owner of attributes start: BytesStart<'de>, de: &'d mut Deserializer<'de, R, E>, /// State of the iterator over attributes. Contains the next position in the /// inner `start` slice, from which next attribute should be parsed. iter: IterState, /// Current state of the accessor that determines what next call to API /// methods should return. source: ValueSource, /// List of field names of the struct. It is empty for maps fields: &'static [&'static str], /// If `true`, then the deserialized struct has a field with a special name: /// [`VALUE_KEY`]. That field should be deserialized from the whole content /// of an XML node, including tag name: /// /// ```xml /// value for VALUE_KEY field /// ``` has_value_field: bool, } impl<'de, 'd, R, E> ElementMapAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Create a new ElementMapAccess pub fn new( de: &'d mut Deserializer<'de, R, E>, start: BytesStart<'de>, fields: &'static [&'static str], ) -> Result { Ok(Self { de, iter: IterState::new(start.name().as_ref().len(), false), start, source: ValueSource::Unknown, fields, has_value_field: fields.contains(&VALUE_KEY), }) } } impl<'de, 'd, R, E> MapAccess<'de> for ElementMapAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; fn next_key_seed>( &mut self, seed: K, ) -> Result, Self::Error> { debug_assert_eq!(self.source, ValueSource::Unknown); // FIXME: There error positions counted from the start of tag name - need global position let slice = &self.start.buf; let decoder = self.de.reader.decoder(); if let Some(a) = self.iter.next(slice).transpose()? { // try getting map from attributes (key= "value") let (key, value) = a.into(); self.source = ValueSource::Attribute(value.unwrap_or_default()); let de = QNameDeserializer::from_attr(QName(&slice[key]), decoder, &mut self.de.key_buf)?; seed.deserialize(de).map(Some) } else { // try getting from events (value) match self.de.peek()? { // We shouldn't have both `$value` and `$text` fields in the same // struct, so if we have `$value` field, the we should deserialize // text content to `$value` DeEvent::Text(_) if self.has_value_field => { self.source = ValueSource::Content; // Deserialize `key` from special attribute name which means // that value should be taken from the text content of the // XML node let de = BorrowedStrDeserializer::::new(VALUE_KEY); seed.deserialize(de).map(Some) } DeEvent::Text(_) => { self.source = ValueSource::Text; // Deserialize `key` from special attribute name which means // that value should be taken from the text content of the // XML node let de = BorrowedStrDeserializer::::new(TEXT_KEY); seed.deserialize(de).map(Some) } // Used to deserialize collections of enums, like: // // // // // // // into // // enum Enum { A, B, С } // struct Root { // #[serde(rename = "$value")] // items: Vec, // } // TODO: This should be handled by #[serde(flatten)] // See https://github.com/serde-rs/serde/issues/1905 DeEvent::Start(e) if self.has_value_field && not_in(self.fields, e, decoder)? => { self.source = ValueSource::Content; let de = BorrowedStrDeserializer::::new(VALUE_KEY); seed.deserialize(de).map(Some) } DeEvent::Start(e) => { self.source = ValueSource::Nested; let de = QNameDeserializer::from_elem(e.raw_name(), decoder)?; seed.deserialize(de).map(Some) } // Stop iteration after reaching a closing tag // The matching tag name is guaranteed by the reader if our // deserializer implementation is correct DeEvent::End(e) => { debug_assert_eq!(self.start.name(), e.name()); // Consume End self.de.next()?; Ok(None) } // We cannot get `Eof` legally, because we always inside of the // opened tag `self.start` DeEvent::Eof => Err(Error::missed_end(self.start.name(), decoder).into()), } } } fn next_value_seed>( &mut self, seed: K, ) -> Result { match std::mem::replace(&mut self.source, ValueSource::Unknown) { ValueSource::Attribute(value) => seed.deserialize(SimpleTypeDeserializer::from_part( &self.start.buf, value, true, self.de.reader.decoder(), )), // This arm processes the following XML shape: // // text value // // The whole map represented by an `` element, the map key // is implicit and equals to the `TEXT_KEY` constant, and the value // is a `Text` event (the value deserializer will see that event) // This case are checked by "xml_schema_lists::element" tests in tests/serde-de.rs ValueSource::Text => match self.de.next()? { DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)), // SAFETY: We set `Text` only when we seen `Text` _ => unreachable!(), }, // This arm processes the following XML shape: // // ... // // The whole map represented by an `` element, the map key // is implicit and equals to the `VALUE_KEY` constant, and the value // is a `Start` event (the value deserializer will see that event) ValueSource::Content => seed.deserialize(MapValueDeserializer { map: self, fixed_name: false, }), // This arm processes the following XML shape: // // ... // // The whole map represented by an `` element, the map key // is a `tag`, and the value is a `Start` event (the value deserializer // will see that event) ValueSource::Nested => seed.deserialize(MapValueDeserializer { map: self, fixed_name: true, }), ValueSource::Unknown => Err(DeError::KeyNotRead), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A deserializer for a value of map or struct. That deserializer slightly /// differently processes events for a primitive types and sequences than /// a [`Deserializer`]. /// /// This deserializer used to deserialize two kinds of fields: /// - usual fields with a dedicated name, such as `field_one` or `field_two`, in /// that case field [`Self::fixed_name`] is `true`; /// - the special `$value` field which represents any tag or a textual content /// in the XML which would be found in the document, in that case field /// [`Self::fixed_name`] is `false`. /// /// This deserializer can see two kind of events at the start: /// - [`DeEvent::Text`] /// - [`DeEvent::Start`] /// /// which represents two possible variants of items: /// ```xml /// A tag item /// A text item /// /// ``` /// /// This deserializer are very similar to a [`ElementDeserializer`]. The only difference /// in the `deserialize_seq` method. This deserializer will act as an iterator /// over tags / text within it's parent tag, whereas the [`ElementDeserializer`] /// will represent sequences as an `xs:list`. /// /// This deserializer processes items as following: /// - primitives (numbers, booleans, strings, characters) are deserialized either /// from a text content, or unwrapped from a one level of a tag. So, `123` and /// `123` both can be deserialized into an `u32`; /// - `Option`: /// - empty text of [`DeEvent::Text`] is deserialized as `None`; /// - everything else are deserialized as `Some` using the same deserializer, /// including `` or ``; /// - units (`()`) and unit structs consumes the whole text or element subtree; /// - newtype structs are deserialized by forwarding deserialization of inner type /// with the same deserializer; /// - sequences, tuples and tuple structs are deserialized by iterating within the /// parent tag and deserializing each tag or text content using [`ElementDeserializer`]; /// - structs and maps are deserialized using new instance of [`ElementMapAccess`]; /// - enums: /// - in case of [`DeEvent::Text`] event the text content is deserialized as /// a `$text` variant. Enum content is deserialized from the text using /// [`SimpleTypeDeserializer`]; /// - in case of [`DeEvent::Start`] event the tag name is deserialized as /// an enum tag, and the content inside are deserialized as an enum content. /// Depending on a variant kind deserialization is performed as: /// - unit variants: consuming text content or a subtree; /// - newtype variants: forward deserialization to the inner type using /// this deserializer; /// - tuple variants: call [`deserialize_tuple`] of this deserializer; /// - struct variants: call [`deserialize_struct`] of this deserializer. /// /// [`deserialize_tuple`]: #method.deserialize_tuple /// [`deserialize_struct`]: #method.deserialize_struct struct MapValueDeserializer<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Access to the map that created this deserializer. Gives access to the /// context, such as list of fields, that current map known about. map: &'m mut ElementMapAccess<'de, 'd, R, E>, /// Whether this deserializer was created for deserialization from an element /// with fixed name, or the elements with different names or even text are allowed. /// /// If this field is `true`, we process `` element in the following XML shape: /// /// ```xml /// /// ... /// /// ``` /// /// The whole map represented by an `` element, the map key is a `tag`, /// and the value starts with is a `Start("tag")` (the value deserializer will /// see that event first) and extended to the matching `End("tag")` event. /// In order to deserialize primitives (such as `usize`) we need to allow to /// look inside the one levels of tags, so the /// /// ```xml /// 42 /// ``` /// /// could be deserialized into `42usize` without problems, and at the same time /// /// ```xml /// /// /// /// /// /// ``` /// could be deserialized to a struct. /// /// If this field is `false`, we processes the one of following XML shapes: /// /// ```xml /// /// text value /// /// ``` /// ```xml /// /// /// /// ``` /// ```xml /// /// ... /// /// ``` /// /// The whole map represented by an `` element, the map key is /// implicit and equals to the [`VALUE_KEY`] constant, and the value is /// a [`Text`], or a [`Start`] event (the value deserializer will see one of /// those events). In the first two cases the value of this field do not matter /// (because we already see the textual event and there no reasons to look /// "inside" something), but in the last case the primitives should raise /// a deserialization error, because that means that you trying to deserialize /// the following struct: /// /// ```ignore /// struct AnyName { /// #[serde(rename = "$value")] /// any_name: String, /// } /// ``` /// which means that `any_name` should get a content of the `` element. /// /// Changing this can be valuable for , /// but those fields should be explicitly marked that they want to get any /// possible markup as a `String` and that mark is different from marking them /// as accepting "text content" which the currently `$text` means. /// /// [`Text`]: DeEvent::Text /// [`Start`]: DeEvent::Start fixed_name: bool, } impl<'de, 'd, 'm, R, E> MapValueDeserializer<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Returns a next string as concatenated content of consequent [`Text`] and /// [`CData`] events, used inside [`deserialize_primitives!()`]. /// /// [`Text`]: crate::events::Event::Text /// [`CData`]: crate::events::Event::CData #[inline] fn read_string(&mut self) -> Result, DeError> { // TODO: Read the whole content to fix https://github.com/tafia/quick-xml/issues/483 self.map.de.read_string_impl(self.fixed_name) } } impl<'de, 'd, 'm, R, E> de::Deserializer<'de> for MapValueDeserializer<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; deserialize_primitives!(mut); #[inline] fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { self.map.de.deserialize_unit(visitor) } fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { match self.map.de.peek()? { DeEvent::Text(t) if t.is_empty() => visitor.visit_none(), _ => visitor.visit_some(self), } } /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`] /// with the same deserializer. fn deserialize_newtype_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_newtype_struct(self) } /// Deserializes each `` in /// ```xml /// /// ... /// ... /// ... /// /// ``` /// as a sequence item, where `` represents a Map in a [`Self::map`], /// and a `` is a sequential field of that map. fn deserialize_seq(self, visitor: V) -> Result where V: Visitor<'de>, { let filter = if self.fixed_name { match self.map.de.peek()? { // Clone is cheap if event borrows from the input DeEvent::Start(e) => TagFilter::Include(e.clone()), // SAFETY: we use that deserializer with `fixed_name == true` // only from the `ElementMapAccess::next_value_seed` and only when we // peeked `Start` event _ => unreachable!(), } } else { TagFilter::Exclude(self.map.fields) }; visitor.visit_seq(MapValueSeqAccess { #[cfg(feature = "overlapped-lists")] checkpoint: self.map.de.skip_checkpoint(), map: self.map, filter, }) } #[inline] fn deserialize_struct( self, name: &'static str, fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { self.map.de.deserialize_struct(name, fields, visitor) } fn deserialize_enum( self, _name: &'static str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { if self.fixed_name { match self.map.de.next()? { // Handles UnitEnumVariant DeEvent::Start(e) => { // skip , read text after it and ensure that it is ended by let text = self.map.de.read_text(e.name())?; if text.is_empty() { // Map empty text () to a special `$text` variant visitor.visit_enum(SimpleTypeDeserializer::from_text(TEXT_KEY.into())) } else { visitor.visit_enum(SimpleTypeDeserializer::from_text(text)) } } // SAFETY: we use that deserializer with `fixed_name == true` // only from the `MapAccess::next_value_seed` and only when we // peeked `Start` event _ => unreachable!(), } } else { visitor.visit_enum(self) } } fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { match self.map.de.peek()? { DeEvent::Text(_) => self.deserialize_str(visitor), _ => self.deserialize_map(visitor), } } } impl<'de, 'd, 'm, R, E> de::EnumAccess<'de> for MapValueDeserializer<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; type Variant = MapValueVariantAccess<'de, 'd, 'm, R, E>; fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> where V: DeserializeSeed<'de>, { let decoder = self.map.de.reader.decoder(); let (name, is_text) = match self.map.de.peek()? { DeEvent::Start(e) => ( seed.deserialize(QNameDeserializer::from_elem(e.raw_name(), decoder)?)?, false, ), DeEvent::Text(_) => ( seed.deserialize(BorrowedStrDeserializer::::new(TEXT_KEY))?, true, ), // SAFETY: we use that deserializer only when we peeked `Start` or `Text` event _ => unreachable!(), }; Ok(( name, MapValueVariantAccess { map: self.map, is_text, }, )) } } struct MapValueVariantAccess<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Access to the map that created this enum accessor. Gives access to the /// context, such as list of fields, that current map known about. map: &'m mut ElementMapAccess<'de, 'd, R, E>, /// `true` if variant should be deserialized from a textual content /// and `false` if from tag is_text: bool, } impl<'de, 'd, 'm, R, E> de::VariantAccess<'de> for MapValueVariantAccess<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; fn unit_variant(self) -> Result<(), Self::Error> { match self.map.de.next()? { // Consume subtree DeEvent::Start(e) => self.map.de.read_to_end(e.name()), // Does not needed to deserialize using SimpleTypeDeserializer, because // it returns `()` when `deserialize_unit()` is requested DeEvent::Text(_) => Ok(()), // SAFETY: the other events are filtered in `variant_seed()` _ => unreachable!("Only `Start` or `Text` events are possible here"), } } fn newtype_variant_seed(self, seed: T) -> Result where T: DeserializeSeed<'de>, { if self.is_text { match self.map.de.next()? { DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)), // SAFETY: the other events are filtered in `variant_seed()` _ => unreachable!("Only `Text` events are possible here"), } } else { seed.deserialize(MapValueDeserializer { map: self.map, // Because element name already was either mapped to a field name, // or to a variant name, we should not treat it as variable fixed_name: true, }) } } fn tuple_variant(self, len: usize, visitor: V) -> Result where V: Visitor<'de>, { if self.is_text { match self.map.de.next()? { DeEvent::Text(e) => { SimpleTypeDeserializer::from_text_content(e).deserialize_tuple(len, visitor) } // SAFETY: the other events are filtered in `variant_seed()` _ => unreachable!("Only `Text` events are possible here"), } } else { MapValueDeserializer { map: self.map, // Because element name already was either mapped to a field name, // or to a variant name, we should not treat it as variable fixed_name: true, } .deserialize_tuple(len, visitor) } } fn struct_variant( self, fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { match self.map.de.next()? { DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self.map.de, e, fields)?), DeEvent::Text(e) => { SimpleTypeDeserializer::from_text_content(e).deserialize_struct("", fields, visitor) } // SAFETY: the other events are filtered in `variant_seed()` _ => unreachable!("Only `Start` or `Text` events are possible here"), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Check if tag `start` is included in the `fields` list. `decoder` is used to /// get a string representation of a tag. /// /// Returns `true`, if `start` is not in the `fields` list and `false` otherwise. fn not_in( fields: &'static [&'static str], start: &BytesStart, decoder: Decoder, ) -> Result { let tag = decoder.decode(start.local_name().into_inner())?; Ok(fields.iter().all(|&field| field != tag.as_ref())) } /// A filter that determines, what tags should form a sequence. /// /// There are two types of sequences: /// - sequence where each element represented by tags with the same name /// - sequence where each element can have a different tag /// /// The first variant could represent a collection of structs, the second -- /// a collection of enum variants. /// /// In the second case we don't know what tag name should be expected as a /// sequence element, so we accept any element. Since the sequence are flattened /// into maps, we skip elements which have dedicated fields in a struct by using an /// `Exclude` filter that filters out elements with names matching field names /// from the struct. /// /// # Lifetimes /// /// `'de` represents a lifetime of the XML input, when filter stores the /// dedicated tag name #[derive(Debug)] enum TagFilter<'de> { /// A `SeqAccess` interested only in tags with specified name to deserialize /// an XML like this: /// /// ```xml /// <...> /// /// /// /// ... /// /// ``` /// /// The tag name is stored inside (`b"tag"` for that example) Include(BytesStart<'de>), //TODO: Need to store only name instead of a whole tag /// A `SeqAccess` interested in tags with any name, except explicitly listed. /// Excluded tags are used as struct field names and therefore should not /// fall into a `$value` category Exclude(&'static [&'static str]), } impl<'de> TagFilter<'de> { fn is_suitable(&self, start: &BytesStart, decoder: Decoder) -> Result { match self { Self::Include(n) => Ok(n.name() == start.name()), Self::Exclude(fields) => not_in(fields, start, decoder), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// An accessor to sequence elements forming a value for struct field. /// Technically, this sequence is flattened out into structure and sequence /// elements are overlapped with other fields of a structure. Each call to /// [`Self::next_element_seed`] consumes a next sub-tree or consequent list /// of [`Text`] and [`CData`] events. /// /// ```xml /// <> /// ... /// The is the one item /// This is item it splitted by comments /// ...and that is the third! /// ... /// /// ``` /// /// Depending on [`Self::filter`], only some of that possible constructs would be /// an element. /// /// [`Text`]: crate::events::Event::Text /// [`CData`]: crate::events::Event::CData struct MapValueSeqAccess<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Accessor to a map that creates this accessor and to a deserializer for /// a sequence items. map: &'m mut ElementMapAccess<'de, 'd, R, E>, /// Filter that determines whether a tag is a part of this sequence. /// /// When feature [`overlapped-lists`] is not activated, iteration will stop /// when found a tag that does not pass this filter. /// /// When feature [`overlapped-lists`] is activated, all tags, that not pass /// this check, will be skipped. /// /// [`overlapped-lists`]: ../../index.html#overlapped-lists filter: TagFilter<'de>, /// Checkpoint after which all skipped events should be returned. All events, /// that was skipped before creating this checkpoint, will still stay buffered /// and will not be returned #[cfg(feature = "overlapped-lists")] checkpoint: usize, } #[cfg(feature = "overlapped-lists")] impl<'de, 'd, 'm, R, E> Drop for MapValueSeqAccess<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { fn drop(&mut self) { self.map.de.start_replay(self.checkpoint); } } impl<'de, 'd, 'm, R, E> SeqAccess<'de> for MapValueSeqAccess<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; fn next_element_seed(&mut self, seed: T) -> Result, DeError> where T: DeserializeSeed<'de>, { let decoder = self.map.de.reader.decoder(); loop { break match self.map.de.peek()? { // If we see a tag that we not interested, skip it #[cfg(feature = "overlapped-lists")] DeEvent::Start(e) if !self.filter.is_suitable(e, decoder)? => { self.map.de.skip()?; continue; } // Stop iteration when list elements ends #[cfg(not(feature = "overlapped-lists"))] DeEvent::Start(e) if !self.filter.is_suitable(e, decoder)? => Ok(None), // Stop iteration after reaching a closing tag // The matching tag name is guaranteed by the reader DeEvent::End(e) => { debug_assert_eq!(self.map.start.name(), e.name()); Ok(None) } // We cannot get `Eof` legally, because we always inside of the // opened tag `self.map.start` DeEvent::Eof => Err(Error::missed_end(self.map.start.name(), decoder).into()), DeEvent::Text(_) => match self.map.de.next()? { DeEvent::Text(e) => seed.deserialize(TextDeserializer(e)).map(Some), // SAFETY: we just checked that the next event is Text _ => unreachable!(), }, DeEvent::Start(_) => match self.map.de.next()? { DeEvent::Start(start) => seed .deserialize(ElementDeserializer { start, de: self.map.de, }) .map(Some), // SAFETY: we just checked that the next event is Start _ => unreachable!(), }, }; } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A deserializer for a single tag item of a mixed sequence of tags and text. /// /// This deserializer are very similar to a [`MapValueDeserializer`] (when it /// processes the [`DeEvent::Start`] event). The only difference in the /// [`deserialize_seq`] method. This deserializer will perform deserialization /// from the textual content between start and end events, whereas the /// [`MapValueDeserializer`] will iterate over tags / text within it's parent tag. /// /// This deserializer processes items as following: /// - numbers are parsed from a text content between tags using [`FromStr`]. So, /// `123` can be deserialized into an `u32`; /// - booleans converted from a text content between tags according to the XML /// [specification]: /// - `"true"` and `"1"` converted to `true`; /// - `"false"` and `"0"` converted to `false`; /// - strings returned as a text content between tags; /// - characters also returned as strings. If string contain more than one character /// or empty, it is responsibility of a type to return an error; /// - `Option` are always deserialized as `Some` using the same deserializer, /// including `` or ``; /// - units (`()`) and unit structs consumes the whole element subtree; /// - newtype structs forwards deserialization to the inner type using /// [`SimpleTypeDeserializer`]; /// - sequences, tuples and tuple structs are deserialized using [`SimpleTypeDeserializer`] /// (this is the difference): text content between tags is passed to /// [`SimpleTypeDeserializer`]; /// - structs and maps are deserialized using new instance of [`ElementMapAccess`]; /// - enums: /// - the variant name is deserialized using [`QNameDeserializer`] from the element name; /// - the content is deserialized using the same deserializer: /// - unit variants: consuming a subtree and return `()`; /// - newtype variants forwards deserialization to the inner type using /// this deserializer; /// - tuple variants: call [`deserialize_tuple`] of this deserializer; /// - struct variants: call [`deserialize_struct`] of this deserializer. /// /// [`deserialize_seq`]: #method.deserialize_seq /// [`FromStr`]: std::str::FromStr /// [specification]: https://www.w3.org/TR/xmlschema11-2/#boolean /// [`deserialize_tuple`]: #method.deserialize_tuple /// [`deserialize_struct`]: #method.deserialize_struct struct ElementDeserializer<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { start: BytesStart<'de>, de: &'d mut Deserializer<'de, R, E>, } impl<'de, 'd, R, E> ElementDeserializer<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Returns a next string as concatenated content of consequent [`Text`] and /// [`CData`] events, used inside [`deserialize_primitives!()`]. /// /// [`Text`]: crate::events::Event::Text /// [`CData`]: crate::events::Event::CData #[inline] fn read_string(&mut self) -> Result, DeError> { self.de.read_text(self.start.name()) } } impl<'de, 'd, R, E> de::Deserializer<'de> for ElementDeserializer<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; deserialize_primitives!(mut); fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { // Consume subtree self.de.read_to_end(self.start.name())?; visitor.visit_unit() } fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_some(self) } /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`] /// with this deserializer. fn deserialize_newtype_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_newtype_struct(self) } /// This method deserializes a sequence inside of element that itself is a /// sequence element: /// /// ```xml /// <> /// ... /// inner sequence /// inner sequence /// inner sequence /// ... /// /// ``` fn deserialize_seq(mut self, visitor: V) -> Result where V: Visitor<'de>, { let text = self.read_string()?; SimpleTypeDeserializer::from_text(text).deserialize_seq(visitor) } fn deserialize_struct( self, _name: &'static str, fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_map(ElementMapAccess::new(self.de, self.start, fields)?) } fn deserialize_enum( self, _name: &'static str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_enum(self) } #[inline] fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_map(visitor) } } impl<'de, 'd, R, E> de::EnumAccess<'de> for ElementDeserializer<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; type Variant = Self; fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> where V: DeserializeSeed<'de>, { let name = seed.deserialize(QNameDeserializer::from_elem( self.start.raw_name(), self.de.reader.decoder(), )?)?; Ok((name, self)) } } impl<'de, 'd, R, E> de::VariantAccess<'de> for ElementDeserializer<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; fn unit_variant(self) -> Result<(), Self::Error> { // Consume subtree self.de.read_to_end(self.start.name()) } fn newtype_variant_seed(self, seed: T) -> Result where T: DeserializeSeed<'de>, { seed.deserialize(self) } #[inline] fn tuple_variant(self, len: usize, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_tuple(len, visitor) } #[inline] fn struct_variant( self, fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_struct("", fields, visitor) } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[test] fn test_not_in() { use pretty_assertions::assert_eq; let tag = BytesStart::new("tag"); assert_eq!(not_in(&[], &tag, Decoder::utf8()).unwrap(), true); assert_eq!( not_in(&["no", "such", "tags"], &tag, Decoder::utf8()).unwrap(), true ); assert_eq!( not_in(&["some", "tag", "included"], &tag, Decoder::utf8()).unwrap(), false ); let tag_ns = BytesStart::new("ns1:tag"); assert_eq!( not_in(&["no", "such", "tags"], &tag_ns, Decoder::utf8()).unwrap(), true ); assert_eq!( not_in(&["some", "tag", "included"], &tag_ns, Decoder::utf8()).unwrap(), false ); assert_eq!( not_in(&["some", "namespace", "ns1:tag"], &tag_ns, Decoder::utf8()).unwrap(), true ); } quick-xml-0.36.1/src/de/mod.rs000064400000000000000000005201360072674642500141700ustar 00000000000000//! Serde `Deserializer` module. //! //! Due to the complexity of the XML standard and the fact that Serde was developed //! with JSON in mind, not all Serde concepts apply smoothly to XML. This leads to //! that fact that some XML concepts are inexpressible in terms of Serde derives //! and may require manual deserialization. //! //! The most notable restriction is the ability to distinguish between _elements_ //! and _attributes_, as no other format used by serde has such a conception. //! //! Due to that the mapping is performed in a best effort manner. //! //! //! //! Table of Contents //! ================= //! - [Mapping XML to Rust types](#mapping-xml-to-rust-types) //! - [Basics](#basics) //! - [Optional attributes and elements](#optional-attributes-and-elements) //! - [Choices (`xs:choice` XML Schema type)](#choices-xschoice-xml-schema-type) //! - [Sequences (`xs:all` and `xs:sequence` XML Schema types)](#sequences-xsall-and-xssequence-xml-schema-types) //! - [Generate Rust types from XML](#generate-rust-types-from-xml) //! - [Composition Rules](#composition-rules) //! - [Enum Representations](#enum-representations) //! - [Normal enum variant](#normal-enum-variant) //! - [`$text` enum variant](#text-enum-variant) //! - [Difference between `$text` and `$value` special names](#difference-between-text-and-value-special-names) //! - [`$text`](#text) //! - [`$value`](#value) //! - [Primitives and sequences of primitives](#primitives-and-sequences-of-primitives) //! - [Structs and sequences of structs](#structs-and-sequences-of-structs) //! - [Enums and sequences of enums](#enums-and-sequences-of-enums) //! - [Frequently Used Patterns](#frequently-used-patterns) //! - [`` lists](#element-lists) //! - [Overlapped (Out-of-Order) Elements](#overlapped-out-of-order-elements) //! - [Internally Tagged Enums](#internally-tagged-enums) //! //! //! //! Mapping XML to Rust types //! ========================= //! //! Type names are never considered when deserializing, so you can name your //! types as you wish. Other general rules: //! - `struct` field name could be represented in XML only as an attribute name //! or an element name; //! - `enum` variant name could be represented in XML only as an attribute name //! or an element name; //! - the unit struct, unit type `()` and unit enum variant can be deserialized //! from any valid XML content: //! - attribute and element names; //! - attribute and element values; //! - text or CDATA content (including mixed text and CDATA content). //! //!
//! //! NOTE: All tests are marked with an `ignore` option, even though they do //! compile. This is because rustdoc marks such blocks with an information //! icon unlike `no_run` blocks. //! //!
//! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //!
//! //! ## Basics //! //!
To parse all these XML's......use these Rust type(s)
//! Content of attributes and text / CDATA content of elements (including mixed //! text and CDATA content): //! //! ```xml //! <... ...="content" /> //! ``` //! ```xml //! <...>content //! ``` //! ```xml //! <...> //! ``` //! ```xml //! <...>texttext //! ``` //! Mixed text / CDATA content represents one logical string, `"textcdatatext"` in that case. //! //! //! You can use any type that can be deserialized from an `&str`, for example: //! - [`String`] and [`&str`] //! - [`Cow`] //! - [`u32`], [`f32`] and other numeric types //! - `enum`s, like //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! enum Language { //! Rust, //! Cpp, //! #[serde(other)] //! Other, //! } //! # #[derive(Debug, PartialEq, Deserialize)] //! # struct X { #[serde(rename = "$text")] x: Language } //! # assert_eq!(X { x: Language::Rust }, quick_xml::de::from_str("Rust").unwrap()); //! # assert_eq!(X { x: Language::Cpp }, quick_xml::de::from_str("Cp").unwrap()); //! # assert_eq!(X { x: Language::Other }, quick_xml::de::from_str("").unwrap()); //! ``` //! //!
//! //! NOTE: deserialization to non-owned types (i.e. borrow from the input), //! such as `&str`, is possible only if you parse document in the UTF-8 //! encoding and content does not contain entity references such as `&`, //! or character references such as ` `, as well as text content represented //! by one piece of [text] or [CDATA] element. //!
//! //! //! [text]: Event::Text //! [CDATA]: Event::CData //!
//! //! Content of attributes and text / CDATA content of elements (including mixed //! text and CDATA content), which represents a space-delimited lists, as //! specified in the XML Schema specification for [`xs:list`] `simpleType`: //! //! ```xml //! <... ...="element1 element2 ..." /> //! ``` //! ```xml //! <...> //! element1 //! element2 //! ... //! //! ``` //! ```xml //! <...> //! ``` //! //! [`xs:list`]: https://www.w3.org/TR/xmlschema11-2/#list-datatypes //! //! //! Use any type that deserialized using [`deserialize_seq()`] call, for example: //! //! ``` //! type List = Vec; //! ``` //! //! See the next row to learn where in your struct definition you should //! use that type. //! //! According to the XML Schema specification, delimiters for elements is one //! or more space (`' '`, `'\r'`, `'\n'`, and `'\t'`) character(s). //! //!
//! //! NOTE: according to the XML Schema restrictions, you cannot escape those //! white-space characters, so list elements will _never_ contain them. //! In practice you will usually use `xs:list`s for lists of numbers or enumerated //! values which looks like identifiers in many languages, for example, `item`, //! `some_item` or `some-item`, so that shouldn't be a problem. //! //! NOTE: according to the XML Schema specification, list elements can be //! delimited only by spaces. Other delimiters (for example, commas) are not //! allowed. //! //!
//! //! [`deserialize_seq()`]: de::Deserializer::deserialize_seq //!
//! A typical XML with attributes. The root tag name does not matter: //! //! ```xml //! //! ``` //! //! //! A structure where each XML attribute is mapped to a field with a name //! starting with `@`. Because Rust identifiers do not permit the `@` character, //! you should use the `#[serde(rename = "@...")]` attribute to rename it. //! The name of the struct itself does not matter: //! //! ``` //! # use serde::Deserialize; //! # type T = (); //! # type U = (); //! // Get both attributes //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@one")] //! one: T, //! //! #[serde(rename = "@two")] //! two: U, //! } //! # quick_xml::de::from_str::(r#""#).unwrap(); //! ``` //! ``` //! # use serde::Deserialize; //! # type T = (); //! // Get only the one attribute, ignore the other //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@one")] //! one: T, //! } //! # quick_xml::de::from_str::(r#""#).unwrap(); //! # quick_xml::de::from_str::(r#""#).unwrap(); //! # quick_xml::de::from_str::(r#"..."#).unwrap(); //! ``` //! ``` //! # use serde::Deserialize; //! // Ignore all attributes //! // You can also use the `()` type (unit type) //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName; //! # quick_xml::de::from_str::(r#""#).unwrap(); //! # quick_xml::de::from_str::(r#"..."#).unwrap(); //! # quick_xml::de::from_str::(r#"......"#).unwrap(); //! ``` //! //! All these structs can be used to deserialize from an XML on the //! left side depending on amount of information that you want to get. //! Of course, you can combine them with elements extractor structs (see below). //! //!
//! //! NOTE: XML allows you to have an attribute and an element with the same name //! inside the one element. quick-xml deals with that by prepending a `@` prefix //! to the name of attributes. //!
//!
//! A typical XML with child elements. The root tag name does not matter: //! //! ```xml //! //! ... //! ... //! //! ``` //! //! A structure where each XML child element is mapped to the field. //! Each element name becomes a name of field. The name of the struct itself //! does not matter: //! //! ``` //! # use serde::Deserialize; //! # type T = (); //! # type U = (); //! // Get both elements //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! one: T, //! two: U, //! } //! # quick_xml::de::from_str::(r#"......"#).unwrap(); //! # //! # quick_xml::de::from_str::(r#""#).unwrap_err(); //! # quick_xml::de::from_str::(r#"..."#).unwrap_err(); //! ``` //! ``` //! # use serde::Deserialize; //! # type T = (); //! // Get only the one element, ignore the other //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! one: T, //! } //! # quick_xml::de::from_str::(r#"......"#).unwrap(); //! # quick_xml::de::from_str::(r#"..."#).unwrap(); //! ``` //! ``` //! # use serde::Deserialize; //! // Ignore all elements //! // You can also use the `()` type (unit type) //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName; //! # quick_xml::de::from_str::(r#""#).unwrap(); //! # quick_xml::de::from_str::(r#"......"#).unwrap(); //! # quick_xml::de::from_str::(r#"..."#).unwrap(); //! # quick_xml::de::from_str::(r#"..."#).unwrap(); //! ``` //! //! All these structs can be used to deserialize from an XML on the //! left side depending on amount of information that you want to get. //! Of course, you can combine them with attributes extractor structs (see above). //! //!
//! //! NOTE: XML allows you to have an attribute and an element with the same name //! inside the one element. quick-xml deals with that by prepending a `@` prefix //! to the name of attributes. //!
//!
//! An XML with an attribute and a child element named equally: //! //! ```xml //! //! ... //! //! ``` //! //! //! You MUST specify `#[serde(rename = "@field")]` on a field that will be used //! for an attribute: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # type U = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@field")] //! attribute: T, //! field: U, //! } //! # assert_eq!( //! # AnyName { attribute: (), field: () }, //! # quick_xml::de::from_str(r#" //! # //! # ... //! # //! # "#).unwrap(), //! # ); //! ``` //!
//! //! ## Optional attributes and elements //! //!
To parse all these XML's......use these Rust type(s)
//! An optional XML attribute that you want to capture. //! The root tag name does not matter: //! //! ```xml //! //! ``` //! ```xml //! //! ``` //! //! //! A structure with an optional field, renamed according to the requirements //! for attributes: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@optional")] //! optional: Option, //! } //! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#""#).unwrap()); //! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#""#).unwrap()); //! ``` //! When the XML attribute is present, type `T` will be deserialized from //! an attribute value (which is a string). Note, that if `T = String` or other //! string type, the empty attribute is mapped to a `Some("")`, whereas `None` //! represents the missed attribute: //! ```xml //! //! //! //! ``` //!
//! An optional XML elements that you want to capture. //! The root tag name does not matter: //! //! ```xml //! //! ... //! //! ``` //! ```xml //! //! //! //! ``` //! ```xml //! //! ``` //! //! //! A structure with an optional field: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! optional: Option, //! } //! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"..."#).unwrap()); //! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#""#).unwrap()); //! ``` //! When the XML element is present, type `T` will be deserialized from an //! element (which is a string or a multi-mapping -- i.e. mapping which can have //! duplicated keys). //!
//! //! Currently some edge cases exists described in the issue [#497]. //!
//!
//! //! ## Choices (`xs:choice` XML Schema type) //! //!
To parse all these XML's......use these Rust type(s)
//! An XML with different root tag names, as well as text / CDATA content: //! //! ```xml //! ... //! ``` //! ```xml //! //! ... //! //! ``` //! ```xml //! Text content //! ``` //! //! //! An enum where each variant has the name of a possible root tag. The name of //! the enum itself does not matter. //! //! If you need to get the textual content, mark a variant with `#[serde(rename = "$text")]`. //! //! All these structs can be used to deserialize from any XML on the //! left side depending on amount of information that you want to get: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # type U = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum AnyName { //! One { #[serde(rename = "@field1")] field1: T }, //! Two { field2: U }, //! //! /// Use unit variant, if you do not care of a content. //! /// You can use tuple variant if you want to parse //! /// textual content as an xs:list. //! /// Struct variants are will pass a string to the //! /// struct enum variant visitor, which typically //! /// returns Err(Custom) //! #[serde(rename = "$text")] //! Text(String), //! } //! # assert_eq!(AnyName::One { field1: () }, quick_xml::de::from_str(r#"..."#).unwrap()); //! # assert_eq!(AnyName::Two { field2: () }, quick_xml::de::from_str(r#"..."#).unwrap()); //! # assert_eq!(AnyName::Text("text cdata ".into()), quick_xml::de::from_str(r#"text "#).unwrap()); //! ``` //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct Two { //! field2: T, //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum AnyName { //! // `field1` content discarded //! One, //! Two(Two), //! #[serde(rename = "$text")] //! Text, //! } //! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"..."#).unwrap()); //! # assert_eq!(AnyName::Two(Two { field2: () }), quick_xml::de::from_str(r#"..."#).unwrap()); //! # assert_eq!(AnyName::Text, quick_xml::de::from_str(r#"text "#).unwrap()); //! ``` //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum AnyName { //! One, //! // the and textual content will be mapped to this //! #[serde(other)] //! Other, //! } //! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"..."#).unwrap()); //! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"..."#).unwrap()); //! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"text "#).unwrap()); //! ``` //!
//! //! NOTE: You should have variants for all possible tag names in your enum //! or have an `#[serde(other)]` variant. //! //!
//!
//! //! `` embedded in the other element, and at the same time you want //! to get access to other attributes that can appear in the same container //! (``). Also this case can be described, as if you want to choose //! Rust enum variant based on a tag name: //! //! ```xml //! //! ... //! //! ``` //! ```xml //! //! ... //! //! ``` //! ```xml //! //! Text content //! //! ``` //! //! //! A structure with a field which type is an `enum`. //! //! If you need to get a textual content, mark a variant with `#[serde(rename = "$text")]`. //! //! Names of the enum, struct, and struct field with `Choice` type does not matter: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! //! /// Use unit variant, if you do not care of a content. //! /// You can use tuple variant if you want to parse //! /// textual content as an xs:list. //! /// Struct variants are will pass a string to the //! /// struct enum variant visitor, which typically //! /// returns Err(Custom) //! #[serde(rename = "$text")] //! Text(String), //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@field")] //! field: T, //! //! #[serde(rename = "$value")] //! any_name: Choice, //! } //! # assert_eq!( //! # AnyName { field: (), any_name: Choice::One }, //! # quick_xml::de::from_str(r#"..."#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { field: (), any_name: Choice::Two }, //! # quick_xml::de::from_str(r#"..."#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { field: (), any_name: Choice::Text("text cdata ".into()) }, //! # quick_xml::de::from_str(r#"text "#).unwrap(), //! # ); //! ``` //!
//! //! `` embedded in the other element, and at the same time you want //! to get access to other elements that can appear in the same container //! (``). Also this case can be described, as if you want to choose //! Rust enum variant based on a tag name: //! //! ```xml //! //! ... //! ... //! //! ``` //! ```xml //! //! ... //! ... //! //! ``` //! //! //! A structure with a field which type is an `enum`. //! //! Names of the enum, struct, and struct field with `Choice` type does not matter: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! field: T, //! //! #[serde(rename = "$value")] //! any_name: Choice, //! } //! # assert_eq!( //! # AnyName { field: (), any_name: Choice::One }, //! # quick_xml::de::from_str(r#"......"#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { field: (), any_name: Choice::Two }, //! # quick_xml::de::from_str(r#"......"#).unwrap(), //! # ); //! ``` //! //!
//! //! NOTE: if your `Choice` enum would contain an `#[serde(other)]` //! variant, element `` will be mapped to the `field` and not to the enum //! variant. //!
//! //!
//! //! `` encapsulated in other element with a fixed name: //! //! ```xml //! //! //! ... //! //! //! ``` //! ```xml //! //! //! ... //! //! //! ``` //! //! //! A structure with a field of an intermediate type with one field of `enum` type. //! Actually, this example is not necessary, because you can construct it by yourself //! using the composition rules that were described above. However the XML construction //! described here is very common, so it is shown explicitly. //! //! Names of the enum and struct does not matter: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct Holder { //! #[serde(rename = "$value")] //! any_name: Choice, //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@field")] //! field: T, //! //! choice: Holder, //! } //! # assert_eq!( //! # AnyName { field: (), choice: Holder { any_name: Choice::One } }, //! # quick_xml::de::from_str(r#"..."#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { field: (), choice: Holder { any_name: Choice::Two } }, //! # quick_xml::de::from_str(r#"..."#).unwrap(), //! # ); //! ``` //!
//! //! `` encapsulated in other element with a fixed name: //! //! ```xml //! //! ... //! //! ... //! //! //! ``` //! ```xml //! //! //! ... //! //! ... //! //! ``` //! //! //! A structure with a field of an intermediate type with one field of `enum` type. //! Actually, this example is not necessary, because you can construct it by yourself //! using the composition rules that were described above. However the XML construction //! described here is very common, so it is shown explicitly. //! //! Names of the enum and struct does not matter: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct Holder { //! #[serde(rename = "$value")] //! any_name: Choice, //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! field: T, //! //! choice: Holder, //! } //! # assert_eq!( //! # AnyName { field: (), choice: Holder { any_name: Choice::One } }, //! # quick_xml::de::from_str(r#"......"#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { field: (), choice: Holder { any_name: Choice::Two } }, //! # quick_xml::de::from_str(r#"......"#).unwrap(), //! # ); //! ``` //!
//! //! ## Sequences (`xs:all` and `xs:sequence` XML Schema types) //! //!
To parse all these XML's......use these Rust type(s)
//! A sequence inside of a tag without a dedicated name: //! //! ```xml //! //! ``` //! ```xml //! //! //! //! ``` //! ```xml //! //! //! //! //! //! ``` //! //! //! A structure with a field which is a sequence type, for example, [`Vec`]. //! Because XML syntax does not distinguish between empty sequences and missed //! elements, we should indicate that on the Rust side, because serde will require //! that field `item` exists. You can do that in two possible ways: //! //! Use the `#[serde(default)]` attribute for a [field] or the entire [struct]: //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type Item = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(default)] //! item: Vec, //! } //! # assert_eq!( //! # AnyName { item: vec![] }, //! # quick_xml::de::from_str(r#""#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { item: vec![()] }, //! # quick_xml::de::from_str(r#""#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { item: vec![(), (), ()] }, //! # quick_xml::de::from_str(r#""#).unwrap(), //! # ); //! ``` //! //! Use the [`Option`]. In that case inner array will always contains at least one //! element after deserialization: //! ```ignore //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type Item = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! item: Option>, //! } //! # assert_eq!( //! # AnyName { item: None }, //! # quick_xml::de::from_str(r#""#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { item: Some(vec![()]) }, //! # quick_xml::de::from_str(r#""#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { item: Some(vec![(), (), ()]) }, //! # quick_xml::de::from_str(r#""#).unwrap(), //! # ); //! ``` //! //! See also [Frequently Used Patterns](#element-lists). //! //! [field]: https://serde.rs/field-attrs.html#default //! [struct]: https://serde.rs/container-attrs.html#default //!
//! A sequence with a strict order, probably with mixed content //! (text / CDATA and tags): //! //! ```xml //! ... //! text //! //! ... //! ... //! ``` //!
//! //! NOTE: this is just an example for showing mapping. XML does not allow //! multiple root tags -- you should wrap the sequence into a tag. //!
//!
//! //! All elements mapped to the heterogeneous sequential type: tuple or named tuple. //! Each element of the tuple should be able to be deserialized from the nested //! element content (`...`), except the enum types which would be deserialized //! from the full element (`...`), so they could use the element name //! to choose the right variant: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type One = (); //! # type Two = (); //! # /* //! type One = ...; //! type Two = ...; //! # */ //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName(One, String, Two, One); //! # assert_eq!( //! # AnyName((), "text cdata".into(), (), ()), //! # quick_xml::de::from_str(r#"...text ......"#).unwrap(), //! # ); //! ``` //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! } //! # type Two = (); //! # /* //! type Two = ...; //! # */ //! type AnyName = (Choice, String, Two, Choice); //! # assert_eq!( //! # (Choice::One, "text cdata".to_string(), (), Choice::One), //! # quick_xml::de::from_str(r#"...text ......"#).unwrap(), //! # ); //! ``` //!
//! //! NOTE: consequent text and CDATA nodes are merged into the one text node, //! so you cannot have two adjacent string types in your sequence. //! //! NOTE: In the case that the list might contain tags that are overlapped with //! tags that do not correspond to the list you should add the feature [`overlapped-lists`]. //!
//!
//! A sequence with a non-strict order, probably with a mixed content //! (text / CDATA and tags). //! //! ```xml //! ... //! text //! //! ... //! ... //! ``` //!
//! //! NOTE: this is just an example for showing mapping. XML does not allow //! multiple root tags -- you should wrap the sequence into a tag. //!
//!
//! A homogeneous sequence of elements with a fixed or dynamic size: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! #[serde(other)] //! Other, //! } //! type AnyName = [Choice; 4]; //! # assert_eq!( //! # [Choice::One, Choice::Other, Choice::Two, Choice::One], //! # quick_xml::de::from_str::(r#"...text ......"#).unwrap(), //! # ); //! ``` //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! #[serde(rename = "$text")] //! Other(String), //! } //! type AnyName = Vec; //! # assert_eq!( //! # vec![ //! # Choice::One, //! # Choice::Other("text cdata".into()), //! # Choice::Two, //! # Choice::One, //! # ], //! # quick_xml::de::from_str::(r#"...text ......"#).unwrap(), //! # ); //! ``` //!
//! //! NOTE: consequent text and CDATA nodes are merged into the one text node, //! so you cannot have two adjacent string types in your sequence. //!
//!
//! A sequence with a strict order, probably with a mixed content, //! (text and tags) inside of the other element: //! //! ```xml //! //! ... //! text //! //! ... //! ... //! //! ``` //! //! //! A structure where all child elements mapped to the one field which have //! a heterogeneous sequential type: tuple or named tuple. Each element of the //! tuple should be able to be deserialized from the full element (`...`). //! //! You MUST specify `#[serde(rename = "$value")]` on that field: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type One = (); //! # type Two = (); //! # /* //! type One = ...; //! type Two = ...; //! # */ //! //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@attribute")] //! # attribute: (), //! # /* //! attribute: ..., //! # */ //! // Does not (yet?) supported by the serde //! // https://github.com/serde-rs/serde/issues/1905 //! // #[serde(flatten)] //! #[serde(rename = "$value")] //! any_name: (One, String, Two, One), //! } //! # assert_eq!( //! # AnyName { attribute: (), any_name: ((), "text cdata".into(), (), ()) }, //! # quick_xml::de::from_str("\ //! # \ //! # ...\ //! # text \ //! # \ //! # ...\ //! # ...\ //! # " //! # ).unwrap(), //! # ); //! ``` //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type One = (); //! # type Two = (); //! # /* //! type One = ...; //! type Two = ...; //! # */ //! //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct NamedTuple(One, String, Two, One); //! //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@attribute")] //! # attribute: (), //! # /* //! attribute: ..., //! # */ //! // Does not (yet?) supported by the serde //! // https://github.com/serde-rs/serde/issues/1905 //! // #[serde(flatten)] //! #[serde(rename = "$value")] //! any_name: NamedTuple, //! } //! # assert_eq!( //! # AnyName { attribute: (), any_name: NamedTuple((), "text cdata".into(), (), ()) }, //! # quick_xml::de::from_str("\ //! # \ //! # ...\ //! # text \ //! # \ //! # ...\ //! # ...\ //! # " //! # ).unwrap(), //! # ); //! ``` //!
//! //! NOTE: consequent text and CDATA nodes are merged into the one text node, //! so you cannot have two adjacent string types in your sequence. //!
//!
//! A sequence with a non-strict order, probably with a mixed content //! (text / CDATA and tags) inside of the other element: //! //! ```xml //! //! ... //! text //! //! ... //! ... //! //! ``` //! //! //! A structure where all child elements mapped to the one field which have //! a homogeneous sequential type: array-like container. A container type `T` //! should be able to be deserialized from the nested element content (`...`), //! except if it is an enum type which would be deserialized from the full //! element (`...`). //! //! You MUST specify `#[serde(rename = "$value")]` on that field: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! #[serde(rename = "$text")] //! Other(String), //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@attribute")] //! # attribute: (), //! # /* //! attribute: ..., //! # */ //! // Does not (yet?) supported by the serde //! // https://github.com/serde-rs/serde/issues/1905 //! // #[serde(flatten)] //! #[serde(rename = "$value")] //! any_name: [Choice; 4], //! } //! # assert_eq!( //! # AnyName { attribute: (), any_name: [ //! # Choice::One, //! # Choice::Other("text cdata".into()), //! # Choice::Two, //! # Choice::One, //! # ] }, //! # quick_xml::de::from_str("\ //! # \ //! # ...\ //! # text \ //! # \ //! # ...\ //! # ...\ //! # " //! # ).unwrap(), //! # ); //! ``` //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! #[serde(rename = "$text")] //! Other(String), //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@attribute")] //! # attribute: (), //! # /* //! attribute: ..., //! # */ //! // Does not (yet?) supported by the serde //! // https://github.com/serde-rs/serde/issues/1905 //! // #[serde(flatten)] //! #[serde(rename = "$value")] //! any_name: Vec, //! } //! # assert_eq!( //! # AnyName { attribute: (), any_name: vec![ //! # Choice::One, //! # Choice::Other("text cdata".into()), //! # Choice::Two, //! # Choice::One, //! # ] }, //! # quick_xml::de::from_str("\ //! # \ //! # ...\ //! # text \ //! # \ //! # ...\ //! # ...\ //! # " //! # ).unwrap(), //! # ); //! ``` //!
//! //! NOTE: consequent text and CDATA nodes are merged into the one text node, //! so you cannot have two adjacent string types in your sequence. //!
//!
//! //! //! Generate Rust types from XML //! ============================ //! //! To speed up the creation of Rust types that represent a given XML file you can //! use the [xml_schema_generator](https://github.com/Thomblin/xml_schema_generator). //! It provides a standalone binary and a Rust library that parses one or more XML files //! and generates a collection of structs that are compatible with quick_xml::de. //! //! //! //! Composition Rules //! ================= //! //! The XML format is very different from other formats supported by `serde`. //! One such difference it is how data in the serialized form is related to //! the Rust type. Usually each byte in the data can be associated only with //! one field in the data structure. However, XML is an exception. //! //! For example, took this XML: //! //! ```xml //! //! //! //! ``` //! //! and try to deserialize it to the struct `AnyName`: //! //! ```no_run //! # use serde::Deserialize; //! #[derive(Deserialize)] //! struct AnyName { // AnyName calls `deserialize_struct` on `` //! // Used data: ^^^^^^^^^^^^^^^^^^^ //! key: Inner, // Inner calls `deserialize_struct` on `` //! // Used data: ^^^^^^^^^^^^ //! } //! #[derive(Deserialize)] //! struct Inner { //! #[serde(rename = "@attr")] //! attr: String, // String calls `deserialize_string` on `value` //! // Used data: ^^^^^ //! } //! ``` //! //! Comments shows what methods of a [`Deserializer`] called by each struct //! `deserialize` method and which input their seen. **Used data** shows, what //! content is actually used for deserializing. As you see, name of the inner //! `` tag used both as a map key / outer struct field name and as part //! of the inner struct (although _value_ of the tag, i.e. `key` is not used //! by it). //! //! //! //! Enum Representations //! ==================== //! //! `quick-xml` represents enums differently in normal fields, `$text` fields and //! `$value` fields. A normal representation is compatible with serde's adjacent //! and internal tags feature -- tag for adjacently and internally tagged enums //! are serialized using [`Serializer::serialize_unit_variant`] and deserialized //! using [`Deserializer::deserialize_enum`]. //! //! Use those simple rules to remember, how enum would be represented in XML: //! - In `$value` field the representation is always the same as top-level representation; //! - In `$text` field the representation is always the same as in normal field, //! but surrounding tags with field name are removed; //! - In normal field the representation is always contains a tag with a field name. //! //! Normal enum variant //! ------------------- //! //! To model an `xs:choice` XML construct use `$value` field. //! To model a top-level `xs:choice` just use the enum type. //! //! |Kind |Top-level and in `$value` field |In normal field |In `$text` field | //! |-------|-----------------------------------------|---------------------|---------------------| //! |Unit |`` |`Unit`|`Unit` | //! |Newtype|`42` |Err(Custom) [^0] |Err(Custom) [^0] | //! |Tuple |`42answer` |Err(Custom) [^0] |Err(Custom) [^0] | //! |Struct |`42
answer`|Err(Custom) [^0] |Err(Custom) [^0] | //! //! `$text` enum variant //! -------------------- //! //! |Kind |Top-level and in `$value` field |In normal field |In `$text` field | //! |-------|-----------------------------------------|---------------------|---------------------| //! |Unit |_(empty)_ |`` |_(empty)_ | //! |Newtype|`42` |Err(Custom) [^0] [^1]|Err(Custom) [^0] [^2]| //! |Tuple |`42 answer` |Err(Custom) [^0] [^3]|Err(Custom) [^0] [^4]| //! |Struct |Err(Custom) [^0] |Err(Custom) [^0] |Err(Custom) [^0] | //! //! [^0]: Error is returned by the deserialized type. In case of derived implementation a `Custom` //! error will be returned, but custom deserialize implementation can successfully deserialize //! value from a string which will be passed to it. //! //! [^1]: If this serialize as `42` then it will be ambiguity during deserialization, //! because it clash with `Unit` representation in normal field. //! //! [^2]: If this serialize as `42` then it will be ambiguity during deserialization, //! because it clash with `Unit` representation in `$text` field. //! //! [^3]: If this serialize as `42 answer` then it will be ambiguity during deserialization, //! because it clash with `Unit` representation in normal field. //! //! [^4]: If this serialize as `42 answer` then it will be ambiguity during deserialization, //! because it clash with `Unit` representation in `$text` field. //! //! //! //! Difference between `$text` and `$value` special names //! ===================================================== //! //! quick-xml supports two special names for fields -- `$text` and `$value`. //! Although they may seem the same, there is a distinction. Two different //! names is required mostly for serialization, because quick-xml should know //! how you want to serialize certain constructs, which could be represented //! through XML in multiple different ways. //! //! The only difference is in how complex types and sequences are serialized. //! If you doubt which one you should select, begin with [`$value`](#value). //! //! ## `$text` //! `$text` is used when you want to write your XML as a text or a CDATA content. //! More formally, field with that name represents simple type definition with //! `{variety} = atomic` or `{variety} = union` whose basic members are all atomic, //! as described in the [specification]. //! //! As a result, not all types of such fields can be serialized. Only serialization //! of following types are supported: //! - all primitive types (strings, numbers, booleans) //! - unit variants of enumerations (serializes to a name of a variant) //! - newtypes (delegates serialization to inner type) //! - [`Option`] of above (`None` serializes to nothing) //! - sequences (including tuples and tuple variants of enumerations) of above, //! excluding `None` and empty string elements (because it will not be possible //! to deserialize them back). The elements are separated by space(s) //! - unit type `()` and unit structs (serializes to nothing) //! //! Complex types, such as structs and maps, are not supported in this field. //! If you want them, you should use `$value`. //! //! Sequences serialized to a space-delimited string, that is why only certain //! types are allowed in this mode: //! //! ``` //! # use serde::{Deserialize, Serialize}; //! # use quick_xml::de::from_str; //! # use quick_xml::se::to_string; //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct AnyName { //! #[serde(rename = "$text")] //! field: Vec, //! } //! //! let obj = AnyName { field: vec![1, 2, 3] }; //! let xml = to_string(&obj).unwrap(); //! assert_eq!(xml, "1 2 3"); //! //! let object: AnyName = from_str(&xml).unwrap(); //! assert_eq!(object, obj); //! ``` //! //! ## `$value` //!
//! //! NOTE: a name `#content` would better explain the purpose of that field, //! but `$value` is used for compatibility with other XML serde crates, which //! uses that name. This will allow you to switch XML crates more smoothly if required. //!
//! //! Representation of primitive types in `$value` does not differ from their //! representation in `$text` field. The difference is how sequences are serialized. //! `$value` serializes each sequence item as a separate XML element. The name //! of that element is taken from serialized type, and because only `enum`s provide //! such name (their variant name), only they should be used for such fields. //! //! `$value` fields does not support `struct` types with fields, the serialization //! of such types would end with an `Err(Unsupported)`. Unit structs and unit //! type `()` serializing to nothing and can be deserialized from any content. //! //! Serialization and deserialization of `$value` field performed as usual, except //! that name for an XML element will be given by the serialized type, instead of //! field. The latter allow to serialize enumerated types, where variant is encoded //! as a tag name, and, so, represent an XSD `xs:choice` schema by the Rust `enum`. //! //! In the example below, field will be serialized as ``, because elements //! get their names from the field name. It cannot be deserialized, because `Enum` //! expects elements ``, `` or ``, but `AnyName` looked only for ``: //! //! ``` //! # use serde::{Deserialize, Serialize}; //! # use pretty_assertions::assert_eq; //! # #[derive(PartialEq, Debug)] //! #[derive(Deserialize, Serialize)] //! enum Enum { A, B, C } //! //! # #[derive(PartialEq, Debug)] //! #[derive(Deserialize, Serialize)] //! struct AnyName { //! // A, B, or C //! field: Enum, //! } //! # assert_eq!( //! # quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(), //! # "A", //! # ); //! # assert_eq!( //! # AnyName { field: Enum::B }, //! # quick_xml::de::from_str("B").unwrap(), //! # ); //! ``` //! //! If you rename field to `$value`, then `field` would be serialized as ``, //! `` or ``, depending on the its content. It is also possible to //! deserialize it from the same elements: //! //! ``` //! # use serde::{Deserialize, Serialize}; //! # use pretty_assertions::assert_eq; //! # #[derive(Deserialize, Serialize, PartialEq, Debug)] //! # enum Enum { A, B, C } //! # //! # #[derive(PartialEq, Debug)] //! #[derive(Deserialize, Serialize)] //! struct AnyName { //! // , or //! #[serde(rename = "$value")] //! field: Enum, //! } //! # assert_eq!( //! # quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(), //! # "", //! # ); //! # assert_eq!( //! # AnyName { field: Enum::B }, //! # quick_xml::de::from_str("").unwrap(), //! # ); //! ``` //! //! ### Primitives and sequences of primitives //! //! Sequences serialized to a list of elements. Note, that types that does not //! produce their own tag (i. e. primitives) are written as is, without delimiters: //! //! ``` //! # use serde::{Deserialize, Serialize}; //! # use pretty_assertions::assert_eq; //! # use quick_xml::de::from_str; //! # use quick_xml::se::to_string; //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct AnyName { //! #[serde(rename = "$value")] //! field: Vec, //! } //! //! let obj = AnyName { field: vec![1, 2, 3] }; //! let xml = to_string(&obj).unwrap(); //! // Note, that types that does not produce their own tag are written as is! //! assert_eq!(xml, "123"); //! //! let object: AnyName = from_str("123").unwrap(); //! assert_eq!(object, AnyName { field: vec![123] }); //! //! // `1 2 3` is mapped to a single `usize` element //! // It is impossible to deserialize list of primitives to such field //! from_str::("1 2 3").unwrap_err(); //! ``` //! //! A particular case of that example is a string `$value` field, which probably //! would be a most used example of that attribute: //! //! ``` //! # use serde::{Deserialize, Serialize}; //! # use pretty_assertions::assert_eq; //! # use quick_xml::de::from_str; //! # use quick_xml::se::to_string; //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct AnyName { //! #[serde(rename = "$value")] //! field: String, //! } //! //! let obj = AnyName { field: "content".to_string() }; //! let xml = to_string(&obj).unwrap(); //! assert_eq!(xml, "content"); //! ``` //! //! ### Structs and sequences of structs //! //! Note, that structures do not have a serializable name as well (name of the //! type is never used), so it is impossible to serialize non-unit struct or //! sequence of non-unit structs in `$value` field. (sequences of) unit structs //! are serialized as empty string, because units itself serializing //! to nothing: //! //! ``` //! # use serde::{Deserialize, Serialize}; //! # use pretty_assertions::assert_eq; //! # use quick_xml::de::from_str; //! # use quick_xml::se::to_string; //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct Unit; //! //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct AnyName { //! // #[serde(default)] is required to deserialization of empty lists //! // This is a general note, not related to $value //! #[serde(rename = "$value", default)] //! field: Vec, //! } //! //! let obj = AnyName { field: vec![Unit, Unit, Unit] }; //! let xml = to_string(&obj).unwrap(); //! assert_eq!(xml, ""); //! //! let object: AnyName = from_str("").unwrap(); //! assert_eq!(object, AnyName { field: vec![] }); //! //! let object: AnyName = from_str("").unwrap(); //! assert_eq!(object, AnyName { field: vec![] }); //! //! let object: AnyName = from_str("").unwrap(); //! assert_eq!(object, AnyName { field: vec![Unit, Unit, Unit] }); //! ``` //! //! ### Enums and sequences of enums //! //! Enumerations uses the variant name as an element name: //! //! ``` //! # use serde::{Deserialize, Serialize}; //! # use pretty_assertions::assert_eq; //! # use quick_xml::de::from_str; //! # use quick_xml::se::to_string; //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct AnyName { //! #[serde(rename = "$value")] //! field: Vec, //! } //! //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! enum Enum { A, B, C } //! //! let obj = AnyName { field: vec![Enum::A, Enum::B, Enum::C] }; //! let xml = to_string(&obj).unwrap(); //! assert_eq!( //! xml, //! "\ //! \ //! \ //! \ //! " //! ); //! //! let object: AnyName = from_str(&xml).unwrap(); //! assert_eq!(object, obj); //! ``` //! //! ---------------------------------------------------------------------------- //! //! You can have either `$text` or `$value` field in your structs. Unfortunately, //! that is not enforced, so you can theoretically have both, but you should //! avoid that. //! //! //! //! Frequently Used Patterns //! ======================== //! //! Some XML constructs used so frequent, that it is worth to document the recommended //! way to represent them in the Rust. The sections below describes them. //! //! `` lists //! ----------------- //! Many XML formats wrap lists of elements in the additional container, //! although this is not required by the XML rules: //! //! ```xml //! //! //! //! //! //! //! //! //! //! //! ``` //! In this case, there is a great desire to describe this XML in this way: //! ``` //! /// Represents //! type Element = (); //! //! /// Represents ... //! struct AnyName { //! // Incorrect //! list: Vec, //! } //! ``` //! This will not work, because potentially `` element can have attributes //! and other elements inside. You should define the struct for the `` //! explicitly, as you do that in the XSD for that XML: //! ``` //! /// Represents //! type Element = (); //! //! /// Represents ... //! struct AnyName { //! // Correct //! list: List, //! } //! /// Represents ... //! struct List { //! element: Vec, //! } //! ``` //! //! If you want to simplify your API, you could write a simple function for unwrapping //! inner list and apply it via [`deserialize_with`]: //! //! ``` //! # use pretty_assertions::assert_eq; //! use quick_xml::de::from_str; //! use serde::{Deserialize, Deserializer}; //! //! /// Represents //! type Element = (); //! //! /// Represents ... //! #[derive(Deserialize, Debug, PartialEq)] //! struct AnyName { //! #[serde(deserialize_with = "unwrap_list")] //! list: Vec, //! } //! //! fn unwrap_list<'de, D>(deserializer: D) -> Result, D::Error> //! where //! D: Deserializer<'de>, //! { //! /// Represents ... //! #[derive(Deserialize)] //! struct List { //! // default allows empty list //! #[serde(default)] //! element: Vec, //! } //! Ok(List::deserialize(deserializer)?.element) //! } //! //! assert_eq!( //! AnyName { list: vec![(), (), ()] }, //! from_str(" //! //! //! //! //! //! //! //! ").unwrap(), //! ); //! ``` //! //! Instead of writing such functions manually, you also could try . //! //! Overlapped (Out-of-Order) Elements //! ---------------------------------- //! In the case that the list might contain tags that are overlapped with //! tags that do not correspond to the list (this is a usual case in XML //! documents) like this: //! ```xml //! //! //! //! //! //! //! ``` //! you should enable the [`overlapped-lists`] feature to make it possible //! to deserialize this to: //! ```no_run //! # use serde::Deserialize; //! #[derive(Deserialize)] //! #[serde(rename_all = "kebab-case")] //! struct AnyName { //! item: Vec<()>, //! another_item: (), //! } //! ``` //! //! //! Internally Tagged Enums //! ----------------------- //! [Tagged enums] are currently not supported because of an issue in the Serde //! design (see [serde#1183] and [quick-xml#586]) and missing optimizations in //! Serde which could be useful for XML parsing ([serde#1495]). This can be worked //! around by manually implementing deserialize with `#[serde(deserialize_with = "func")]` //! or implementing [`Deserialize`], but this can get very tedious very fast for //! files with large amounts of tagged enums. To help with this issue quick-xml //! provides a macro [`impl_deserialize_for_internally_tagged_enum!`]. See the //! macro documentation for details. //! //! //! [`overlapped-lists`]: ../index.html#overlapped-lists //! [specification]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition //! [`deserialize_with`]: https://serde.rs/field-attrs.html#deserialize_with //! [#497]: https://github.com/tafia/quick-xml/issues/497 //! [`Serializer::serialize_unit_variant`]: serde::Serializer::serialize_unit_variant //! [`Deserializer::deserialize_enum`]: serde::Deserializer::deserialize_enum //! [Tagged enums]: https://serde.rs/enum-representations.html#internally-tagged //! [serde#1183]: https://github.com/serde-rs/serde/issues/1183 //! [serde#1495]: https://github.com/serde-rs/serde/issues/1495 //! [quick-xml#586]: https://github.com/tafia/quick-xml/issues/586 //! [`impl_deserialize_for_internally_tagged_enum!`]: crate::impl_deserialize_for_internally_tagged_enum // Macros should be defined before the modules that using them // Also, macros should be imported before using them use serde::serde_if_integer128; macro_rules! deserialize_type { ($deserialize:ident => $visit:ident, $($mut:tt)?) => { fn $deserialize($($mut)? self, visitor: V) -> Result where V: Visitor<'de>, { // No need to unescape because valid integer representations cannot be escaped let text = self.read_string()?; visitor.$visit(text.parse()?) } }; } /// Implement deserialization methods for scalar types, such as numbers, strings, /// byte arrays, booleans and identifiers. macro_rules! deserialize_primitives { ($($mut:tt)?) => { deserialize_type!(deserialize_i8 => visit_i8, $($mut)?); deserialize_type!(deserialize_i16 => visit_i16, $($mut)?); deserialize_type!(deserialize_i32 => visit_i32, $($mut)?); deserialize_type!(deserialize_i64 => visit_i64, $($mut)?); deserialize_type!(deserialize_u8 => visit_u8, $($mut)?); deserialize_type!(deserialize_u16 => visit_u16, $($mut)?); deserialize_type!(deserialize_u32 => visit_u32, $($mut)?); deserialize_type!(deserialize_u64 => visit_u64, $($mut)?); serde_if_integer128! { deserialize_type!(deserialize_i128 => visit_i128, $($mut)?); deserialize_type!(deserialize_u128 => visit_u128, $($mut)?); } deserialize_type!(deserialize_f32 => visit_f32, $($mut)?); deserialize_type!(deserialize_f64 => visit_f64, $($mut)?); fn deserialize_bool($($mut)? self, visitor: V) -> Result where V: Visitor<'de>, { let text = self.read_string()?; str2bool(&text, visitor) } /// Character represented as [strings](#method.deserialize_str). #[inline] fn deserialize_char(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } fn deserialize_str($($mut)? self, visitor: V) -> Result where V: Visitor<'de>, { let text = self.read_string()?; match text { Cow::Borrowed(string) => visitor.visit_borrowed_str(string), Cow::Owned(string) => visitor.visit_string(string), } } /// Representation of owned strings the same as [non-owned](#method.deserialize_str). #[inline] fn deserialize_string(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } /// Forwards deserialization to the [`deserialize_any`](#method.deserialize_any). #[inline] fn deserialize_bytes(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_any(visitor) } /// Forwards deserialization to the [`deserialize_bytes`](#method.deserialize_bytes). #[inline] fn deserialize_byte_buf(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_bytes(visitor) } /// Representation of the named units the same as [unnamed units](#method.deserialize_unit). #[inline] fn deserialize_unit_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_unit(visitor) } /// Representation of tuples the same as [sequences](#method.deserialize_seq). #[inline] fn deserialize_tuple(self, _len: usize, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_seq(visitor) } /// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple). #[inline] fn deserialize_tuple_struct( self, _name: &'static str, len: usize, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_tuple(len, visitor) } /// Forwards deserialization to the [`deserialize_struct`](#method.deserialize_struct) /// with empty name and fields. #[inline] fn deserialize_map(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_struct("", &[], visitor) } /// Identifiers represented as [strings](#method.deserialize_str). #[inline] fn deserialize_identifier(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } /// Forwards deserialization to the [`deserialize_unit`](#method.deserialize_unit). #[inline] fn deserialize_ignored_any(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_unit(visitor) } }; } mod key; mod map; mod resolver; mod simple_type; mod text; mod var; pub use crate::errors::serialize::DeError; pub use resolver::{EntityResolver, PredefinedEntityResolver}; use crate::{ de::map::ElementMapAccess, encoding::Decoder, errors::Error, events::{BytesCData, BytesEnd, BytesStart, BytesText, Event}, name::QName, reader::Reader, }; use serde::de::{self, Deserialize, DeserializeOwned, DeserializeSeed, SeqAccess, Visitor}; use std::borrow::Cow; #[cfg(feature = "overlapped-lists")] use std::collections::VecDeque; use std::io::BufRead; use std::mem::replace; #[cfg(feature = "overlapped-lists")] use std::num::NonZeroUsize; use std::ops::Deref; /// Data represented by a text node or a CDATA node. XML markup is not expected pub(crate) const TEXT_KEY: &str = "$text"; /// Data represented by any XML markup inside pub(crate) const VALUE_KEY: &str = "$value"; /// Decoded and concatenated content of consequent [`Text`] and [`CData`] /// events. _Consequent_ means that events should follow each other or be /// delimited only by (any count of) [`Comment`] or [`PI`] events. /// /// Internally text is stored in `Cow`. Cloning of text is cheap while it /// is borrowed and makes copies of data when it is owned. /// /// [`Text`]: Event::Text /// [`CData`]: Event::CData /// [`Comment`]: Event::Comment /// [`PI`]: Event::PI #[derive(Clone, Debug, PartialEq, Eq)] pub struct Text<'a> { text: Cow<'a, str>, } impl<'a> Deref for Text<'a> { type Target = str; #[inline] fn deref(&self) -> &Self::Target { self.text.deref() } } impl<'a> From<&'a str> for Text<'a> { #[inline] fn from(text: &'a str) -> Self { Self { text: Cow::Borrowed(text), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Simplified event which contains only these variants that used by deserializer #[derive(Clone, Debug, PartialEq, Eq)] pub enum DeEvent<'a> { /// Start tag (with attributes) ``. Start(BytesStart<'a>), /// End tag ``. End(BytesEnd<'a>), /// Decoded and concatenated content of consequent [`Text`] and [`CData`] /// events. _Consequent_ means that events should follow each other or be /// delimited only by (any count of) [`Comment`] or [`PI`] events. /// /// [`Text`]: Event::Text /// [`CData`]: Event::CData /// [`Comment`]: Event::Comment /// [`PI`]: Event::PI Text(Text<'a>), /// End of XML document. Eof, } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Simplified event which contains only these variants that used by deserializer, /// but [`Text`] events not yet fully processed. /// /// [`Text`] events should be trimmed if they does not surrounded by the other /// [`Text`] or [`CData`] events. This event contains intermediate state of [`Text`] /// event, where they are trimmed from the start, but not from the end. To trim /// end spaces we should lookahead by one deserializer event (i. e. skip all /// comments and processing instructions). /// /// [`Text`]: Event::Text /// [`CData`]: Event::CData #[derive(Clone, Debug, PartialEq, Eq)] pub enum PayloadEvent<'a> { /// Start tag (with attributes) ``. Start(BytesStart<'a>), /// End tag ``. End(BytesEnd<'a>), /// Escaped character data between tags. Text(BytesText<'a>), /// Unescaped character data stored in ``. CData(BytesCData<'a>), /// Document type definition data (DTD) stored in ``. DocType(BytesText<'a>), /// End of XML document. Eof, } impl<'a> PayloadEvent<'a> { /// Ensures that all data is owned to extend the object's lifetime if necessary. #[inline] fn into_owned(self) -> PayloadEvent<'static> { match self { PayloadEvent::Start(e) => PayloadEvent::Start(e.into_owned()), PayloadEvent::End(e) => PayloadEvent::End(e.into_owned()), PayloadEvent::Text(e) => PayloadEvent::Text(e.into_owned()), PayloadEvent::CData(e) => PayloadEvent::CData(e.into_owned()), PayloadEvent::DocType(e) => PayloadEvent::DocType(e.into_owned()), PayloadEvent::Eof => PayloadEvent::Eof, } } } /// An intermediate reader that consumes [`PayloadEvent`]s and produces final [`DeEvent`]s. /// [`PayloadEvent::Text`] events, that followed by any event except /// [`PayloadEvent::Text`] or [`PayloadEvent::CData`], are trimmed from the end. struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = PredefinedEntityResolver> { /// A source of low-level XML events reader: R, /// Intermediate event, that could be returned by the next call to `next()`. /// If that is the `Text` event then leading spaces already trimmed, but /// trailing spaces is not. Before the event will be returned, trimming of /// the spaces could be necessary lookahead: Result, DeError>, /// Used to resolve unknown entities that would otherwise cause the parser /// to return an [`EscapeError::UnrecognizedEntity`] error. /// /// [`EscapeError::UnrecognizedEntity`]: crate::escape::EscapeError::UnrecognizedEntity entity_resolver: E, } impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> { fn new(mut reader: R, entity_resolver: E) -> Self { // Lookahead by one event immediately, so we do not need to check in the // loop if we need lookahead or not let lookahead = reader.next(); Self { reader, lookahead, entity_resolver, } } /// Returns `true` if all events was consumed const fn is_empty(&self) -> bool { matches!(self.lookahead, Ok(PayloadEvent::Eof)) } /// Read next event and put it in lookahead, return the current lookahead #[inline(always)] fn next_impl(&mut self) -> Result, DeError> { replace(&mut self.lookahead, self.reader.next()) } /// Returns `true` when next event is not a text event in any form. #[inline(always)] const fn current_event_is_last_text(&self) -> bool { // If next event is a text or CDATA, we should not trim trailing spaces !matches!( self.lookahead, Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_)) ) } /// Read all consequent [`Text`] and [`CData`] events until non-text event /// occurs. Content of all events would be appended to `result` and returned /// as [`DeEvent::Text`]. /// /// [`Text`]: PayloadEvent::Text /// [`CData`]: PayloadEvent::CData fn drain_text(&mut self, mut result: Cow<'i, str>) -> Result, DeError> { loop { if self.current_event_is_last_text() { break; } match self.next_impl()? { PayloadEvent::Text(mut e) => { if self.current_event_is_last_text() { // FIXME: Actually, we should trim after decoding text, but now we trim before e.inplace_trim_end(); } result .to_mut() .push_str(&e.unescape_with(|entity| self.entity_resolver.resolve(entity))?); } PayloadEvent::CData(e) => result.to_mut().push_str(&e.decode()?), // SAFETY: current_event_is_last_text checks that event is Text or CData _ => unreachable!("Only `Text` and `CData` events can come here"), } } Ok(DeEvent::Text(Text { text: result })) } /// Return an input-borrowing event. fn next(&mut self) -> Result, DeError> { loop { return match self.next_impl()? { PayloadEvent::Start(e) => Ok(DeEvent::Start(e)), PayloadEvent::End(e) => Ok(DeEvent::End(e)), PayloadEvent::Text(mut e) => { if self.current_event_is_last_text() && e.inplace_trim_end() { // FIXME: Actually, we should trim after decoding text, but now we trim before continue; } self.drain_text(e.unescape_with(|entity| self.entity_resolver.resolve(entity))?) } PayloadEvent::CData(e) => self.drain_text(e.decode()?), PayloadEvent::DocType(e) => { self.entity_resolver .capture(e) .map_err(|err| DeError::Custom(format!("cannot parse DTD: {}", err)))?; continue; } PayloadEvent::Eof => Ok(DeEvent::Eof), }; } } #[inline] fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { match self.lookahead { // We pre-read event with the same name that is required to be skipped. // First call of `read_to_end` will end out pre-read event, the second // will consume other events Ok(PayloadEvent::Start(ref e)) if e.name() == name => { let result1 = self.reader.read_to_end(name); let result2 = self.reader.read_to_end(name); // In case of error `next_impl` returns `Eof` let _ = self.next_impl(); result1?; result2?; } // We pre-read event with the same name that is required to be skipped. // Because this is end event, we already consume the whole tree, so // nothing to do, just update lookahead Ok(PayloadEvent::End(ref e)) if e.name() == name => { let _ = self.next_impl(); } Ok(_) => { let result = self.reader.read_to_end(name); // In case of error `next_impl` returns `Eof` let _ = self.next_impl(); result?; } // Read next lookahead event, unpack error from the current lookahead Err(_) => { self.next_impl()?; } } Ok(()) } #[inline] fn decoder(&self) -> Decoder { self.reader.decoder() } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Deserialize an instance of type `T` from a string of XML text. pub fn from_str<'de, T>(s: &'de str) -> Result where T: Deserialize<'de>, { let mut de = Deserializer::from_str(s); T::deserialize(&mut de) } /// Deserialize from a reader. This method will do internal copies of data /// readed from `reader`. If you want have a `&str` input and want to borrow /// as much as possible, use [`from_str`]. pub fn from_reader(reader: R) -> Result where R: BufRead, T: DeserializeOwned, { let mut de = Deserializer::from_reader(reader); T::deserialize(&mut de) } // TODO: According to the https://www.w3.org/TR/xmlschema11-2/#boolean, // valid boolean representations are only "true", "false", "1", and "0" fn str2bool<'de, V>(value: &str, visitor: V) -> Result where V: de::Visitor<'de>, { match value { "true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => { visitor.visit_bool(true) } "false" | "0" | "False" | "FALSE" | "f" | "No" | "NO" | "no" | "n" => { visitor.visit_bool(false) } _ => Err(DeError::InvalidBoolean(value.into())), } } fn deserialize_bool<'de, V>(value: &[u8], decoder: Decoder, visitor: V) -> Result where V: Visitor<'de>, { #[cfg(feature = "encoding")] { let value = decoder.decode(value)?; // No need to unescape because valid boolean representations cannot be escaped str2bool(value.as_ref(), visitor) } #[cfg(not(feature = "encoding"))] { // No need to unescape because valid boolean representations cannot be escaped match value { b"true" | b"1" | b"True" | b"TRUE" | b"t" | b"Yes" | b"YES" | b"yes" | b"y" => { visitor.visit_bool(true) } b"false" | b"0" | b"False" | b"FALSE" | b"f" | b"No" | b"NO" | b"no" | b"n" => { visitor.visit_bool(false) } e => Err(DeError::InvalidBoolean(decoder.decode(e)?.into())), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A structure that deserializes XML into Rust values. pub struct Deserializer<'de, R, E: EntityResolver = PredefinedEntityResolver> where R: XmlRead<'de>, { /// An XML reader that streams events into this deserializer reader: XmlReader<'de, R, E>, /// When deserializing sequences sometimes we have to skip unwanted events. /// That events should be stored and then replayed. This is a replay buffer, /// that streams events while not empty. When it exhausted, events will /// requested from [`Self::reader`]. #[cfg(feature = "overlapped-lists")] read: VecDeque>, /// When deserializing sequences sometimes we have to skip events, because XML /// is tolerant to elements order and even if in the XSD order is strictly /// specified (using `xs:sequence`) most of XML parsers allows order violations. /// That means, that elements, forming a sequence, could be overlapped with /// other elements, do not related to that sequence. /// /// In order to support this, deserializer will scan events and skip unwanted /// events, store them here. After call [`Self::start_replay()`] all events /// moved from this to [`Self::read`]. #[cfg(feature = "overlapped-lists")] write: VecDeque>, /// Maximum number of events that can be skipped when processing sequences /// that occur out-of-order. This field is used to prevent potential /// denial-of-service (DoS) attacks which could cause infinite memory /// consumption when parsing a very large amount of XML into a sequence field. #[cfg(feature = "overlapped-lists")] limit: Option, #[cfg(not(feature = "overlapped-lists"))] peek: Option>, /// Buffer to store attribute name as a field name exposed to serde consumers key_buf: String, } impl<'de, R, E> Deserializer<'de, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Create an XML deserializer from one of the possible quick_xml input sources. /// /// Typically it is more convenient to use one of these methods instead: /// /// - [`Deserializer::from_str`] /// - [`Deserializer::from_reader`] fn new(reader: R, entity_resolver: E) -> Self { Self { reader: XmlReader::new(reader, entity_resolver), #[cfg(feature = "overlapped-lists")] read: VecDeque::new(), #[cfg(feature = "overlapped-lists")] write: VecDeque::new(), #[cfg(feature = "overlapped-lists")] limit: None, #[cfg(not(feature = "overlapped-lists"))] peek: None, key_buf: String::new(), } } /// Returns `true` if all events was consumed. pub fn is_empty(&self) -> bool { #[cfg(feature = "overlapped-lists")] if self.read.is_empty() { return self.reader.is_empty(); } #[cfg(not(feature = "overlapped-lists"))] if self.peek.is_none() { return self.reader.is_empty(); } false } /// Returns the underlying XML reader. /// /// ``` /// # use pretty_assertions::assert_eq; /// use serde::Deserialize; /// use quick_xml::de::Deserializer; /// use quick_xml::Reader; /// /// #[derive(Deserialize)] /// struct SomeStruct { /// field1: String, /// field2: String, /// } /// /// // Try to deserialize from broken XML /// let mut de = Deserializer::from_str( /// "" /// // 0 ^= 28 ^= 41 /// ); /// /// let err = SomeStruct::deserialize(&mut de); /// assert!(err.is_err()); /// /// let reader: &Reader<_> = de.get_ref().get_ref(); /// /// assert_eq!(reader.error_position(), 28); /// assert_eq!(reader.buffer_position(), 41); /// ``` pub const fn get_ref(&self) -> &R { &self.reader.reader } /// Set the maximum number of events that could be skipped during deserialization /// of sequences. /// /// If `` contains more than specified nested elements, `$text` or /// CDATA nodes, then [`DeError::TooManyEvents`] will be returned during /// deserialization of sequence field (any type that uses [`deserialize_seq`] /// for the deserialization, for example, `Vec`). /// /// This method can be used to prevent a [DoS] attack and infinite memory /// consumption when parsing a very large XML to a sequence field. /// /// It is strongly recommended to set limit to some value when you parse data /// from untrusted sources. You should choose a value that your typical XMLs /// can have _between_ different elements that corresponds to the same sequence. /// /// # Examples /// /// Let's imagine, that we deserialize such structure: /// ``` /// struct List { /// item: Vec<()>, /// } /// ``` /// /// The XML that we try to parse look like this: /// ```xml /// /// /// /// /// with text /// /// /// /// /// /// /// /// ``` /// /// There, when we deserialize the `item` field, we need to buffer 7 events, /// before we can deserialize the second ``: /// /// - `` /// - `` /// - `$text(with text)` /// - `` /// - `` (virtual start event) /// - `` (virtual end event) /// - `` /// /// Note, that `` internally represented as 2 events: /// one for the start tag and one for the end tag. In the future this can be /// eliminated, but for now we use [auto-expanding feature] of a reader, /// because this simplifies deserializer code. /// /// [`deserialize_seq`]: serde::Deserializer::deserialize_seq /// [DoS]: https://en.wikipedia.org/wiki/Denial-of-service_attack /// [auto-expanding feature]: crate::reader::Config::expand_empty_elements #[cfg(feature = "overlapped-lists")] pub fn event_buffer_size(&mut self, limit: Option) -> &mut Self { self.limit = limit; self } #[cfg(feature = "overlapped-lists")] fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> { if self.read.is_empty() { self.read.push_front(self.reader.next()?); } if let Some(event) = self.read.front() { return Ok(event); } // SAFETY: `self.read` was filled in the code above. // NOTE: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }` // if unsafe code will be allowed unreachable!() } #[cfg(not(feature = "overlapped-lists"))] fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> { if self.peek.is_none() { self.peek = Some(self.reader.next()?); } match self.peek.as_ref() { Some(v) => Ok(v), // SAFETY: a `None` variant for `self.peek` would have been replaced // by a `Some` variant in the code above. // TODO: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }` // if unsafe code will be allowed None => unreachable!(), } } fn next(&mut self) -> Result, DeError> { // Replay skipped or peeked events #[cfg(feature = "overlapped-lists")] if let Some(event) = self.read.pop_front() { return Ok(event); } #[cfg(not(feature = "overlapped-lists"))] if let Some(e) = self.peek.take() { return Ok(e); } self.reader.next() } /// Returns the mark after which all events, skipped by [`Self::skip()`] call, /// should be replayed after calling [`Self::start_replay()`]. #[cfg(feature = "overlapped-lists")] #[inline] #[must_use = "returned checkpoint should be used in `start_replay`"] fn skip_checkpoint(&self) -> usize { self.write.len() } /// Extracts XML tree of events from and stores them in the skipped events /// buffer from which they can be retrieved later. You MUST call /// [`Self::start_replay()`] after calling this to give access to the skipped /// events and release internal buffers. #[cfg(feature = "overlapped-lists")] fn skip(&mut self) -> Result<(), DeError> { let event = self.next()?; self.skip_event(event)?; match self.write.back() { // Skip all subtree, if we skip a start event Some(DeEvent::Start(e)) => { let end = e.name().as_ref().to_owned(); let mut depth = 0; loop { let event = self.next()?; match event { DeEvent::Start(ref e) if e.name().as_ref() == end => { self.skip_event(event)?; depth += 1; } DeEvent::End(ref e) if e.name().as_ref() == end => { self.skip_event(event)?; if depth == 0 { break; } depth -= 1; } DeEvent::Eof => { self.skip_event(event)?; break; } _ => self.skip_event(event)?, } } } _ => (), } Ok(()) } #[cfg(feature = "overlapped-lists")] #[inline] fn skip_event(&mut self, event: DeEvent<'de>) -> Result<(), DeError> { if let Some(max) = self.limit { if self.write.len() >= max.get() { return Err(DeError::TooManyEvents(max)); } } self.write.push_back(event); Ok(()) } /// Moves buffered events, skipped after given `checkpoint` from [`Self::write`] /// skip buffer to [`Self::read`] buffer. /// /// After calling this method, [`Self::peek()`] and [`Self::next()`] starts /// return events that was skipped previously by calling [`Self::skip()`], /// and only when all that events will be consumed, the deserializer starts /// to drain events from underlying reader. /// /// This method MUST be called if any number of [`Self::skip()`] was called /// after [`Self::new()`] or `start_replay()` or you'll lost events. #[cfg(feature = "overlapped-lists")] fn start_replay(&mut self, checkpoint: usize) { if checkpoint == 0 { self.write.append(&mut self.read); std::mem::swap(&mut self.read, &mut self.write); } else { let mut read = self.write.split_off(checkpoint); read.append(&mut self.read); self.read = read; } } #[inline] fn read_string(&mut self) -> Result, DeError> { self.read_string_impl(true) } /// Consumes consequent [`Text`] and [`CData`] (both a referred below as a _text_) /// events, merge them into one string. If there are no such events, returns /// an empty string. /// /// If `allow_start` is `false`, then only text events are consumed, for other /// events an error is returned (see table below). /// /// If `allow_start` is `true`, then two or three events are expected: /// - [`DeEvent::Start`]; /// - _(optional)_ [`DeEvent::Text`] which content is returned; /// - [`DeEvent::End`]. If text event was missed, an empty string is returned. /// /// Corresponding events are consumed. /// /// # Handling events /// /// The table below shows how events is handled by this method: /// /// |Event |XML |Handling /// |------------------|---------------------------|---------------------------------------- /// |[`DeEvent::Start`]|`...` |if `allow_start == true`, result determined by the second table, otherwise emits [`UnexpectedStart("tag")`](DeError::UnexpectedStart) /// |[`DeEvent::End`] |`` |This is impossible situation, the method will panic if it happens /// |[`DeEvent::Text`] |`text content` or `` (probably mixed)|Returns event content unchanged /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof) /// /// Second event, consumed if [`DeEvent::Start`] was received and `allow_start == true`: /// /// |Event |XML |Handling /// |------------------|---------------------------|---------------------------------------------------------------------------------- /// |[`DeEvent::Start`]|`...` |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart) /// |[`DeEvent::End`] |`
` |Returns an empty slice. The reader guarantee that tag will match the open one /// |[`DeEvent::Text`] |`text content` or `` (probably mixed)|Returns event content unchanged, expects the `
` after that /// |[`DeEvent::Eof`] | |Emits [`InvalidXml(IllFormed(MissingEndTag))`](DeError::InvalidXml) /// /// [`Text`]: Event::Text /// [`CData`]: Event::CData fn read_string_impl(&mut self, allow_start: bool) -> Result, DeError> { match self.next()? { DeEvent::Text(e) => Ok(e.text), // allow one nested level DeEvent::Start(e) if allow_start => self.read_text(e.name()), DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())), // SAFETY: The reader is guaranteed that we don't have unmatched tags // If we here, then out deserializer has a bug DeEvent::End(e) => unreachable!("{:?}", e), DeEvent::Eof => Err(DeError::UnexpectedEof), } } /// Consumes one [`DeEvent::Text`] event and ensures that it is followed by the /// [`DeEvent::End`] event. /// /// # Parameters /// - `name`: name of a tag opened before reading text. The corresponding end tag /// should present in input just after the text fn read_text(&mut self, name: QName) -> Result, DeError> { match self.next()? { DeEvent::Text(e) => match self.next()? { // The matching tag name is guaranteed by the reader DeEvent::End(_) => Ok(e.text), // SAFETY: Cannot be two consequent Text events, they would be merged into one DeEvent::Text(_) => unreachable!(), DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())), DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()), }, // We can get End event in case of `` or `` input // Return empty text in that case // The matching tag name is guaranteed by the reader DeEvent::End(_) => Ok("".into()), DeEvent::Start(s) => Err(DeError::UnexpectedStart(s.name().as_ref().to_owned())), DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()), } } /// Drops all events until event with [name](BytesEnd::name()) `name` won't be /// dropped. This method should be called after [`Self::next()`] #[cfg(feature = "overlapped-lists")] fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { let mut depth = 0; loop { match self.read.pop_front() { Some(DeEvent::Start(e)) if e.name() == name => { depth += 1; } Some(DeEvent::End(e)) if e.name() == name => { if depth == 0 { break; } depth -= 1; } // Drop all other skipped events Some(_) => continue, // If we do not have skipped events, use effective reading that will // not allocate memory for events None => { // We should close all opened tags, because we could buffer // Start events, but not the corresponding End events. So we // keep reading events until we exit all nested tags. // `read_to_end()` will return an error if an Eof was encountered // preliminary (in case of malformed XML). // // // ^^^^^^^^^^ - buffered in `self.read`, when `self.read_to_end()` is called, depth = 2 // ^^^^^^ - read by the first call of `self.reader.read_to_end()` // ^^^^^^ - read by the second call of `self.reader.read_to_end()` loop { self.reader.read_to_end(name)?; if depth == 0 { break; } depth -= 1; } break; } } } Ok(()) } #[cfg(not(feature = "overlapped-lists"))] fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { // First one might be in self.peek match self.next()? { DeEvent::Start(e) => self.reader.read_to_end(e.name())?, DeEvent::End(e) if e.name() == name => return Ok(()), _ => (), } self.reader.read_to_end(name) } } impl<'de> Deserializer<'de, SliceReader<'de>> { /// Create new deserializer that will borrow data from the specified string. /// /// Deserializer created with this method will not resolve custom entities. #[allow(clippy::should_implement_trait)] pub fn from_str(source: &'de str) -> Self { Self::from_str_with_resolver(source, PredefinedEntityResolver) } } impl<'de, E> Deserializer<'de, SliceReader<'de>, E> where E: EntityResolver, { /// Create new deserializer that will borrow data from the specified string /// and use specified entity resolver. pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self { let mut reader = Reader::from_str(source); let config = reader.config_mut(); config.expand_empty_elements = true; Self::new( SliceReader { reader, start_trimmer: StartTrimmer::default(), }, entity_resolver, ) } } impl<'de, R> Deserializer<'de, IoReader> where R: BufRead, { /// Create new deserializer that will copy data from the specified reader /// into internal buffer. /// /// If you already have a string use [`Self::from_str`] instead, because it /// will borrow instead of copy. If you have `&[u8]` which is known to represent /// UTF-8, you can decode it first before using [`from_str`]. /// /// Deserializer created with this method will not resolve custom entities. pub fn from_reader(reader: R) -> Self { Self::with_resolver(reader, PredefinedEntityResolver) } } impl<'de, R, E> Deserializer<'de, IoReader, E> where R: BufRead, E: EntityResolver, { /// Create new deserializer that will copy data from the specified reader /// into internal buffer and use specified entity resolver. /// /// If you already have a string use [`Self::from_str`] instead, because it /// will borrow instead of copy. If you have `&[u8]` which is known to represent /// UTF-8, you can decode it first before using [`from_str`]. pub fn with_resolver(reader: R, entity_resolver: E) -> Self { let mut reader = Reader::from_reader(reader); let config = reader.config_mut(); config.expand_empty_elements = true; Self::new( IoReader { reader, start_trimmer: StartTrimmer::default(), buf: Vec::new(), }, entity_resolver, ) } } impl<'de, 'a, R, E> de::Deserializer<'de> for &'a mut Deserializer<'de, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; deserialize_primitives!(); fn deserialize_struct( self, _name: &'static str, fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { match self.next()? { DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self, e, fields)?), // SAFETY: The reader is guaranteed that we don't have unmatched tags // If we here, then out deserializer has a bug DeEvent::End(e) => unreachable!("{:?}", e), // Deserializer methods are only hints, if deserializer could not satisfy // request, it should return the data that it has. It is responsibility // of a Visitor to return an error if it does not understand the data DeEvent::Text(e) => match e.text { Cow::Borrowed(s) => visitor.visit_borrowed_str(s), Cow::Owned(s) => visitor.visit_string(s), }, DeEvent::Eof => Err(DeError::UnexpectedEof), } } /// Unit represented in XML as a `xs:element` or text/CDATA content. /// Any content inside `xs:element` is ignored and skipped. /// /// Produces unit struct from any of following inputs: /// - any `...` /// - any `` /// - any consequent text / CDATA content (can consist of several parts /// delimited by comments and processing instructions) /// /// # Events handling /// /// |Event |XML |Handling /// |------------------|---------------------------|------------------------------------------- /// |[`DeEvent::Start`]|`...` |Calls `visitor.visit_unit()`, consumes all events up to and including corresponding `End` event /// |[`DeEvent::End`] |`` |This is impossible situation, the method will panic if it happens /// |[`DeEvent::Text`] |`text content` or `` (probably mixed)|Calls `visitor.visit_unit()`. The content is ignored /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof) fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { match self.next()? { DeEvent::Start(s) => { self.read_to_end(s.name())?; visitor.visit_unit() } DeEvent::Text(_) => visitor.visit_unit(), // SAFETY: The reader is guaranteed that we don't have unmatched tags // If we here, then out deserializer has a bug DeEvent::End(e) => unreachable!("{:?}", e), DeEvent::Eof => Err(DeError::UnexpectedEof), } } /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`] /// with the same deserializer. fn deserialize_newtype_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_newtype_struct(self) } fn deserialize_enum( self, _name: &'static str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_enum(var::EnumAccess::new(self)) } fn deserialize_seq(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_seq(self) } fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { match self.peek()? { DeEvent::Text(t) if t.is_empty() => visitor.visit_none(), DeEvent::Eof => visitor.visit_none(), _ => visitor.visit_some(self), } } fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { match self.peek()? { DeEvent::Text(_) => self.deserialize_str(visitor), _ => self.deserialize_map(visitor), } } } /// An accessor to sequence elements forming a value for top-level sequence of XML /// elements. /// /// Technically, multiple top-level elements violates XML rule of only one top-level /// element, but we consider this as several concatenated XML documents. impl<'de, 'a, R, E> SeqAccess<'de> for &'a mut Deserializer<'de, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; fn next_element_seed(&mut self, seed: T) -> Result, Self::Error> where T: DeserializeSeed<'de>, { match self.peek()? { DeEvent::Eof => { // We need to consume event in order to self.is_empty() worked self.next()?; Ok(None) } // Start(tag), End(tag), Text _ => seed.deserialize(&mut **self).map(Some), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Helper struct that contains a state for an algorithm of converting events /// from raw events to semi-trimmed events that is independent from a way of /// events reading. struct StartTrimmer { /// If `true`, then leading whitespace will be removed from next returned /// [`Event::Text`]. This field is set to `true` after reading each event /// except [`Event::Text`] and [`Event::CData`], so [`Event::Text`] events /// read right after them does not trimmed. trim_start: bool, } impl StartTrimmer { /// Converts raw reader's event into a payload event. /// Returns `None`, if event should be skipped. #[inline(always)] fn trim<'a>(&mut self, event: Event<'a>) -> Option> { let (event, trim_next_event) = match event { Event::DocType(e) => (PayloadEvent::DocType(e), true), Event::Start(e) => (PayloadEvent::Start(e), true), Event::End(e) => (PayloadEvent::End(e), true), Event::Eof => (PayloadEvent::Eof, true), // Do not trim next text event after Text or CDATA event Event::CData(e) => (PayloadEvent::CData(e), false), Event::Text(mut e) => { // If event is empty after trimming, skip it if self.trim_start && e.inplace_trim_start() { return None; } (PayloadEvent::Text(e), false) } _ => return None, }; self.trim_start = trim_next_event; Some(event) } } impl Default for StartTrimmer { #[inline] fn default() -> Self { Self { trim_start: true } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Trait used by the deserializer for iterating over input. This is manually /// "specialized" for iterating over `&[u8]`. /// /// You do not need to implement this trait, it is needed to abstract from /// [borrowing](SliceReader) and [copying](IoReader) data sources and reuse code in /// deserializer pub trait XmlRead<'i> { /// Return an input-borrowing event. fn next(&mut self) -> Result, DeError>; /// Skips until end element is found. Unlike `next()` it will not allocate /// when it cannot satisfy the lifetime. fn read_to_end(&mut self, name: QName) -> Result<(), DeError>; /// A copy of the reader's decoder used to decode strings. fn decoder(&self) -> Decoder; } /// XML input source that reads from a std::io input stream. /// /// You cannot create it, it is created automatically when you call /// [`Deserializer::from_reader`] pub struct IoReader { reader: Reader, start_trimmer: StartTrimmer, buf: Vec, } impl IoReader { /// Returns the underlying XML reader. /// /// ``` /// # use pretty_assertions::assert_eq; /// use serde::Deserialize; /// use std::io::Cursor; /// use quick_xml::de::Deserializer; /// use quick_xml::Reader; /// /// #[derive(Deserialize)] /// struct SomeStruct { /// field1: String, /// field2: String, /// } /// /// // Try to deserialize from broken XML /// let mut de = Deserializer::from_reader(Cursor::new( /// "" /// // 0 ^= 28 ^= 41 /// )); /// /// let err = SomeStruct::deserialize(&mut de); /// assert!(err.is_err()); /// /// let reader: &Reader> = de.get_ref().get_ref(); /// /// assert_eq!(reader.error_position(), 28); /// assert_eq!(reader.buffer_position(), 41); /// ``` pub const fn get_ref(&self) -> &Reader { &self.reader } } impl<'i, R: BufRead> XmlRead<'i> for IoReader { fn next(&mut self) -> Result, DeError> { loop { self.buf.clear(); let event = self.reader.read_event_into(&mut self.buf)?; if let Some(event) = self.start_trimmer.trim(event) { return Ok(event.into_owned()); } } } fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { match self.reader.read_to_end_into(name, &mut self.buf) { Err(e) => Err(e.into()), Ok(_) => Ok(()), } } fn decoder(&self) -> Decoder { self.reader.decoder() } } /// XML input source that reads from a slice of bytes and can borrow from it. /// /// You cannot create it, it is created automatically when you call /// [`Deserializer::from_str`]. pub struct SliceReader<'de> { reader: Reader<&'de [u8]>, start_trimmer: StartTrimmer, } impl<'de> SliceReader<'de> { /// Returns the underlying XML reader. /// /// ``` /// # use pretty_assertions::assert_eq; /// use serde::Deserialize; /// use quick_xml::de::Deserializer; /// use quick_xml::Reader; /// /// #[derive(Deserialize)] /// struct SomeStruct { /// field1: String, /// field2: String, /// } /// /// // Try to deserialize from broken XML /// let mut de = Deserializer::from_str( /// "" /// // 0 ^= 28 ^= 41 /// ); /// /// let err = SomeStruct::deserialize(&mut de); /// assert!(err.is_err()); /// /// let reader: &Reader<&[u8]> = de.get_ref().get_ref(); /// /// assert_eq!(reader.error_position(), 28); /// assert_eq!(reader.buffer_position(), 41); /// ``` pub const fn get_ref(&self) -> &Reader<&'de [u8]> { &self.reader } } impl<'de> XmlRead<'de> for SliceReader<'de> { fn next(&mut self) -> Result, DeError> { loop { let event = self.reader.read_event()?; if let Some(event) = self.start_trimmer.trim(event) { return Ok(event); } } } fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { match self.reader.read_to_end(name) { Err(e) => Err(e.into()), Ok(_) => Ok(()), } } fn decoder(&self) -> Decoder { self.reader.decoder() } } #[cfg(test)] mod tests { use super::*; use crate::errors::IllFormedError; use pretty_assertions::assert_eq; fn make_de<'de>(source: &'de str) -> Deserializer<'de, SliceReader<'de>> { dbg!(source); Deserializer::from_str(source) } #[cfg(feature = "overlapped-lists")] mod skip { use super::*; use crate::de::DeEvent::*; use crate::events::BytesEnd; use pretty_assertions::assert_eq; /// Checks that `peek()` and `read()` behaves correctly after `skip()` #[test] fn read_and_peek() { let mut de = make_de( r#" text "#, ); // Initial conditions - both are empty assert_eq!(de.read, vec![]); assert_eq!(de.write, vec![]); assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("inner"))); // Mark that start_replay() should begin replay from this point let checkpoint = de.skip_checkpoint(); assert_eq!(checkpoint, 0); // Should skip first tree de.skip().unwrap(); assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ Start(BytesStart::new("inner")), Text("text".into()), Start(BytesStart::new("inner")), End(BytesEnd::new("inner")), End(BytesEnd::new("inner")), ] ); // Consume . Now unconsumed XML looks like: // // // text // // // // assert_eq!(de.next().unwrap(), Start(BytesStart::new("next"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("next"))); // We finish writing. Next call to `next()` should start replay that messages: // // // text // // // // and after that stream that messages: // // // de.start_replay(checkpoint); assert_eq!( de.read, vec![ Start(BytesStart::new("inner")), Text("text".into()), Start(BytesStart::new("inner")), End(BytesEnd::new("inner")), End(BytesEnd::new("inner")), ] ); assert_eq!(de.write, vec![]); assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner"))); // Mark that start_replay() should begin replay from this point let checkpoint = de.skip_checkpoint(); assert_eq!(checkpoint, 0); // Skip `$text` node and consume after it de.skip().unwrap(); assert_eq!( de.read, vec![ Start(BytesStart::new("inner")), End(BytesEnd::new("inner")), End(BytesEnd::new("inner")), ] ); assert_eq!( de.write, vec![ // This comment here to keep the same formatting of both arrays // otherwise rustfmt suggest one-line it Text("text".into()), ] ); assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner"))); // We finish writing. Next call to `next()` should start replay messages: // // text // // // and after that stream that messages: // // // de.start_replay(checkpoint); assert_eq!( de.read, vec![ // This comment here to keep the same formatting as others // otherwise rustfmt suggest one-line it Text("text".into()), End(BytesEnd::new("inner")), ] ); assert_eq!(de.write, vec![]); assert_eq!(de.next().unwrap(), Text("text".into())); assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner"))); assert_eq!(de.next().unwrap(), Start(BytesStart::new("target"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("target"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); assert_eq!(de.next().unwrap(), Eof); } /// Checks that `read_to_end()` behaves correctly after `skip()` #[test] fn read_to_end() { let mut de = make_de( r#" text "#, ); // Initial conditions - both are empty assert_eq!(de.read, vec![]); assert_eq!(de.write, vec![]); assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); // Mark that start_replay() should begin replay from this point let checkpoint = de.skip_checkpoint(); assert_eq!(checkpoint, 0); // Skip the tree de.skip().unwrap(); assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ Start(BytesStart::new("skip")), Text("text".into()), Start(BytesStart::new("skip")), End(BytesEnd::new("skip")), End(BytesEnd::new("skip")), ] ); // Drop all events that represents tree. Now unconsumed XML looks like: // // // text // // // assert_eq!(de.next().unwrap(), Start(BytesStart::new("target"))); de.read_to_end(QName(b"target")).unwrap(); assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ Start(BytesStart::new("skip")), Text("text".into()), Start(BytesStart::new("skip")), End(BytesEnd::new("skip")), End(BytesEnd::new("skip")), ] ); // We finish writing. Next call to `next()` should start replay that messages: // // // text // // // // and after that stream that messages: // // de.start_replay(checkpoint); assert_eq!( de.read, vec![ Start(BytesStart::new("skip")), Text("text".into()), Start(BytesStart::new("skip")), End(BytesEnd::new("skip")), End(BytesEnd::new("skip")), ] ); assert_eq!(de.write, vec![]); assert_eq!(de.next().unwrap(), Start(BytesStart::new("skip"))); de.read_to_end(QName(b"skip")).unwrap(); assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); assert_eq!(de.next().unwrap(), Eof); } /// Checks that replay replayes only part of events /// Test for https://github.com/tafia/quick-xml/issues/435 #[test] fn partial_replay() { let mut de = make_de( r#" "#, ); // Initial conditions - both are empty assert_eq!(de.read, vec![]); assert_eq!(de.write, vec![]); assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); // start_replay() should start replay from this point let checkpoint1 = de.skip_checkpoint(); assert_eq!(checkpoint1, 0); // Should skip first and second elements de.skip().unwrap(); // skipped-1 de.skip().unwrap(); // skipped-2 assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), ] ); //////////////////////////////////////////////////////////////////////////////////////// assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner"))); assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("skipped-3"))); assert_eq!( de.read, vec![ // This comment here to keep the same formatting of both arrays // otherwise rustfmt suggest one-line it Start(BytesStart::new("skipped-3")), ] ); assert_eq!( de.write, vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), ] ); // start_replay() should start replay from this point let checkpoint2 = de.skip_checkpoint(); assert_eq!(checkpoint2, 4); // Should skip third and forth elements de.skip().unwrap(); // skipped-3 de.skip().unwrap(); // skipped-4 assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ // checkpoint 1 Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), // checkpoint 2 Start(BytesStart::new("skipped-3")), End(BytesEnd::new("skipped-3")), Start(BytesStart::new("skipped-4")), End(BytesEnd::new("skipped-4")), ] ); assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-2"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-2"))); assert_eq!(de.peek().unwrap(), &End(BytesEnd::new("inner"))); assert_eq!( de.read, vec![ // This comment here to keep the same formatting of both arrays // otherwise rustfmt suggest one-line it End(BytesEnd::new("inner")), ] ); assert_eq!( de.write, vec![ // checkpoint 1 Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), // checkpoint 2 Start(BytesStart::new("skipped-3")), End(BytesEnd::new("skipped-3")), Start(BytesStart::new("skipped-4")), End(BytesEnd::new("skipped-4")), ] ); // Start replay events from checkpoint 2 de.start_replay(checkpoint2); assert_eq!( de.read, vec![ Start(BytesStart::new("skipped-3")), End(BytesEnd::new("skipped-3")), Start(BytesStart::new("skipped-4")), End(BytesEnd::new("skipped-4")), End(BytesEnd::new("inner")), ] ); assert_eq!( de.write, vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), ] ); // Replayed events assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-3"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-3"))); assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-4"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-4"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner"))); assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), ] ); //////////////////////////////////////////////////////////////////////////////////////// // New events assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-1"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-1"))); assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), ] ); // Start replay events from checkpoint 1 de.start_replay(checkpoint1); assert_eq!( de.read, vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), ] ); assert_eq!(de.write, vec![]); // Replayed events assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-1"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-1"))); assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-2"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-2"))); assert_eq!(de.read, vec![]); assert_eq!(de.write, vec![]); // New events assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); assert_eq!(de.next().unwrap(), Eof); } /// Checks that limiting buffer size works correctly #[test] fn limit() { use serde::Deserialize; #[derive(Debug, Deserialize)] #[allow(unused)] struct List { item: Vec<()>, } let mut de = make_de( r#" with text "#, ); de.event_buffer_size(NonZeroUsize::new(3)); match List::deserialize(&mut de) { Err(DeError::TooManyEvents(count)) => assert_eq!(count.get(), 3), e => panic!("Expected `Err(TooManyEvents(3))`, but got `{:?}`", e), } } /// Without handling Eof in `skip` this test failed with memory allocation #[test] fn invalid_xml() { use crate::de::DeEvent::*; let mut de = make_de(""); // Cache all events let checkpoint = de.skip_checkpoint(); de.skip().unwrap(); de.start_replay(checkpoint); assert_eq!(de.read, vec![Start(BytesStart::new("root")), Eof]); } } mod read_to_end { use super::*; use crate::de::DeEvent::*; use pretty_assertions::assert_eq; #[test] fn complex() { let mut de = make_de( r#" textcontent "#, ); assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); assert_eq!( de.next().unwrap(), Start(BytesStart::from_content(r#"tag a="1""#, 3)) ); assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ()); assert_eq!( de.next().unwrap(), Start(BytesStart::from_content(r#"tag a="2""#, 3)) ); assert_eq!(de.next().unwrap(), Text("cdata content".into())); assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed"))); assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ()); assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); assert_eq!(de.next().unwrap(), Eof); } #[test] fn invalid_xml1() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag"))); assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag"))); match de.read_to_end(QName(b"tag")) { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::MissingEndTag("tag".into())) } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } assert_eq!(de.next().unwrap(), Eof); } #[test] fn invalid_xml2() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag"))); assert_eq!(de.peek().unwrap(), &Text("".into())); match de.read_to_end(QName(b"tag")) { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::MissingEndTag("tag".into())) } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } assert_eq!(de.next().unwrap(), Eof); } } #[test] fn borrowing_reader_parity() { let s = r#" Some text "#; let mut reader1 = IoReader { reader: Reader::from_reader(s.as_bytes()), start_trimmer: StartTrimmer::default(), buf: Vec::new(), }; let mut reader2 = SliceReader { reader: Reader::from_str(s), start_trimmer: StartTrimmer::default(), }; loop { let event1 = reader1.next().unwrap(); let event2 = reader2.next().unwrap(); if let (PayloadEvent::Eof, PayloadEvent::Eof) = (&event1, &event2) { break; } assert_eq!(event1, event2); } } #[test] fn borrowing_reader_events() { let s = r#" Some text "#; let mut reader = SliceReader { reader: Reader::from_str(s), start_trimmer: StartTrimmer::default(), }; let config = reader.reader.config_mut(); config.expand_empty_elements = true; let mut events = Vec::new(); loop { let event = reader.next().unwrap(); if let PayloadEvent::Eof = event { break; } events.push(event); } use crate::de::PayloadEvent::*; assert_eq!( events, vec![ Start(BytesStart::from_content( r#"item name="hello" source="world.rs""#, 4 )), Text(BytesText::from_escaped("Some text")), End(BytesEnd::new("item")), Start(BytesStart::from_content("item2", 5)), End(BytesEnd::new("item2")), Start(BytesStart::from_content("item3", 5)), End(BytesEnd::new("item3")), Start(BytesStart::from_content(r#"item4 value="world" "#, 5)), End(BytesEnd::new("item4")), ] ) } /// Ensures, that [`Deserializer::read_string()`] never can get an `End` event, /// because parser reports error early #[test] fn read_string() { match from_str::(r#""#) { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::UnmatchedEndTag("root".into())); } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } let s: String = from_str(r#""#).unwrap(); assert_eq!(s, ""); match from_str::(r#""#) { Err(DeError::InvalidXml(Error::IllFormed(cause))) => assert_eq!( cause, IllFormedError::MismatchedEndTag { expected: "root".into(), found: "other".into(), } ), x => panic!("Expected `Err(InvalidXml(IllFormed(_))`, but got `{:?}`", x), } } /// Tests for https://github.com/tafia/quick-xml/issues/474. /// /// That tests ensures that comments and processed instructions is ignored /// and can split one logical string in pieces. mod merge_text { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { let mut de = make_de("text"); assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); } #[test] fn cdata() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata".into())); } #[test] fn text_and_cdata() { let mut de = make_de("text and "); assert_eq!(de.next().unwrap(), DeEvent::Text("text and cdata".into())); } #[test] fn text_and_empty_cdata() { let mut de = make_de("text and "); assert_eq!(de.next().unwrap(), DeEvent::Text("text and ".into())); } #[test] fn cdata_and_text() { let mut de = make_de(" and text"); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata and text".into())); } #[test] fn empty_cdata_and_text() { let mut de = make_de(" and text"); assert_eq!(de.next().unwrap(), DeEvent::Text(" and text".into())); } #[test] fn cdata_and_cdata() { let mut de = make_de( "\ \ cdata]]>\ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into())); } mod comment_between { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { let mut de = make_de( "\ text \ \ text\ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("text text".into())); } #[test] fn cdata() { let mut de = make_de( "\ \ \ cdata]]>\ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into())); } #[test] fn text_and_cdata() { let mut de = make_de( "\ text \ \ \ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata".into())); } #[test] fn text_and_empty_cdata() { let mut de = make_de( "\ text \ \ \ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into())); } #[test] fn cdata_and_text() { let mut de = make_de( "\ \ \ text \ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text".into())); } #[test] fn empty_cdata_and_text() { let mut de = make_de( "\ \ \ text \ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text(" text".into())); } #[test] fn cdata_and_cdata() { let mut de = make_de( "\ \ \ cdata]]>\ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into())); } } mod pi_between { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { let mut de = make_de( "\ text \ \ text\ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("text text".into())); } #[test] fn cdata() { let mut de = make_de( "\ \ \ cdata]]>\ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into())); } #[test] fn text_and_cdata() { let mut de = make_de( "\ text \ \ \ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata".into())); } #[test] fn text_and_empty_cdata() { let mut de = make_de( "\ text \ \ \ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into())); } #[test] fn cdata_and_text() { let mut de = make_de( "\ \ \ text \ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text".into())); } #[test] fn empty_cdata_and_text() { let mut de = make_de( "\ \ \ text \ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text(" text".into())); } #[test] fn cdata_and_cdata() { let mut de = make_de( "\ \ \ cdata]]>\ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into())); } } } /// Tests for https://github.com/tafia/quick-xml/issues/474. /// /// This tests ensures that any combination of payload data is processed /// as expected. mod triples { use super::*; use pretty_assertions::assert_eq; mod start { use super::*; /// ... mod start { use super::*; use pretty_assertions::assert_eq; #[test] fn start() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag3"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } /// Not matching end tag will result to error #[test] fn end() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn text() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn cdata() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn eof() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); assert_eq!(de.next().unwrap(), DeEvent::Eof); } } /// ... mod end { use super::*; use pretty_assertions::assert_eq; #[test] fn start() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn end() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); match de.next() { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag2".into())); } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn text() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn cdata() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn eof() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); assert_eq!(de.next().unwrap(), DeEvent::Eof); } } /// text ... mod text { use super::*; use pretty_assertions::assert_eq; #[test] fn start() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn end() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } // start::text::text has no difference from start::text #[test] fn cdata() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); // Text is trimmed from the start assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn eof() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); assert_eq!(de.next().unwrap(), DeEvent::Eof); } } /// ... mod cdata { use super::*; use pretty_assertions::assert_eq; #[test] fn start() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn end() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn text() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); // Text is trimmed from the end assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn cdata() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn eof() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); assert_eq!(de.next().unwrap(), DeEvent::Eof); } } } /// Start from End event will always generate an error #[test] fn end() { let mut de = make_de(""); match de.next() { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into())); } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } assert_eq!(de.next().unwrap(), DeEvent::Eof); } mod text { use super::*; use pretty_assertions::assert_eq; mod start { use super::*; use pretty_assertions::assert_eq; #[test] fn start() { let mut de = make_de(" text "); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } /// Not matching end tag will result in error #[test] fn end() { let mut de = make_de(" text "); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn text() { let mut de = make_de(" text text2 "); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text2".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn cdata() { let mut de = make_de(" text "); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn eof() { // Text is trimmed from both sides let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); assert_eq!(de.next().unwrap(), DeEvent::Eof); } } /// End event without corresponding start event will always generate an error #[test] fn end() { let mut de = make_de(" text "); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); match de.next() { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into())); } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } assert_eq!(de.next().unwrap(), DeEvent::Eof); } // text::text::something is equivalent to text::something mod cdata { use super::*; use pretty_assertions::assert_eq; #[test] fn start() { let mut de = make_de(" text "); // Text is trimmed from the start assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn end() { let mut de = make_de(" text "); // Text is trimmed from the start assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into())); match de.next() { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into())); } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn text() { let mut de = make_de(" text text2 "); // Text is trimmed from the start and from the end assert_eq!( de.next().unwrap(), DeEvent::Text("text cdata text2".into()) ); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn cdata() { let mut de = make_de(" text "); // Text is trimmed from the start assert_eq!( de.next().unwrap(), DeEvent::Text("text cdata cdata2 ".into()) ); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn eof() { let mut de = make_de(" text "); // Text is trimmed from the start assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); assert_eq!(de.next().unwrap(), DeEvent::Eof); } } } mod cdata { use super::*; use pretty_assertions::assert_eq; mod start { use super::*; use pretty_assertions::assert_eq; #[test] fn start() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } /// Not matching end tag will result in error #[test] fn end() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn text() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); // Text is trimmed from both sides assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn cdata() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata2 ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn eof() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); assert_eq!(de.next().unwrap(), DeEvent::Eof); } } /// End event without corresponding start event will always generate an error #[test] fn end() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); match de.next() { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into())); } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } assert_eq!(de.next().unwrap(), DeEvent::Eof); } mod text { use super::*; use pretty_assertions::assert_eq; #[test] fn start() { let mut de = make_de(" text "); // Text is trimmed from the end assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn end() { let mut de = make_de(" text "); // Text is trimmed from the end assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into())); match de.next() { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into())); } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } assert_eq!(de.next().unwrap(), DeEvent::Eof); } // cdata::text::text is equivalent to cdata::text #[test] fn cdata() { let mut de = make_de(" text "); assert_eq!( de.next().unwrap(), DeEvent::Text(" cdata text cdata2 ".into()) ); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn eof() { let mut de = make_de(" text "); // Text is trimmed from the end assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); assert_eq!(de.next().unwrap(), DeEvent::Eof); } } mod cdata { use super::*; use pretty_assertions::assert_eq; #[test] fn start() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn end() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into())); match de.next() { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into())); } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn text() { let mut de = make_de(" text "); // Text is trimmed from the end assert_eq!( de.next().unwrap(), DeEvent::Text(" cdata cdata2 text".into()) ); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn cdata() { let mut de = make_de(""); assert_eq!( de.next().unwrap(), DeEvent::Text(" cdata cdata2 cdata3 ".into()) ); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn eof() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); assert_eq!(de.next().unwrap(), DeEvent::Eof); } } } } } quick-xml-0.36.1/src/de/resolver.rs000064400000000000000000000067070072674642500152550ustar 00000000000000//! Entity resolver module use std::convert::Infallible; use std::error::Error; use crate::escape::resolve_predefined_entity; use crate::events::BytesText; /// Used to resolve unknown entities while parsing /// /// # Example /// /// ``` /// # use serde::Deserialize; /// # use pretty_assertions::assert_eq; /// use regex::bytes::Regex; /// use std::collections::BTreeMap; /// use std::string::FromUtf8Error; /// use quick_xml::de::{Deserializer, EntityResolver}; /// use quick_xml::events::BytesText; /// /// struct DocTypeEntityResolver { /// re: Regex, /// map: BTreeMap, /// } /// /// impl Default for DocTypeEntityResolver { /// fn default() -> Self { /// Self { /// // We do not focus on true parsing in this example /// // You should use special libraries to parse DTD /// re: Regex::new(r#""#).unwrap(), /// map: BTreeMap::new(), /// } /// } /// } /// /// impl EntityResolver for DocTypeEntityResolver { /// type Error = FromUtf8Error; /// /// fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error> { /// for cap in self.re.captures_iter(&doctype) { /// self.map.insert( /// String::from_utf8(cap[1].to_vec())?, /// String::from_utf8(cap[2].to_vec())?, /// ); /// } /// Ok(()) /// } /// /// fn resolve(&self, entity: &str) -> Option<&str> { /// self.map.get(entity).map(|s| s.as_str()) /// } /// } /// /// let xml_reader = br#" /// ]> /// /// &e1; /// /// "#.as_ref(); /// /// let mut de = Deserializer::with_resolver( /// xml_reader, /// DocTypeEntityResolver::default(), /// ); /// let data: BTreeMap = BTreeMap::deserialize(&mut de).unwrap(); /// /// assert_eq!(data.get("entity_one"), Some(&"entity 1".to_string())); /// ``` pub trait EntityResolver { /// The error type that represents DTD parse error type Error: Error; /// Called on contents of [`Event::DocType`] to capture declared entities. /// Can be called multiple times, for each parsed `` declaration. /// /// [`Event::DocType`]: crate::events::Event::DocType fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error>; /// Called when an entity needs to be resolved. /// /// `None` is returned if a suitable value can not be found. /// In that case an [`EscapeError::UnrecognizedEntity`] will be returned by /// a deserializer. /// /// [`EscapeError::UnrecognizedEntity`]: crate::escape::EscapeError::UnrecognizedEntity fn resolve(&self, entity: &str) -> Option<&str>; } /// An [`EntityResolver`] that resolves only predefined entities: /// /// | Entity | Resolution /// |--------|------------ /// |`<` | `<` /// |`>` | `>` /// |`&` | `&` /// |`'`| `'` /// |`"`| `"` #[derive(Default, Copy, Clone)] pub struct PredefinedEntityResolver; impl EntityResolver for PredefinedEntityResolver { type Error = Infallible; #[inline] fn capture(&mut self, _doctype: BytesText) -> Result<(), Self::Error> { Ok(()) } #[inline] fn resolve(&self, entity: &str) -> Option<&str> { resolve_predefined_entity(entity) } } quick-xml-0.36.1/src/de/simple_type.rs000064400000000000000000001417440072674642500157470ustar 00000000000000//! Contains Serde `Deserializer` for XML [simple types] [as defined] in the XML Schema. //! //! [simple types]: https://www.w3schools.com/xml/el_simpletype.asp //! [as defined]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition use crate::de::{deserialize_bool, str2bool, Text}; use crate::encoding::Decoder; use crate::errors::serialize::DeError; use crate::escape::unescape; use crate::utils::CowRef; use memchr::memchr; use serde::de::value::UnitDeserializer; use serde::de::{DeserializeSeed, Deserializer, EnumAccess, SeqAccess, VariantAccess, Visitor}; use serde::serde_if_integer128; use std::borrow::Cow; use std::ops::Range; macro_rules! deserialize_num { ($method:ident, $visit:ident) => { fn $method(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.$visit(self.content.as_str().parse()?) } }; ($method:ident => $visit:ident) => { fn $method(self, visitor: V) -> Result where V: Visitor<'de>, { let string = self.decode()?; visitor.$visit(string.as_str().parse()?) } }; } macro_rules! unsupported { ( $deserialize:ident $( ($($type:ty),*) )? ) => { #[inline] fn $deserialize>( self, $($(_: $type,)*)? visitor: V ) -> Result { // Deserializer methods are only hints, if deserializer could not satisfy // request, it should return the data that it has. It is responsibility // of a Visitor to return an error if it does not understand the data self.deserialize_str(visitor) } }; } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A version of [`Cow`] that can borrow from two different buffers, one of them /// is a deserializer input, and conceptually contains only part of owned data. /// /// # Lifetimes /// - `'de` -- lifetime of the data that deserializer borrow from the parsed input /// - `'a` -- lifetime of the data that owned by a deserializer enum Content<'de, 'a> { /// An input borrowed from the parsed data Input(&'de str), /// An input borrowed from the buffer owned by another deserializer Slice(&'a str), /// An input taken from an external deserializer, owned by that deserializer. /// Only part of this data, located after offset represented by `usize`, used /// to deserialize data, the other is a garbage that can't be dropped because /// we do not want to make reallocations if they will not required. Owned(String, usize), } impl<'de, 'a> Content<'de, 'a> { /// Returns string representation of the content fn as_str(&self) -> &str { match self { Content::Input(s) => s, Content::Slice(s) => s, Content::Owned(s, offset) => s.split_at(*offset).1, } } /// Supply to the visitor a borrowed string, a string slice, or an owned /// string depending on the kind of input. Unlike [`Self::deserialize_item`], /// the whole [`Self::Owned`] string will be passed to the visitor. /// /// Calls /// - `visitor.visit_borrowed_str` if data borrowed from the input /// - `visitor.visit_str` if data borrowed from another source /// - `visitor.visit_string` if data owned by this type #[inline] fn deserialize_all(self, visitor: V) -> Result where V: Visitor<'de>, { match self { Content::Input(s) => visitor.visit_borrowed_str(s), Content::Slice(s) => visitor.visit_str(s), Content::Owned(s, _) => visitor.visit_string(s), } } /// Supply to the visitor a borrowed string, a string slice, or an owned /// string depending on the kind of input. Unlike [`Self::deserialize_all`], /// only part of [`Self::Owned`] string will be passed to the visitor. /// /// Calls /// - `visitor.visit_borrowed_str` if data borrowed from the input /// - `visitor.visit_str` if data borrowed from another source /// - `visitor.visit_string` if data owned by this type #[inline] fn deserialize_item(self, visitor: V) -> Result where V: Visitor<'de>, { match self { Content::Input(s) => visitor.visit_borrowed_str(s), Content::Slice(s) => visitor.visit_str(s), Content::Owned(s, 0) => visitor.visit_string(s), Content::Owned(s, offset) => visitor.visit_str(s.split_at(offset).1), } } } /// A deserializer that handles ordinary [simple type definition][item] with /// `{variety} = atomic`, or an ordinary [simple type] definition with /// `{variety} = union` whose basic members are all atomic. /// /// This deserializer can deserialize only primitive types: /// - numbers /// - booleans /// - strings /// - units /// - options /// - unit variants of enums /// /// Identifiers represented as strings and deserialized accordingly. /// /// Deserialization of all other types will provide a string and in most cases /// the deserialization will fail because visitor does not expect that. /// /// The `Owned` variant of the content acts as a storage for data, allocated by /// an external deserializer that pass it via [`ListIter`]. /// /// [item]: https://www.w3.org/TR/xmlschema11-1/#std-item_type_definition /// [simple type]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition struct AtomicDeserializer<'de, 'a> { /// Content of the attribute value, text content or CDATA content content: Content<'de, 'a>, /// If `true`, `content` in an escaped form and should be unescaped before use escaped: bool, } impl<'de, 'a> Deserializer<'de> for AtomicDeserializer<'de, 'a> { type Error = DeError; /// Forwards deserialization to the [`Self::deserialize_str`] fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } /// According to the , /// valid boolean representations are only `"true"`, `"false"`, `"1"`, /// and `"0"`. But this method also handles following: /// /// |`bool` |XML content /// |-------|------------------------------------------------------------- /// |`true` |`"True"`, `"TRUE"`, `"t"`, `"Yes"`, `"YES"`, `"yes"`, `"y"` /// |`false`|`"False"`, `"FALSE"`, `"f"`, `"No"`, `"NO"`, `"no"`, `"n"` fn deserialize_bool(self, visitor: V) -> Result where V: Visitor<'de>, { str2bool(self.content.as_str(), visitor) } deserialize_num!(deserialize_i8, visit_i8); deserialize_num!(deserialize_i16, visit_i16); deserialize_num!(deserialize_i32, visit_i32); deserialize_num!(deserialize_i64, visit_i64); deserialize_num!(deserialize_u8, visit_u8); deserialize_num!(deserialize_u16, visit_u16); deserialize_num!(deserialize_u32, visit_u32); deserialize_num!(deserialize_u64, visit_u64); serde_if_integer128! { deserialize_num!(deserialize_i128, visit_i128); deserialize_num!(deserialize_u128, visit_u128); } deserialize_num!(deserialize_f32, visit_f32); deserialize_num!(deserialize_f64, visit_f64); /// Forwards deserialization to the [`Self::deserialize_str`] fn deserialize_char(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } /// Supply to the visitor borrowed string, string slice, or owned string /// depending on the kind of input and presence of the escaped data. /// /// If string requires unescaping, then calls [`Visitor::visit_string`] with /// new allocated buffer with unescaped data. /// /// Otherwise calls /// - [`Visitor::visit_borrowed_str`] if data borrowed from the input /// - [`Visitor::visit_str`] if data borrowed from other deserializer /// - [`Visitor::visit_string`] if data owned by this deserializer fn deserialize_str(self, visitor: V) -> Result where V: Visitor<'de>, { if self.escaped { match unescape(self.content.as_str())? { Cow::Borrowed(_) => self.content.deserialize_item(visitor), Cow::Owned(s) => visitor.visit_string(s), } } else { self.content.deserialize_item(visitor) } } fn deserialize_string(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } /// If `content` is an empty string then calls [`Visitor::visit_none`], /// otherwise calls [`Visitor::visit_some`] with itself fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { if self.content.as_str().is_empty() { visitor.visit_none() } else { visitor.visit_some(self) } } fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } /// Forwards deserialization to the [`Self::deserialize_unit`] fn deserialize_unit_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_unit(visitor) } fn deserialize_newtype_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_newtype_struct(self) } fn deserialize_enum( self, _name: &'static str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_enum(self) } /// Forwards deserialization to the [`Self::deserialize_str`] fn deserialize_identifier(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } fn deserialize_ignored_any(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } unsupported!(deserialize_bytes); unsupported!(deserialize_byte_buf); unsupported!(deserialize_seq); unsupported!(deserialize_tuple(usize)); unsupported!(deserialize_tuple_struct(&'static str, usize)); unsupported!(deserialize_map); unsupported!(deserialize_struct(&'static str, &'static [&'static str])); } impl<'de, 'a> EnumAccess<'de> for AtomicDeserializer<'de, 'a> { type Error = DeError; type Variant = UnitOnly; fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), DeError> where V: DeserializeSeed<'de>, { let name = seed.deserialize(self)?; Ok((name, UnitOnly)) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Deserializer of variant data, that supports only unit variants. /// Attempt to deserialize newtype will provide [`UnitDeserializer`]. /// Attempt to deserialize tuple or struct variant will result to call of /// [`Visitor::visit_unit`]. pub struct UnitOnly; impl<'de> VariantAccess<'de> for UnitOnly { type Error = DeError; #[inline] fn unit_variant(self) -> Result<(), Self::Error> { Ok(()) } fn newtype_variant_seed(self, seed: T) -> Result where T: DeserializeSeed<'de>, { seed.deserialize(UnitDeserializer::::new()) } #[inline] fn tuple_variant(self, _len: usize, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } #[inline] fn struct_variant( self, _fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_unit() } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Iterator over string sub-slices delimited by one or several spaces. /// Contains decoded value of the `simpleType`. /// Iteration ends when list contains `None`. struct ListIter<'de, 'a> { /// If `Some`, contains unconsumed data of the list content: Option>, /// If `true`, `content` in escaped form and should be unescaped before use escaped: bool, } impl<'de, 'a> SeqAccess<'de> for ListIter<'de, 'a> { type Error = DeError; fn next_element_seed(&mut self, seed: T) -> Result, DeError> where T: DeserializeSeed<'de>, { if let Some(mut content) = self.content.take() { const DELIMITER: u8 = b' '; loop { let string = content.as_str(); if string.is_empty() { return Ok(None); } return match memchr(DELIMITER, string.as_bytes()) { // No delimiters in the `content`, deserialize it as a whole atomic None => seed.deserialize(AtomicDeserializer { content, escaped: self.escaped, }), // `content` started with a space, skip them all Some(0) => { // Skip all spaces let start = string.as_bytes().iter().position(|ch| *ch != DELIMITER); content = match (start, content) { // We cannot find any non-space character, so string contains only spaces (None, _) => return Ok(None), // Borrow result from input or deserializer depending on the initial borrowing (Some(start), Content::Input(s)) => Content::Input(s.split_at(start).1), (Some(start), Content::Slice(s)) => Content::Slice(s.split_at(start).1), // Skip additional bytes if we own data (Some(start), Content::Owned(s, skip)) => { Content::Owned(s, skip + start) } }; continue; } // `content` started from an atomic Some(end) => match content { // Borrow for the next iteration from input or deserializer depending on // the initial borrowing Content::Input(s) => { let (item, rest) = s.split_at(end); self.content = Some(Content::Input(rest)); seed.deserialize(AtomicDeserializer { content: Content::Input(item), escaped: self.escaped, }) } Content::Slice(s) => { let (item, rest) = s.split_at(end); self.content = Some(Content::Slice(rest)); seed.deserialize(AtomicDeserializer { content: Content::Slice(item), escaped: self.escaped, }) } // Skip additional bytes if we own data for next iteration, but deserialize from // the borrowed data from our buffer Content::Owned(s, skip) => { let item = s.split_at(skip + end).0; let result = seed.deserialize(AtomicDeserializer { content: Content::Slice(item), escaped: self.escaped, }); self.content = Some(Content::Owned(s, skip + end)); result } }, } .map(Some); } } Ok(None) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A deserializer for an xml probably escaped and encoded value of XSD [simple types]. /// This deserializer will borrow from the input as much as possible. /// /// `deserialize_any()` returns the whole string that deserializer contains. /// /// Escaping the value is actually not always necessary, for instance when /// converting to a float, we don't expect any escapable character anyway. /// In that cases deserializer skips unescaping step. /// /// Used for deserialize values from: /// - attribute values (`<... ...="value" ...>`) /// - mixed text / CDATA content (`<...>text`) /// /// This deserializer processes items as following: /// - numbers are parsed from a text content using [`FromStr`]; /// - booleans converted from the text according to the XML [specification]: /// - `"true"` and `"1"` converted to `true`; /// - `"false"` and `"0"` converted to `false`; /// - strings returned as is; /// - characters also returned as strings. If string contain more than one character /// or empty, it is responsibility of a type to return an error; /// - `Option` always deserialized as `Some` using the same deserializer. /// If attribute or text content is missed, then the deserializer even wouldn't /// be used, so if it is used, then the value should be; /// - units (`()`) and unit structs always deserialized successfully; /// - newtype structs forwards deserialization to the inner type using the same /// deserializer; /// - sequences, tuples and tuple structs are deserialized as `xs:list`s. Only /// sequences of primitive types is possible to deserialize this way and they /// should be delimited by a space (` `, `\t`, `\r`, or `\n`); /// - structs and maps delegates to [`Self::deserialize_str`]; /// - enums: /// - unit variants: just return `()`; /// - newtype variants: deserialize from [`UnitDeserializer`]; /// - tuple and struct variants: call [`Visitor::visit_unit`]; /// - identifiers are deserialized as strings. /// /// [simple types]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition /// [`FromStr`]: std::str::FromStr /// [specification]: https://www.w3.org/TR/xmlschema11-2/#boolean pub struct SimpleTypeDeserializer<'de, 'a> { /// - In case of attribute contains escaped attribute value /// - In case of text contains unescaped text value content: CowRef<'de, 'a, [u8]>, /// If `true`, `content` in escaped form and should be unescaped before use escaped: bool, /// Decoder used to deserialize string data, numeric and boolean data. /// Not used for deserializing raw byte buffers decoder: Decoder, } impl<'de, 'a> SimpleTypeDeserializer<'de, 'a> { /// Creates a deserializer from a value, that possible borrowed from input pub fn from_text(text: Cow<'de, str>) -> Self { let content = match text { Cow::Borrowed(slice) => CowRef::Input(slice.as_bytes()), Cow::Owned(content) => CowRef::Owned(content.into_bytes()), }; Self::new(content, false, Decoder::utf8()) } /// Creates a deserializer from a value, that possible borrowed from input pub fn from_text_content(value: Text<'de>) -> Self { Self::from_text(value.text) } /// Creates a deserializer from a part of value at specified range #[allow(clippy::ptr_arg)] pub fn from_part( value: &'a Cow<'de, [u8]>, range: Range, escaped: bool, decoder: Decoder, ) -> Self { let content = match value { Cow::Borrowed(slice) => CowRef::Input(&slice[range]), Cow::Owned(slice) => CowRef::Slice(&slice[range]), }; Self::new(content, escaped, decoder) } /// Constructor for tests #[inline] const fn new(content: CowRef<'de, 'a, [u8]>, escaped: bool, decoder: Decoder) -> Self { Self { content, escaped, decoder, } } /// Decodes raw bytes using the encoding specified. /// The method will borrow if has the UTF-8 compatible representation. #[inline] fn decode<'b>(&'b self) -> Result, DeError> { Ok(match self.content { CowRef::Input(content) => match self.decoder.decode(content)? { Cow::Borrowed(content) => Content::Input(content), Cow::Owned(content) => Content::Owned(content, 0), }, CowRef::Slice(content) => match self.decoder.decode(content)? { Cow::Borrowed(content) => Content::Slice(content), Cow::Owned(content) => Content::Owned(content, 0), }, CowRef::Owned(ref content) => match self.decoder.decode(content)? { Cow::Borrowed(content) => Content::Slice(content), Cow::Owned(content) => Content::Owned(content, 0), }, }) } } impl<'de, 'a> Deserializer<'de> for SimpleTypeDeserializer<'de, 'a> { type Error = DeError; /// Forwards deserialization to the [`Self::deserialize_str`] fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } fn deserialize_bool(self, visitor: V) -> Result where V: Visitor<'de>, { deserialize_bool(&self.content, self.decoder, visitor) } deserialize_num!(deserialize_i8 => visit_i8); deserialize_num!(deserialize_i16 => visit_i16); deserialize_num!(deserialize_i32 => visit_i32); deserialize_num!(deserialize_i64 => visit_i64); deserialize_num!(deserialize_u8 => visit_u8); deserialize_num!(deserialize_u16 => visit_u16); deserialize_num!(deserialize_u32 => visit_u32); deserialize_num!(deserialize_u64 => visit_u64); serde_if_integer128! { deserialize_num!(deserialize_i128 => visit_i128); deserialize_num!(deserialize_u128 => visit_u128); } deserialize_num!(deserialize_f32 => visit_f32); deserialize_num!(deserialize_f64 => visit_f64); /// Forwards deserialization to the [`Self::deserialize_str`] #[inline] fn deserialize_char(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } fn deserialize_str(self, visitor: V) -> Result where V: Visitor<'de>, { let content = self.decode()?; if self.escaped { match unescape(content.as_str())? { Cow::Borrowed(_) => content.deserialize_all(visitor), Cow::Owned(s) => visitor.visit_string(s), } } else { content.deserialize_all(visitor) } } /// Forwards deserialization to the [`Self::deserialize_str`] #[inline] fn deserialize_string(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } /// Forwards deserialization to the [`Self::deserialize_str`] #[inline] fn deserialize_bytes(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } /// Forwards deserialization to the [`Self::deserialize_str`] #[inline] fn deserialize_byte_buf(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_bytes(visitor) } fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_some(self) } #[inline] fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } /// Forwards deserialization to the [`Self::deserialize_unit`] #[inline] fn deserialize_unit_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_unit(visitor) } fn deserialize_newtype_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_newtype_struct(self) } fn deserialize_seq(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_seq(ListIter { content: Some(self.decode()?), escaped: self.escaped, }) } /// Representation of tuples the same as [sequences][Self::deserialize_seq]. #[inline] fn deserialize_tuple(self, _len: usize, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_seq(visitor) } /// Representation of named tuples the same as [unnamed tuples][Self::deserialize_tuple]. #[inline] fn deserialize_tuple_struct( self, _name: &'static str, len: usize, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_tuple(len, visitor) } unsupported!(deserialize_map); unsupported!(deserialize_struct(&'static str, &'static [&'static str])); fn deserialize_enum( self, _name: &'static str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_enum(self) } /// Forwards deserialization to the [`Self::deserialize_str`] #[inline] fn deserialize_identifier(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } #[inline] fn deserialize_ignored_any(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } } impl<'de, 'a> EnumAccess<'de> for SimpleTypeDeserializer<'de, 'a> { type Error = DeError; type Variant = UnitOnly; fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), DeError> where V: DeserializeSeed<'de>, { let name = seed.deserialize(self)?; Ok((name, UnitOnly)) } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod tests { use super::*; use crate::se::simple_type::{QuoteTarget, SimpleTypeSerializer}; use crate::se::{Indent, QuoteLevel}; use crate::utils::{ByteBuf, Bytes}; use serde::de::IgnoredAny; use serde::{Deserialize, Serialize}; use std::collections::HashMap; macro_rules! simple_only { ($encoding:ident, $name:ident: $type:ty = $xml:expr => $result:expr) => { #[test] fn $name() { let decoder = Decoder::$encoding(); let xml = $xml; let de = SimpleTypeDeserializer::new(CowRef::Input(xml.as_ref()), true, decoder); let data: $type = Deserialize::deserialize(de).unwrap(); assert_eq!(data, $result); } }; } macro_rules! simple { ($encoding:ident, $name:ident: $type:ty = $xml:expr => $result:expr) => { #[test] fn $name() { let decoder = Decoder::$encoding(); let xml = $xml; let de = SimpleTypeDeserializer::new(CowRef::Input(xml.as_ref()), true, decoder); let data: $type = Deserialize::deserialize(de).unwrap(); assert_eq!(data, $result); // Roundtrip to ensure that serializer corresponds to deserializer assert_eq!( data.serialize(SimpleTypeSerializer { writer: String::new(), target: QuoteTarget::Text, level: QuoteLevel::Full, indent: Indent::None, }) .unwrap(), xml ); } }; } macro_rules! err { ($encoding:ident, $name:ident: $type:ty = $xml:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let decoder = Decoder::$encoding(); let xml = $xml; let de = SimpleTypeDeserializer::new(CowRef::Input(xml.as_ref()), true, decoder); let err = <$type as Deserialize>::deserialize(de).unwrap_err(); match err { DeError::$kind(e) => assert_eq!(e, $reason), _ => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, err ), } } }; } #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Unit; #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Newtype(String); #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Tuple((), ()); #[derive(Debug, Deserialize, Serialize, PartialEq)] struct BorrowedNewtype<'a>(&'a str); #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Struct { key: String, val: usize, } #[derive(Debug, Deserialize, Serialize, PartialEq)] enum Enum { Unit, Newtype(String), Tuple(String, usize), Struct { key: String, val: usize }, } #[derive(Debug, Deserialize, PartialEq)] #[serde(field_identifier)] enum Id { Field, } #[derive(Debug, Deserialize)] #[serde(transparent)] struct Any(IgnoredAny); impl PartialEq for Any { fn eq(&self, _other: &Any) -> bool { true } } /// Tests for deserialize atomic and union values, as defined in XSD specification mod atomic { use super::*; use crate::se::simple_type::AtomicSerializer; use pretty_assertions::assert_eq; /// Checks that given `$input` successfully deserializing into given `$result` macro_rules! deserialized_to_only { ($name:ident: $type:ty = $input:literal => $result:expr) => { #[test] fn $name() { let de = AtomicDeserializer { content: Content::Input($input), escaped: true, }; let data: $type = Deserialize::deserialize(de).unwrap(); assert_eq!(data, $result); } }; } /// Checks that given `$input` successfully deserializing into given `$result` /// and the result is serialized back to the `$input` macro_rules! deserialized_to { ($name:ident: $type:ty = $input:literal => $result:expr) => { #[test] fn $name() { let de = AtomicDeserializer { content: Content::Input($input), escaped: true, }; let data: $type = Deserialize::deserialize(de).unwrap(); assert_eq!(data, $result); // Roundtrip to ensure that serializer corresponds to deserializer let mut buffer = String::new(); let has_written = data .serialize(AtomicSerializer { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, indent: Some(Indent::None), }) .unwrap(); assert_eq!(buffer, $input); assert_eq!(has_written, !buffer.is_empty()); } }; } /// Checks that attempt to deserialize given `$input` as a `$type` results to a /// deserialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $type:ty = $input:literal => $kind:ident($reason:literal)) => { #[test] fn $name() { let de = AtomicDeserializer { content: Content::Input($input), escaped: true, }; let err = <$type as Deserialize>::deserialize(de).unwrap_err(); match err { DeError::$kind(e) => assert_eq!(e, $reason), _ => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, err ), } } }; } deserialized_to!(false_: bool = "false" => false); deserialized_to!(true_: bool = "true" => true); deserialized_to!(i8_: i8 = "-2" => -2); deserialized_to!(i16_: i16 = "-2" => -2); deserialized_to!(i32_: i32 = "-2" => -2); deserialized_to!(i64_: i64 = "-2" => -2); deserialized_to!(u8_: u8 = "3" => 3); deserialized_to!(u16_: u16 = "3" => 3); deserialized_to!(u32_: u32 = "3" => 3); deserialized_to!(u64_: u64 = "3" => 3); serde_if_integer128! { deserialized_to!(i128_: i128 = "-2" => -2); deserialized_to!(u128_: u128 = "2" => 2); } deserialized_to!(f32_: f32 = "1.23" => 1.23); deserialized_to!(f64_: f64 = "1.23" => 1.23); deserialized_to!(char_unescaped: char = "h" => 'h'); deserialized_to!(char_escaped: char = "<" => '<'); deserialized_to!(string: String = "<escaped string" => " "non-escaped string"); err!(escaped_str: &str = "escaped string" => Custom("invalid type: string \"escaped string\", expected a borrowed string")); err!(byte_buf: ByteBuf = "<escaped string" => Custom("invalid type: string \" Custom("invalid type: string \"non-escaped string\", expected borrowed bytes")); deserialized_to!(option_none: Option<&str> = "" => None); deserialized_to!(option_some: Option<&str> = "non-escaped-string" => Some("non-escaped-string")); deserialized_to_only!(unit: () = "anything" => ()); deserialized_to_only!(unit_struct: Unit = "anything" => Unit); deserialized_to!(newtype_owned: Newtype = "<escaped string" => Newtype(" BorrowedNewtype("non-escaped string")); err!(seq: Vec<()> = "non-escaped string" => Custom("invalid type: string \"non-escaped string\", expected a sequence")); err!(tuple: ((), ()) = "non-escaped string" => Custom("invalid type: string \"non-escaped string\", expected a tuple of size 2")); err!(tuple_struct: Tuple = "non-escaped string" => Custom("invalid type: string \"non-escaped string\", expected tuple struct Tuple")); err!(map: HashMap<(), ()> = "non-escaped string" => Custom("invalid type: string \"non-escaped string\", expected a map")); err!(struct_: Struct = "non-escaped string" => Custom("invalid type: string \"non-escaped string\", expected struct Struct")); deserialized_to!(enum_unit: Enum = "Unit" => Enum::Unit); err!(enum_newtype: Enum = "Newtype" => Custom("invalid type: unit value, expected a string")); err!(enum_tuple: Enum = "Tuple" => Custom("invalid type: unit value, expected tuple variant Enum::Tuple")); err!(enum_struct: Enum = "Struct" => Custom("invalid type: unit value, expected struct variant Enum::Struct")); err!(enum_other: Enum = "any data" => Custom("unknown variant `any data`, expected one of `Unit`, `Newtype`, `Tuple`, `Struct`")); deserialized_to_only!(identifier: Id = "Field" => Id::Field); deserialized_to_only!(ignored_any: Any = "any data" => Any(IgnoredAny)); /// Checks that deserialization from an owned content is working #[test] #[cfg(feature = "encoding")] fn owned_data() { let de = AtomicDeserializer { content: Content::Owned("string slice".into(), 7), escaped: true, }; assert_eq!(de.content.as_str(), "slice"); let data: String = Deserialize::deserialize(de).unwrap(); assert_eq!(data, "slice"); } /// Checks that deserialization from a content borrowed from some /// buffer other that input is working #[test] fn borrowed_from_deserializer() { let de = AtomicDeserializer { content: Content::Slice("string slice"), escaped: true, }; assert_eq!(de.content.as_str(), "string slice"); let data: String = Deserialize::deserialize(de).unwrap(); assert_eq!(data, "string slice"); } } /// Module for testing list accessor mod list { use super::*; use pretty_assertions::assert_eq; #[test] fn empty() { let mut seq = ListIter { content: Some(Content::Input("")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), None); assert_eq!(seq.next_element::<&str>().unwrap(), None); } #[test] fn only_spaces() { let mut seq = ListIter { content: Some(Content::Input(" ")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), None); assert_eq!(seq.next_element::<&str>().unwrap(), None); } #[test] fn one_item() { let mut seq = ListIter { content: Some(Content::Input("abc")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), Some("abc")); assert_eq!(seq.next_element::<&str>().unwrap(), None); assert_eq!(seq.next_element::<&str>().unwrap(), None); } #[test] fn two_items() { let mut seq = ListIter { content: Some(Content::Input("abc def")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), Some("abc")); assert_eq!(seq.next_element::<&str>().unwrap(), Some("def")); assert_eq!(seq.next_element::<&str>().unwrap(), None); assert_eq!(seq.next_element::<&str>().unwrap(), None); } #[test] fn leading_spaces() { let mut seq = ListIter { content: Some(Content::Input(" def")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), Some("def")); assert_eq!(seq.next_element::<&str>().unwrap(), None); assert_eq!(seq.next_element::<&str>().unwrap(), None); } #[test] fn trailing_spaces() { let mut seq = ListIter { content: Some(Content::Input("abc ")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), Some("abc")); assert_eq!(seq.next_element::<&str>().unwrap(), None); assert_eq!(seq.next_element::<&str>().unwrap(), None); } #[test] fn mixed_types() { let mut seq = ListIter { content: Some(Content::Input("string 1.23 42 true false h Unit")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), Some("string")); assert_eq!(seq.next_element::().unwrap(), Some(1.23)); assert_eq!(seq.next_element::().unwrap(), Some(42)); assert_eq!(seq.next_element::().unwrap(), Some(true)); assert_eq!(seq.next_element::().unwrap(), Some(false)); assert_eq!(seq.next_element::().unwrap(), Some('h')); assert_eq!(seq.next_element::().unwrap(), Some(Enum::Unit)); assert_eq!(seq.next_element::<()>().unwrap(), None); assert_eq!(seq.next_element::<()>().unwrap(), None); } } mod utf8 { use super::*; use pretty_assertions::assert_eq; simple!(utf8, i8_: i8 = "-2" => -2); simple!(utf8, i16_: i16 = "-2" => -2); simple!(utf8, i32_: i32 = "-2" => -2); simple!(utf8, i64_: i64 = "-2" => -2); simple!(utf8, u8_: u8 = "3" => 3); simple!(utf8, u16_: u16 = "3" => 3); simple!(utf8, u32_: u32 = "3" => 3); simple!(utf8, u64_: u64 = "3" => 3); serde_if_integer128! { simple!(utf8, i128_: i128 = "-2" => -2); simple!(utf8, u128_: u128 = "2" => 2); } simple!(utf8, f32_: f32 = "1.23" => 1.23); simple!(utf8, f64_: f64 = "1.23" => 1.23); simple!(utf8, false_: bool = "false" => false); simple!(utf8, true_: bool = "true" => true); simple!(utf8, char_unescaped: char = "h" => 'h'); simple!(utf8, char_escaped: char = "<" => '<'); simple!(utf8, string: String = "<escaped string" => " Custom("invalid type: string \" "non-escaped string"); err!(utf8, borrowed_bytes: Bytes = "<escaped string" => Custom("invalid type: string \" = "" => Some("")); simple!(utf8, option_some: Option<&str> = "non-escaped string" => Some("non-escaped string")); simple_only!(utf8, unit: () = "any data" => ()); simple_only!(utf8, unit_struct: Unit = "any data" => Unit); // Serializer will not escape space because this is unnecessary. // Because borrowing has meaning only for deserializer, no need to test // roundtrip here, it is already tested for strings where compatible list // of escaped characters is used simple_only!(utf8, newtype_owned: Newtype = "<escaped string" => Newtype(" BorrowedNewtype("non-escaped string")); err!(utf8, map: HashMap<(), ()> = "any data" => Custom("invalid type: string \"any data\", expected a map")); err!(utf8, struct_: Struct = "any data" => Custom("invalid type: string \"any data\", expected struct Struct")); simple!(utf8, enum_unit: Enum = "Unit" => Enum::Unit); err!(utf8, enum_newtype: Enum = "Newtype" => Custom("invalid type: unit value, expected a string")); err!(utf8, enum_tuple: Enum = "Tuple" => Custom("invalid type: unit value, expected tuple variant Enum::Tuple")); err!(utf8, enum_struct: Enum = "Struct" => Custom("invalid type: unit value, expected struct variant Enum::Struct")); err!(utf8, enum_other: Enum = "any data" => Custom("unknown variant `any data`, expected one of `Unit`, `Newtype`, `Tuple`, `Struct`")); simple_only!(utf8, identifier: Id = "Field" => Id::Field); simple_only!(utf8, ignored_any: Any = "any data" => Any(IgnoredAny)); } #[cfg(feature = "encoding")] mod utf16 { use super::*; use pretty_assertions::assert_eq; fn to_utf16(string: &str) -> Vec { let mut bytes = Vec::new(); for ch in string.encode_utf16() { bytes.extend_from_slice(&ch.to_le_bytes()); } bytes } macro_rules! utf16 { ($name:ident: $type:ty = $xml:literal => $result:expr) => { simple_only!(utf16, $name: $type = to_utf16($xml) => $result); }; } macro_rules! unsupported { ($name:ident: $type:ty = $xml:literal => $err:literal) => { err!(utf16, $name: $type = to_utf16($xml) => Custom($err)); }; } utf16!(i8_: i8 = "-2" => -2); utf16!(i16_: i16 = "-2" => -2); utf16!(i32_: i32 = "-2" => -2); utf16!(i64_: i64 = "-2" => -2); utf16!(u8_: u8 = "3" => 3); utf16!(u16_: u16 = "3" => 3); utf16!(u32_: u32 = "3" => 3); utf16!(u64_: u64 = "3" => 3); serde_if_integer128! { utf16!(i128_: i128 = "-2" => -2); utf16!(u128_: u128 = "2" => 2); } utf16!(f32_: f32 = "1.23" => 1.23); utf16!(f64_: f64 = "1.23" => 1.23); utf16!(false_: bool = "false" => false); utf16!(true_: bool = "true" => true); utf16!(char_unescaped: char = "h" => 'h'); utf16!(char_escaped: char = "<" => '<'); utf16!(string: String = "<escaped string" => " "invalid type: string \" = "" => Some(())); utf16!(option_some: Option<()> = "any data" => Some(())); utf16!(unit: () = "any data" => ()); utf16!(unit_struct: Unit = "any data" => Unit); utf16!(newtype_owned: Newtype = "<escaped string" => Newtype(" "invalid type: string \"non-escaped string\", expected a borrowed string"); unsupported!(map: HashMap<(), ()> = "any data" => "invalid type: string \"any data\", expected a map"); unsupported!(struct_: Struct = "any data" => "invalid type: string \"any data\", expected struct Struct"); utf16!(enum_unit: Enum = "Unit" => Enum::Unit); unsupported!(enum_newtype: Enum = "Newtype" => "invalid type: unit value, expected a string"); unsupported!(enum_tuple: Enum = "Tuple" => "invalid type: unit value, expected tuple variant Enum::Tuple"); unsupported!(enum_struct: Enum = "Struct" => "invalid type: unit value, expected struct variant Enum::Struct"); unsupported!(enum_other: Enum = "any data" => "unknown variant `any data`, expected one of `Unit`, `Newtype`, `Tuple`, `Struct`"); utf16!(identifier: Id = "Field" => Id::Field); utf16!(ignored_any: Any = "any data" => Any(IgnoredAny)); } } quick-xml-0.36.1/src/de/text.rs000064400000000000000000000146510072674642500143750ustar 00000000000000use crate::{ de::simple_type::SimpleTypeDeserializer, de::{str2bool, Text, TEXT_KEY}, errors::serialize::DeError, }; use serde::de::value::BorrowedStrDeserializer; use serde::de::{DeserializeSeed, Deserializer, EnumAccess, VariantAccess, Visitor}; use serde::serde_if_integer128; use std::borrow::Cow; /// A deserializer for a single text node of a mixed sequence of tags and text. /// /// This deserializer are very similar to a [`MapValueDeserializer`] (when it /// processes the [`DeEvent::Text`] event). The only difference in the /// `deserialize_seq` method. This deserializer will perform deserialization /// from a textual content, whereas the [`MapValueDeserializer`] will iterate /// over tags / text within it's parent tag. /// /// This deserializer processes items as following: /// - numbers are parsed from a text content using [`FromStr`]; /// - booleans converted from the text according to the XML [specification]: /// - `"true"` and `"1"` converted to `true`; /// - `"false"` and `"0"` converted to `false`; /// - strings returned as is; /// - characters also returned as strings. If string contain more than one character /// or empty, it is responsibility of a type to return an error; /// - `Option`: /// - empty text is deserialized as `None`; /// - everything else is deserialized as `Some` using the same deserializer; /// - units (`()`) and unit structs always deserialized successfully; /// - newtype structs forwards deserialization to the inner type using the same /// deserializer; /// - sequences, tuples and tuple structs are deserialized using [`SimpleTypeDeserializer`] /// (this is the difference): text content passed to the deserializer directly; /// - structs and maps calls [`Visitor::visit_borrowed_str`] or [`Visitor::visit_string`], /// it is responsibility of the type to return an error if it do not able to process /// this data; /// - enums: /// - the variant name is deserialized as `$text`; /// - the content is deserialized using the same deserializer: /// - unit variants: just return `()`; /// - newtype variants forwards deserialization to the inner type using the /// same deserializer; /// - tuple and struct variants are deserialized using [`SimpleTypeDeserializer`]. /// /// [`MapValueDeserializer`]: ../map/struct.MapValueDeserializer.html /// [`DeEvent::Text`]: crate::de::DeEvent::Text /// [`FromStr`]: std::str::FromStr /// [specification]: https://www.w3.org/TR/xmlschema11-2/#boolean pub struct TextDeserializer<'de>(pub Text<'de>); impl<'de> TextDeserializer<'de> { /// Returns a next string as concatenated content of consequent [`Text`] and /// [`CData`] events, used inside [`deserialize_primitives!()`]. /// /// [`Text`]: crate::events::Event::Text /// [`CData`]: crate::events::Event::CData #[inline] fn read_string(self) -> Result, DeError> { Ok(self.0.text) } } impl<'de> Deserializer<'de> for TextDeserializer<'de> { type Error = DeError; deserialize_primitives!(); fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { if self.0.is_empty() { visitor.visit_none() } else { visitor.visit_some(self) } } /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`] /// with this deserializer. fn deserialize_newtype_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_newtype_struct(self) } /// This method deserializes a sequence inside of element that itself is a /// sequence element: /// /// ```xml /// <> /// ... /// inner sequence as xs:list /// ... /// /// ``` fn deserialize_seq(self, visitor: V) -> Result where V: Visitor<'de>, { SimpleTypeDeserializer::from_text_content(self.0).deserialize_seq(visitor) } #[inline] fn deserialize_struct( self, _name: &'static str, _fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { // Deserializer methods are only hints, if deserializer could not satisfy // request, it should return the data that it has. It is responsibility // of a Visitor to return an error if it does not understand the data self.deserialize_str(visitor) } fn deserialize_enum( self, _name: &'static str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_enum(self) } #[inline] fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } } impl<'de> EnumAccess<'de> for TextDeserializer<'de> { type Error = DeError; type Variant = Self; fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> where V: DeserializeSeed<'de>, { let name = seed.deserialize(BorrowedStrDeserializer::::new(TEXT_KEY))?; Ok((name, self)) } } impl<'de> VariantAccess<'de> for TextDeserializer<'de> { type Error = DeError; #[inline] fn unit_variant(self) -> Result<(), Self::Error> { Ok(()) } fn newtype_variant_seed(self, seed: T) -> Result where T: DeserializeSeed<'de>, { seed.deserialize(self) } #[inline] fn tuple_variant(self, len: usize, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_tuple(len, visitor) } #[inline] fn struct_variant( self, fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_struct("", fields, visitor) } } quick-xml-0.36.1/src/de/var.rs000064400000000000000000000112100072674642500141650ustar 00000000000000use crate::{ de::key::QNameDeserializer, de::map::ElementMapAccess, de::resolver::EntityResolver, de::simple_type::SimpleTypeDeserializer, de::{DeEvent, Deserializer, XmlRead, TEXT_KEY}, errors::serialize::DeError, }; use serde::de::value::BorrowedStrDeserializer; use serde::de::{self, DeserializeSeed, Deserializer as _, Visitor}; /// An enum access pub struct EnumAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { de: &'d mut Deserializer<'de, R, E>, } impl<'de, 'd, R, E> EnumAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { pub fn new(de: &'d mut Deserializer<'de, R, E>) -> Self { EnumAccess { de } } } impl<'de, 'd, R, E> de::EnumAccess<'de> for EnumAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; type Variant = VariantAccess<'de, 'd, R, E>; fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> where V: DeserializeSeed<'de>, { let decoder = self.de.reader.decoder(); let (name, is_text) = match self.de.peek()? { DeEvent::Start(e) => ( seed.deserialize(QNameDeserializer::from_elem(e.raw_name(), decoder)?)?, false, ), DeEvent::Text(_) => ( seed.deserialize(BorrowedStrDeserializer::::new(TEXT_KEY))?, true, ), // SAFETY: The reader is guaranteed that we don't have unmatched tags // If we here, then out deserializer has a bug DeEvent::End(e) => unreachable!("{:?}", e), DeEvent::Eof => return Err(DeError::UnexpectedEof), }; Ok(( name, VariantAccess { de: self.de, is_text, }, )) } } pub struct VariantAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { de: &'d mut Deserializer<'de, R, E>, /// `true` if variant should be deserialized from a textual content /// and `false` if from tag is_text: bool, } impl<'de, 'd, R, E> de::VariantAccess<'de> for VariantAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; fn unit_variant(self) -> Result<(), Self::Error> { match self.de.next()? { // Consume subtree DeEvent::Start(e) => self.de.read_to_end(e.name()), // Does not needed to deserialize using SimpleTypeDeserializer, because // it returns `()` when `deserialize_unit()` is requested DeEvent::Text(_) => Ok(()), // SAFETY: the other events are filtered in `variant_seed()` _ => unreachable!("Only `Start` or `Text` events are possible here"), } } fn newtype_variant_seed(self, seed: T) -> Result where T: DeserializeSeed<'de>, { if self.is_text { match self.de.next()? { DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)), // SAFETY: the other events are filtered in `variant_seed()` _ => unreachable!("Only `Text` events are possible here"), } } else { seed.deserialize(self.de) } } fn tuple_variant(self, len: usize, visitor: V) -> Result where V: Visitor<'de>, { if self.is_text { match self.de.next()? { DeEvent::Text(e) => { SimpleTypeDeserializer::from_text_content(e).deserialize_tuple(len, visitor) } // SAFETY: the other events are filtered in `variant_seed()` _ => unreachable!("Only `Text` events are possible here"), } } else { self.de.deserialize_tuple(len, visitor) } } fn struct_variant( self, fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { match self.de.next()? { DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self.de, e, fields)?), DeEvent::Text(e) => { SimpleTypeDeserializer::from_text_content(e).deserialize_struct("", fields, visitor) } // SAFETY: the other events are filtered in `variant_seed()` _ => unreachable!("Only `Start` or `Text` events are possible here"), } } } quick-xml-0.36.1/src/encoding.rs000064400000000000000000000172010072674642500146010ustar 00000000000000//! A module for wrappers that encode / decode data. use std::borrow::Cow; #[cfg(feature = "encoding")] use encoding_rs::{DecoderResult, Encoding, UTF_16BE, UTF_16LE, UTF_8}; #[cfg(feature = "encoding")] use crate::Error; use crate::Result; /// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-8. /// See pub(crate) const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF]; /// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-16 with little-endian byte order. /// See #[cfg(feature = "encoding")] pub(crate) const UTF16_LE_BOM: &[u8] = &[0xFF, 0xFE]; /// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-16 with big-endian byte order. /// See #[cfg(feature = "encoding")] pub(crate) const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF]; /// Decoder of byte slices into strings. /// /// If feature [`encoding`] is enabled, this encoding taken from the `"encoding"` /// XML declaration or assumes UTF-8, if XML has no declaration, encoding /// key is not defined or contains unknown encoding. /// /// The library supports any UTF-8 compatible encodings that crate `encoding_rs` /// is supported. [*UTF-16 and ISO-2022-JP are not supported at the present*][utf16]. /// /// If feature [`encoding`] is disabled, the decoder is always UTF-8 decoder: /// any XML declarations are ignored. /// /// [utf16]: https://github.com/tafia/quick-xml/issues/158 /// [`encoding`]: ../index.html#encoding #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct Decoder { #[cfg(feature = "encoding")] pub(crate) encoding: &'static Encoding, } impl Decoder { pub(crate) fn utf8() -> Self { Decoder { #[cfg(feature = "encoding")] encoding: UTF_8, } } #[cfg(all(test, feature = "encoding", feature = "serialize"))] pub(crate) fn utf16() -> Self { Decoder { encoding: UTF_16LE } } } impl Decoder { /// Returns the `Reader`s encoding. /// /// This encoding will be used by [`decode`]. /// /// [`decode`]: Self::decode #[cfg(feature = "encoding")] pub const fn encoding(&self) -> &'static Encoding { self.encoding } /// ## Without `encoding` feature /// /// Decodes an UTF-8 slice regardless of XML declaration and ignoring BOM /// if it is present in the `bytes`. /// /// ## With `encoding` feature /// /// Decodes specified bytes using encoding, declared in the XML, if it was /// declared there, or UTF-8 otherwise, and ignoring BOM if it is present /// in the `bytes`. /// /// ---- /// Returns an error in case of malformed sequences in the `bytes`. pub fn decode<'b>(&self, bytes: &'b [u8]) -> Result> { #[cfg(not(feature = "encoding"))] let decoded = Ok(Cow::Borrowed(std::str::from_utf8(bytes)?)); #[cfg(feature = "encoding")] let decoded = decode(bytes, self.encoding); decoded } /// Like [`decode`][Self::decode] but using a pre-allocated buffer. pub fn decode_into(&self, bytes: &[u8], buf: &mut String) -> Result<()> { #[cfg(not(feature = "encoding"))] buf.push_str(std::str::from_utf8(bytes)?); #[cfg(feature = "encoding")] decode_into(bytes, self.encoding, buf)?; Ok(()) } /// Decodes the `Cow` buffer, preserves the lifetime pub(crate) fn decode_cow<'b>(&self, bytes: &Cow<'b, [u8]>) -> Result> { match bytes { Cow::Borrowed(bytes) => self.decode(bytes), // Convert to owned, because otherwise Cow will be bound with wrong lifetime Cow::Owned(bytes) => Ok(self.decode(bytes)?.into_owned().into()), } } } /// Decodes the provided bytes using the specified encoding. /// /// Returns an error in case of malformed or non-representable sequences in the `bytes`. #[cfg(feature = "encoding")] pub fn decode<'b>(bytes: &'b [u8], encoding: &'static Encoding) -> Result> { encoding .decode_without_bom_handling_and_without_replacement(bytes) .ok_or(Error::NonDecodable(None)) } /// Like [`decode`] but using a pre-allocated buffer. #[cfg(feature = "encoding")] pub fn decode_into(bytes: &[u8], encoding: &'static Encoding, buf: &mut String) -> Result<()> { if encoding == UTF_8 { buf.push_str(std::str::from_utf8(bytes)?); return Ok(()); } let mut decoder = encoding.new_decoder_without_bom_handling(); buf.reserve( decoder .max_utf8_buffer_length_without_replacement(bytes.len()) // SAFETY: None can be returned only if required size will overflow usize, // but in that case String::reserve also panics .unwrap(), ); let (result, read) = decoder.decode_to_string_without_replacement(bytes, buf, true); match result { DecoderResult::InputEmpty => { debug_assert_eq!(read, bytes.len()); Ok(()) } DecoderResult::Malformed(_, _) => Err(Error::NonDecodable(None)), // SAFETY: We allocate enough space above DecoderResult::OutputFull => unreachable!(), } } /// Automatic encoding detection of XML files based using the /// [recommended algorithm](https://www.w3.org/TR/xml11/#sec-guessing). /// /// If encoding is detected, `Some` is returned with an encoding and size of BOM /// in bytes, if detection was performed using BOM, or zero, if detection was /// performed without BOM. /// /// IF encoding was not recognized, `None` is returned. /// /// Because the [`encoding_rs`] crate supports only subset of those encodings, only /// the supported subset are detected, which is UTF-8, UTF-16 BE and UTF-16 LE. /// /// The algorithm suggests examine up to the first 4 bytes to determine encoding /// according to the following table: /// /// | Bytes |Detected encoding /// |-------------|------------------------------------------ /// | **BOM** /// |`FE_FF_##_##`|UTF-16, big-endian /// |`FF FE ## ##`|UTF-16, little-endian /// |`EF BB BF` |UTF-8 /// | **No BOM** /// |`00 3C 00 3F`|UTF-16 BE or ISO-10646-UCS-2 BE or similar 16-bit BE (use declared encoding to find the exact one) /// |`3C 00 3F 00`|UTF-16 LE or ISO-10646-UCS-2 LE or similar 16-bit LE (use declared encoding to find the exact one) /// |`3C 3F 78 6D`|UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, or any other 7-bit, 8-bit, or mixed-width encoding which ensures that the characters of ASCII have their normal positions, width, and values; the actual encoding declaration must be read to detect which of these applies, but since all of these encodings use the same bit patterns for the relevant ASCII characters, the encoding declaration itself may be read reliably #[cfg(feature = "encoding")] pub fn detect_encoding(bytes: &[u8]) -> Option<(&'static Encoding, usize)> { match bytes { // with BOM _ if bytes.starts_with(UTF16_BE_BOM) => Some((UTF_16BE, 2)), _ if bytes.starts_with(UTF16_LE_BOM) => Some((UTF_16LE, 2)), _ if bytes.starts_with(UTF8_BOM) => Some((UTF_8, 3)), // without BOM _ if bytes.starts_with(&[0x00, b'<', 0x00, b'?']) => Some((UTF_16BE, 0)), // Some BE encoding, for example, UTF-16 or ISO-10646-UCS-2 _ if bytes.starts_with(&[b'<', 0x00, b'?', 0x00]) => Some((UTF_16LE, 0)), // Some LE encoding, for example, UTF-16 or ISO-10646-UCS-2 _ if bytes.starts_with(&[b'<', b'?', b'x', b'm']) => Some((UTF_8, 0)), // Some ASCII compatible _ => None, } } quick-xml-0.36.1/src/errors.rs000064400000000000000000000432540072674642500143360ustar 00000000000000//! Error management module use crate::encoding::Decoder; use crate::escape::EscapeError; use crate::events::attributes::AttrError; use crate::name::QName; use crate::utils::write_byte_string; use std::fmt; use std::io::Error as IoError; use std::str::Utf8Error; use std::string::FromUtf8Error; use std::sync::Arc; /// An error returned if parsed document does not correspond to the XML grammar, /// for example, a tag opened by `<` not closed with `>`. This error does not /// represent invalid XML constructs, for example, tags `<>` and `` a well-formed /// from syntax point-of-view. #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum SyntaxError { /// The parser started to parse `` sequence was found. UnclosedPIOrXmlDecl, /// The parser started to parse comment (`` sequence was found. UnclosedComment, /// The parser started to parse DTD (`` character was found. UnclosedDoctype, /// The parser started to parse `` sequence was found. UnclosedCData, /// The parser started to parse tag content, but the input ended /// before the closing `>` character was found. UnclosedTag, } impl fmt::Display for SyntaxError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::InvalidBangMarkup => f.write_str("unknown or missed symbol in markup"), Self::UnclosedPIOrXmlDecl => { f.write_str("processing instruction or xml declaration not closed: `?>` not found before end of input") } Self::UnclosedComment => { f.write_str("comment not closed: `-->` not found before end of input") } Self::UnclosedDoctype => { f.write_str("DOCTYPE not closed: `>` not found before end of input") } Self::UnclosedCData => { f.write_str("CDATA not closed: `]]>` not found before end of input") } Self::UnclosedTag => f.write_str("tag not closed: `>` not found before end of input"), } } } impl std::error::Error for SyntaxError {} //////////////////////////////////////////////////////////////////////////////////////////////////// /// An error returned if parsed document is not [well-formed], for example, /// an opened tag is not closed before end of input. /// /// Those errors are not fatal: after encountering an error you can continue /// parsing the document. /// /// [well-formed]: https://www.w3.org/TR/xml11/#dt-wellformed #[derive(Clone, Debug, PartialEq, Eq)] pub enum IllFormedError { /// A `version` attribute was not found in an XML declaration or is not the /// first attribute. /// /// According to the [specification], the XML declaration (``) MUST contain /// a `version` attribute and it MUST be the first attribute. This error indicates, /// that the declaration does not contain attributes at all (if contains `None`) /// or either `version` attribute is not present or not the first attribute in /// the declaration. In the last case it contains the name of the found attribute. /// /// [specification]: https://www.w3.org/TR/xml11/#sec-prolog-dtd MissingDeclVersion(Option), /// A document type definition (DTD) does not contain a name of a root element. /// /// According to the [specification], document type definition (``) /// MUST contain a name which defines a document type (`foo`). If that name /// is missed, this error is returned. /// /// [specification]: https://www.w3.org/TR/xml11/#NT-doctypedecl MissingDoctypeName, /// The end tag was not found during reading of a sub-tree of elements due to /// encountering an EOF from the underlying reader. This error is returned from /// [`Reader::read_to_end`]. /// /// [`Reader::read_to_end`]: crate::reader::Reader::read_to_end MissingEndTag(String), /// The specified end tag was encountered without corresponding open tag at the /// same level of hierarchy UnmatchedEndTag(String), /// The specified end tag does not match the start tag at that nesting level. MismatchedEndTag { /// Name of open tag, that is expected to be closed expected: String, /// Name of actually closed tag found: String, }, /// A comment contains forbidden double-hyphen (`--`) sequence inside. /// /// According to the [specification], for compatibility, comments MUST NOT contain /// double-hyphen (`--`) sequence, in particular, they cannot end by `--->`. /// /// The quick-xml by default does not check that, because this restriction is /// mostly artificial, but you can enable it in the [configuration]. /// /// [specification]: https://www.w3.org/TR/xml11/#sec-comments /// [configuration]: crate::reader::Config::check_comments DoubleHyphenInComment, } impl fmt::Display for IllFormedError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::MissingDeclVersion(None) => { write!(f, "an XML declaration does not contain `version` attribute") } Self::MissingDeclVersion(Some(attr)) => { write!(f, "an XML declaration must start with `version` attribute, but in starts with `{}`", attr) } Self::MissingDoctypeName => write!( f, "`` declaration does not contain a name of a document type" ), Self::MissingEndTag(tag) => write!( f, "start tag not closed: `` not found before end of input", tag, ), Self::UnmatchedEndTag(tag) => { write!(f, "close tag `` does not match any open tag", tag) } Self::MismatchedEndTag { expected, found } => write!( f, "expected ``, but `` was found", expected, found, ), Self::DoubleHyphenInComment => { write!(f, "forbidden string `--` was found in a comment") } } } } impl std::error::Error for IllFormedError {} //////////////////////////////////////////////////////////////////////////////////////////////////// /// The error type used by this crate. #[derive(Clone, Debug)] pub enum Error { /// XML document cannot be read from or written to underlying source. /// /// Contains the reference-counted I/O error to make the error type `Clone`able. Io(Arc), /// The document does not corresponds to the XML grammar. Syntax(SyntaxError), /// The document is not [well-formed](https://www.w3.org/TR/xml11/#dt-wellformed). IllFormed(IllFormedError), /// Input decoding error. If [`encoding`] feature is disabled, contains `None`, /// otherwise contains the UTF-8 decoding error /// /// [`encoding`]: index.html#encoding NonDecodable(Option), /// Attribute parsing error InvalidAttr(AttrError), /// Escape error EscapeError(EscapeError), /// Specified namespace prefix is unknown, cannot resolve namespace for it UnknownPrefix(Vec), /// Error for when a reserved namespace is set incorrectly. /// /// This error returned in following cases: /// - the XML document attempts to bind `xml` prefix to something other than /// `http://www.w3.org/XML/1998/namespace` /// - the XML document attempts to bind `xmlns` prefix /// - the XML document attempts to bind some prefix (except `xml`) to /// `http://www.w3.org/XML/1998/namespace` /// - the XML document attempts to bind some prefix to /// `http://www.w3.org/2000/xmlns/` InvalidPrefixBind { /// The prefix that is tried to be bound prefix: Vec, /// Namespace to which prefix tried to be bound namespace: Vec, }, } impl Error { pub(crate) fn missed_end(name: QName, decoder: Decoder) -> Self { match decoder.decode(name.as_ref()) { Ok(name) => IllFormedError::MissingEndTag(name.into()).into(), Err(err) => err.into(), } } } impl From for Error { /// Creates a new `Error::Io` from the given error #[inline] fn from(error: IoError) -> Error { Error::Io(Arc::new(error)) } } impl From for Error { /// Creates a new `Error::Syntax` from the given error #[inline] fn from(error: SyntaxError) -> Self { Self::Syntax(error) } } impl From for Error { /// Creates a new `Error::IllFormed` from the given error #[inline] fn from(error: IllFormedError) -> Self { Self::IllFormed(error) } } impl From for Error { /// Creates a new `Error::NonDecodable` from the given error #[inline] fn from(error: Utf8Error) -> Error { Error::NonDecodable(Some(error)) } } impl From for Error { /// Creates a new `Error::Utf8` from the given error #[inline] fn from(error: FromUtf8Error) -> Error { error.utf8_error().into() } } impl From for Error { /// Creates a new `Error::EscapeError` from the given error #[inline] fn from(error: EscapeError) -> Error { Error::EscapeError(error) } } impl From for Error { #[inline] fn from(error: AttrError) -> Self { Error::InvalidAttr(error) } } /// A specialized `Result` type where the error is hard-wired to [`Error`]. pub type Result = std::result::Result; impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Error::Io(e) => write!(f, "I/O error: {}", e), Error::Syntax(e) => write!(f, "syntax error: {}", e), Error::IllFormed(e) => write!(f, "ill-formed document: {}", e), Error::NonDecodable(None) => write!(f, "Malformed input, decoding impossible"), Error::NonDecodable(Some(e)) => write!(f, "Malformed UTF-8 input: {}", e), Error::InvalidAttr(e) => write!(f, "error while parsing attribute: {}", e), Error::EscapeError(e) => write!(f, "{}", e), Error::UnknownPrefix(prefix) => { f.write_str("Unknown namespace prefix '")?; write_byte_string(f, prefix)?; f.write_str("'") } Error::InvalidPrefixBind { prefix, namespace } => { f.write_str("The namespace prefix '")?; write_byte_string(f, prefix)?; f.write_str("' cannot be bound to '")?; write_byte_string(f, namespace)?; f.write_str("'") } } } } impl std::error::Error for Error { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { Error::Io(e) => Some(e), Error::Syntax(e) => Some(e), Error::IllFormed(e) => Some(e), Error::NonDecodable(Some(e)) => Some(e), Error::InvalidAttr(e) => Some(e), Error::EscapeError(e) => Some(e), _ => None, } } } #[cfg(feature = "serialize")] pub mod serialize { //! A module to handle serde (de)serialization errors use super::*; use std::borrow::Cow; #[cfg(feature = "overlapped-lists")] use std::num::NonZeroUsize; use std::num::{ParseFloatError, ParseIntError}; /// (De)serialization error #[derive(Clone, Debug)] pub enum DeError { /// Serde custom error Custom(String), /// Xml parsing error InvalidXml(Error), /// Cannot parse to integer InvalidInt(ParseIntError), /// Cannot parse to float InvalidFloat(ParseFloatError), /// Cannot parse specified value to boolean InvalidBoolean(String), /// This error indicates an error in the [`Deserialize`](serde::Deserialize) /// implementation when read a map or a struct: `MapAccess::next_value[_seed]` /// was called before `MapAccess::next_key[_seed]`. /// /// You should check your types, that implements corresponding trait. KeyNotRead, /// Deserializer encounter a start tag with a specified name when it is /// not expecting. This happens when you try to deserialize a primitive /// value (numbers, strings, booleans) from an XML element. UnexpectedStart(Vec), /// The [`Reader`] produced [`Event::Eof`] when it is not expecting, /// for example, after producing [`Event::Start`] but before corresponding /// [`Event::End`]. /// /// [`Reader`]: crate::reader::Reader /// [`Event::Eof`]: crate::events::Event::Eof /// [`Event::Start`]: crate::events::Event::Start /// [`Event::End`]: crate::events::Event::End UnexpectedEof, /// An attempt to deserialize to a type, that is not supported by the XML /// store at current position, for example, attempt to deserialize `struct` /// from attribute or attempt to deserialize binary data. /// /// Serialized type cannot be represented in an XML due to violation of the /// XML rules in the final XML document. For example, attempt to serialize /// a `HashMap<{integer}, ...>` would cause this error because [XML name] /// cannot start from a digit or a hyphen (minus sign). The same result /// would occur if map key is a complex type that cannot be serialized as /// a primitive type (i.e. string, char, bool, unit struct or unit variant). /// /// [XML name]: https://www.w3.org/TR/xml11/#sec-common-syn Unsupported(Cow<'static, str>), /// Too many events were skipped while deserializing a sequence, event limit /// exceeded. The limit was provided as an argument #[cfg(feature = "overlapped-lists")] TooManyEvents(NonZeroUsize), } impl fmt::Display for DeError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { DeError::Custom(s) => write!(f, "{}", s), DeError::InvalidXml(e) => write!(f, "{}", e), DeError::InvalidInt(e) => write!(f, "{}", e), DeError::InvalidFloat(e) => write!(f, "{}", e), DeError::InvalidBoolean(v) => write!(f, "Invalid boolean value '{}'", v), DeError::KeyNotRead => write!(f, "Invalid `Deserialize` implementation: `MapAccess::next_value[_seed]` was called before `MapAccess::next_key[_seed]`"), DeError::UnexpectedStart(e) => { f.write_str("Unexpected `Event::Start(")?; write_byte_string(f, e)?; f.write_str(")`") } DeError::UnexpectedEof => write!(f, "Unexpected `Event::Eof`"), DeError::Unsupported(s) => write!(f, "Unsupported operation: {}", s), #[cfg(feature = "overlapped-lists")] DeError::TooManyEvents(s) => write!(f, "Deserializer buffers {} events, limit exceeded", s), } } } impl ::std::error::Error for DeError { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { DeError::InvalidXml(e) => Some(e), DeError::InvalidInt(e) => Some(e), DeError::InvalidFloat(e) => Some(e), _ => None, } } } impl serde::de::Error for DeError { fn custom(msg: T) -> Self { DeError::Custom(msg.to_string()) } } impl serde::ser::Error for DeError { fn custom(msg: T) -> Self { DeError::Custom(msg.to_string()) } } impl From for DeError { #[inline] fn from(e: Error) -> Self { Self::InvalidXml(e) } } impl From for DeError { #[inline] fn from(e: EscapeError) -> Self { Self::InvalidXml(e.into()) } } impl From for DeError { #[inline] fn from(e: Utf8Error) -> Self { Self::InvalidXml(e.into()) } } impl From for DeError { #[inline] fn from(e: FromUtf8Error) -> Self { Self::InvalidXml(e.into()) } } impl From for DeError { #[inline] fn from(e: AttrError) -> Self { Self::InvalidXml(e.into()) } } impl From for DeError { #[inline] fn from(e: ParseIntError) -> Self { Self::InvalidInt(e) } } impl From for DeError { #[inline] fn from(e: ParseFloatError) -> Self { Self::InvalidFloat(e) } } impl From for DeError { #[inline] fn from(e: fmt::Error) -> Self { Self::Custom(e.to_string()) } } } quick-xml-0.36.1/src/escape.rs000064400000000000000000002125100072674642500142530ustar 00000000000000//! Manage xml character escapes use memchr::memchr2_iter; use std::borrow::Cow; use std::num::ParseIntError; use std::ops::Range; /// Error of parsing character reference (`&#;` or `&#x;`). #[derive(Clone, Debug, PartialEq)] pub enum ParseCharRefError { /// Number contains sign character (`+` or `-`) which is not allowed. UnexpectedSign, /// Number cannot be parsed due to non-number characters or a numeric overflow. InvalidNumber(ParseIntError), /// Character reference represents not a valid unicode codepoint. InvalidCodepoint(u32), /// Character reference expanded to a not permitted character for an XML. /// /// Currently, only `0x0` character produces this error. IllegalCharacter(u32), } impl std::fmt::Display for ParseCharRefError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { Self::UnexpectedSign => f.write_str("unexpected number sign"), Self::InvalidNumber(e) => e.fmt(f), Self::InvalidCodepoint(n) => write!(f, "`{}` is not a valid codepoint", n), Self::IllegalCharacter(n) => write!(f, "0x{:x} character is not permitted in XML", n), } } } impl std::error::Error for ParseCharRefError { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { Self::InvalidNumber(e) => Some(e), _ => None, } } } /// Error for XML escape / unescape. #[derive(Clone, Debug, PartialEq)] pub enum EscapeError { /// Referenced entity in unknown to the parser. UnrecognizedEntity(Range, String), /// Cannot find `;` after `&` UnterminatedEntity(Range), /// Attempt to parse character reference (`&#;` or `&#x;`) /// was unsuccessful, not all characters are decimal or hexadecimal numbers. InvalidCharRef(ParseCharRefError), } impl std::fmt::Display for EscapeError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { EscapeError::UnrecognizedEntity(rge, res) => { write!(f, "at {:?}: unrecognized entity `{}`", rge, res) } EscapeError::UnterminatedEntity(e) => write!( f, "Error while escaping character at range {:?}: Cannot find ';' after '&'", e ), EscapeError::InvalidCharRef(e) => { write!(f, "invalid character reference: {}", e) } } } } impl std::error::Error for EscapeError { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { Self::InvalidCharRef(e) => Some(e), _ => None, } } } /// Escapes an `&str` and replaces all xml special characters (`<`, `>`, `&`, `'`, `"`) /// with their corresponding xml escaped value. /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `>` | `>` /// | `&` | `&` /// | `'` | `'` /// | `"` | `"` /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `>` | `>` /// | `&` | `&` /// | `'` | `'` /// | `"` | `"` pub fn escape(raw: &str) -> Cow { _escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&' | b'\'' | b'\"')) } /// Escapes an `&str` and replaces xml special characters (`<`, `>`, `&`) /// with their corresponding xml escaped value. /// /// Should only be used for escaping text content. In XML text content, it is allowed /// (though not recommended) to leave the quote special characters `"` and `'` unescaped. /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `>` | `>` /// | `&` | `&` /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `>` | `>` /// | `&` | `&` pub fn partial_escape(raw: &str) -> Cow { _escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&')) } /// XML standard [requires] that only `<` and `&` was escaped in text content or /// attribute value. All other characters not necessary to be escaped, although /// for compatibility with SGML they also should be escaped. Practically, escaping /// only those characters is enough. /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `&` | `&` /// /// [requires]: https://www.w3.org/TR/xml11/#syntax pub fn minimal_escape(raw: &str) -> Cow { _escape(raw, |ch| matches!(ch, b'<' | b'&')) } /// Escapes an `&str` and replaces a subset of xml special characters (`<`, `>`, /// `&`, `'`, `"`) with their corresponding xml escaped value. pub(crate) fn _escape bool>(raw: &str, escape_chars: F) -> Cow { let bytes = raw.as_bytes(); let mut escaped = None; let mut iter = bytes.iter(); let mut pos = 0; while let Some(i) = iter.position(|&b| escape_chars(b)) { if escaped.is_none() { escaped = Some(Vec::with_capacity(raw.len())); } let escaped = escaped.as_mut().expect("initialized"); let new_pos = pos + i; escaped.extend_from_slice(&bytes[pos..new_pos]); match bytes[new_pos] { b'<' => escaped.extend_from_slice(b"<"), b'>' => escaped.extend_from_slice(b">"), b'\'' => escaped.extend_from_slice(b"'"), b'&' => escaped.extend_from_slice(b"&"), b'"' => escaped.extend_from_slice(b"""), // This set of escapes handles characters that should be escaped // in elements of xs:lists, because those characters works as // delimiters of list elements b'\t' => escaped.extend_from_slice(b" "), b'\n' => escaped.extend_from_slice(b" "), b'\r' => escaped.extend_from_slice(b" "), b' ' => escaped.extend_from_slice(b" "), _ => unreachable!( "Only '<', '>','\', '&', '\"', '\\t', '\\r', '\\n', and ' ' are escaped" ), } pos = new_pos + 1; } if let Some(mut escaped) = escaped { if let Some(raw) = bytes.get(pos..) { escaped.extend_from_slice(raw); } // SAFETY: we operate on UTF-8 input and search for an one byte chars only, // so all slices that was put to the `escaped` is a valid UTF-8 encoded strings // TODO: Can be replaced with `unsafe { String::from_utf8_unchecked() }` // if unsafe code will be allowed Cow::Owned(String::from_utf8(escaped).unwrap()) } else { Cow::Borrowed(raw) } } /// Unescape an `&str` and replaces all xml escaped characters (`&...;`) into /// their corresponding value. /// /// If feature [`escape-html`] is enabled, then recognizes all [HTML5 escapes]. /// /// [`escape-html`]: ../index.html#escape-html /// [HTML5 escapes]: https://dev.w3.org/html5/html-author/charref pub fn unescape(raw: &str) -> Result, EscapeError> { unescape_with(raw, resolve_predefined_entity) } /// Unescape an `&str` and replaces all xml escaped characters (`&...;`) into /// their corresponding value, using a resolver function for custom entities. /// /// If feature [`escape-html`] is enabled, then recognizes all [HTML5 escapes]. /// /// Predefined entities will be resolved _after_ trying to resolve with `resolve_entity`, /// which allows you to override default behavior which required in some XML dialects. /// /// Character references (`&#hh;`) cannot be overridden, they are resolved before /// calling `resolve_entity`. /// /// Note, that entities will not be resolved recursively. In order to satisfy the /// XML [requirements] you should unescape nested entities by yourself. /// /// # Example /// /// ``` /// use quick_xml::escape::resolve_xml_entity; /// # use quick_xml::escape::unescape_with; /// # use pretty_assertions::assert_eq; /// let override_named_entities = |entity: &str| match entity { /// // Override standard entities /// "lt" => Some("FOO"), /// "gt" => Some("BAR"), /// // Resolve custom entities /// "baz" => Some("<"), /// // Delegate other entities to the default implementation /// _ => resolve_xml_entity(entity), /// }; /// /// assert_eq!( /// unescape_with("&<test>&baz;", override_named_entities).unwrap(), /// "&FOOtestBAR<" /// ); /// ``` /// /// [`escape-html`]: ../index.html#escape-html /// [HTML5 escapes]: https://dev.w3.org/html5/html-author/charref /// [requirements]: https://www.w3.org/TR/xml11/#intern-replacement pub fn unescape_with<'input, 'entity, F>( raw: &'input str, mut resolve_entity: F, ) -> Result, EscapeError> where // the lifetime of the output comes from a capture or is `'static` F: FnMut(&str) -> Option<&'entity str>, { let bytes = raw.as_bytes(); let mut unescaped = None; let mut last_end = 0; let mut iter = memchr2_iter(b'&', b';', bytes); while let Some(start) = iter.by_ref().find(|p| bytes[*p] == b'&') { match iter.next() { Some(end) if bytes[end] == b';' => { // append valid data if unescaped.is_none() { unescaped = Some(String::with_capacity(raw.len())); } let unescaped = unescaped.as_mut().expect("initialized"); unescaped.push_str(&raw[last_end..start]); // search for character correctness let pat = &raw[start + 1..end]; if let Some(entity) = pat.strip_prefix('#') { let codepoint = parse_number(entity).map_err(EscapeError::InvalidCharRef)?; unescaped.push_str(codepoint.encode_utf8(&mut [0u8; 4])); } else if let Some(value) = resolve_entity(pat) { unescaped.push_str(value); } else { return Err(EscapeError::UnrecognizedEntity( start + 1..end, pat.to_string(), )); } last_end = end + 1; } _ => return Err(EscapeError::UnterminatedEntity(start..raw.len())), } } if let Some(mut unescaped) = unescaped { if let Some(raw) = raw.get(last_end..) { unescaped.push_str(raw); } Ok(Cow::Owned(unescaped)) } else { Ok(Cow::Borrowed(raw)) } } /// Resolves predefined XML entities or all HTML5 entities depending on the feature /// [`escape-html`](https://docs.rs/quick-xml/latest/quick_xml/#escape-html). /// /// Behaves like [`resolve_xml_entity`] if feature is not enabled and as /// [`resolve_html5_entity`] if enabled. #[inline] pub const fn resolve_predefined_entity(entity: &str) -> Option<&'static str> { #[cfg(not(feature = "escape-html"))] { resolve_xml_entity(entity) } #[cfg(feature = "escape-html")] { resolve_html5_entity(entity) } } /// Resolves predefined XML entities. If specified entity is not a predefined XML /// entity, `None` is returned. /// /// The complete list of predefined entities are defined in the [specification]. /// /// ``` /// # use quick_xml::escape::resolve_xml_entity; /// # use pretty_assertions::assert_eq; /// assert_eq!(resolve_xml_entity("lt"), Some("<")); /// assert_eq!(resolve_xml_entity("gt"), Some(">")); /// assert_eq!(resolve_xml_entity("amp"), Some("&")); /// assert_eq!(resolve_xml_entity("apos"), Some("'")); /// assert_eq!(resolve_xml_entity("quot"), Some("\"")); /// /// assert_eq!(resolve_xml_entity("foo"), None); /// ``` /// /// [specification]: https://www.w3.org/TR/xml11/#sec-predefined-ent pub const fn resolve_xml_entity(entity: &str) -> Option<&'static str> { // match over strings are not allowed in const functions let s = match entity.as_bytes() { b"lt" => "<", b"gt" => ">", b"amp" => "&", b"apos" => "'", b"quot" => "\"", _ => return None, }; Some(s) } /// Resolves all HTML5 entities. For complete list see . #[cfg(feature = "escape-html")] pub const fn resolve_html5_entity(entity: &str) -> Option<&'static str> { // imported from https://dev.w3.org/html5/html-author/charref // match over strings are not allowed in const functions //TODO: automate up-to-dating using https://html.spec.whatwg.org/entities.json //TODO: building this function increases compilation time by 10+ seconds (or 5x times) // Maybe this is because of very long match // See https://github.com/tafia/quick-xml/issues/763 let s = match entity.as_bytes() { b"Tab" => "\u{09}", b"NewLine" => "\u{0A}", b"excl" => "\u{21}", b"quot" | b"QUOT" => "\u{22}", b"num" => "\u{23}", b"dollar" => "\u{24}", b"percnt" => "\u{25}", b"amp" | b"AMP" => "\u{26}", b"apos" => "\u{27}", b"lpar" => "\u{28}", b"rpar" => "\u{29}", b"ast" | b"midast" => "\u{2A}", b"plus" => "\u{2B}", b"comma" => "\u{2C}", b"period" => "\u{2E}", b"sol" => "\u{2F}", b"colon" => "\u{3A}", b"semi" => "\u{3B}", b"lt" | b"LT" => "\u{3C}", b"equals" => "\u{3D}", b"gt" | b"GT" => "\u{3E}", b"quest" => "\u{3F}", b"commat" => "\u{40}", b"lsqb" | b"lbrack" => "\u{5B}", b"bsol" => "\u{5C}", b"rsqb" | b"rbrack" => "\u{5D}", b"Hat" => "\u{5E}", b"lowbar" => "\u{5F}", b"grave" | b"DiacriticalGrave" => "\u{60}", b"lcub" | b"lbrace" => "\u{7B}", b"verbar" | b"vert" | b"VerticalLine" => "\u{7C}", b"rcub" | b"rbrace" => "\u{7D}", b"nbsp" | b"NonBreakingSpace" => "\u{A0}", b"iexcl" => "\u{A1}", b"cent" => "\u{A2}", b"pound" => "\u{A3}", b"curren" => "\u{A4}", b"yen" => "\u{A5}", b"brvbar" => "\u{A6}", b"sect" => "\u{A7}", b"Dot" | b"die" | b"DoubleDot" | b"uml" => "\u{A8}", b"copy" | b"COPY" => "\u{A9}", b"ordf" => "\u{AA}", b"laquo" => "\u{AB}", b"not" => "\u{AC}", b"shy" => "\u{AD}", b"reg" | b"circledR" | b"REG" => "\u{AE}", b"macr" | b"OverBar" | b"strns" => "\u{AF}", b"deg" => "\u{B0}", b"plusmn" | b"pm" | b"PlusMinus" => "\u{B1}", b"sup2" => "\u{B2}", b"sup3" => "\u{B3}", b"acute" | b"DiacriticalAcute" => "\u{B4}", b"micro" => "\u{B5}", b"para" => "\u{B6}", b"middot" | b"centerdot" | b"CenterDot" => "\u{B7}", b"cedil" | b"Cedilla" => "\u{B8}", b"sup1" => "\u{B9}", b"ordm" => "\u{BA}", b"raquo" => "\u{BB}", b"frac14" => "\u{BC}", b"frac12" | b"half" => "\u{BD}", b"frac34" => "\u{BE}", b"iquest" => "\u{BF}", b"Agrave" => "\u{C0}", b"Aacute" => "\u{C1}", b"Acirc" => "\u{C2}", b"Atilde" => "\u{C3}", b"Auml" => "\u{C4}", b"Aring" => "\u{C5}", b"AElig" => "\u{C6}", b"Ccedil" => "\u{C7}", b"Egrave" => "\u{C8}", b"Eacute" => "\u{C9}", b"Ecirc" => "\u{CA}", b"Euml" => "\u{CB}", b"Igrave" => "\u{CC}", b"Iacute" => "\u{CD}", b"Icirc" => "\u{CE}", b"Iuml" => "\u{CF}", b"ETH" => "\u{D0}", b"Ntilde" => "\u{D1}", b"Ograve" => "\u{D2}", b"Oacute" => "\u{D3}", b"Ocirc" => "\u{D4}", b"Otilde" => "\u{D5}", b"Ouml" => "\u{D6}", b"times" => "\u{D7}", b"Oslash" => "\u{D8}", b"Ugrave" => "\u{D9}", b"Uacute" => "\u{DA}", b"Ucirc" => "\u{DB}", b"Uuml" => "\u{DC}", b"Yacute" => "\u{DD}", b"THORN" => "\u{DE}", b"szlig" => "\u{DF}", b"agrave" => "\u{E0}", b"aacute" => "\u{E1}", b"acirc" => "\u{E2}", b"atilde" => "\u{E3}", b"auml" => "\u{E4}", b"aring" => "\u{E5}", b"aelig" => "\u{E6}", b"ccedil" => "\u{E7}", b"egrave" => "\u{E8}", b"eacute" => "\u{E9}", b"ecirc" => "\u{EA}", b"euml" => "\u{EB}", b"igrave" => "\u{EC}", b"iacute" => "\u{ED}", b"icirc" => "\u{EE}", b"iuml" => "\u{EF}", b"eth" => "\u{F0}", b"ntilde" => "\u{F1}", b"ograve" => "\u{F2}", b"oacute" => "\u{F3}", b"ocirc" => "\u{F4}", b"otilde" => "\u{F5}", b"ouml" => "\u{F6}", b"divide" | b"div" => "\u{F7}", b"oslash" => "\u{F8}", b"ugrave" => "\u{F9}", b"uacute" => "\u{FA}", b"ucirc" => "\u{FB}", b"uuml" => "\u{FC}", b"yacute" => "\u{FD}", b"thorn" => "\u{FE}", b"yuml" => "\u{FF}", b"Amacr" => "\u{10}", b"amacr" => "\u{10}", b"Abreve" => "\u{10}", b"abreve" => "\u{10}", b"Aogon" => "\u{10}", b"aogon" => "\u{10}", b"Cacute" => "\u{10}", b"cacute" => "\u{10}", b"Ccirc" => "\u{10}", b"ccirc" => "\u{10}", b"Cdot" => "\u{10}", b"cdot" => "\u{10}", b"Ccaron" => "\u{10}", b"ccaron" => "\u{10}", b"Dcaron" => "\u{10}", b"dcaron" => "\u{10}", b"Dstrok" => "\u{11}", b"dstrok" => "\u{11}", b"Emacr" => "\u{11}", b"emacr" => "\u{11}", b"Edot" => "\u{11}", b"edot" => "\u{11}", b"Eogon" => "\u{11}", b"eogon" => "\u{11}", b"Ecaron" => "\u{11}", b"ecaron" => "\u{11}", b"Gcirc" => "\u{11}", b"gcirc" => "\u{11}", b"Gbreve" => "\u{11}", b"gbreve" => "\u{11}", b"Gdot" => "\u{12}", b"gdot" => "\u{12}", b"Gcedil" => "\u{12}", b"Hcirc" => "\u{12}", b"hcirc" => "\u{12}", b"Hstrok" => "\u{12}", b"hstrok" => "\u{12}", b"Itilde" => "\u{12}", b"itilde" => "\u{12}", b"Imacr" => "\u{12}", b"imacr" => "\u{12}", b"Iogon" => "\u{12}", b"iogon" => "\u{12}", b"Idot" => "\u{13}", b"imath" | b"inodot" => "\u{13}", b"IJlig" => "\u{13}", b"ijlig" => "\u{13}", b"Jcirc" => "\u{13}", b"jcirc" => "\u{13}", b"Kcedil" => "\u{13}", b"kcedil" => "\u{13}", b"kgreen" => "\u{13}", b"Lacute" => "\u{13}", b"lacute" => "\u{13}", b"Lcedil" => "\u{13}", b"lcedil" => "\u{13}", b"Lcaron" => "\u{13}", b"lcaron" => "\u{13}", b"Lmidot" => "\u{13}", b"lmidot" => "\u{14}", b"Lstrok" => "\u{14}", b"lstrok" => "\u{14}", b"Nacute" => "\u{14}", b"nacute" => "\u{14}", b"Ncedil" => "\u{14}", b"ncedil" => "\u{14}", b"Ncaron" => "\u{14}", b"ncaron" => "\u{14}", b"napos" => "\u{14}", b"ENG" => "\u{14}", b"eng" => "\u{14}", b"Omacr" => "\u{14}", b"omacr" => "\u{14}", b"Odblac" => "\u{15}", b"odblac" => "\u{15}", b"OElig" => "\u{15}", b"oelig" => "\u{15}", b"Racute" => "\u{15}", b"racute" => "\u{15}", b"Rcedil" => "\u{15}", b"rcedil" => "\u{15}", b"Rcaron" => "\u{15}", b"rcaron" => "\u{15}", b"Sacute" => "\u{15}", b"sacute" => "\u{15}", b"Scirc" => "\u{15}", b"scirc" => "\u{15}", b"Scedil" => "\u{15}", b"scedil" => "\u{15}", b"Scaron" => "\u{16}", b"scaron" => "\u{16}", b"Tcedil" => "\u{16}", b"tcedil" => "\u{16}", b"Tcaron" => "\u{16}", b"tcaron" => "\u{16}", b"Tstrok" => "\u{16}", b"tstrok" => "\u{16}", b"Utilde" => "\u{16}", b"utilde" => "\u{16}", b"Umacr" => "\u{16}", b"umacr" => "\u{16}", b"Ubreve" => "\u{16}", b"ubreve" => "\u{16}", b"Uring" => "\u{16}", b"uring" => "\u{16}", b"Udblac" => "\u{17}", b"udblac" => "\u{17}", b"Uogon" => "\u{17}", b"uogon" => "\u{17}", b"Wcirc" => "\u{17}", b"wcirc" => "\u{17}", b"Ycirc" => "\u{17}", b"ycirc" => "\u{17}", b"Yuml" => "\u{17}", b"Zacute" => "\u{17}", b"zacute" => "\u{17}", b"Zdot" => "\u{17}", b"zdot" => "\u{17}", b"Zcaron" => "\u{17}", b"zcaron" => "\u{17}", b"fnof" => "\u{19}", b"imped" => "\u{1B}", b"gacute" => "\u{1F}", b"jmath" => "\u{23}", b"circ" => "\u{2C}", b"caron" | b"Hacek" => "\u{2C}", b"breve" | b"Breve" => "\u{2D}", b"dot" | b"DiacriticalDot" => "\u{2D}", b"ring" => "\u{2D}", b"ogon" => "\u{2D}", b"tilde" | b"DiacriticalTilde" => "\u{2D}", b"dblac" | b"DiacriticalDoubleAcute" => "\u{2D}", b"DownBreve" => "\u{31}", b"UnderBar" => "\u{33}", b"Alpha" => "\u{39}", b"Beta" => "\u{39}", b"Gamma" => "\u{39}", b"Delta" => "\u{39}", b"Epsilon" => "\u{39}", b"Zeta" => "\u{39}", b"Eta" => "\u{39}", b"Theta" => "\u{39}", b"Iota" => "\u{39}", b"Kappa" => "\u{39}", b"Lambda" => "\u{39}", b"Mu" => "\u{39}", b"Nu" => "\u{39}", b"Xi" => "\u{39}", b"Omicron" => "\u{39}", b"Pi" => "\u{3A}", b"Rho" => "\u{3A}", b"Sigma" => "\u{3A}", b"Tau" => "\u{3A}", b"Upsilon" => "\u{3A}", b"Phi" => "\u{3A}", b"Chi" => "\u{3A}", b"Psi" => "\u{3A}", b"Omega" => "\u{3A}", b"alpha" => "\u{3B}", b"beta" => "\u{3B}", b"gamma" => "\u{3B}", b"delta" => "\u{3B}", b"epsiv" | b"varepsilon" | b"epsilon" => "\u{3B}", b"zeta" => "\u{3B}", b"eta" => "\u{3B}", b"theta" => "\u{3B}", b"iota" => "\u{3B}", b"kappa" => "\u{3B}", b"lambda" => "\u{3B}", b"mu" => "\u{3B}", b"nu" => "\u{3B}", b"xi" => "\u{3B}", b"omicron" => "\u{3B}", b"pi" => "\u{3C}", b"rho" => "\u{3C}", b"sigmav" | b"varsigma" | b"sigmaf" => "\u{3C}", b"sigma" => "\u{3C}", b"tau" => "\u{3C}", b"upsi" | b"upsilon" => "\u{3C}", b"phi" | b"phiv" | b"varphi" => "\u{3C}", b"chi" => "\u{3C}", b"psi" => "\u{3C}", b"omega" => "\u{3C}", b"thetav" | b"vartheta" | b"thetasym" => "\u{3D}", b"Upsi" | b"upsih" => "\u{3D}", b"straightphi" => "\u{3D}", b"piv" | b"varpi" => "\u{3D}", b"Gammad" => "\u{3D}", b"gammad" | b"digamma" => "\u{3D}", b"kappav" | b"varkappa" => "\u{3F}", b"rhov" | b"varrho" => "\u{3F}", b"epsi" | b"straightepsilon" => "\u{3F}", b"bepsi" | b"backepsilon" => "\u{3F}", b"IOcy" => "\u{40}", b"DJcy" => "\u{40}", b"GJcy" => "\u{40}", b"Jukcy" => "\u{40}", b"DScy" => "\u{40}", b"Iukcy" => "\u{40}", b"YIcy" => "\u{40}", b"Jsercy" => "\u{40}", b"LJcy" => "\u{40}", b"NJcy" => "\u{40}", b"TSHcy" => "\u{40}", b"KJcy" => "\u{40}", b"Ubrcy" => "\u{40}", b"DZcy" => "\u{40}", b"Acy" => "\u{41}", b"Bcy" => "\u{41}", b"Vcy" => "\u{41}", b"Gcy" => "\u{41}", b"Dcy" => "\u{41}", b"IEcy" => "\u{41}", b"ZHcy" => "\u{41}", b"Zcy" => "\u{41}", b"Icy" => "\u{41}", b"Jcy" => "\u{41}", b"Kcy" => "\u{41}", b"Lcy" => "\u{41}", b"Mcy" => "\u{41}", b"Ncy" => "\u{41}", b"Ocy" => "\u{41}", b"Pcy" => "\u{41}", b"Rcy" => "\u{42}", b"Scy" => "\u{42}", b"Tcy" => "\u{42}", b"Ucy" => "\u{42}", b"Fcy" => "\u{42}", b"KHcy" => "\u{42}", b"TScy" => "\u{42}", b"CHcy" => "\u{42}", b"SHcy" => "\u{42}", b"SHCHcy" => "\u{42}", b"HARDcy" => "\u{42}", b"Ycy" => "\u{42}", b"SOFTcy" => "\u{42}", b"Ecy" => "\u{42}", b"YUcy" => "\u{42}", b"YAcy" => "\u{42}", b"acy" => "\u{43}", b"bcy" => "\u{43}", b"vcy" => "\u{43}", b"gcy" => "\u{43}", b"dcy" => "\u{43}", b"iecy" => "\u{43}", b"zhcy" => "\u{43}", b"zcy" => "\u{43}", b"icy" => "\u{43}", b"jcy" => "\u{43}", b"kcy" => "\u{43}", b"lcy" => "\u{43}", b"mcy" => "\u{43}", b"ncy" => "\u{43}", b"ocy" => "\u{43}", b"pcy" => "\u{43}", b"rcy" => "\u{44}", b"scy" => "\u{44}", b"tcy" => "\u{44}", b"ucy" => "\u{44}", b"fcy" => "\u{44}", b"khcy" => "\u{44}", b"tscy" => "\u{44}", b"chcy" => "\u{44}", b"shcy" => "\u{44}", b"shchcy" => "\u{44}", b"hardcy" => "\u{44}", b"ycy" => "\u{44}", b"softcy" => "\u{44}", b"ecy" => "\u{44}", b"yucy" => "\u{44}", b"yacy" => "\u{44}", b"iocy" => "\u{45}", b"djcy" => "\u{45}", b"gjcy" => "\u{45}", b"jukcy" => "\u{45}", b"dscy" => "\u{45}", b"iukcy" => "\u{45}", b"yicy" => "\u{45}", b"jsercy" => "\u{45}", b"ljcy" => "\u{45}", b"njcy" => "\u{45}", b"tshcy" => "\u{45}", b"kjcy" => "\u{45}", b"ubrcy" => "\u{45}", b"dzcy" => "\u{45}", b"ensp" => "\u{2002}", b"emsp" => "\u{2003}", b"emsp13" => "\u{2004}", b"emsp14" => "\u{2005}", b"numsp" => "\u{2007}", b"puncsp" => "\u{2008}", b"thinsp" | b"ThinSpace" => "\u{2009}", b"hairsp" | b"VeryThinSpace" => "\u{200A}", b"ZeroWidthSpace" | b"NegativeVeryThinSpace" | b"NegativeThinSpace" | b"NegativeMediumSpace" | b"NegativeThickSpace" => "\u{200B}", b"zwnj" => "\u{200C}", b"zwj" => "\u{200D}", b"lrm" => "\u{200E}", b"rlm" => "\u{200F}", b"hyphen" | b"dash" => "\u{2010}", b"ndash" => "\u{2013}", b"mdash" => "\u{2014}", b"horbar" => "\u{2015}", b"Verbar" | b"Vert" => "\u{2016}", b"lsquo" | b"OpenCurlyQuote" => "\u{2018}", b"rsquo" | b"rsquor" | b"CloseCurlyQuote" => "\u{2019}", b"lsquor" | b"sbquo" => "\u{201A}", b"ldquo" | b"OpenCurlyDoubleQuote" => "\u{201C}", b"rdquo" | b"rdquor" | b"CloseCurlyDoubleQuote" => "\u{201D}", b"ldquor" | b"bdquo" => "\u{201E}", b"dagger" => "\u{2020}", b"Dagger" | b"ddagger" => "\u{2021}", b"bull" | b"bullet" => "\u{2022}", b"nldr" => "\u{2025}", b"hellip" | b"mldr" => "\u{2026}", b"permil" => "\u{2030}", b"pertenk" => "\u{2031}", b"prime" => "\u{2032}", b"Prime" => "\u{2033}", b"tprime" => "\u{2034}", b"bprime" | b"backprime" => "\u{2035}", b"lsaquo" => "\u{2039}", b"rsaquo" => "\u{203A}", b"oline" => "\u{203E}", b"caret" => "\u{2041}", b"hybull" => "\u{2043}", b"frasl" => "\u{2044}", b"bsemi" => "\u{204F}", b"qprime" => "\u{2057}", b"MediumSpace" => "\u{205F}", b"NoBreak" => "\u{2060}", b"ApplyFunction" | b"af" => "\u{2061}", b"InvisibleTimes" | b"it" => "\u{2062}", b"InvisibleComma" | b"ic" => "\u{2063}", b"euro" => "\u{20AC}", b"tdot" | b"TripleDot" => "\u{20DB}", b"DotDot" => "\u{20DC}", b"Copf" | b"complexes" => "\u{2102}", b"incare" => "\u{2105}", b"gscr" => "\u{210A}", b"hamilt" | b"HilbertSpace" | b"Hscr" => "\u{210B}", b"Hfr" | b"Poincareplane" => "\u{210C}", b"quaternions" | b"Hopf" => "\u{210D}", b"planckh" => "\u{210E}", b"planck" | b"hbar" | b"plankv" | b"hslash" => "\u{210F}", b"Iscr" | b"imagline" => "\u{2110}", b"image" | b"Im" | b"imagpart" | b"Ifr" => "\u{2111}", b"Lscr" | b"lagran" | b"Laplacetrf" => "\u{2112}", b"ell" => "\u{2113}", b"Nopf" | b"naturals" => "\u{2115}", b"numero" => "\u{2116}", b"copysr" => "\u{2117}", b"weierp" | b"wp" => "\u{2118}", b"Popf" | b"primes" => "\u{2119}", b"rationals" | b"Qopf" => "\u{211A}", b"Rscr" | b"realine" => "\u{211B}", b"real" | b"Re" | b"realpart" | b"Rfr" => "\u{211C}", b"reals" | b"Ropf" => "\u{211D}", b"rx" => "\u{211E}", b"trade" | b"TRADE" => "\u{2122}", b"integers" | b"Zopf" => "\u{2124}", b"ohm" => "\u{2126}", b"mho" => "\u{2127}", b"Zfr" | b"zeetrf" => "\u{2128}", b"iiota" => "\u{2129}", b"angst" => "\u{212B}", b"bernou" | b"Bernoullis" | b"Bscr" => "\u{212C}", b"Cfr" | b"Cayleys" => "\u{212D}", b"escr" => "\u{212F}", b"Escr" | b"expectation" => "\u{2130}", b"Fscr" | b"Fouriertrf" => "\u{2131}", b"phmmat" | b"Mellintrf" | b"Mscr" => "\u{2133}", b"order" | b"orderof" | b"oscr" => "\u{2134}", b"alefsym" | b"aleph" => "\u{2135}", b"beth" => "\u{2136}", b"gimel" => "\u{2137}", b"daleth" => "\u{2138}", b"CapitalDifferentialD" | b"DD" => "\u{2145}", b"DifferentialD" | b"dd" => "\u{2146}", b"ExponentialE" | b"exponentiale" | b"ee" => "\u{2147}", b"ImaginaryI" | b"ii" => "\u{2148}", b"frac13" => "\u{2153}", b"frac23" => "\u{2154}", b"frac15" => "\u{2155}", b"frac25" => "\u{2156}", b"frac35" => "\u{2157}", b"frac45" => "\u{2158}", b"frac16" => "\u{2159}", b"frac56" => "\u{215A}", b"frac18" => "\u{215B}", b"frac38" => "\u{215C}", b"frac58" => "\u{215D}", b"frac78" => "\u{215E}", b"larr" | b"leftarrow" | b"LeftArrow" | b"slarr" | b"ShortLeftArrow" => "\u{2190}", b"uarr" | b"uparrow" | b"UpArrow" | b"ShortUpArrow" => "\u{2191}", b"rarr" | b"rightarrow" | b"RightArrow" | b"srarr" | b"ShortRightArrow" => "\u{2192}", b"darr" | b"downarrow" | b"DownArrow" | b"ShortDownArrow" => "\u{2193}", b"harr" | b"leftrightarrow" | b"LeftRightArrow" => "\u{2194}", b"varr" | b"updownarrow" | b"UpDownArrow" => "\u{2195}", b"nwarr" | b"UpperLeftArrow" | b"nwarrow" => "\u{2196}", b"nearr" | b"UpperRightArrow" | b"nearrow" => "\u{2197}", b"searr" | b"searrow" | b"LowerRightArrow" => "\u{2198}", b"swarr" | b"swarrow" | b"LowerLeftArrow" => "\u{2199}", b"nlarr" | b"nleftarrow" => "\u{219A}", b"nrarr" | b"nrightarrow" => "\u{219B}", b"rarrw" | b"rightsquigarrow" => "\u{219D}", b"Larr" | b"twoheadleftarrow" => "\u{219E}", b"Uarr" => "\u{219F}", b"Rarr" | b"twoheadrightarrow" => "\u{21A0}", b"Darr" => "\u{21A1}", b"larrtl" | b"leftarrowtail" => "\u{21A2}", b"rarrtl" | b"rightarrowtail" => "\u{21A3}", b"LeftTeeArrow" | b"mapstoleft" => "\u{21A4}", b"UpTeeArrow" | b"mapstoup" => "\u{21A5}", b"map" | b"RightTeeArrow" | b"mapsto" => "\u{21A6}", b"DownTeeArrow" | b"mapstodown" => "\u{21A7}", b"larrhk" | b"hookleftarrow" => "\u{21A9}", b"rarrhk" | b"hookrightarrow" => "\u{21AA}", b"larrlp" | b"looparrowleft" => "\u{21AB}", b"rarrlp" | b"looparrowright" => "\u{21AC}", b"harrw" | b"leftrightsquigarrow" => "\u{21AD}", b"nharr" | b"nleftrightarrow" => "\u{21AE}", b"lsh" | b"Lsh" => "\u{21B0}", b"rsh" | b"Rsh" => "\u{21B1}", b"ldsh" => "\u{21B2}", b"rdsh" => "\u{21B3}", b"crarr" => "\u{21B5}", b"cularr" | b"curvearrowleft" => "\u{21B6}", b"curarr" | b"curvearrowright" => "\u{21B7}", b"olarr" | b"circlearrowleft" => "\u{21BA}", b"orarr" | b"circlearrowright" => "\u{21BB}", b"lharu" | b"LeftVector" | b"leftharpoonup" => "\u{21BC}", b"lhard" | b"leftharpoondown" | b"DownLeftVector" => "\u{21BD}", b"uharr" | b"upharpoonright" | b"RightUpVector" => "\u{21BE}", b"uharl" | b"upharpoonleft" | b"LeftUpVector" => "\u{21BF}", b"rharu" | b"RightVector" | b"rightharpoonup" => "\u{21C0}", b"rhard" | b"rightharpoondown" | b"DownRightVector" => "\u{21C1}", b"dharr" | b"RightDownVector" | b"downharpoonright" => "\u{21C2}", b"dharl" | b"LeftDownVector" | b"downharpoonleft" => "\u{21C3}", b"rlarr" | b"rightleftarrows" | b"RightArrowLeftArrow" => "\u{21C4}", b"udarr" | b"UpArrowDownArrow" => "\u{21C5}", b"lrarr" | b"leftrightarrows" | b"LeftArrowRightArrow" => "\u{21C6}", b"llarr" | b"leftleftarrows" => "\u{21C7}", b"uuarr" | b"upuparrows" => "\u{21C8}", b"rrarr" | b"rightrightarrows" => "\u{21C9}", b"ddarr" | b"downdownarrows" => "\u{21CA}", b"lrhar" | b"ReverseEquilibrium" | b"leftrightharpoons" => "\u{21CB}", b"rlhar" | b"rightleftharpoons" | b"Equilibrium" => "\u{21CC}", b"nlArr" | b"nLeftarrow" => "\u{21CD}", b"nhArr" | b"nLeftrightarrow" => "\u{21CE}", b"nrArr" | b"nRightarrow" => "\u{21CF}", b"lArr" | b"Leftarrow" | b"DoubleLeftArrow" => "\u{21D0}", b"uArr" | b"Uparrow" | b"DoubleUpArrow" => "\u{21D1}", b"rArr" | b"Rightarrow" | b"Implies" | b"DoubleRightArrow" => "\u{21D2}", b"dArr" | b"Downarrow" | b"DoubleDownArrow" => "\u{21D3}", b"hArr" | b"Leftrightarrow" | b"DoubleLeftRightArrow" | b"iff" => "\u{21D4}", b"vArr" | b"Updownarrow" | b"DoubleUpDownArrow" => "\u{21D5}", b"nwArr" => "\u{21D6}", b"neArr" => "\u{21D7}", b"seArr" => "\u{21D8}", b"swArr" => "\u{21D9}", b"lAarr" | b"Lleftarrow" => "\u{21DA}", b"rAarr" | b"Rrightarrow" => "\u{21DB}", b"zigrarr" => "\u{21DD}", b"larrb" | b"LeftArrowBar" => "\u{21E4}", b"rarrb" | b"RightArrowBar" => "\u{21E5}", b"duarr" | b"DownArrowUpArrow" => "\u{21F5}", b"loarr" => "\u{21FD}", b"roarr" => "\u{21FE}", b"hoarr" => "\u{21FF}", b"forall" | b"ForAll" => "\u{2200}", b"comp" | b"complement" => "\u{2201}", b"part" | b"PartialD" => "\u{2202}", b"exist" | b"Exists" => "\u{2203}", b"nexist" | b"NotExists" | b"nexists" => "\u{2204}", b"empty" | b"emptyset" | b"emptyv" | b"varnothing" => "\u{2205}", b"nabla" | b"Del" => "\u{2207}", b"isin" | b"isinv" | b"Element" | b"in" => "\u{2208}", b"notin" | b"NotElement" | b"notinva" => "\u{2209}", b"niv" | b"ReverseElement" | b"ni" | b"SuchThat" => "\u{220B}", b"notni" | b"notniva" | b"NotReverseElement" => "\u{220C}", b"prod" | b"Product" => "\u{220F}", b"coprod" | b"Coproduct" => "\u{2210}", b"sum" | b"Sum" => "\u{2211}", b"minus" => "\u{2212}", b"mnplus" | b"mp" | b"MinusPlus" => "\u{2213}", b"plusdo" | b"dotplus" => "\u{2214}", b"setmn" | b"setminus" | b"Backslash" | b"ssetmn" | b"smallsetminus" => "\u{2216}", b"lowast" => "\u{2217}", b"compfn" | b"SmallCircle" => "\u{2218}", b"radic" | b"Sqrt" => "\u{221A}", b"prop" | b"propto" | b"Proportional" | b"vprop" | b"varpropto" => "\u{221D}", b"infin" => "\u{221E}", b"angrt" => "\u{221F}", b"ang" | b"angle" => "\u{2220}", b"angmsd" | b"measuredangle" => "\u{2221}", b"angsph" => "\u{2222}", b"mid" | b"VerticalBar" | b"smid" | b"shortmid" => "\u{2223}", b"nmid" | b"NotVerticalBar" | b"nsmid" | b"nshortmid" => "\u{2224}", b"par" | b"parallel" | b"DoubleVerticalBar" | b"spar" | b"shortparallel" => "\u{2225}", b"npar" | b"nparallel" | b"NotDoubleVerticalBar" | b"nspar" | b"nshortparallel" => { "\u{2226}" } b"and" | b"wedge" => "\u{2227}", b"or" | b"vee" => "\u{2228}", b"cap" => "\u{2229}", b"cup" => "\u{222A}", b"int" | b"Integral" => "\u{222B}", b"Int" => "\u{222C}", b"tint" | b"iiint" => "\u{222D}", b"conint" | b"oint" | b"ContourIntegral" => "\u{222E}", b"Conint" | b"DoubleContourIntegral" => "\u{222F}", b"Cconint" => "\u{2230}", b"cwint" => "\u{2231}", b"cwconint" | b"ClockwiseContourIntegral" => "\u{2232}", b"awconint" | b"CounterClockwiseContourIntegral" => "\u{2233}", b"there4" | b"therefore" | b"Therefore" => "\u{2234}", b"becaus" | b"because" | b"Because" => "\u{2235}", b"ratio" => "\u{2236}", b"Colon" | b"Proportion" => "\u{2237}", b"minusd" | b"dotminus" => "\u{2238}", b"mDDot" => "\u{223A}", b"homtht" => "\u{223B}", b"sim" | b"Tilde" | b"thksim" | b"thicksim" => "\u{223C}", b"bsim" | b"backsim" => "\u{223D}", b"ac" | b"mstpos" => "\u{223E}", b"acd" => "\u{223F}", b"wreath" | b"VerticalTilde" | b"wr" => "\u{2240}", b"nsim" | b"NotTilde" => "\u{2241}", b"esim" | b"EqualTilde" | b"eqsim" => "\u{2242}", b"sime" | b"TildeEqual" | b"simeq" => "\u{2243}", b"nsime" | b"nsimeq" | b"NotTildeEqual" => "\u{2244}", b"cong" | b"TildeFullEqual" => "\u{2245}", b"simne" => "\u{2246}", b"ncong" | b"NotTildeFullEqual" => "\u{2247}", b"asymp" | b"ap" | b"TildeTilde" | b"approx" | b"thkap" | b"thickapprox" => "\u{2248}", b"nap" | b"NotTildeTilde" | b"napprox" => "\u{2249}", b"ape" | b"approxeq" => "\u{224A}", b"apid" => "\u{224B}", b"bcong" | b"backcong" => "\u{224C}", b"asympeq" | b"CupCap" => "\u{224D}", b"bump" | b"HumpDownHump" | b"Bumpeq" => "\u{224E}", b"bumpe" | b"HumpEqual" | b"bumpeq" => "\u{224F}", b"esdot" | b"DotEqual" | b"doteq" => "\u{2250}", b"eDot" | b"doteqdot" => "\u{2251}", b"efDot" | b"fallingdotseq" => "\u{2252}", b"erDot" | b"risingdotseq" => "\u{2253}", b"colone" | b"coloneq" | b"Assign" => "\u{2254}", b"ecolon" | b"eqcolon" => "\u{2255}", b"ecir" | b"eqcirc" => "\u{2256}", b"cire" | b"circeq" => "\u{2257}", b"wedgeq" => "\u{2259}", b"veeeq" => "\u{225A}", b"trie" | b"triangleq" => "\u{225C}", b"equest" | b"questeq" => "\u{225F}", b"ne" | b"NotEqual" => "\u{2260}", b"equiv" | b"Congruent" => "\u{2261}", b"nequiv" | b"NotCongruent" => "\u{2262}", b"le" | b"leq" => "\u{2264}", b"ge" | b"GreaterEqual" | b"geq" => "\u{2265}", b"lE" | b"LessFullEqual" | b"leqq" => "\u{2266}", b"gE" | b"GreaterFullEqual" | b"geqq" => "\u{2267}", b"lnE" | b"lneqq" => "\u{2268}", b"gnE" | b"gneqq" => "\u{2269}", b"Lt" | b"NestedLessLess" | b"ll" => "\u{226A}", b"Gt" | b"NestedGreaterGreater" | b"gg" => "\u{226B}", b"twixt" | b"between" => "\u{226C}", b"NotCupCap" => "\u{226D}", b"nlt" | b"NotLess" | b"nless" => "\u{226E}", b"ngt" | b"NotGreater" | b"ngtr" => "\u{226F}", b"nle" | b"NotLessEqual" | b"nleq" => "\u{2270}", b"nge" | b"NotGreaterEqual" | b"ngeq" => "\u{2271}", b"lsim" | b"LessTilde" | b"lesssim" => "\u{2272}", b"gsim" | b"gtrsim" | b"GreaterTilde" => "\u{2273}", b"nlsim" | b"NotLessTilde" => "\u{2274}", b"ngsim" | b"NotGreaterTilde" => "\u{2275}", b"lg" | b"lessgtr" | b"LessGreater" => "\u{2276}", b"gl" | b"gtrless" | b"GreaterLess" => "\u{2277}", b"ntlg" | b"NotLessGreater" => "\u{2278}", b"ntgl" | b"NotGreaterLess" => "\u{2279}", b"pr" | b"Precedes" | b"prec" => "\u{227A}", b"sc" | b"Succeeds" | b"succ" => "\u{227B}", b"prcue" | b"PrecedesSlantEqual" | b"preccurlyeq" => "\u{227C}", b"sccue" | b"SucceedsSlantEqual" | b"succcurlyeq" => "\u{227D}", b"prsim" | b"precsim" | b"PrecedesTilde" => "\u{227E}", b"scsim" | b"succsim" | b"SucceedsTilde" => "\u{227F}", b"npr" | b"nprec" | b"NotPrecedes" => "\u{2280}", b"nsc" | b"nsucc" | b"NotSucceeds" => "\u{2281}", b"sub" | b"subset" => "\u{2282}", b"sup" | b"supset" | b"Superset" => "\u{2283}", b"nsub" => "\u{2284}", b"nsup" => "\u{2285}", b"sube" | b"SubsetEqual" | b"subseteq" => "\u{2286}", b"supe" | b"supseteq" | b"SupersetEqual" => "\u{2287}", b"nsube" | b"nsubseteq" | b"NotSubsetEqual" => "\u{2288}", b"nsupe" | b"nsupseteq" | b"NotSupersetEqual" => "\u{2289}", b"subne" | b"subsetneq" => "\u{228A}", b"supne" | b"supsetneq" => "\u{228B}", b"cupdot" => "\u{228D}", b"uplus" | b"UnionPlus" => "\u{228E}", b"sqsub" | b"SquareSubset" | b"sqsubset" => "\u{228F}", b"sqsup" | b"SquareSuperset" | b"sqsupset" => "\u{2290}", b"sqsube" | b"SquareSubsetEqual" | b"sqsubseteq" => "\u{2291}", b"sqsupe" | b"SquareSupersetEqual" | b"sqsupseteq" => "\u{2292}", b"sqcap" | b"SquareIntersection" => "\u{2293}", b"sqcup" | b"SquareUnion" => "\u{2294}", b"oplus" | b"CirclePlus" => "\u{2295}", b"ominus" | b"CircleMinus" => "\u{2296}", b"otimes" | b"CircleTimes" => "\u{2297}", b"osol" => "\u{2298}", b"odot" | b"CircleDot" => "\u{2299}", b"ocir" | b"circledcirc" => "\u{229A}", b"oast" | b"circledast" => "\u{229B}", b"odash" | b"circleddash" => "\u{229D}", b"plusb" | b"boxplus" => "\u{229E}", b"minusb" | b"boxminus" => "\u{229F}", b"timesb" | b"boxtimes" => "\u{22A0}", b"sdotb" | b"dotsquare" => "\u{22A1}", b"vdash" | b"RightTee" => "\u{22A2}", b"dashv" | b"LeftTee" => "\u{22A3}", b"top" | b"DownTee" => "\u{22A4}", b"bottom" | b"bot" | b"perp" | b"UpTee" => "\u{22A5}", b"models" => "\u{22A7}", b"vDash" | b"DoubleRightTee" => "\u{22A8}", b"Vdash" => "\u{22A9}", b"Vvdash" => "\u{22AA}", b"VDash" => "\u{22AB}", b"nvdash" => "\u{22AC}", b"nvDash" => "\u{22AD}", b"nVdash" => "\u{22AE}", b"nVDash" => "\u{22AF}", b"prurel" => "\u{22B0}", b"vltri" | b"vartriangleleft" | b"LeftTriangle" => "\u{22B2}", b"vrtri" | b"vartriangleright" | b"RightTriangle" => "\u{22B3}", b"ltrie" | b"trianglelefteq" | b"LeftTriangleEqual" => "\u{22B4}", b"rtrie" | b"trianglerighteq" | b"RightTriangleEqual" => "\u{22B5}", b"origof" => "\u{22B6}", b"imof" => "\u{22B7}", b"mumap" | b"multimap" => "\u{22B8}", b"hercon" => "\u{22B9}", b"intcal" | b"intercal" => "\u{22BA}", b"veebar" => "\u{22BB}", b"barvee" => "\u{22BD}", b"angrtvb" => "\u{22BE}", b"lrtri" => "\u{22BF}", b"xwedge" | b"Wedge" | b"bigwedge" => "\u{22C0}", b"xvee" | b"Vee" | b"bigvee" => "\u{22C1}", b"xcap" | b"Intersection" | b"bigcap" => "\u{22C2}", b"xcup" | b"Union" | b"bigcup" => "\u{22C3}", b"diam" | b"diamond" | b"Diamond" => "\u{22C4}", b"sdot" => "\u{22C5}", b"sstarf" | b"Star" => "\u{22C6}", b"divonx" | b"divideontimes" => "\u{22C7}", b"bowtie" => "\u{22C8}", b"ltimes" => "\u{22C9}", b"rtimes" => "\u{22CA}", b"lthree" | b"leftthreetimes" => "\u{22CB}", b"rthree" | b"rightthreetimes" => "\u{22CC}", b"bsime" | b"backsimeq" => "\u{22CD}", b"cuvee" | b"curlyvee" => "\u{22CE}", b"cuwed" | b"curlywedge" => "\u{22CF}", b"Sub" | b"Subset" => "\u{22D0}", b"Sup" | b"Supset" => "\u{22D1}", b"Cap" => "\u{22D2}", b"Cup" => "\u{22D3}", b"fork" | b"pitchfork" => "\u{22D4}", b"epar" => "\u{22D5}", b"ltdot" | b"lessdot" => "\u{22D6}", b"gtdot" | b"gtrdot" => "\u{22D7}", b"Ll" => "\u{22D8}", b"Gg" | b"ggg" => "\u{22D9}", b"leg" | b"LessEqualGreater" | b"lesseqgtr" => "\u{22DA}", b"gel" | b"gtreqless" | b"GreaterEqualLess" => "\u{22DB}", b"cuepr" | b"curlyeqprec" => "\u{22DE}", b"cuesc" | b"curlyeqsucc" => "\u{22DF}", b"nprcue" | b"NotPrecedesSlantEqual" => "\u{22E0}", b"nsccue" | b"NotSucceedsSlantEqual" => "\u{22E1}", b"nsqsube" | b"NotSquareSubsetEqual" => "\u{22E2}", b"nsqsupe" | b"NotSquareSupersetEqual" => "\u{22E3}", b"lnsim" => "\u{22E6}", b"gnsim" => "\u{22E7}", b"prnsim" | b"precnsim" => "\u{22E8}", b"scnsim" | b"succnsim" => "\u{22E9}", b"nltri" | b"ntriangleleft" | b"NotLeftTriangle" => "\u{22EA}", b"nrtri" | b"ntriangleright" | b"NotRightTriangle" => "\u{22EB}", b"nltrie" | b"ntrianglelefteq" | b"NotLeftTriangleEqual" => "\u{22EC}", b"nrtrie" | b"ntrianglerighteq" | b"NotRightTriangleEqual" => "\u{22ED}", b"vellip" => "\u{22EE}", b"ctdot" => "\u{22EF}", b"utdot" => "\u{22F0}", b"dtdot" => "\u{22F1}", b"disin" => "\u{22F2}", b"isinsv" => "\u{22F3}", b"isins" => "\u{22F4}", b"isindot" => "\u{22F5}", b"notinvc" => "\u{22F6}", b"notinvb" => "\u{22F7}", b"isinE" => "\u{22F9}", b"nisd" => "\u{22FA}", b"xnis" => "\u{22FB}", b"nis" => "\u{22FC}", b"notnivc" => "\u{22FD}", b"notnivb" => "\u{22FE}", b"barwed" | b"barwedge" => "\u{2305}", b"Barwed" | b"doublebarwedge" => "\u{2306}", b"lceil" | b"LeftCeiling" => "\u{2308}", b"rceil" | b"RightCeiling" => "\u{2309}", b"lfloor" | b"LeftFloor" => "\u{230A}", b"rfloor" | b"RightFloor" => "\u{230B}", b"drcrop" => "\u{230C}", b"dlcrop" => "\u{230D}", b"urcrop" => "\u{230E}", b"ulcrop" => "\u{230F}", b"bnot" => "\u{2310}", b"profline" => "\u{2312}", b"profsurf" => "\u{2313}", b"telrec" => "\u{2315}", b"target" => "\u{2316}", b"ulcorn" | b"ulcorner" => "\u{231C}", b"urcorn" | b"urcorner" => "\u{231D}", b"dlcorn" | b"llcorner" => "\u{231E}", b"drcorn" | b"lrcorner" => "\u{231F}", b"frown" | b"sfrown" => "\u{2322}", b"smile" | b"ssmile" => "\u{2323}", b"cylcty" => "\u{232D}", b"profalar" => "\u{232E}", b"topbot" => "\u{2336}", b"ovbar" => "\u{233D}", b"solbar" => "\u{233F}", b"angzarr" => "\u{237C}", b"lmoust" | b"lmoustache" => "\u{23B0}", b"rmoust" | b"rmoustache" => "\u{23B1}", b"tbrk" | b"OverBracket" => "\u{23B4}", b"bbrk" | b"UnderBracket" => "\u{23B5}", b"bbrktbrk" => "\u{23B6}", b"OverParenthesis" => "\u{23DC}", b"UnderParenthesis" => "\u{23DD}", b"OverBrace" => "\u{23DE}", b"UnderBrace" => "\u{23DF}", b"trpezium" => "\u{23E2}", b"elinters" => "\u{23E7}", b"blank" => "\u{2423}", b"oS" | b"circledS" => "\u{24C8}", b"boxh" | b"HorizontalLine" => "\u{2500}", b"boxv" => "\u{2502}", b"boxdr" => "\u{250C}", b"boxdl" => "\u{2510}", b"boxur" => "\u{2514}", b"boxul" => "\u{2518}", b"boxvr" => "\u{251C}", b"boxvl" => "\u{2524}", b"boxhd" => "\u{252C}", b"boxhu" => "\u{2534}", b"boxvh" => "\u{253C}", b"boxH" => "\u{2550}", b"boxV" => "\u{2551}", b"boxdR" => "\u{2552}", b"boxDr" => "\u{2553}", b"boxDR" => "\u{2554}", b"boxdL" => "\u{2555}", b"boxDl" => "\u{2556}", b"boxDL" => "\u{2557}", b"boxuR" => "\u{2558}", b"boxUr" => "\u{2559}", b"boxUR" => "\u{255A}", b"boxuL" => "\u{255B}", b"boxUl" => "\u{255C}", b"boxUL" => "\u{255D}", b"boxvR" => "\u{255E}", b"boxVr" => "\u{255F}", b"boxVR" => "\u{2560}", b"boxvL" => "\u{2561}", b"boxVl" => "\u{2562}", b"boxVL" => "\u{2563}", b"boxHd" => "\u{2564}", b"boxhD" => "\u{2565}", b"boxHD" => "\u{2566}", b"boxHu" => "\u{2567}", b"boxhU" => "\u{2568}", b"boxHU" => "\u{2569}", b"boxvH" => "\u{256A}", b"boxVh" => "\u{256B}", b"boxVH" => "\u{256C}", b"uhblk" => "\u{2580}", b"lhblk" => "\u{2584}", b"block" => "\u{2588}", b"blk14" => "\u{2591}", b"blk12" => "\u{2592}", b"blk34" => "\u{2593}", b"squ" | b"square" | b"Square" => "\u{25A1}", b"squf" | b"squarf" | b"blacksquare" | b"FilledVerySmallSquare" => "\u{25AA}", b"EmptyVerySmallSquare" => "\u{25AB}", b"rect" => "\u{25AD}", b"marker" => "\u{25AE}", b"fltns" => "\u{25B1}", b"xutri" | b"bigtriangleup" => "\u{25B3}", b"utrif" | b"blacktriangle" => "\u{25B4}", b"utri" | b"triangle" => "\u{25B5}", b"rtrif" | b"blacktriangleright" => "\u{25B8}", b"rtri" | b"triangleright" => "\u{25B9}", b"xdtri" | b"bigtriangledown" => "\u{25BD}", b"dtrif" | b"blacktriangledown" => "\u{25BE}", b"dtri" | b"triangledown" => "\u{25BF}", b"ltrif" | b"blacktriangleleft" => "\u{25C2}", b"ltri" | b"triangleleft" => "\u{25C3}", b"loz" | b"lozenge" => "\u{25CA}", b"cir" => "\u{25CB}", b"tridot" => "\u{25EC}", b"xcirc" | b"bigcirc" => "\u{25EF}", b"ultri" => "\u{25F8}", b"urtri" => "\u{25F9}", b"lltri" => "\u{25FA}", b"EmptySmallSquare" => "\u{25FB}", b"FilledSmallSquare" => "\u{25FC}", b"starf" | b"bigstar" => "\u{2605}", b"star" => "\u{2606}", b"phone" => "\u{260E}", b"female" => "\u{2640}", b"male" => "\u{2642}", b"spades" | b"spadesuit" => "\u{2660}", b"clubs" | b"clubsuit" => "\u{2663}", b"hearts" | b"heartsuit" => "\u{2665}", b"diams" | b"diamondsuit" => "\u{2666}", b"sung" => "\u{266A}", b"flat" => "\u{266D}", b"natur" | b"natural" => "\u{266E}", b"sharp" => "\u{266F}", b"check" | b"checkmark" => "\u{2713}", b"cross" => "\u{2717}", b"malt" | b"maltese" => "\u{2720}", b"sext" => "\u{2736}", b"VerticalSeparator" => "\u{2758}", b"lbbrk" => "\u{2772}", b"rbbrk" => "\u{2773}", b"lobrk" | b"LeftDoubleBracket" => "\u{27E6}", b"robrk" | b"RightDoubleBracket" => "\u{27E7}", b"lang" | b"LeftAngleBracket" | b"langle" => "\u{27E8}", b"rang" | b"RightAngleBracket" | b"rangle" => "\u{27E9}", b"Lang" => "\u{27EA}", b"Rang" => "\u{27EB}", b"loang" => "\u{27EC}", b"roang" => "\u{27ED}", b"xlarr" | b"longleftarrow" | b"LongLeftArrow" => "\u{27F5}", b"xrarr" | b"longrightarrow" | b"LongRightArrow" => "\u{27F6}", b"xharr" | b"longleftrightarrow" | b"LongLeftRightArrow" => "\u{27F7}", b"xlArr" | b"Longleftarrow" | b"DoubleLongLeftArrow" => "\u{27F8}", b"xrArr" | b"Longrightarrow" | b"DoubleLongRightArrow" => "\u{27F9}", b"xhArr" | b"Longleftrightarrow" | b"DoubleLongLeftRightArrow" => "\u{27FA}", b"xmap" | b"longmapsto" => "\u{27FC}", b"dzigrarr" => "\u{27FF}", b"nvlArr" => "\u{2902}", b"nvrArr" => "\u{2903}", b"nvHarr" => "\u{2904}", b"Map" => "\u{2905}", b"lbarr" => "\u{290C}", b"rbarr" | b"bkarow" => "\u{290D}", b"lBarr" => "\u{290E}", b"rBarr" | b"dbkarow" => "\u{290F}", b"RBarr" | b"drbkarow" => "\u{2910}", b"DDotrahd" => "\u{2911}", b"UpArrowBar" => "\u{2912}", b"DownArrowBar" => "\u{2913}", b"Rarrtl" => "\u{2916}", b"latail" => "\u{2919}", b"ratail" => "\u{291A}", b"lAtail" => "\u{291B}", b"rAtail" => "\u{291C}", b"larrfs" => "\u{291D}", b"rarrfs" => "\u{291E}", b"larrbfs" => "\u{291F}", b"rarrbfs" => "\u{2920}", b"nwarhk" => "\u{2923}", b"nearhk" => "\u{2924}", b"searhk" | b"hksearow" => "\u{2925}", b"swarhk" | b"hkswarow" => "\u{2926}", b"nwnear" => "\u{2927}", b"nesear" | b"toea" => "\u{2928}", b"seswar" | b"tosa" => "\u{2929}", b"swnwar" => "\u{292A}", b"rarrc" => "\u{2933}", b"cudarrr" => "\u{2935}", b"ldca" => "\u{2936}", b"rdca" => "\u{2937}", b"cudarrl" => "\u{2938}", b"larrpl" => "\u{2939}", b"curarrm" => "\u{293C}", b"cularrp" => "\u{293D}", b"rarrpl" => "\u{2945}", b"harrcir" => "\u{2948}", b"Uarrocir" => "\u{2949}", b"lurdshar" => "\u{294A}", b"ldrushar" => "\u{294B}", b"LeftRightVector" => "\u{294E}", b"RightUpDownVector" => "\u{294F}", b"DownLeftRightVector" => "\u{2950}", b"LeftUpDownVector" => "\u{2951}", b"LeftVectorBar" => "\u{2952}", b"RightVectorBar" => "\u{2953}", b"RightUpVectorBar" => "\u{2954}", b"RightDownVectorBar" => "\u{2955}", b"DownLeftVectorBar" => "\u{2956}", b"DownRightVectorBar" => "\u{2957}", b"LeftUpVectorBar" => "\u{2958}", b"LeftDownVectorBar" => "\u{2959}", b"LeftTeeVector" => "\u{295A}", b"RightTeeVector" => "\u{295B}", b"RightUpTeeVector" => "\u{295C}", b"RightDownTeeVector" => "\u{295D}", b"DownLeftTeeVector" => "\u{295E}", b"DownRightTeeVector" => "\u{295F}", b"LeftUpTeeVector" => "\u{2960}", b"LeftDownTeeVector" => "\u{2961}", b"lHar" => "\u{2962}", b"uHar" => "\u{2963}", b"rHar" => "\u{2964}", b"dHar" => "\u{2965}", b"luruhar" => "\u{2966}", b"ldrdhar" => "\u{2967}", b"ruluhar" => "\u{2968}", b"rdldhar" => "\u{2969}", b"lharul" => "\u{296A}", b"llhard" => "\u{296B}", b"rharul" => "\u{296C}", b"lrhard" => "\u{296D}", b"udhar" | b"UpEquilibrium" => "\u{296E}", b"duhar" | b"ReverseUpEquilibrium" => "\u{296F}", b"RoundImplies" => "\u{2970}", b"erarr" => "\u{2971}", b"simrarr" => "\u{2972}", b"larrsim" => "\u{2973}", b"rarrsim" => "\u{2974}", b"rarrap" => "\u{2975}", b"ltlarr" => "\u{2976}", b"gtrarr" => "\u{2978}", b"subrarr" => "\u{2979}", b"suplarr" => "\u{297B}", b"lfisht" => "\u{297C}", b"rfisht" => "\u{297D}", b"ufisht" => "\u{297E}", b"dfisht" => "\u{297F}", b"lopar" => "\u{2985}", b"ropar" => "\u{2986}", b"lbrke" => "\u{298B}", b"rbrke" => "\u{298C}", b"lbrkslu" => "\u{298D}", b"rbrksld" => "\u{298E}", b"lbrksld" => "\u{298F}", b"rbrkslu" => "\u{2990}", b"langd" => "\u{2991}", b"rangd" => "\u{2992}", b"lparlt" => "\u{2993}", b"rpargt" => "\u{2994}", b"gtlPar" => "\u{2995}", b"ltrPar" => "\u{2996}", b"vzigzag" => "\u{299A}", b"vangrt" => "\u{299C}", b"angrtvbd" => "\u{299D}", b"ange" => "\u{29A4}", b"range" => "\u{29A5}", b"dwangle" => "\u{29A6}", b"uwangle" => "\u{29A7}", b"angmsdaa" => "\u{29A8}", b"angmsdab" => "\u{29A9}", b"angmsdac" => "\u{29AA}", b"angmsdad" => "\u{29AB}", b"angmsdae" => "\u{29AC}", b"angmsdaf" => "\u{29AD}", b"angmsdag" => "\u{29AE}", b"angmsdah" => "\u{29AF}", b"bemptyv" => "\u{29B0}", b"demptyv" => "\u{29B1}", b"cemptyv" => "\u{29B2}", b"raemptyv" => "\u{29B3}", b"laemptyv" => "\u{29B4}", b"ohbar" => "\u{29B5}", b"omid" => "\u{29B6}", b"opar" => "\u{29B7}", b"operp" => "\u{29B9}", b"olcross" => "\u{29BB}", b"odsold" => "\u{29BC}", b"olcir" => "\u{29BE}", b"ofcir" => "\u{29BF}", b"olt" => "\u{29C0}", b"ogt" => "\u{29C1}", b"cirscir" => "\u{29C2}", b"cirE" => "\u{29C3}", b"solb" => "\u{29C4}", b"bsolb" => "\u{29C5}", b"boxbox" => "\u{29C9}", b"trisb" => "\u{29CD}", b"rtriltri" => "\u{29CE}", b"LeftTriangleBar" => "\u{29CF}", b"RightTriangleBar" => "\u{29D0}", b"race" => "\u{29DA}", b"iinfin" => "\u{29DC}", b"infintie" => "\u{29DD}", b"nvinfin" => "\u{29DE}", b"eparsl" => "\u{29E3}", b"smeparsl" => "\u{29E4}", b"eqvparsl" => "\u{29E5}", b"lozf" | b"blacklozenge" => "\u{29EB}", b"RuleDelayed" => "\u{29F4}", b"dsol" => "\u{29F6}", b"xodot" | b"bigodot" => "\u{2A00}", b"xoplus" | b"bigoplus" => "\u{2A01}", b"xotime" | b"bigotimes" => "\u{2A02}", b"xuplus" | b"biguplus" => "\u{2A04}", b"xsqcup" | b"bigsqcup" => "\u{2A06}", b"qint" | b"iiiint" => "\u{2A0C}", b"fpartint" => "\u{2A0D}", b"cirfnint" => "\u{2A10}", b"awint" => "\u{2A11}", b"rppolint" => "\u{2A12}", b"scpolint" => "\u{2A13}", b"npolint" => "\u{2A14}", b"pointint" => "\u{2A15}", b"quatint" => "\u{2A16}", b"intlarhk" => "\u{2A17}", b"pluscir" => "\u{2A22}", b"plusacir" => "\u{2A23}", b"simplus" => "\u{2A24}", b"plusdu" => "\u{2A25}", b"plussim" => "\u{2A26}", b"plustwo" => "\u{2A27}", b"mcomma" => "\u{2A29}", b"minusdu" => "\u{2A2A}", b"loplus" => "\u{2A2D}", b"roplus" => "\u{2A2E}", b"Cross" => "\u{2A2F}", b"timesd" => "\u{2A30}", b"timesbar" => "\u{2A31}", b"smashp" => "\u{2A33}", b"lotimes" => "\u{2A34}", b"rotimes" => "\u{2A35}", b"otimesas" => "\u{2A36}", b"Otimes" => "\u{2A37}", b"odiv" => "\u{2A38}", b"triplus" => "\u{2A39}", b"triminus" => "\u{2A3A}", b"tritime" => "\u{2A3B}", b"iprod" | b"intprod" => "\u{2A3C}", b"amalg" => "\u{2A3F}", b"capdot" => "\u{2A40}", b"ncup" => "\u{2A42}", b"ncap" => "\u{2A43}", b"capand" => "\u{2A44}", b"cupor" => "\u{2A45}", b"cupcap" => "\u{2A46}", b"capcup" => "\u{2A47}", b"cupbrcap" => "\u{2A48}", b"capbrcup" => "\u{2A49}", b"cupcup" => "\u{2A4A}", b"capcap" => "\u{2A4B}", b"ccups" => "\u{2A4C}", b"ccaps" => "\u{2A4D}", b"ccupssm" => "\u{2A50}", b"And" => "\u{2A53}", b"Or" => "\u{2A54}", b"andand" => "\u{2A55}", b"oror" => "\u{2A56}", b"orslope" => "\u{2A57}", b"andslope" => "\u{2A58}", b"andv" => "\u{2A5A}", b"orv" => "\u{2A5B}", b"andd" => "\u{2A5C}", b"ord" => "\u{2A5D}", b"wedbar" => "\u{2A5F}", b"sdote" => "\u{2A66}", b"simdot" => "\u{2A6A}", b"congdot" => "\u{2A6D}", b"easter" => "\u{2A6E}", b"apacir" => "\u{2A6F}", b"apE" => "\u{2A70}", b"eplus" => "\u{2A71}", b"pluse" => "\u{2A72}", b"Esim" => "\u{2A73}", b"Colone" => "\u{2A74}", b"Equal" => "\u{2A75}", b"eDDot" | b"ddotseq" => "\u{2A77}", b"equivDD" => "\u{2A78}", b"ltcir" => "\u{2A79}", b"gtcir" => "\u{2A7A}", b"ltquest" => "\u{2A7B}", b"gtquest" => "\u{2A7C}", b"les" | b"LessSlantEqual" | b"leqslant" => "\u{2A7D}", b"ges" | b"GreaterSlantEqual" | b"geqslant" => "\u{2A7E}", b"lesdot" => "\u{2A7F}", b"gesdot" => "\u{2A80}", b"lesdoto" => "\u{2A81}", b"gesdoto" => "\u{2A82}", b"lesdotor" => "\u{2A83}", b"gesdotol" => "\u{2A84}", b"lap" | b"lessapprox" => "\u{2A85}", b"gap" | b"gtrapprox" => "\u{2A86}", b"lne" | b"lneq" => "\u{2A87}", b"gne" | b"gneq" => "\u{2A88}", b"lnap" | b"lnapprox" => "\u{2A89}", b"gnap" | b"gnapprox" => "\u{2A8A}", b"lEg" | b"lesseqqgtr" => "\u{2A8B}", b"gEl" | b"gtreqqless" => "\u{2A8C}", b"lsime" => "\u{2A8D}", b"gsime" => "\u{2A8E}", b"lsimg" => "\u{2A8F}", b"gsiml" => "\u{2A90}", b"lgE" => "\u{2A91}", b"glE" => "\u{2A92}", b"lesges" => "\u{2A93}", b"gesles" => "\u{2A94}", b"els" | b"eqslantless" => "\u{2A95}", b"egs" | b"eqslantgtr" => "\u{2A96}", b"elsdot" => "\u{2A97}", b"egsdot" => "\u{2A98}", b"el" => "\u{2A99}", b"eg" => "\u{2A9A}", b"siml" => "\u{2A9D}", b"simg" => "\u{2A9E}", b"simlE" => "\u{2A9F}", b"simgE" => "\u{2AA0}", b"LessLess" => "\u{2AA1}", b"GreaterGreater" => "\u{2AA2}", b"glj" => "\u{2AA4}", b"gla" => "\u{2AA5}", b"ltcc" => "\u{2AA6}", b"gtcc" => "\u{2AA7}", b"lescc" => "\u{2AA8}", b"gescc" => "\u{2AA9}", b"smt" => "\u{2AAA}", b"lat" => "\u{2AAB}", b"smte" => "\u{2AAC}", b"late" => "\u{2AAD}", b"bumpE" => "\u{2AAE}", b"pre" | b"preceq" | b"PrecedesEqual" => "\u{2AAF}", b"sce" | b"succeq" | b"SucceedsEqual" => "\u{2AB0}", b"prE" => "\u{2AB3}", b"scE" => "\u{2AB4}", b"prnE" | b"precneqq" => "\u{2AB5}", b"scnE" | b"succneqq" => "\u{2AB6}", b"prap" | b"precapprox" => "\u{2AB7}", b"scap" | b"succapprox" => "\u{2AB8}", b"prnap" | b"precnapprox" => "\u{2AB9}", b"scnap" | b"succnapprox" => "\u{2ABA}", b"Pr" => "\u{2ABB}", b"Sc" => "\u{2ABC}", b"subdot" => "\u{2ABD}", b"supdot" => "\u{2ABE}", b"subplus" => "\u{2ABF}", b"supplus" => "\u{2AC0}", b"submult" => "\u{2AC1}", b"supmult" => "\u{2AC2}", b"subedot" => "\u{2AC3}", b"supedot" => "\u{2AC4}", b"subE" | b"subseteqq" => "\u{2AC5}", b"supE" | b"supseteqq" => "\u{2AC6}", b"subsim" => "\u{2AC7}", b"supsim" => "\u{2AC8}", b"subnE" | b"subsetneqq" => "\u{2ACB}", b"supnE" | b"supsetneqq" => "\u{2ACC}", b"csub" => "\u{2ACF}", b"csup" => "\u{2AD0}", b"csube" => "\u{2AD1}", b"csupe" => "\u{2AD2}", b"subsup" => "\u{2AD3}", b"supsub" => "\u{2AD4}", b"subsub" => "\u{2AD5}", b"supsup" => "\u{2AD6}", b"suphsub" => "\u{2AD7}", b"supdsub" => "\u{2AD8}", b"forkv" => "\u{2AD9}", b"topfork" => "\u{2ADA}", b"mlcp" => "\u{2ADB}", b"Dashv" | b"DoubleLeftTee" => "\u{2AE4}", b"Vdashl" => "\u{2AE6}", b"Barv" => "\u{2AE7}", b"vBar" => "\u{2AE8}", b"vBarv" => "\u{2AE9}", b"Vbar" => "\u{2AEB}", b"Not" => "\u{2AEC}", b"bNot" => "\u{2AED}", b"rnmid" => "\u{2AEE}", b"cirmid" => "\u{2AEF}", b"midcir" => "\u{2AF0}", b"topcir" => "\u{2AF1}", b"nhpar" => "\u{2AF2}", b"parsim" => "\u{2AF3}", b"parsl" => "\u{2AFD}", b"fflig" => "\u{FB00}", b"filig" => "\u{FB01}", b"fllig" => "\u{FB02}", b"ffilig" => "\u{FB03}", b"ffllig" => "\u{FB04}", b"Ascr" => "\u{1D49}", b"Cscr" => "\u{1D49}", b"Dscr" => "\u{1D49}", b"Gscr" => "\u{1D4A}", b"Jscr" => "\u{1D4A}", b"Kscr" => "\u{1D4A}", b"Nscr" => "\u{1D4A}", b"Oscr" => "\u{1D4A}", b"Pscr" => "\u{1D4A}", b"Qscr" => "\u{1D4A}", b"Sscr" => "\u{1D4A}", b"Tscr" => "\u{1D4A}", b"Uscr" => "\u{1D4B}", b"Vscr" => "\u{1D4B}", b"Wscr" => "\u{1D4B}", b"Xscr" => "\u{1D4B}", b"Yscr" => "\u{1D4B}", b"Zscr" => "\u{1D4B}", b"ascr" => "\u{1D4B}", b"bscr" => "\u{1D4B}", b"cscr" => "\u{1D4B}", b"dscr" => "\u{1D4B}", b"fscr" => "\u{1D4B}", b"hscr" => "\u{1D4B}", b"iscr" => "\u{1D4B}", b"jscr" => "\u{1D4B}", b"kscr" => "\u{1D4C}", b"lscr" => "\u{1D4C}", b"mscr" => "\u{1D4C}", b"nscr" => "\u{1D4C}", b"pscr" => "\u{1D4C}", b"qscr" => "\u{1D4C}", b"rscr" => "\u{1D4C}", b"sscr" => "\u{1D4C}", b"tscr" => "\u{1D4C}", b"uscr" => "\u{1D4C}", b"vscr" => "\u{1D4C}", b"wscr" => "\u{1D4C}", b"xscr" => "\u{1D4C}", b"yscr" => "\u{1D4C}", b"zscr" => "\u{1D4C}", b"Afr" => "\u{1D50}", b"Bfr" => "\u{1D50}", b"Dfr" => "\u{1D50}", b"Efr" => "\u{1D50}", b"Ffr" => "\u{1D50}", b"Gfr" => "\u{1D50}", b"Jfr" => "\u{1D50}", b"Kfr" => "\u{1D50}", b"Lfr" => "\u{1D50}", b"Mfr" => "\u{1D51}", b"Nfr" => "\u{1D51}", b"Ofr" => "\u{1D51}", b"Pfr" => "\u{1D51}", b"Qfr" => "\u{1D51}", b"Sfr" => "\u{1D51}", b"Tfr" => "\u{1D51}", b"Ufr" => "\u{1D51}", b"Vfr" => "\u{1D51}", b"Wfr" => "\u{1D51}", b"Xfr" => "\u{1D51}", b"Yfr" => "\u{1D51}", b"afr" => "\u{1D51}", b"bfr" => "\u{1D51}", b"cfr" => "\u{1D52}", b"dfr" => "\u{1D52}", b"efr" => "\u{1D52}", b"ffr" => "\u{1D52}", b"gfr" => "\u{1D52}", b"hfr" => "\u{1D52}", b"ifr" => "\u{1D52}", b"jfr" => "\u{1D52}", b"kfr" => "\u{1D52}", b"lfr" => "\u{1D52}", b"mfr" => "\u{1D52}", b"nfr" => "\u{1D52}", b"ofr" => "\u{1D52}", b"pfr" => "\u{1D52}", b"qfr" => "\u{1D52}", b"rfr" => "\u{1D52}", b"sfr" => "\u{1D53}", b"tfr" => "\u{1D53}", b"ufr" => "\u{1D53}", b"vfr" => "\u{1D53}", b"wfr" => "\u{1D53}", b"xfr" => "\u{1D53}", b"yfr" => "\u{1D53}", b"zfr" => "\u{1D53}", b"Aopf" => "\u{1D53}", b"Bopf" => "\u{1D53}", b"Dopf" => "\u{1D53}", b"Eopf" => "\u{1D53}", b"Fopf" => "\u{1D53}", b"Gopf" => "\u{1D53}", b"Iopf" => "\u{1D54}", b"Jopf" => "\u{1D54}", b"Kopf" => "\u{1D54}", b"Lopf" => "\u{1D54}", b"Mopf" => "\u{1D54}", b"Oopf" => "\u{1D54}", b"Sopf" => "\u{1D54}", b"Topf" => "\u{1D54}", b"Uopf" => "\u{1D54}", b"Vopf" => "\u{1D54}", b"Wopf" => "\u{1D54}", b"Xopf" => "\u{1D54}", b"Yopf" => "\u{1D55}", b"aopf" => "\u{1D55}", b"bopf" => "\u{1D55}", b"copf" => "\u{1D55}", b"dopf" => "\u{1D55}", b"eopf" => "\u{1D55}", b"fopf" => "\u{1D55}", b"gopf" => "\u{1D55}", b"hopf" => "\u{1D55}", b"iopf" => "\u{1D55}", b"jopf" => "\u{1D55}", b"kopf" => "\u{1D55}", b"lopf" => "\u{1D55}", b"mopf" => "\u{1D55}", b"nopf" => "\u{1D55}", b"oopf" => "\u{1D56}", b"popf" => "\u{1D56}", b"qopf" => "\u{1D56}", b"ropf" => "\u{1D56}", b"sopf" => "\u{1D56}", b"topf" => "\u{1D56}", b"uopf" => "\u{1D56}", b"vopf" => "\u{1D56}", b"wopf" => "\u{1D56}", b"xopf" => "\u{1D56}", b"yopf" => "\u{1D56}", b"zopf" => "\u{1D56}", _ => return None, }; Some(s) } fn parse_number(num: &str) -> Result { let code = if let Some(hex) = num.strip_prefix('x') { from_str_radix(hex, 16)? } else { from_str_radix(num, 10)? }; if code == 0 { return Err(ParseCharRefError::IllegalCharacter(code)); } match std::char::from_u32(code) { Some(c) => Ok(c), None => Err(ParseCharRefError::InvalidCodepoint(code)), } } #[inline] fn from_str_radix(src: &str, radix: u32) -> Result { match src.as_bytes().first().copied() { // We should not allow sign numbers, but u32::from_str_radix will accept `+`. // We also handle `-` to be consistent in returned errors Some(b'+') | Some(b'-') => Err(ParseCharRefError::UnexpectedSign), _ => u32::from_str_radix(src, radix).map_err(ParseCharRefError::InvalidNumber), } } quick-xml-0.36.1/src/events/attributes.rs000064400000000000000000002367520072674642500165230ustar 00000000000000//! Xml Attributes module //! //! Provides an iterator over attributes key/value pairs use crate::encoding::Decoder; use crate::errors::Result as XmlResult; use crate::escape::{escape, resolve_predefined_entity, unescape_with}; use crate::name::QName; use crate::utils::{is_whitespace, write_byte_string, write_cow_string, Bytes}; use std::fmt::{self, Debug, Display, Formatter}; use std::iter::FusedIterator; use std::{borrow::Cow, ops::Range}; /// A struct representing a key/value XML attribute. /// /// Field `value` stores raw bytes, possibly containing escape-sequences. Most users will likely /// want to access the value using one of the [`unescape_value`] and [`decode_and_unescape_value`] /// functions. /// /// [`unescape_value`]: Self::unescape_value /// [`decode_and_unescape_value`]: Self::decode_and_unescape_value #[derive(Clone, Eq, PartialEq)] pub struct Attribute<'a> { /// The key to uniquely define the attribute. /// /// If [`Attributes::with_checks`] is turned off, the key might not be unique. pub key: QName<'a>, /// The raw value of the attribute. pub value: Cow<'a, [u8]>, } impl<'a> Attribute<'a> { /// Decodes using UTF-8 then unescapes the value. /// /// This is normally the value you are interested in. Escape sequences such as `>` are /// replaced with their unescaped equivalents such as `>`. /// /// This will allocate if the value contains any escape sequences. /// /// See also [`unescape_value_with()`](Self::unescape_value_with) /// /// This method is available only if [`encoding`] feature is **not** enabled. /// /// [`encoding`]: ../../index.html#encoding #[cfg(any(doc, not(feature = "encoding")))] pub fn unescape_value(&self) -> XmlResult> { self.unescape_value_with(resolve_predefined_entity) } /// Decodes using UTF-8 then unescapes the value, using custom entities. /// /// This is normally the value you are interested in. Escape sequences such as `>` are /// replaced with their unescaped equivalents such as `>`. /// A fallback resolver for additional custom entities can be provided via /// `resolve_entity`. /// /// This will allocate if the value contains any escape sequences. /// /// See also [`unescape_value()`](Self::unescape_value) /// /// This method is available only if [`encoding`] feature is **not** enabled. /// /// [`encoding`]: ../../index.html#encoding #[cfg(any(doc, not(feature = "encoding")))] #[inline] pub fn unescape_value_with<'entity>( &self, resolve_entity: impl FnMut(&str) -> Option<&'entity str>, ) -> XmlResult> { self.decode_and_unescape_value_with(Decoder::utf8(), resolve_entity) } /// Decodes then unescapes the value. /// /// This will allocate if the value contains any escape sequences or in /// non-UTF-8 encoding. pub fn decode_and_unescape_value(&self, decoder: Decoder) -> XmlResult> { self.decode_and_unescape_value_with(decoder, resolve_predefined_entity) } /// Decodes then unescapes the value with custom entities. /// /// This will allocate if the value contains any escape sequences or in /// non-UTF-8 encoding. pub fn decode_and_unescape_value_with<'entity>( &self, decoder: Decoder, resolve_entity: impl FnMut(&str) -> Option<&'entity str>, ) -> XmlResult> { let decoded = decoder.decode_cow(&self.value)?; match unescape_with(&decoded, resolve_entity)? { // Because result is borrowed, no replacements was done and we can use original string Cow::Borrowed(_) => Ok(decoded), Cow::Owned(s) => Ok(s.into()), } } } impl<'a> Debug for Attribute<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "Attribute {{ key: ")?; write_byte_string(f, self.key.as_ref())?; write!(f, ", value: ")?; write_cow_string(f, &self.value)?; write!(f, " }}") } } impl<'a> From<(&'a [u8], &'a [u8])> for Attribute<'a> { /// Creates new attribute from raw bytes. /// Does not apply any transformation to both key and value. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::attributes::Attribute; /// /// let features = Attribute::from(("features".as_bytes(), "Bells & whistles".as_bytes())); /// assert_eq!(features.value, "Bells & whistles".as_bytes()); /// ``` fn from(val: (&'a [u8], &'a [u8])) -> Attribute<'a> { Attribute { key: QName(val.0), value: Cow::from(val.1), } } } impl<'a> From<(&'a str, &'a str)> for Attribute<'a> { /// Creates new attribute from text representation. /// Key is stored as-is, but the value will be escaped. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::attributes::Attribute; /// /// let features = Attribute::from(("features", "Bells & whistles")); /// assert_eq!(features.value, "Bells & whistles".as_bytes()); /// ``` fn from(val: (&'a str, &'a str)) -> Attribute<'a> { Attribute { key: QName(val.0.as_bytes()), value: match escape(val.1) { Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), Cow::Owned(s) => Cow::Owned(s.into_bytes()), }, } } } impl<'a> From> for Attribute<'a> { #[inline] fn from(attr: Attr<&'a [u8]>) -> Self { Self { key: attr.key(), value: Cow::Borrowed(attr.value()), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Iterator over XML attributes. /// /// Yields `Result`. An `Err` will be yielded if an attribute is malformed or duplicated. /// The duplicate check can be turned off by calling [`with_checks(false)`]. /// /// [`with_checks(false)`]: Self::with_checks #[derive(Clone, Debug)] pub struct Attributes<'a> { /// Slice of `BytesStart` corresponding to attributes bytes: &'a [u8], /// Iterator state, independent from the actual source of bytes state: IterState, } impl<'a> Attributes<'a> { /// Internal constructor, used by `BytesStart`. Supplies data in reader's encoding #[inline] pub(crate) const fn wrap(buf: &'a [u8], pos: usize, html: bool) -> Self { Self { bytes: buf, state: IterState::new(pos, html), } } /// Creates a new attribute iterator from a buffer. pub const fn new(buf: &'a str, pos: usize) -> Self { Self::wrap(buf.as_bytes(), pos, false) } /// Creates a new attribute iterator from a buffer, allowing HTML attribute syntax. pub const fn html(buf: &'a str, pos: usize) -> Self { Self::wrap(buf.as_bytes(), pos, true) } /// Changes whether attributes should be checked for uniqueness. /// /// The XML specification requires attribute keys in the same element to be unique. This check /// can be disabled to improve performance slightly. /// /// (`true` by default) pub fn with_checks(&mut self, val: bool) -> &mut Attributes<'a> { self.state.check_duplicates = val; self } } impl<'a> Iterator for Attributes<'a> { type Item = Result, AttrError>; #[inline] fn next(&mut self) -> Option { match self.state.next(self.bytes) { None => None, Some(Ok(a)) => Some(Ok(a.map(|range| &self.bytes[range]).into())), Some(Err(e)) => Some(Err(e)), } } } impl<'a> FusedIterator for Attributes<'a> {} //////////////////////////////////////////////////////////////////////////////////////////////////// /// Errors that can be raised during parsing attributes. /// /// Recovery position in examples shows the position from which parsing of the /// next attribute will be attempted. #[derive(Clone, Debug, PartialEq, Eq)] pub enum AttrError { /// Attribute key was not followed by `=`, position relative to the start of /// the owning tag is provided. /// /// Example of input that raises this error: /// /// ```xml /// /// /// ``` /// /// This error can be raised only when the iterator is in XML mode. ExpectedEq(usize), /// Attribute value was not found after `=`, position relative to the start /// of the owning tag is provided. /// /// Example of input that raises this error: /// /// ```xml /// /// /// ``` /// /// This error can be returned only for the last attribute in the list, /// because otherwise any content after `=` will be threated as a value. /// The XML /// /// ```xml /// /// /// /// ``` /// /// will be treated as `Attribute { key = b"key", value = b"another-key" }` /// and or [`Attribute`] is returned, or [`AttrError::UnquotedValue`] is raised, /// depending on the parsing mode. ExpectedValue(usize), /// Attribute value is not quoted, position relative to the start of the /// owning tag is provided. /// /// Example of input that raises this error: /// /// ```xml /// /// /// /// ``` /// /// This error can be raised only when the iterator is in XML mode. UnquotedValue(usize), /// Attribute value was not finished with a matching quote, position relative /// to the start of owning tag and a quote is provided. That position is always /// a last character in the tag content. /// /// Example of input that raises this error: /// /// ```xml /// /// /// /// /// ``` /// /// This error is returned only when [`Attributes::with_checks()`] is set /// to `true` (that is default behavior). Duplicated(usize, usize), } impl Display for AttrError { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { Self::ExpectedEq(pos) => write!( f, r#"position {}: attribute key must be directly followed by `=` or space"#, pos ), Self::ExpectedValue(pos) => write!( f, r#"position {}: `=` must be followed by an attribute value"#, pos ), Self::UnquotedValue(pos) => write!( f, r#"position {}: attribute value must be enclosed in `"` or `'`"#, pos ), Self::ExpectedQuote(pos, quote) => write!( f, r#"position {}: missing closing quote `{}` in attribute value"#, pos, *quote as char ), Self::Duplicated(pos1, pos2) => write!( f, r#"position {}: duplicated attribute, previous declaration at position {}"#, pos1, pos2 ), } } } impl std::error::Error for AttrError {} //////////////////////////////////////////////////////////////////////////////////////////////////// /// A struct representing a key/value XML or HTML [attribute]. /// /// [attribute]: https://www.w3.org/TR/xml11/#NT-Attribute #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Attr { /// Attribute with value enclosed in double quotes (`"`). Attribute key and /// value provided. This is a canonical XML-style attribute. DoubleQ(T, T), /// Attribute with value enclosed in single quotes (`'`). Attribute key and /// value provided. This is an XML-style attribute. SingleQ(T, T), /// Attribute with value not enclosed in quotes. Attribute key and value /// provided. This is HTML-style attribute, it can be returned in HTML-mode /// parsing only. In an XML mode [`AttrError::UnquotedValue`] will be raised /// instead. /// /// Attribute value can be invalid according to the [HTML specification], /// in particular, it can contain `"`, `'`, `=`, `<`, and ` /// characters. The absence of the `>` character is nevertheless guaranteed, /// since the parser extracts [events] based on them even before the start /// of parsing attributes. /// /// [HTML specification]: https://html.spec.whatwg.org/#unquoted /// [events]: crate::events::Event::Start Unquoted(T, T), /// Attribute without value. Attribute key provided. This is HTML-style attribute, /// it can be returned in HTML-mode parsing only. In XML mode /// [`AttrError::ExpectedEq`] will be raised instead. Empty(T), } impl Attr { /// Maps an `Attr` to `Attr` by applying a function to a contained key and value. #[inline] pub fn map(self, mut f: F) -> Attr where F: FnMut(T) -> U, { match self { Attr::DoubleQ(key, value) => Attr::DoubleQ(f(key), f(value)), Attr::SingleQ(key, value) => Attr::SingleQ(f(key), f(value)), Attr::Empty(key) => Attr::Empty(f(key)), Attr::Unquoted(key, value) => Attr::Unquoted(f(key), f(value)), } } } impl<'a> Attr<&'a [u8]> { /// Returns the key value #[inline] pub const fn key(&self) -> QName<'a> { QName(match self { Attr::DoubleQ(key, _) => key, Attr::SingleQ(key, _) => key, Attr::Empty(key) => key, Attr::Unquoted(key, _) => key, }) } /// Returns the attribute value. For [`Self::Empty`] variant an empty slice /// is returned according to the [HTML specification]. /// /// [HTML specification]: https://www.w3.org/TR/2012/WD-html-markup-20120329/syntax.html#syntax-attr-empty #[inline] pub const fn value(&self) -> &'a [u8] { match self { Attr::DoubleQ(_, value) => value, Attr::SingleQ(_, value) => value, Attr::Empty(_) => &[], Attr::Unquoted(_, value) => value, } } } impl> Debug for Attr { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { Attr::DoubleQ(key, value) => f .debug_tuple("Attr::DoubleQ") .field(&Bytes(key.as_ref())) .field(&Bytes(value.as_ref())) .finish(), Attr::SingleQ(key, value) => f .debug_tuple("Attr::SingleQ") .field(&Bytes(key.as_ref())) .field(&Bytes(value.as_ref())) .finish(), Attr::Empty(key) => f .debug_tuple("Attr::Empty") // Comment to prevent formatting and keep style consistent .field(&Bytes(key.as_ref())) .finish(), Attr::Unquoted(key, value) => f .debug_tuple("Attr::Unquoted") .field(&Bytes(key.as_ref())) .field(&Bytes(value.as_ref())) .finish(), } } } /// Unpacks attribute key and value into tuple of this two elements. /// `None` value element is returned only for [`Attr::Empty`] variant. impl From> for (T, Option) { #[inline] fn from(attr: Attr) -> Self { match attr { Attr::DoubleQ(key, value) => (key, Some(value)), Attr::SingleQ(key, value) => (key, Some(value)), Attr::Empty(key) => (key, None), Attr::Unquoted(key, value) => (key, Some(value)), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// type AttrResult = Result>, AttrError>; #[derive(Clone, Copy, Debug)] enum State { /// Iteration finished, iterator will return `None` to all [`IterState::next`] /// requests. Done, /// The last attribute returned was deserialized successfully. Contains an /// offset from which next attribute should be searched. Next(usize), /// The last attribute returns [`AttrError::UnquotedValue`], offset pointed /// to the beginning of the value. Recover should skip a value SkipValue(usize), /// The last attribute returns [`AttrError::Duplicated`], offset pointed to /// the equal (`=`) sign. Recover should skip it and a value SkipEqValue(usize), } /// External iterator over spans of attribute key and value #[derive(Clone, Debug)] pub(crate) struct IterState { /// Iteration state that determines what actions should be done before the /// actual parsing of the next attribute state: State, /// If `true`, enables ability to parse unquoted values and key-only (empty) /// attributes html: bool, /// If `true`, checks for duplicate names check_duplicates: bool, /// If `check_duplicates` is set, contains the ranges of already parsed attribute /// names. We store a ranges instead of slices to able to report a previous /// attribute position keys: Vec>, } impl IterState { pub const fn new(offset: usize, html: bool) -> Self { Self { state: State::Next(offset), html, check_duplicates: true, keys: Vec::new(), } } /// Recover from an error that could have been made on a previous step. /// Returns an offset from which parsing should continue. /// If there no input left, returns `None`. fn recover(&self, slice: &[u8]) -> Option { match self.state { State::Done => None, State::Next(offset) => Some(offset), State::SkipValue(offset) => self.skip_value(slice, offset), State::SkipEqValue(offset) => self.skip_eq_value(slice, offset), } } /// Skip all characters up to first space symbol or end-of-input #[inline] #[allow(clippy::manual_map)] fn skip_value(&self, slice: &[u8], offset: usize) -> Option { let mut iter = (offset..).zip(slice[offset..].iter()); match iter.find(|(_, &b)| is_whitespace(b)) { // Input: ` key = value ` // | ^ // offset e Some((e, _)) => Some(e), // Input: ` key = value` // | ^ // offset e = len() None => None, } } /// Skip all characters up to first space symbol or end-of-input #[inline] fn skip_eq_value(&self, slice: &[u8], offset: usize) -> Option { let mut iter = (offset..).zip(slice[offset..].iter()); // Skip all up to the quote and get the quote type let quote = match iter.find(|(_, &b)| !is_whitespace(b)) { // Input: ` key = "` // | ^ // offset Some((_, b'"')) => b'"', // Input: ` key = '` // | ^ // offset Some((_, b'\'')) => b'\'', // Input: ` key = x` // | ^ // offset Some((offset, _)) => return self.skip_value(slice, offset), // Input: ` key = ` // | ^ // offset None => return None, }; match iter.find(|(_, &b)| b == quote) { // Input: ` key = " "` // ^ Some((e, b'"')) => Some(e), // Input: ` key = ' '` // ^ Some((e, _)) => Some(e), // Input: ` key = " ` // Input: ` key = ' ` // ^ // Closing quote not found None => None, } } #[inline] fn check_for_duplicates( &mut self, slice: &[u8], key: Range, ) -> Result, AttrError> { if self.check_duplicates { if let Some(prev) = self .keys .iter() .find(|r| slice[(*r).clone()] == slice[key.clone()]) { return Err(AttrError::Duplicated(key.start, prev.start)); } self.keys.push(key.clone()); } Ok(key) } /// # Parameters /// /// - `slice`: content of the tag, used for checking for duplicates /// - `key`: Range of key in slice, if iterator in HTML mode /// - `offset`: Position of error if iterator in XML mode #[inline] fn key_only(&mut self, slice: &[u8], key: Range, offset: usize) -> Option { Some(if self.html { self.check_for_duplicates(slice, key).map(Attr::Empty) } else { Err(AttrError::ExpectedEq(offset)) }) } #[inline] fn double_q(&mut self, key: Range, value: Range) -> Option { self.state = State::Next(value.end + 1); // +1 for `"` Some(Ok(Attr::DoubleQ(key, value))) } #[inline] fn single_q(&mut self, key: Range, value: Range) -> Option { self.state = State::Next(value.end + 1); // +1 for `'` Some(Ok(Attr::SingleQ(key, value))) } pub fn next(&mut self, slice: &[u8]) -> Option { let mut iter = match self.recover(slice) { Some(offset) => (offset..).zip(slice[offset..].iter()), None => return None, }; // Index where next key started let start_key = match iter.find(|(_, &b)| !is_whitespace(b)) { // Input: ` key` // ^ Some((s, _)) => s, // Input: ` ` // ^ None => { // Because we reach end-of-input, stop iteration on next call self.state = State::Done; return None; } }; // Span of a key let (key, offset) = match iter.find(|(_, &b)| b == b'=' || is_whitespace(b)) { // Input: ` key=` // | ^ // s e Some((e, b'=')) => (start_key..e, e), // Input: ` key ` // ^ Some((e, _)) => match iter.find(|(_, &b)| !is_whitespace(b)) { // Input: ` key =` // | | ^ // start_key e Some((offset, b'=')) => (start_key..e, offset), // Input: ` key x` // | | ^ // start_key e // If HTML-like attributes is allowed, this is the result, otherwise error Some((offset, _)) => { // In any case, recovering is not required self.state = State::Next(offset); return self.key_only(slice, start_key..e, offset); } // Input: ` key ` // | | ^ // start_key e // If HTML-like attributes is allowed, this is the result, otherwise error None => { // Because we reach end-of-input, stop iteration on next call self.state = State::Done; return self.key_only(slice, start_key..e, slice.len()); } }, // Input: ` key` // | ^ // s e = len() // If HTML-like attributes is allowed, this is the result, otherwise error None => { // Because we reach end-of-input, stop iteration on next call self.state = State::Done; let e = slice.len(); return self.key_only(slice, start_key..e, e); } }; let key = match self.check_for_duplicates(slice, key) { Err(e) => { self.state = State::SkipEqValue(offset); return Some(Err(e)); } Ok(key) => key, }; //////////////////////////////////////////////////////////////////////// // Gets the position of quote and quote type let (start_value, quote) = match iter.find(|(_, &b)| !is_whitespace(b)) { // Input: ` key = "` // ^ Some((s, b'"')) => (s + 1, b'"'), // Input: ` key = '` // ^ Some((s, b'\'')) => (s + 1, b'\''), // Input: ` key = x` // ^ // If HTML-like attributes is allowed, this is the start of the value Some((s, _)) if self.html => { // We do not check validity of attribute value characters as required // according to https://html.spec.whatwg.org/#unquoted. It can be done // during validation phase let end = match iter.find(|(_, &b)| is_whitespace(b)) { // Input: ` key = value ` // | ^ // s e Some((e, _)) => e, // Input: ` key = value` // | ^ // s e = len() None => slice.len(), }; self.state = State::Next(end); return Some(Ok(Attr::Unquoted(key, s..end))); } // Input: ` key = x` // ^ Some((s, _)) => { self.state = State::SkipValue(s); return Some(Err(AttrError::UnquotedValue(s))); } // Input: ` key = ` // ^ None => { // Because we reach end-of-input, stop iteration on next call self.state = State::Done; return Some(Err(AttrError::ExpectedValue(slice.len()))); } }; match iter.find(|(_, &b)| b == quote) { // Input: ` key = " "` // ^ Some((e, b'"')) => self.double_q(key, start_value..e), // Input: ` key = ' '` // ^ Some((e, _)) => self.single_q(key, start_value..e), // Input: ` key = " ` // Input: ` key = ' ` // ^ // Closing quote not found None => { // Because we reach end-of-input, stop iteration on next call self.state = State::Done; Some(Err(AttrError::ExpectedQuote(slice.len(), quote))) } } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Checks, how parsing of XML-style attributes works. Each attribute should /// have a value, enclosed in single or double quotes. #[cfg(test)] mod xml { use super::*; use pretty_assertions::assert_eq; /// Checked attribute is the single attribute mod single { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::new(r#"tag key='value'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::new(r#"tag key="value""#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::new(r#"tag key=value"#, 3); // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(8)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::new(r#"tag key"#, 3); // 0 ^ = 7 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(7)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key is started with an invalid symbol (a single quote in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_start_invalid() { let mut iter = Attributes::new(r#"tag 'key'='value'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'key'"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key contains an invalid symbol (an ampersand in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_contains_invalid() { let mut iter = Attributes::new(r#"tag key&jey='value'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key&jey"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute value is missing after `=` #[test] fn missed_value() { let mut iter = Attributes::new(r#"tag key="#, 3); // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(8)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Checked attribute is the first attribute in the list of many attributes mod first { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::new(r#"tag key='value' regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::new(r#"tag key="value" regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::new(r#"tag key=value regular='attribute'"#, 3); // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(8)))); // check error recovery assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::new(r#"tag key regular='attribute'"#, 3); // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(8)))); // check error recovery assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key is started with an invalid symbol (a single quote in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_start_invalid() { let mut iter = Attributes::new(r#"tag 'key'='value' regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'key'"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key contains an invalid symbol (an ampersand in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_contains_invalid() { let mut iter = Attributes::new(r#"tag key&jey='value' regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key&jey"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute value is missing after `=`. #[test] fn missed_value() { let mut iter = Attributes::new(r#"tag key= regular='attribute'"#, 3); // 0 ^ = 9 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9)))); // Because we do not check validity of keys and values during parsing, // "error='recovery'" is considered, as unquoted attribute value and // skipped during recovery and iteration finished assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); //////////////////////////////////////////////////////////////////// let mut iter = Attributes::new(r#"tag key= regular= 'attribute'"#, 3); // 0 ^ = 9 ^ = 29 // In that case "regular=" considered as unquoted value assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9)))); // In that case "'attribute'" considered as a key, because we do not check // validity of key names assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(29)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); //////////////////////////////////////////////////////////////////// let mut iter = Attributes::new(r#"tag key= regular ='attribute'"#, 3); // 0 ^ = 9 ^ = 29 // In that case "regular" considered as unquoted value assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9)))); // In that case "='attribute'" considered as a key, because we do not check // validity of key names assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(29)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); //////////////////////////////////////////////////////////////////// let mut iter = Attributes::new(r#"tag key= regular = 'attribute'"#, 3); // 0 ^ = 9 ^ = 19 ^ = 30 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9)))); // In that case second "=" considered as a key, because we do not check // validity of key names assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(19)))); // In that case "'attribute'" considered as a key, because we do not check // validity of key names assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(30)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Copy of single, but with additional spaces in markup mod sparsed { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::new(r#"tag key = 'value' "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::new(r#"tag key = "value" "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::new(r#"tag key = value "#, 3); // 0 ^ = 10 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(10)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::new(r#"tag key "#, 3); // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(8)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key is started with an invalid symbol (a single quote in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_start_invalid() { let mut iter = Attributes::new(r#"tag 'key' = 'value' "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'key'"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key contains an invalid symbol (an ampersand in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_contains_invalid() { let mut iter = Attributes::new(r#"tag key&jey = 'value' "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key&jey"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute value is missing after `=` #[test] fn missed_value() { let mut iter = Attributes::new(r#"tag key = "#, 3); // 0 ^ = 10 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(10)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Checks that duplicated attributes correctly reported and recovering is /// possible after that mod duplicated { use super::*; mod with_check { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::new(r#"tag key='value' key='dup' another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::new(r#"tag key='value' key="dup" another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::new(r#"tag key='value' key=dup another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::new(r#"tag key='value' key another=''"#, 3); // 0 ^ = 20 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(20)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Check for duplicated names is disabled mod without_check { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::new(r#"tag key='value' key='dup' another=''"#, 3); iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"dup"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::new(r#"tag key='value' key="dup" another=''"#, 3); iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"dup"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::new(r#"tag key='value' key=dup another=''"#, 3); // 0 ^ = 20 iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(20)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::new(r#"tag key='value' key another=''"#, 3); // 0 ^ = 20 iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(20)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } } #[test] fn mixed_quote() { let mut iter = Attributes::new(r#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"a"), value: Cow::Borrowed(b"a"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"b"), value: Cow::Borrowed(b"b"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"c"), value: Cow::Borrowed(br#"cc"cc"#), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"d"), value: Cow::Borrowed(b"dd'dd"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Checks, how parsing of HTML-style attributes works. Each attribute can be /// in three forms: /// - XML-like: have a value, enclosed in single or double quotes /// - have a value, do not enclosed in quotes /// - without value, key only #[cfg(test)] mod html { use super::*; use pretty_assertions::assert_eq; /// Checked attribute is the single attribute mod single { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::html(r#"tag key='value'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::html(r#"tag key="value""#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::html(r#"tag key=value"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::html(r#"tag key"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(&[]), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key is started with an invalid symbol (a single quote in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_start_invalid() { let mut iter = Attributes::html(r#"tag 'key'='value'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'key'"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key contains an invalid symbol (an ampersand in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_contains_invalid() { let mut iter = Attributes::html(r#"tag key&jey='value'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key&jey"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute value is missing after `=` #[test] fn missed_value() { let mut iter = Attributes::html(r#"tag key="#, 3); // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(8)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Checked attribute is the first attribute in the list of many attributes mod first { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::html(r#"tag key='value' regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::html(r#"tag key="value" regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::html(r#"tag key=value regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::html(r#"tag key regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(&[]), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key is started with an invalid symbol (a single quote in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_start_invalid() { let mut iter = Attributes::html(r#"tag 'key'='value' regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'key'"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key contains an invalid symbol (an ampersand in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_contains_invalid() { let mut iter = Attributes::html(r#"tag key&jey='value' regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key&jey"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute value is missing after `=` #[test] fn missed_value() { let mut iter = Attributes::html(r#"tag key= regular='attribute'"#, 3); // Because we do not check validity of keys and values during parsing, // "regular='attribute'" is considered as unquoted attribute value assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"regular='attribute'"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); //////////////////////////////////////////////////////////////////// let mut iter = Attributes::html(r#"tag key= regular= 'attribute'"#, 3); // Because we do not check validity of keys and values during parsing, // "regular=" is considered as unquoted attribute value assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"regular="), })) ); // Because we do not check validity of keys and values during parsing, // "'attribute'" is considered as key-only attribute assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'attribute'"), value: Cow::Borrowed(&[]), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); //////////////////////////////////////////////////////////////////// let mut iter = Attributes::html(r#"tag key= regular ='attribute'"#, 3); // Because we do not check validity of keys and values during parsing, // "regular" is considered as unquoted attribute value assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"regular"), })) ); // Because we do not check validity of keys and values during parsing, // "='attribute'" is considered as key-only attribute assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"='attribute'"), value: Cow::Borrowed(&[]), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); //////////////////////////////////////////////////////////////////// let mut iter = Attributes::html(r#"tag key= regular = 'attribute'"#, 3); // 0 ^ = 9 ^ = 19 ^ = 30 // Because we do not check validity of keys and values during parsing, // "regular" is considered as unquoted attribute value assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"regular"), })) ); // Because we do not check validity of keys and values during parsing, // "=" is considered as key-only attribute assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"="), value: Cow::Borrowed(&[]), })) ); // Because we do not check validity of keys and values during parsing, // "'attribute'" is considered as key-only attribute assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'attribute'"), value: Cow::Borrowed(&[]), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Copy of single, but with additional spaces in markup mod sparsed { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::html(r#"tag key = 'value' "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::html(r#"tag key = "value" "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::html(r#"tag key = value "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::html(r#"tag key "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(&[]), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key is started with an invalid symbol (a single quote in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_start_invalid() { let mut iter = Attributes::html(r#"tag 'key' = 'value' "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'key'"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key contains an invalid symbol (an ampersand in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_contains_invalid() { let mut iter = Attributes::html(r#"tag key&jey = 'value' "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key&jey"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute value is missing after `=` #[test] fn missed_value() { let mut iter = Attributes::html(r#"tag key = "#, 3); // 0 ^ = 10 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(10)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Checks that duplicated attributes correctly reported and recovering is /// possible after that mod duplicated { use super::*; mod with_check { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::html(r#"tag key='value' key='dup' another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::html(r#"tag key='value' key="dup" another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::html(r#"tag key='value' key=dup another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::html(r#"tag key='value' key another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Check for duplicated names is disabled mod without_check { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::html(r#"tag key='value' key='dup' another=''"#, 3); iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"dup"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::html(r#"tag key='value' key="dup" another=''"#, 3); iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"dup"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::html(r#"tag key='value' key=dup another=''"#, 3); iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"dup"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::html(r#"tag key='value' key another=''"#, 3); iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(&[]), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } } #[test] fn mixed_quote() { let mut iter = Attributes::html(r#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"a"), value: Cow::Borrowed(b"a"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"b"), value: Cow::Borrowed(b"b"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"c"), value: Cow::Borrowed(br#"cc"cc"#), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"d"), value: Cow::Borrowed(b"dd'dd"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } quick-xml-0.36.1/src/events/mod.rs000064400000000000000000001415170072674642500151060ustar 00000000000000//! Defines zero-copy XML events used throughout this library. //! //! A XML event often represents part of a XML element. //! They occur both during reading and writing and are //! usually used with the stream-oriented API. //! //! For example, the XML element //! ```xml //! Inner text //! ``` //! consists of the three events `Start`, `Text` and `End`. //! They can also represent other parts in an XML document like the //! XML declaration. Each Event usually contains further information, //! like the tag name, the attribute or the inner text. //! //! See [`Event`] for a list of all possible events. //! //! # Reading //! When reading a XML stream, the events are emitted by [`Reader::read_event`] //! and [`Reader::read_event_into`]. You must listen //! for the different types of events you are interested in. //! //! See [`Reader`] for further information. //! //! # Writing //! When writing the XML document, you must create the XML element //! by constructing the events it consists of and pass them to the writer //! sequentially. //! //! See [`Writer`] for further information. //! //! [`Reader::read_event`]: crate::reader::Reader::read_event //! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into //! [`Reader`]: crate::reader::Reader //! [`Writer`]: crate::writer::Writer //! [`Event`]: crate::events::Event pub mod attributes; #[cfg(feature = "encoding")] use encoding_rs::Encoding; use std::borrow::Cow; use std::fmt::{self, Debug, Formatter}; use std::mem::replace; use std::ops::Deref; use std::str::from_utf8; use crate::encoding::Decoder; use crate::errors::{Error, IllFormedError, Result}; use crate::escape::{ escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with, }; use crate::name::{LocalName, QName}; #[cfg(feature = "serialize")] use crate::utils::CowRef; use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string}; use attributes::{Attribute, Attributes}; /// Opening tag data (`Event::Start`), with optional attributes: ``. /// /// The name can be accessed using the [`name`] or [`local_name`] methods. /// An iterator over the attributes is returned by the [`attributes`] method. /// /// This event implements `Deref`. The `deref()` implementation /// returns the content of this event between `<` and `>` or `/>`: /// /// ``` /// # use quick_xml::events::{BytesStart, Event}; /// # use quick_xml::reader::Reader; /// # use pretty_assertions::assert_eq; /// // Remember, that \ at the end of string literal strips /// // all space characters to the first non-space character /// let mut reader = Reader::from_str("\ /// \ /// " /// ); /// let content = "element a1 = 'val1' a2=\"val2\" "; /// let event = BytesStart::from_content(content, 7); /// /// assert_eq!(reader.read_event().unwrap(), Event::Empty(event.borrow())); /// assert_eq!(reader.read_event().unwrap(), Event::Start(event.borrow())); /// // deref coercion of &BytesStart to &[u8] /// assert_eq!(&event as &[u8], content.as_bytes()); /// // AsRef<[u8]> for &T + deref coercion /// assert_eq!(event.as_ref(), content.as_bytes()); /// ``` /// /// [`name`]: Self::name /// [`local_name`]: Self::local_name /// [`attributes`]: Self::attributes #[derive(Clone, Eq, PartialEq)] pub struct BytesStart<'a> { /// content of the element, before any utf8 conversion pub(crate) buf: Cow<'a, [u8]>, /// end of the element name, the name starts at that the start of `buf` pub(crate) name_len: usize, } impl<'a> BytesStart<'a> { /// Internal constructor, used by `Reader`. Supplies data in reader's encoding #[inline] pub(crate) const fn wrap(content: &'a [u8], name_len: usize) -> Self { BytesStart { buf: Cow::Borrowed(content), name_len, } } /// Creates a new `BytesStart` from the given name. /// /// # Warning /// /// `name` must be a valid name. #[inline] pub fn new>>(name: C) -> Self { let buf = str_cow_to_bytes(name); BytesStart { name_len: buf.len(), buf, } } /// Creates a new `BytesStart` from the given content (name + attributes). /// /// # Warning /// /// `&content[..name_len]` must be a valid name, and the remainder of `content` /// must be correctly-formed attributes. Neither are checked, it is possible /// to generate invalid XML if `content` or `name_len` are incorrect. #[inline] pub fn from_content>>(content: C, name_len: usize) -> Self { BytesStart { buf: str_cow_to_bytes(content), name_len, } } /// Converts the event into an owned event. pub fn into_owned(self) -> BytesStart<'static> { BytesStart { buf: Cow::Owned(self.buf.into_owned()), name_len: self.name_len, } } /// Converts the event into an owned event without taking ownership of Event pub fn to_owned(&self) -> BytesStart<'static> { BytesStart { buf: Cow::Owned(self.buf.clone().into_owned()), name_len: self.name_len, } } /// Converts the event into a borrowed event. Most useful when paired with [`to_end`]. /// /// # Example /// /// ``` /// use quick_xml::events::{BytesStart, Event}; /// # use quick_xml::writer::Writer; /// # use quick_xml::Error; /// /// struct SomeStruct<'a> { /// attrs: BytesStart<'a>, /// // ... /// } /// # impl<'a> SomeStruct<'a> { /// # fn example(&self) -> Result<(), Error> { /// # let mut writer = Writer::new(Vec::new()); /// /// writer.write_event(Event::Start(self.attrs.borrow()))?; /// // ... /// writer.write_event(Event::End(self.attrs.to_end()))?; /// # Ok(()) /// # }} /// ``` /// /// [`to_end`]: Self::to_end pub fn borrow(&self) -> BytesStart { BytesStart { buf: Cow::Borrowed(&self.buf), name_len: self.name_len, } } /// Creates new paired close tag #[inline] pub fn to_end(&self) -> BytesEnd { BytesEnd::from(self.name()) } /// Gets the undecoded raw tag name, as present in the input stream. #[inline] pub fn name(&self) -> QName { QName(&self.buf[..self.name_len]) } /// Gets the undecoded raw local tag name (excluding namespace) as present /// in the input stream. /// /// All content up to and including the first `:` character is removed from the tag name. #[inline] pub fn local_name(&self) -> LocalName { self.name().into() } /// Edit the name of the BytesStart in-place /// /// # Warning /// /// `name` must be a valid name. pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> { let bytes = self.buf.to_mut(); bytes.splice(..self.name_len, name.iter().cloned()); self.name_len = name.len(); self } /// Gets the undecoded raw tag name, as present in the input stream, which /// is borrowed either to the input, or to the event. /// /// # Lifetimes /// /// - `'a`: Lifetime of the input data from which this event is borrow /// - `'e`: Lifetime of the concrete event instance // TODO: We should made this is a part of public API, but with safe wrapped for a name #[cfg(feature = "serialize")] pub(crate) fn raw_name<'e>(&'e self) -> CowRef<'a, 'e, [u8]> { match self.buf { Cow::Borrowed(b) => CowRef::Input(&b[..self.name_len]), Cow::Owned(ref o) => CowRef::Slice(&o[..self.name_len]), } } } /// Attribute-related methods impl<'a> BytesStart<'a> { /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator. /// /// The yielded items must be convertible to [`Attribute`] using `Into`. pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self where I: IntoIterator, I::Item: Into>, { self.extend_attributes(attributes); self } /// Add additional attributes to this tag using an iterator. /// /// The yielded items must be convertible to [`Attribute`] using `Into`. pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a> where I: IntoIterator, I::Item: Into>, { for attr in attributes { self.push_attribute(attr); } self } /// Adds an attribute to this element. pub fn push_attribute<'b, A>(&mut self, attr: A) where A: Into>, { self.buf.to_mut().push(b' '); self.push_attr(attr.into()); } /// Remove all attributes from the ByteStart pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> { self.buf.to_mut().truncate(self.name_len); self } /// Returns an iterator over the attributes of this tag. pub fn attributes(&self) -> Attributes { Attributes::wrap(&self.buf, self.name_len, false) } /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`). pub fn html_attributes(&self) -> Attributes { Attributes::wrap(&self.buf, self.name_len, true) } /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`, /// including the whitespace after the tag name if there is any. #[inline] pub fn attributes_raw(&self) -> &[u8] { &self.buf[self.name_len..] } /// Try to get an attribute pub fn try_get_attribute + Sized>( &'a self, attr_name: N, ) -> Result>> { for a in self.attributes().with_checks(false) { let a = a?; if a.key.as_ref() == attr_name.as_ref() { return Ok(Some(a)); } } Ok(None) } /// Adds an attribute to this element. pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) { let bytes = self.buf.to_mut(); bytes.extend_from_slice(attr.key.as_ref()); bytes.extend_from_slice(b"=\""); // FIXME: need to escape attribute content bytes.extend_from_slice(attr.value.as_ref()); bytes.push(b'"'); } /// Adds new line in existing element pub(crate) fn push_newline(&mut self) { self.buf.to_mut().push(b'\n'); } /// Adds indentation bytes in existing element pub(crate) fn push_indent(&mut self, indent: &[u8]) { self.buf.to_mut().extend_from_slice(indent); } } impl<'a> Debug for BytesStart<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "BytesStart {{ buf: ")?; write_cow_string(f, &self.buf)?; write!(f, ", name_len: {} }}", self.name_len) } } impl<'a> Deref for BytesStart<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { &self.buf } } impl<'a> From> for BytesStart<'a> { #[inline] fn from(name: QName<'a>) -> Self { let name = name.into_inner(); Self::wrap(name, name.len()) } } #[cfg(feature = "arbitrary")] impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { let s = <&str>::arbitrary(u)?; if s.is_empty() || !s.chars().all(char::is_alphanumeric) { return Err(arbitrary::Error::IncorrectFormat); } let mut result = Self::new(s); result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?.into_iter()); Ok(result) } fn size_hint(depth: usize) -> (usize, Option) { return <&str as arbitrary::Arbitrary>::size_hint(depth); } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Closing tag data (`Event::End`): ``. /// /// The name can be accessed using the [`name`] or [`local_name`] methods. /// /// This event implements `Deref`. The `deref()` implementation /// returns the content of this event between ``. /// /// Note, that inner text will not contain `>` character inside: /// /// ``` /// # use quick_xml::events::{BytesEnd, Event}; /// # use quick_xml::reader::Reader; /// # use pretty_assertions::assert_eq; /// let mut reader = Reader::from_str(r#""#); /// // Note, that this entire string considered as a .name() /// let content = "element a1 = 'val1' a2=\"val2\" "; /// let event = BytesEnd::new(content); /// /// reader.config_mut().trim_markup_names_in_closing_tags = false; /// reader.config_mut().check_end_names = false; /// reader.read_event().unwrap(); // Skip `` /// /// assert_eq!(reader.read_event().unwrap(), Event::End(event.borrow())); /// assert_eq!(event.name().as_ref(), content.as_bytes()); /// // deref coercion of &BytesEnd to &[u8] /// assert_eq!(&event as &[u8], content.as_bytes()); /// // AsRef<[u8]> for &T + deref coercion /// assert_eq!(event.as_ref(), content.as_bytes()); /// ``` /// /// [`name`]: Self::name /// [`local_name`]: Self::local_name #[derive(Clone, Eq, PartialEq)] pub struct BytesEnd<'a> { name: Cow<'a, [u8]>, } impl<'a> BytesEnd<'a> { /// Internal constructor, used by `Reader`. Supplies data in reader's encoding #[inline] pub(crate) const fn wrap(name: Cow<'a, [u8]>) -> Self { BytesEnd { name } } /// Creates a new `BytesEnd` borrowing a slice. /// /// # Warning /// /// `name` must be a valid name. #[inline] pub fn new>>(name: C) -> Self { Self::wrap(str_cow_to_bytes(name)) } /// Converts the event into an owned event. pub fn into_owned(self) -> BytesEnd<'static> { BytesEnd { name: Cow::Owned(self.name.into_owned()), } } /// Converts the event into a borrowed event. #[inline] pub fn borrow(&self) -> BytesEnd { BytesEnd { name: Cow::Borrowed(&self.name), } } /// Gets the undecoded raw tag name, as present in the input stream. #[inline] pub fn name(&self) -> QName { QName(&self.name) } /// Gets the undecoded raw local tag name (excluding namespace) as present /// in the input stream. /// /// All content up to and including the first `:` character is removed from the tag name. #[inline] pub fn local_name(&self) -> LocalName { self.name().into() } } impl<'a> Debug for BytesEnd<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "BytesEnd {{ name: ")?; write_cow_string(f, &self.name)?; write!(f, " }}") } } impl<'a> Deref for BytesEnd<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { &self.name } } impl<'a> From> for BytesEnd<'a> { #[inline] fn from(name: QName<'a>) -> Self { Self::wrap(name.into_inner().into()) } } #[cfg(feature = "arbitrary")] impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { Ok(Self::new(<&str>::arbitrary(u)?)) } fn size_hint(depth: usize) -> (usize, Option) { return <&str as arbitrary::Arbitrary>::size_hint(depth); } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Data from various events (most notably, `Event::Text`) that stored in XML /// in escaped form. Internally data is stored in escaped form. /// /// This event implements `Deref`. The `deref()` implementation /// returns the content of this event. In case of comment this is everything /// between `` and the text of comment will not contain `-->` inside. /// In case of DTD this is everything between `` /// (i.e. in case of DTD the first character is never space): /// /// ``` /// # use quick_xml::events::{BytesText, Event}; /// # use quick_xml::reader::Reader; /// # use pretty_assertions::assert_eq; /// // Remember, that \ at the end of string literal strips /// // all space characters to the first non-space character /// let mut reader = Reader::from_str("\ /// \ /// comment or text \ /// " /// ); /// let content = "comment or text "; /// let event = BytesText::new(content); /// /// assert_eq!(reader.read_event().unwrap(), Event::DocType(event.borrow())); /// assert_eq!(reader.read_event().unwrap(), Event::Text(event.borrow())); /// assert_eq!(reader.read_event().unwrap(), Event::Comment(event.borrow())); /// // deref coercion of &BytesText to &[u8] /// assert_eq!(&event as &[u8], content.as_bytes()); /// // AsRef<[u8]> for &T + deref coercion /// assert_eq!(event.as_ref(), content.as_bytes()); /// ``` #[derive(Clone, Eq, PartialEq)] pub struct BytesText<'a> { /// Escaped then encoded content of the event. Content is encoded in the XML /// document encoding when event comes from the reader and should be in the /// document encoding when event passed to the writer content: Cow<'a, [u8]>, /// Encoding in which the `content` is stored inside the event decoder: Decoder, } impl<'a> BytesText<'a> { /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding. #[inline] pub(crate) fn wrap>>(content: C, decoder: Decoder) -> Self { Self { content: content.into(), decoder, } } /// Creates a new `BytesText` from an escaped string. #[inline] pub fn from_escaped>>(content: C) -> Self { Self::wrap(str_cow_to_bytes(content), Decoder::utf8()) } /// Creates a new `BytesText` from a string. The string is expected not to /// be escaped. #[inline] pub fn new(content: &'a str) -> Self { Self::from_escaped(escape(content)) } /// Ensures that all data is owned to extend the object's lifetime if /// necessary. #[inline] pub fn into_owned(self) -> BytesText<'static> { BytesText { content: self.content.into_owned().into(), decoder: self.decoder, } } /// Extracts the inner `Cow` from the `BytesText` event container. #[inline] pub fn into_inner(self) -> Cow<'a, [u8]> { self.content } /// Converts the event into a borrowed event. #[inline] pub fn borrow(&self) -> BytesText { BytesText { content: Cow::Borrowed(&self.content), decoder: self.decoder, } } /// Decodes then unescapes the content of the event. /// /// This will allocate if the value contains any escape sequences or in /// non-UTF-8 encoding. pub fn unescape(&self) -> Result> { self.unescape_with(resolve_predefined_entity) } /// Decodes then unescapes the content of the event with custom entities. /// /// This will allocate if the value contains any escape sequences or in /// non-UTF-8 encoding. pub fn unescape_with<'entity>( &self, resolve_entity: impl FnMut(&str) -> Option<&'entity str>, ) -> Result> { let decoded = self.decoder.decode_cow(&self.content)?; match unescape_with(&decoded, resolve_entity)? { // Because result is borrowed, no replacements was done and we can use original string Cow::Borrowed(_) => Ok(decoded), Cow::Owned(s) => Ok(s.into()), } } /// Removes leading XML whitespace bytes from text content. /// /// Returns `true` if content is empty after that pub fn inplace_trim_start(&mut self) -> bool { self.content = trim_cow( replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_start, ); self.content.is_empty() } /// Removes trailing XML whitespace bytes from text content. /// /// Returns `true` if content is empty after that pub fn inplace_trim_end(&mut self) -> bool { self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end); self.content.is_empty() } } impl<'a> Debug for BytesText<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "BytesText {{ content: ")?; write_cow_string(f, &self.content)?; write!(f, " }}") } } impl<'a> Deref for BytesText<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { &self.content } } #[cfg(feature = "arbitrary")] impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { let s = <&str>::arbitrary(u)?; if !s.chars().all(char::is_alphanumeric) { return Err(arbitrary::Error::IncorrectFormat); } Ok(Self::new(s)) } fn size_hint(depth: usize) -> (usize, Option) { return <&str as arbitrary::Arbitrary>::size_hint(depth); } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// CDATA content contains unescaped data from the reader. If you want to write them as a text, /// [convert](Self::escape) it to [`BytesText`]. /// /// This event implements `Deref`. The `deref()` implementation /// returns the content of this event between ``. /// /// Note, that inner text will not contain `]]>` sequence inside: /// /// ``` /// # use quick_xml::events::{BytesCData, Event}; /// # use quick_xml::reader::Reader; /// # use pretty_assertions::assert_eq; /// let mut reader = Reader::from_str(""); /// let content = " CDATA section "; /// let event = BytesCData::new(content); /// /// assert_eq!(reader.read_event().unwrap(), Event::CData(event.borrow())); /// // deref coercion of &BytesCData to &[u8] /// assert_eq!(&event as &[u8], content.as_bytes()); /// // AsRef<[u8]> for &T + deref coercion /// assert_eq!(event.as_ref(), content.as_bytes()); /// ``` #[derive(Clone, Eq, PartialEq)] pub struct BytesCData<'a> { content: Cow<'a, [u8]>, /// Encoding in which the `content` is stored inside the event decoder: Decoder, } impl<'a> BytesCData<'a> { /// Creates a new `BytesCData` from a byte sequence in the specified encoding. #[inline] pub(crate) fn wrap>>(content: C, decoder: Decoder) -> Self { Self { content: content.into(), decoder, } } /// Creates a new `BytesCData` from a string. /// /// # Warning /// /// `content` must not contain the `]]>` sequence. #[inline] pub fn new>>(content: C) -> Self { Self::wrap(str_cow_to_bytes(content), Decoder::utf8()) } /// Ensures that all data is owned to extend the object's lifetime if /// necessary. #[inline] pub fn into_owned(self) -> BytesCData<'static> { BytesCData { content: self.content.into_owned().into(), decoder: self.decoder, } } /// Extracts the inner `Cow` from the `BytesCData` event container. #[inline] pub fn into_inner(self) -> Cow<'a, [u8]> { self.content } /// Converts the event into a borrowed event. #[inline] pub fn borrow(&self) -> BytesCData { BytesCData { content: Cow::Borrowed(&self.content), decoder: self.decoder, } } /// Converts this CDATA content to an escaped version, that can be written /// as an usual text in XML. /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `>` | `>` /// | `&` | `&` /// | `'` | `'` /// | `"` | `"` pub fn escape(self) -> Result> { let decoded = self.decode()?; Ok(BytesText::wrap( match escape(&decoded) { // Because result is borrowed, no replacements was done and we can use original content Cow::Borrowed(_) => self.content, Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()), }, Decoder::utf8(), )) } /// Converts this CDATA content to an escaped version, that can be written /// as an usual text in XML. /// /// In XML text content, it is allowed (though not recommended) to leave /// the quote special characters `"` and `'` unescaped. /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `>` | `>` /// | `&` | `&` pub fn partial_escape(self) -> Result> { let decoded = self.decode()?; Ok(BytesText::wrap( match partial_escape(&decoded) { // Because result is borrowed, no replacements was done and we can use original content Cow::Borrowed(_) => self.content, Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()), }, Decoder::utf8(), )) } /// Converts this CDATA content to an escaped version, that can be written /// as an usual text in XML. This method escapes only those characters that /// must be escaped according to the [specification]. /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `&` | `&` /// /// [specification]: https://www.w3.org/TR/xml11/#syntax pub fn minimal_escape(self) -> Result> { let decoded = self.decode()?; Ok(BytesText::wrap( match minimal_escape(&decoded) { // Because result is borrowed, no replacements was done and we can use original content Cow::Borrowed(_) => self.content, Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()), }, Decoder::utf8(), )) } /// Gets content of this text buffer in the specified encoding pub(crate) fn decode(&self) -> Result> { self.decoder.decode_cow(&self.content) } } impl<'a> Debug for BytesCData<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "BytesCData {{ content: ")?; write_cow_string(f, &self.content)?; write!(f, " }}") } } impl<'a> Deref for BytesCData<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { &self.content } } #[cfg(feature = "arbitrary")] impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { Ok(Self::new(<&str>::arbitrary(u)?)) } fn size_hint(depth: usize) -> (usize, Option) { return <&str as arbitrary::Arbitrary>::size_hint(depth); } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications. /// /// This event implements `Deref`. The `deref()` implementation /// returns the content of this event between ``. /// /// Note, that inner text will not contain `?>` sequence inside: /// /// ``` /// # use quick_xml::events::{BytesPI, Event}; /// # use quick_xml::reader::Reader; /// # use pretty_assertions::assert_eq; /// let mut reader = Reader::from_str(":-<~ ?>"); /// let content = "processing instruction >:-<~ "; /// let event = BytesPI::new(content); /// /// assert_eq!(reader.read_event().unwrap(), Event::PI(event.borrow())); /// // deref coercion of &BytesPI to &[u8] /// assert_eq!(&event as &[u8], content.as_bytes()); /// // AsRef<[u8]> for &T + deref coercion /// assert_eq!(event.as_ref(), content.as_bytes()); /// ``` /// /// [PI]: https://www.w3.org/TR/xml11/#sec-pi #[derive(Clone, Eq, PartialEq)] pub struct BytesPI<'a> { content: BytesStart<'a>, } impl<'a> BytesPI<'a> { /// Creates a new `BytesPI` from a byte sequence in the specified encoding. #[inline] pub(crate) const fn wrap(content: &'a [u8], target_len: usize) -> Self { Self { content: BytesStart::wrap(content, target_len), } } /// Creates a new `BytesPI` from a string. /// /// # Warning /// /// `content` must not contain the `?>` sequence. #[inline] pub fn new>>(content: C) -> Self { let buf = str_cow_to_bytes(content); let name_len = name_len(&buf); Self { content: BytesStart { buf, name_len }, } } /// Ensures that all data is owned to extend the object's lifetime if /// necessary. #[inline] pub fn into_owned(self) -> BytesPI<'static> { BytesPI { content: self.content.into_owned().into(), } } /// Extracts the inner `Cow` from the `BytesPI` event container. #[inline] pub fn into_inner(self) -> Cow<'a, [u8]> { self.content.buf } /// Converts the event into a borrowed event. #[inline] pub fn borrow(&self) -> BytesPI { BytesPI { content: self.content.borrow(), } } /// A target used to identify the application to which the instruction is directed. /// /// # Example /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::BytesPI; /// /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#); /// assert_eq!(instruction.target(), b"xml-stylesheet"); /// ``` #[inline] pub fn target(&self) -> &[u8] { self.content.name().0 } /// Content of the processing instruction. Contains everything between target /// name and the end of the instruction. A direct consequence is that the first /// character is always a space character. /// /// # Example /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::BytesPI; /// /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#); /// assert_eq!(instruction.content(), br#" href="style.css""#); /// ``` #[inline] pub fn content(&self) -> &[u8] { self.content.attributes_raw() } /// A view of the processing instructions' content as a list of key-value pairs. /// /// Key-value pairs are used in some processing instructions, for example in /// ``. /// /// Returned iterator does not validate attribute values as may required by /// target's rules. For example, it doesn't check that substring `?>` is not /// present in the attribute value. That shouldn't be the problem when event /// is produced by the reader, because reader detects end of processing instruction /// by the first `?>` sequence, as required by the specification, and therefore /// this sequence cannot appear inside it. /// /// # Example /// /// ``` /// # use pretty_assertions::assert_eq; /// use std::borrow::Cow; /// use quick_xml::events::attributes::Attribute; /// use quick_xml::events::BytesPI; /// use quick_xml::name::QName; /// /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#); /// for attr in instruction.attributes() { /// assert_eq!(attr, Ok(Attribute { /// key: QName(b"href"), /// value: Cow::Borrowed(b"style.css"), /// })); /// } /// ``` #[inline] pub fn attributes(&self) -> Attributes { self.content.attributes() } } impl<'a> Debug for BytesPI<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "BytesPI {{ content: ")?; write_cow_string(f, &self.content.buf)?; write!(f, " }}") } } impl<'a> Deref for BytesPI<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { &self.content } } #[cfg(feature = "arbitrary")] impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { Ok(Self::new(<&str>::arbitrary(u)?)) } fn size_hint(depth: usize) -> (usize, Option) { return <&str as arbitrary::Arbitrary>::size_hint(depth); } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// An XML declaration (`Event::Decl`). /// /// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd) /// /// This event implements `Deref`. The `deref()` implementation /// returns the content of this event between ``. /// /// Note, that inner text will not contain `?>` sequence inside: /// /// ``` /// # use quick_xml::events::{BytesDecl, BytesStart, Event}; /// # use quick_xml::reader::Reader; /// # use pretty_assertions::assert_eq; /// let mut reader = Reader::from_str(""); /// let content = "xml version = '1.0' "; /// let event = BytesDecl::from_start(BytesStart::from_content(content, 3)); /// /// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow())); /// // deref coercion of &BytesDecl to &[u8] /// assert_eq!(&event as &[u8], content.as_bytes()); /// // AsRef<[u8]> for &T + deref coercion /// assert_eq!(event.as_ref(), content.as_bytes()); /// ``` #[derive(Clone, Debug, Eq, PartialEq)] pub struct BytesDecl<'a> { content: BytesStart<'a>, } impl<'a> BytesDecl<'a> { /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`), /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`) /// attribute. /// /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values. /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since /// the double quote character is not allowed in any of the attribute values. pub fn new( version: &str, encoding: Option<&str>, standalone: Option<&str>, ) -> BytesDecl<'static> { // Compute length of the buffer based on supplied attributes // ' encoding=""' => 12 let encoding_attr_len = if let Some(xs) = encoding { 12 + xs.len() } else { 0 }; // ' standalone=""' => 14 let standalone_attr_len = if let Some(xs) = standalone { 14 + xs.len() } else { 0 }; // 'xml version=""' => 14 let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len); buf.push_str("xml version=\""); buf.push_str(version); if let Some(encoding_val) = encoding { buf.push_str("\" encoding=\""); buf.push_str(encoding_val); } if let Some(standalone_val) = standalone { buf.push_str("\" standalone=\""); buf.push_str(standalone_val); } buf.push('"'); BytesDecl { content: BytesStart::from_content(buf, 3), } } /// Creates a `BytesDecl` from a `BytesStart` pub const fn from_start(start: BytesStart<'a>) -> Self { Self { content: start } } /// Gets xml version, excluding quotes (`'` or `"`). /// /// According to the [grammar], the version *must* be the first thing in the declaration. /// This method tries to extract the first thing in the declaration and return it. /// In case of multiple attributes value of the first one is returned. /// /// If version is missed in the declaration, or the first thing is not a version, /// [`IllFormedError::MissingDeclVersion`] will be returned. /// /// # Examples /// /// ``` /// use quick_xml::errors::{Error, IllFormedError}; /// use quick_xml::events::{BytesDecl, BytesStart}; /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0)); /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref()); /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0)); /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref()); /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0)); /// match decl.version() { /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"), /// _ => assert!(false), /// } /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0)); /// match decl.version() { /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"), /// _ => assert!(false), /// } /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0)); /// match decl.version() { /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {}, /// _ => assert!(false), /// } /// ``` /// /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl pub fn version(&self) -> Result> { // The version *must* be the first thing in the declaration. match self.content.attributes().with_checks(false).next() { Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value), // first attribute was not "version" Some(Ok(a)) => { let found = from_utf8(a.key.as_ref())?.to_string(); Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some( found, )))) } // error parsing attributes Some(Err(e)) => Err(e.into()), // no attributes None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))), } } /// Gets xml encoding, excluding quotes (`'` or `"`). /// /// Although according to the [grammar] encoding must appear before `"standalone"` /// and after `"version"`, this method does not check that. The first occurrence /// of the attribute will be returned even if there are several. Also, method does /// not restrict symbols that can forming the encoding, so the returned encoding /// name may not correspond to the grammar. /// /// # Examples /// /// ``` /// use std::borrow::Cow; /// use quick_xml::Error; /// use quick_xml::events::{BytesDecl, BytesStart}; /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0)); /// assert!(decl.encoding().is_none()); /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0)); /// match decl.encoding() { /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"), /// _ => assert!(false), /// } /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0)); /// match decl.encoding() { /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"), /// _ => assert!(false), /// } /// ``` /// /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl pub fn encoding(&self) -> Option>> { self.content .try_get_attribute("encoding") .map(|a| a.map(|a| a.value)) .transpose() } /// Gets xml standalone, excluding quotes (`'` or `"`). /// /// Although according to the [grammar] standalone flag must appear after `"version"` /// and `"encoding"`, this method does not check that. The first occurrence of the /// attribute will be returned even if there are several. Also, method does not /// restrict symbols that can forming the value, so the returned flag name may not /// correspond to the grammar. /// /// # Examples /// /// ``` /// use std::borrow::Cow; /// use quick_xml::Error; /// use quick_xml::events::{BytesDecl, BytesStart}; /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0)); /// assert!(decl.standalone().is_none()); /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0)); /// match decl.standalone() { /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"), /// _ => assert!(false), /// } /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0)); /// match decl.standalone() { /// Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"), /// _ => assert!(false), /// } /// ``` /// /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl pub fn standalone(&self) -> Option>> { self.content .try_get_attribute("standalone") .map(|a| a.map(|a| a.value)) .transpose() } /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get) /// algorithm. /// /// If encoding in not known, or `encoding` key was not found, returns `None`. /// In case of duplicated `encoding` key, encoding, corresponding to the first /// one, is returned. #[cfg(feature = "encoding")] pub fn encoder(&self) -> Option<&'static Encoding> { self.encoding() .and_then(|e| e.ok()) .and_then(|e| Encoding::for_label(&e)) } /// Converts the event into an owned event. pub fn into_owned(self) -> BytesDecl<'static> { BytesDecl { content: self.content.into_owned(), } } /// Converts the event into a borrowed event. #[inline] pub fn borrow(&self) -> BytesDecl { BytesDecl { content: self.content.borrow(), } } } impl<'a> Deref for BytesDecl<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { &self.content } } #[cfg(feature = "arbitrary")] impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { Ok(Self::new( <&str>::arbitrary(u)?, Option::<&str>::arbitrary(u)?, Option::<&str>::arbitrary(u)?, )) } fn size_hint(depth: usize) -> (usize, Option) { return <&str as arbitrary::Arbitrary>::size_hint(depth); } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Event emitted by [`Reader::read_event_into`]. /// /// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into #[derive(Clone, Debug, Eq, PartialEq)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum Event<'a> { /// Start tag (with attributes) ``. Start(BytesStart<'a>), /// End tag ``. End(BytesEnd<'a>), /// Empty element tag (with attributes) ``. Empty(BytesStart<'a>), /// Escaped character data between tags. Text(BytesText<'a>), /// Unescaped character data stored in ``. CData(BytesCData<'a>), /// Comment ``. Comment(BytesText<'a>), /// XML declaration ``. Decl(BytesDecl<'a>), /// Processing instruction ``. PI(BytesPI<'a>), /// Document type definition data (DTD) stored in ``. DocType(BytesText<'a>), /// End of XML document. Eof, } impl<'a> Event<'a> { /// Converts the event to an owned version, untied to the lifetime of /// buffer used when reading but incurring a new, separate allocation. pub fn into_owned(self) -> Event<'static> { match self { Event::Start(e) => Event::Start(e.into_owned()), Event::End(e) => Event::End(e.into_owned()), Event::Empty(e) => Event::Empty(e.into_owned()), Event::Text(e) => Event::Text(e.into_owned()), Event::Comment(e) => Event::Comment(e.into_owned()), Event::CData(e) => Event::CData(e.into_owned()), Event::Decl(e) => Event::Decl(e.into_owned()), Event::PI(e) => Event::PI(e.into_owned()), Event::DocType(e) => Event::DocType(e.into_owned()), Event::Eof => Event::Eof, } } /// Converts the event into a borrowed event. #[inline] pub fn borrow(&self) -> Event { match self { Event::Start(e) => Event::Start(e.borrow()), Event::End(e) => Event::End(e.borrow()), Event::Empty(e) => Event::Empty(e.borrow()), Event::Text(e) => Event::Text(e.borrow()), Event::Comment(e) => Event::Comment(e.borrow()), Event::CData(e) => Event::CData(e.borrow()), Event::Decl(e) => Event::Decl(e.borrow()), Event::PI(e) => Event::PI(e.borrow()), Event::DocType(e) => Event::DocType(e.borrow()), Event::Eof => Event::Eof, } } } impl<'a> Deref for Event<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { match *self { Event::Start(ref e) | Event::Empty(ref e) => e, Event::End(ref e) => e, Event::Text(ref e) => e, Event::Decl(ref e) => e, Event::PI(ref e) => e, Event::CData(ref e) => e, Event::Comment(ref e) => e, Event::DocType(ref e) => e, Event::Eof => &[], } } } impl<'a> AsRef> for Event<'a> { fn as_ref(&self) -> &Event<'a> { self } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[inline] fn str_cow_to_bytes<'a, C: Into>>(content: C) -> Cow<'a, [u8]> { match content.into() { Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), Cow::Owned(s) => Cow::Owned(s.into_bytes()), } } fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]> where F: FnOnce(&[u8]) -> &[u8], { match value { Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)), Cow::Owned(mut bytes) => { let trimmed = trim(&bytes); if trimmed.len() != bytes.len() { bytes = trimmed.to_vec(); } Cow::Owned(bytes) } } } #[cfg(test)] mod test { use super::*; use pretty_assertions::assert_eq; #[test] fn bytestart_create() { let b = BytesStart::new("test"); assert_eq!(b.len(), 4); assert_eq!(b.name(), QName(b"test")); } #[test] fn bytestart_set_name() { let mut b = BytesStart::new("test"); assert_eq!(b.len(), 4); assert_eq!(b.name(), QName(b"test")); assert_eq!(b.attributes_raw(), b""); b.push_attribute(("x", "a")); assert_eq!(b.len(), 10); assert_eq!(b.attributes_raw(), b" x=\"a\""); b.set_name(b"g"); assert_eq!(b.len(), 7); assert_eq!(b.name(), QName(b"g")); } #[test] fn bytestart_clear_attributes() { let mut b = BytesStart::new("test"); b.push_attribute(("x", "y\"z")); b.push_attribute(("x", "y\"z")); b.clear_attributes(); assert!(b.attributes().next().is_none()); assert_eq!(b.len(), 4); assert_eq!(b.name(), QName(b"test")); } } quick-xml-0.36.1/src/lib.rs000064400000000000000000000053220072674642500135620ustar 00000000000000//! High performance XML reader/writer. //! //! # Description //! //! quick-xml contains two modes of operation: //! //! A streaming API based on the [StAX] model. This is suited for larger XML documents which //! cannot completely read into memory at once. //! //! The user has to explicitly _ask_ for the next XML event, similar to a database cursor. //! This is achieved by the following two structs: //! //! - [`Reader`]: A low level XML pull-reader where buffer allocation/clearing is left to user. //! - [`Writer`]: A XML writer. Can be nested with readers if you want to transform XMLs. //! //! Especially for nested XML elements, the user must keep track _where_ (how deep) //! in the XML document the current event is located. //! //! quick-xml contains optional support of asynchronous reading and writing using [tokio]. //! To get it enable the [`async-tokio`](#async-tokio) feature. //! //! Furthermore, quick-xml also contains optional [Serde] support to directly //! serialize and deserialize from structs, without having to deal with the XML events. //! To get it enable the [`serialize`](#serialize) feature. Read more about mapping Rust types //! to XML in the documentation of [`de`] module. Also check [`serde_helpers`] //! module. //! //! # Examples //! //! - For a reading example see [`Reader`] //! - For a writing example see [`Writer`] //! //! # Features //! //! `quick-xml` supports the following features: //! //! [StAX]: https://en.wikipedia.org/wiki/StAX //! [tokio]: https://tokio.rs/ //! [Serde]: https://serde.rs/ //! [`de`]: ./de/index.html #![cfg_attr( feature = "document-features", cfg_attr(doc, doc = ::document_features::document_features!( feature_label = "{feature}" )) )] #![forbid(unsafe_code)] #![deny(missing_docs)] #![recursion_limit = "1024"] // Enable feature requirements in the docs from 1.57 // See https://stackoverflow.com/questions/61417452 // docs.rs defines `docsrs` when building documentation #![cfg_attr(docsrs, feature(doc_auto_cfg))] #[cfg(feature = "serialize")] pub mod de; pub mod encoding; pub mod errors; pub mod escape; pub mod events; pub mod name; pub mod parser; pub mod reader; #[cfg(feature = "serialize")] pub mod se; #[cfg(feature = "serde-types")] pub mod serde_helpers; /// Not an official API, public for integration tests #[doc(hidden)] pub mod utils; pub mod writer; // reexports pub use crate::encoding::Decoder; #[cfg(feature = "serialize")] pub use crate::errors::serialize::DeError; pub use crate::errors::{Error, Result}; pub use crate::reader::{NsReader, Reader}; pub use crate::writer::{ElementWriter, Writer}; quick-xml-0.36.1/src/name.rs000064400000000000000000001333670072674642500137470ustar 00000000000000//! Module for handling names according to the W3C [Namespaces in XML 1.1 (Second Edition)][spec] //! specification //! //! [spec]: https://www.w3.org/TR/xml-names11 use crate::errors::{Error, Result}; use crate::events::attributes::Attribute; use crate::events::BytesStart; use crate::utils::write_byte_string; use memchr::memchr; use std::fmt::{self, Debug, Formatter}; /// A [qualified name] of an element or an attribute, including an optional /// namespace [prefix](Prefix) and a [local name](LocalName). /// /// [qualified name]: https://www.w3.org/TR/xml-names11/#dt-qualname #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))] pub struct QName<'a>(pub &'a [u8]); impl<'a> QName<'a> { /// Converts this name to an internal slice representation. #[inline(always)] pub const fn into_inner(self) -> &'a [u8] { self.0 } /// Returns local part of this qualified name. /// /// All content up to and including the first `:` character is removed from /// the tag name. /// /// # Examples /// /// ``` /// # use quick_xml::name::QName; /// let simple = QName(b"simple-name"); /// assert_eq!(simple.local_name().as_ref(), b"simple-name"); /// /// let qname = QName(b"namespace:simple-name"); /// assert_eq!(qname.local_name().as_ref(), b"simple-name"); /// ``` pub fn local_name(&self) -> LocalName<'a> { LocalName(self.index().map_or(self.0, |i| &self.0[i + 1..])) } /// Returns namespace part of this qualified name or `None` if namespace part /// is not defined (symbol `':'` not found). /// /// # Examples /// /// ``` /// # use std::convert::AsRef; /// # use quick_xml::name::QName; /// let simple = QName(b"simple-name"); /// assert_eq!(simple.prefix(), None); /// /// let qname = QName(b"prefix:simple-name"); /// assert_eq!(qname.prefix().as_ref().map(|n| n.as_ref()), Some(b"prefix".as_ref())); /// ``` pub fn prefix(&self) -> Option> { self.index().map(|i| Prefix(&self.0[..i])) } /// The same as `(qname.local_name(), qname.prefix())`, but does only one /// lookup for a `':'` symbol. pub fn decompose(&self) -> (LocalName<'a>, Option>) { match self.index() { None => (LocalName(self.0), None), Some(i) => (LocalName(&self.0[i + 1..]), Some(Prefix(&self.0[..i]))), } } /// If that `QName` represents `"xmlns"` series of names, returns `Some`, /// otherwise `None` is returned. /// /// # Examples /// /// ``` /// # use quick_xml::name::{QName, PrefixDeclaration}; /// let qname = QName(b"xmlns"); /// assert_eq!(qname.as_namespace_binding(), Some(PrefixDeclaration::Default)); /// /// let qname = QName(b"xmlns:prefix"); /// assert_eq!(qname.as_namespace_binding(), Some(PrefixDeclaration::Named(b"prefix"))); /// /// // Be aware that this method does not check the validity of the prefix - it can be empty! /// let qname = QName(b"xmlns:"); /// assert_eq!(qname.as_namespace_binding(), Some(PrefixDeclaration::Named(b""))); /// /// let qname = QName(b"other-name"); /// assert_eq!(qname.as_namespace_binding(), None); /// /// // https://www.w3.org/TR/xml-names11/#xmlReserved /// let qname = QName(b"xmlns-reserved-name"); /// assert_eq!(qname.as_namespace_binding(), None); /// ``` pub fn as_namespace_binding(&self) -> Option> { if self.0.starts_with(b"xmlns") { return match self.0.get(5) { None => Some(PrefixDeclaration::Default), Some(&b':') => Some(PrefixDeclaration::Named(&self.0[6..])), _ => None, }; } None } /// Returns the index in the name where prefix ended #[inline(always)] fn index(&self) -> Option { memchr(b':', self.0) } } impl<'a> Debug for QName<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "QName(")?; write_byte_string(f, self.0)?; write!(f, ")") } } impl<'a> AsRef<[u8]> for QName<'a> { #[inline] fn as_ref(&self) -> &[u8] { self.0 } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A [local (unqualified) name] of an element or an attribute, i.e. a name /// without [prefix](Prefix). /// /// [local (unqualified) name]: https://www.w3.org/TR/xml-names11/#dt-localname #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))] pub struct LocalName<'a>(&'a [u8]); impl<'a> LocalName<'a> { /// Converts this name to an internal slice representation. #[inline(always)] pub const fn into_inner(self) -> &'a [u8] { self.0 } } impl<'a> Debug for LocalName<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "LocalName(")?; write_byte_string(f, self.0)?; write!(f, ")") } } impl<'a> AsRef<[u8]> for LocalName<'a> { #[inline] fn as_ref(&self) -> &[u8] { self.0 } } impl<'a> From> for LocalName<'a> { /// Creates `LocalName` from a [`QName`] /// /// # Examples /// /// ``` /// # use quick_xml::name::{LocalName, QName}; /// /// let local: LocalName = QName(b"unprefixed").into(); /// assert_eq!(local.as_ref(), b"unprefixed"); /// /// let local: LocalName = QName(b"some:prefix").into(); /// assert_eq!(local.as_ref(), b"prefix"); /// ``` #[inline] fn from(name: QName<'a>) -> Self { Self(name.index().map_or(name.0, |i| &name.0[i + 1..])) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A [namespace prefix] part of the [qualified name](QName) of an element tag /// or an attribute: a `prefix` in `` or /// `prefix:local-attribute-name="attribute value"`. /// /// [namespace prefix]: https://www.w3.org/TR/xml-names11/#dt-prefix #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))] pub struct Prefix<'a>(&'a [u8]); impl<'a> Prefix<'a> { /// Extracts internal slice #[inline(always)] pub const fn into_inner(self) -> &'a [u8] { self.0 } } impl<'a> Debug for Prefix<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "Prefix(")?; write_byte_string(f, self.0)?; write!(f, ")") } } impl<'a> AsRef<[u8]> for Prefix<'a> { #[inline] fn as_ref(&self) -> &[u8] { self.0 } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A namespace prefix declaration, `xmlns` or `xmlns:`, as defined in /// [XML Schema specification](https://www.w3.org/TR/xml-names11/#ns-decl) #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum PrefixDeclaration<'a> { /// XML attribute binds a default namespace. Corresponds to `xmlns` in `xmlns="..."` Default, /// XML attribute binds a specified prefix to a namespace. Corresponds to a /// `prefix` in `xmlns:prefix="..."`, which is stored as payload of this variant. Named(&'a [u8]), } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A [namespace name] that is declared in a `xmlns[:prefix]="namespace name"`. /// /// [namespace name]: https://www.w3.org/TR/xml-names11/#dt-NSName #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))] pub struct Namespace<'a>(pub &'a [u8]); impl<'a> Namespace<'a> { /// Converts this namespace to an internal slice representation. /// /// This is [non-normalized] attribute value, i.e. any entity references is /// not expanded and space characters are not removed. This means, that /// different byte slices, returned from this method, can represent the same /// namespace and would be treated by parser as identical. /// /// For example, if the entity **eacute** has been defined to be **é**, /// the empty tags below all contain namespace declarations binding the /// prefix `p` to the same [IRI reference], `http://example.org/rosé`. /// /// ```xml /// /// /// /// /// /// ``` /// /// This is because XML entity references are expanded during attribute value /// normalization. /// /// [non-normalized]: https://www.w3.org/TR/xml11/#AVNormalize /// [IRI reference]: https://datatracker.ietf.org/doc/html/rfc3987 #[inline(always)] pub const fn into_inner(self) -> &'a [u8] { self.0 } //TODO: implement value normalization and use it when comparing namespaces } impl<'a> Debug for Namespace<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "Namespace(")?; write_byte_string(f, self.0)?; write!(f, ")") } } impl<'a> AsRef<[u8]> for Namespace<'a> { #[inline] fn as_ref(&self) -> &[u8] { self.0 } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Result of [prefix] resolution which creates by [`NsReader::resolve_attribute`], /// [`NsReader::resolve_element`], [`NsReader::read_resolved_event`] and /// [`NsReader::read_resolved_event_into`] methods. /// /// [prefix]: Prefix /// [`NsReader::resolve_attribute`]: crate::reader::NsReader::resolve_attribute /// [`NsReader::resolve_element`]: crate::reader::NsReader::resolve_element /// [`NsReader::read_resolved_event`]: crate::reader::NsReader::read_resolved_event /// [`NsReader::read_resolved_event_into`]: crate::reader::NsReader::read_resolved_event_into #[derive(Clone, PartialEq, Eq, Hash)] pub enum ResolveResult<'ns> { /// Qualified name does not contain prefix, and resolver does not define /// default namespace, so name is not bound to any namespace Unbound, /// [`Prefix`] resolved to the specified namespace Bound(Namespace<'ns>), /// Specified prefix was not found in scope Unknown(Vec), } impl<'ns> Debug for ResolveResult<'ns> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { Self::Unbound => write!(f, "Unbound"), Self::Bound(ns) => write!(f, "Bound({:?})", ns), Self::Unknown(p) => { write!(f, "Unknown(")?; write_byte_string(f, p)?; write!(f, ")") } } } } impl<'ns> TryFrom> for Option> { type Error = Error; /// Try to convert this result to an optional namespace and returns /// [`Error::UnknownPrefix`] if this result represents unknown prefix fn try_from(result: ResolveResult<'ns>) -> Result { use ResolveResult::*; match result { Unbound => Ok(None), Bound(ns) => Ok(Some(ns)), Unknown(p) => Err(Error::UnknownPrefix(p)), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// An entry that contains index into the buffer with namespace bindings. /// /// Defines a mapping from *[namespace prefix]* to *[namespace name]*. /// If prefix is empty, defines a *default namespace* binding that applies to /// unprefixed element names (unprefixed attribute names do not bind to any /// namespace and they processing is dependent on the element in which their /// defined). /// /// [namespace prefix]: https://www.w3.org/TR/xml-names11/#dt-prefix /// [namespace name]: https://www.w3.org/TR/xml-names11/#dt-NSName #[derive(Debug, Clone)] struct NamespaceEntry { /// Index of the namespace in the buffer start: usize, /// Length of the prefix /// * if greater than zero, then binds this namespace to the slice /// `[start..start + prefix_len]` in the buffer. /// * else defines the current default namespace. prefix_len: usize, /// The length of a namespace name (the URI) of this namespace declaration. /// Name started just after prefix and extend for `value_len` bytes. /// /// The XML standard [specifies] that an empty namespace value 'removes' a namespace declaration /// for the extent of its scope. For prefix declarations that's not very interesting, but it is /// vital for default namespace declarations. With `xmlns=""` you can revert back to the default /// behaviour of leaving unqualified element names unqualified. /// /// [specifies]: https://www.w3.org/TR/xml-names11/#scoping value_len: usize, /// Level of nesting at which this namespace was declared. The declaring element is included, /// i.e., a declaration on the document root has `level = 1`. /// This is used to pop the namespace when the element gets closed. level: i32, } impl NamespaceEntry { /// Get the namespace prefix, bound to this namespace declaration, or `None`, /// if this declaration is for default namespace (`xmlns="..."`). #[inline] fn prefix<'b>(&self, ns_buffer: &'b [u8]) -> Option> { if self.prefix_len == 0 { None } else { Some(Prefix(&ns_buffer[self.start..self.start + self.prefix_len])) } } /// Gets the namespace name (the URI) slice out of namespace buffer /// /// Returns `None` if namespace for this prefix was explicitly removed from /// scope, using `xmlns[:prefix]=""` #[inline] fn namespace<'ns>(&self, buffer: &'ns [u8]) -> ResolveResult<'ns> { if self.value_len == 0 { ResolveResult::Unbound } else { let start = self.start + self.prefix_len; ResolveResult::Bound(Namespace(&buffer[start..start + self.value_len])) } } } /// A namespace management buffer. /// /// Holds all internal logic to push/pop namespaces with their levels. #[derive(Debug, Clone)] pub(crate) struct NamespaceResolver { /// Buffer that contains names of namespace prefixes (the part between `xmlns:` /// and an `=`) and namespace values. buffer: Vec, /// A stack of namespace bindings to prefixes that currently in scope bindings: Vec, /// The number of open tags at the moment. We need to keep track of this to know which namespace /// declarations to remove when we encounter an `End` event. nesting_level: i32, } /// That constant define the one of [reserved namespaces] for the xml standard. /// /// The prefix `xml` is by definition bound to the namespace name /// `http://www.w3.org/XML/1998/namespace`. It may, but need not, be declared, and must not be /// undeclared or bound to any other namespace name. Other prefixes must not be bound to this /// namespace name, and it must not be declared as the default namespace. /// /// [reserved namespaces]: https://www.w3.org/TR/xml-names11/#xmlReserved const RESERVED_NAMESPACE_XML: (Prefix, Namespace) = ( Prefix(b"xml"), Namespace(b"http://www.w3.org/XML/1998/namespace"), ); /// That constant define the one of [reserved namespaces] for the xml standard. /// /// The prefix `xmlns` is used only to declare namespace bindings and is by definition bound /// to the namespace name `http://www.w3.org/2000/xmlns/`. It must not be declared or /// undeclared. Other prefixes must not be bound to this namespace name, and it must not be /// declared as the default namespace. Element names must not have the prefix `xmlns`. /// /// [reserved namespaces]: https://www.w3.org/TR/xml-names11/#xmlReserved const RESERVED_NAMESPACE_XMLNS: (Prefix, Namespace) = ( Prefix(b"xmlns"), Namespace(b"http://www.w3.org/2000/xmlns/"), ); impl Default for NamespaceResolver { fn default() -> Self { let mut buffer = Vec::new(); let mut bindings = Vec::new(); for ent in &[RESERVED_NAMESPACE_XML, RESERVED_NAMESPACE_XMLNS] { let prefix = ent.0.into_inner(); let uri = ent.1.into_inner(); bindings.push(NamespaceEntry { start: buffer.len(), prefix_len: prefix.len(), value_len: uri.len(), level: 0, }); buffer.extend(prefix); buffer.extend(uri); } Self { buffer, bindings, nesting_level: 0, } } } impl NamespaceResolver { /// Begins a new scope and add to it all [namespace bindings] that found in /// the specified start element. /// /// [namespace binding]: https://www.w3.org/TR/xml-names11/#dt-NSDecl pub fn push(&mut self, start: &BytesStart) -> Result<()> { self.nesting_level += 1; let level = self.nesting_level; // adds new namespaces for attributes starting with 'xmlns:' and for the 'xmlns' // (default namespace) attribute. for a in start.attributes().with_checks(false) { if let Ok(Attribute { key: k, value: v }) = a { match k.as_namespace_binding() { Some(PrefixDeclaration::Default) => { let start = self.buffer.len(); self.buffer.extend_from_slice(&v); self.bindings.push(NamespaceEntry { start, prefix_len: 0, value_len: v.len(), level, }); } Some(PrefixDeclaration::Named(b"xml")) => { if Namespace(&v) != RESERVED_NAMESPACE_XML.1 { // error, `xml` prefix explicitly set to different value return Err(Error::InvalidPrefixBind { prefix: b"xml".to_vec(), namespace: v.to_vec(), }); } // don't add another NamespaceEntry for the `xml` namespace prefix } Some(PrefixDeclaration::Named(b"xmlns")) => { // error, `xmlns` prefix explicitly set return Err(Error::InvalidPrefixBind { prefix: b"xmlns".to_vec(), namespace: v.to_vec(), }); } Some(PrefixDeclaration::Named(prefix)) => { let ns = Namespace(&v); if ns == RESERVED_NAMESPACE_XML.1 || ns == RESERVED_NAMESPACE_XMLNS.1 { // error, non-`xml` prefix set to xml uri // error, non-`xmlns` prefix set to xmlns uri return Err(Error::InvalidPrefixBind { prefix: prefix.to_vec(), namespace: v.to_vec(), }); } let start = self.buffer.len(); self.buffer.extend_from_slice(prefix); self.buffer.extend_from_slice(&v); self.bindings.push(NamespaceEntry { start, prefix_len: prefix.len(), value_len: v.len(), level, }); } None => {} } } else { break; } } Ok(()) } /// Ends a top-most scope by popping all [namespace binding], that was added by /// last call to [`Self::push()`]. /// /// [namespace binding]: https://www.w3.org/TR/xml-names11/#dt-NSDecl pub fn pop(&mut self) { self.nesting_level -= 1; let current_level = self.nesting_level; // from the back (most deeply nested scope), look for the first scope that is still valid match self.bindings.iter().rposition(|n| n.level <= current_level) { // none of the namespaces are valid, remove all of them None => { self.buffer.clear(); self.bindings.clear(); } // drop all namespaces past the last valid namespace Some(last_valid_pos) => { if let Some(len) = self.bindings.get(last_valid_pos + 1).map(|n| n.start) { self.buffer.truncate(len); self.bindings.truncate(last_valid_pos + 1); } } } } /// Resolves a potentially qualified **element name** or **attribute name** /// into (namespace name, local name). /// /// *Qualified* names have the form `prefix:local-name` where the `prefix` is /// defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. /// The namespace prefix can be defined on the same element as the element or /// attribute in question. /// /// *Unqualified* attribute names do *not* inherit the current *default namespace*. /// /// # Lifetimes /// /// - `'n`: lifetime of an attribute or an element name #[inline] pub fn resolve<'n>( &self, name: QName<'n>, use_default: bool, ) -> (ResolveResult, LocalName<'n>) { let (local_name, prefix) = name.decompose(); (self.resolve_prefix(prefix, use_default), local_name) } /// Finds a [namespace name] for a given qualified **element name**, borrow /// it from the internal buffer. /// /// Returns `None`, if: /// - name is unqualified /// - prefix not found in the current scope /// - prefix was [unbound] using `xmlns:prefix=""` /// /// [namespace name]: https://www.w3.org/TR/xml-names11/#dt-NSName /// [unbound]: https://www.w3.org/TR/xml-names11/#scoping #[inline] pub fn find(&self, element_name: QName) -> ResolveResult { self.resolve_prefix(element_name.prefix(), true) } fn resolve_prefix(&self, prefix: Option, use_default: bool) -> ResolveResult { self.bindings .iter() // Find the last defined binding that corresponds to the given prefix .rev() .find_map(|n| match (n.prefix(&self.buffer), prefix) { // This is default namespace definition and name has no explicit prefix (None, None) if use_default => Some(n.namespace(&self.buffer)), (None, None) => Some(ResolveResult::Unbound), // One part has prefix but other is not -> skip (None, Some(_)) => None, (Some(_), None) => None, // Prefixes does not match -> skip (Some(definition), Some(usage)) if definition != usage => None, // Prefixes the same, entry defines binding reset (corresponds to `xmlns:p=""`) _ if n.value_len == 0 => Some(Self::maybe_unknown(prefix)), // Prefixes the same, returns corresponding namespace _ => Some(n.namespace(&self.buffer)), }) .unwrap_or_else(|| Self::maybe_unknown(prefix)) } #[inline] fn maybe_unknown(prefix: Option) -> ResolveResult<'static> { match prefix { Some(p) => ResolveResult::Unknown(p.into_inner().to_vec()), None => ResolveResult::Unbound, } } #[inline] pub const fn iter(&self) -> PrefixIter { PrefixIter { resolver: self, // We initialize the cursor to 2 to skip the two default namespaces xml: and xmlns: bindings_cursor: 2, } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Iterator on the current declared prefixes. /// /// See [`NsReader::prefixes`](crate::NsReader::prefixes) for documentation. #[derive(Debug, Clone)] pub struct PrefixIter<'a> { resolver: &'a NamespaceResolver, bindings_cursor: usize, } impl<'a> Iterator for PrefixIter<'a> { type Item = (PrefixDeclaration<'a>, Namespace<'a>); fn next(&mut self) -> Option<(PrefixDeclaration<'a>, Namespace<'a>)> { while let Some(namespace_entry) = self.resolver.bindings.get(self.bindings_cursor) { self.bindings_cursor += 1; // We increment for next read // We check if the key has not been overridden by having a look // at the namespaces declared after in the array let prefix = namespace_entry.prefix(&self.resolver.buffer); if self.resolver.bindings[self.bindings_cursor..] .iter() .any(|ne| prefix == ne.prefix(&self.resolver.buffer)) { continue; // Overridden } let namespace = if let ResolveResult::Bound(namespace) = namespace_entry.namespace(&self.resolver.buffer) { namespace } else { continue; // We don't return unbound namespaces }; let prefix = if let Some(Prefix(prefix)) = prefix { PrefixDeclaration::Named(prefix) } else { PrefixDeclaration::Default }; return Some((prefix, namespace)); } None // We have exhausted the array } fn size_hint(&self) -> (usize, Option) { // Real count could be less if some namespaces was overridden (0, Some(self.resolver.bindings.len() - self.bindings_cursor)) } } #[cfg(test)] mod namespaces { use super::*; use pretty_assertions::assert_eq; use ResolveResult::*; /// Unprefixed attribute names (resolved with `false` flag) never have a namespace /// according to : /// /// > A default namespace declaration applies to all unprefixed element names /// > within its scope. Default namespace declarations do not apply directly /// > to attribute names; the interpretation of unprefixed attributes is /// > determined by the element on which they appear. mod unprefixed { use super::*; use pretty_assertions::assert_eq; /// Basic tests that checks that basic resolver functionality is working #[test] fn basic() { let name = QName(b"simple"); let ns = Namespace(b"default"); let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); resolver .push(&BytesStart::from_content(" xmlns='default'", 0)) .unwrap(); assert_eq!(&resolver.buffer[s..], b"default"); // Check that tags without namespaces does not change result resolver.push(&BytesStart::from_content("", 0)).unwrap(); assert_eq!(&resolver.buffer[s..], b"default"); resolver.pop(); assert_eq!(&resolver.buffer[s..], b"default"); assert_eq!( resolver.resolve(name, true), (Bound(ns), LocalName(b"simple")) ); assert_eq!( resolver.resolve(name, false), (Unbound, LocalName(b"simple")) ); assert_eq!(resolver.find(name), Bound(ns)); } /// Test adding a second level of namespaces, which replaces the previous binding #[test] fn override_namespace() { let name = QName(b"simple"); let old_ns = Namespace(b"old"); let new_ns = Namespace(b"new"); let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); resolver .push(&BytesStart::from_content(" xmlns='old'", 0)) .unwrap(); resolver .push(&BytesStart::from_content(" xmlns='new'", 0)) .unwrap(); assert_eq!(&resolver.buffer[s..], b"oldnew"); assert_eq!( resolver.resolve(name, true), (Bound(new_ns), LocalName(b"simple")) ); assert_eq!( resolver.resolve(name, false), (Unbound, LocalName(b"simple")) ); assert_eq!(resolver.find(name), Bound(new_ns)); resolver.pop(); assert_eq!(&resolver.buffer[s..], b"old"); assert_eq!( resolver.resolve(name, true), (Bound(old_ns), LocalName(b"simple")) ); assert_eq!( resolver.resolve(name, false), (Unbound, LocalName(b"simple")) ); assert_eq!(resolver.find(name), Bound(old_ns)); } /// Test adding a second level of namespaces, which reset the previous binding /// to not bound state by specifying an empty namespace name. /// /// See #[test] fn reset() { let name = QName(b"simple"); let old_ns = Namespace(b"old"); let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); resolver .push(&BytesStart::from_content(" xmlns='old'", 0)) .unwrap(); resolver .push(&BytesStart::from_content(" xmlns=''", 0)) .unwrap(); assert_eq!(&resolver.buffer[s..], b"old"); assert_eq!( resolver.resolve(name, true), (Unbound, LocalName(b"simple")) ); assert_eq!( resolver.resolve(name, false), (Unbound, LocalName(b"simple")) ); assert_eq!(resolver.find(name), Unbound); resolver.pop(); assert_eq!(&resolver.buffer[s..], b"old"); assert_eq!( resolver.resolve(name, true), (Bound(old_ns), LocalName(b"simple")) ); assert_eq!( resolver.resolve(name, false), (Unbound, LocalName(b"simple")) ); assert_eq!(resolver.find(name), Bound(old_ns)); } } mod declared_prefix { use super::*; use pretty_assertions::assert_eq; /// Basic tests that checks that basic resolver functionality is working #[test] fn basic() { let name = QName(b"p:with-declared-prefix"); let ns = Namespace(b"default"); let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); resolver .push(&BytesStart::from_content(" xmlns:p='default'", 0)) .unwrap(); assert_eq!(&resolver.buffer[s..], b"pdefault"); // Check that tags without namespaces does not change result resolver.push(&BytesStart::from_content("", 0)).unwrap(); assert_eq!(&resolver.buffer[s..], b"pdefault"); resolver.pop(); assert_eq!(&resolver.buffer[s..], b"pdefault"); assert_eq!( resolver.resolve(name, true), (Bound(ns), LocalName(b"with-declared-prefix")) ); assert_eq!( resolver.resolve(name, false), (Bound(ns), LocalName(b"with-declared-prefix")) ); assert_eq!(resolver.find(name), Bound(ns)); } /// Test adding a second level of namespaces, which replaces the previous binding #[test] fn override_namespace() { let name = QName(b"p:with-declared-prefix"); let old_ns = Namespace(b"old"); let new_ns = Namespace(b"new"); let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); resolver .push(&BytesStart::from_content(" xmlns:p='old'", 0)) .unwrap(); resolver .push(&BytesStart::from_content(" xmlns:p='new'", 0)) .unwrap(); assert_eq!(&resolver.buffer[s..], b"poldpnew"); assert_eq!( resolver.resolve(name, true), (Bound(new_ns), LocalName(b"with-declared-prefix")) ); assert_eq!( resolver.resolve(name, false), (Bound(new_ns), LocalName(b"with-declared-prefix")) ); assert_eq!(resolver.find(name), Bound(new_ns)); resolver.pop(); assert_eq!(&resolver.buffer[s..], b"pold"); assert_eq!( resolver.resolve(name, true), (Bound(old_ns), LocalName(b"with-declared-prefix")) ); assert_eq!( resolver.resolve(name, false), (Bound(old_ns), LocalName(b"with-declared-prefix")) ); assert_eq!(resolver.find(name), Bound(old_ns)); } /// Test adding a second level of namespaces, which reset the previous binding /// to not bound state by specifying an empty namespace name. /// /// See #[test] fn reset() { let name = QName(b"p:with-declared-prefix"); let old_ns = Namespace(b"old"); let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); resolver .push(&BytesStart::from_content(" xmlns:p='old'", 0)) .unwrap(); resolver .push(&BytesStart::from_content(" xmlns:p=''", 0)) .unwrap(); assert_eq!(&resolver.buffer[s..], b"poldp"); assert_eq!( resolver.resolve(name, true), (Unknown(b"p".to_vec()), LocalName(b"with-declared-prefix")) ); assert_eq!( resolver.resolve(name, false), (Unknown(b"p".to_vec()), LocalName(b"with-declared-prefix")) ); assert_eq!(resolver.find(name), Unknown(b"p".to_vec())); resolver.pop(); assert_eq!(&resolver.buffer[s..], b"pold"); assert_eq!( resolver.resolve(name, true), (Bound(old_ns), LocalName(b"with-declared-prefix")) ); assert_eq!( resolver.resolve(name, false), (Bound(old_ns), LocalName(b"with-declared-prefix")) ); assert_eq!(resolver.find(name), Bound(old_ns)); } } /// Tests for `xml` and `xmlns` built-in prefixes. /// /// See mod builtin_prefixes { use super::*; mod xml { use super::*; use pretty_assertions::assert_eq; /// `xml` prefix are always defined, it is not required to define it explicitly. #[test] fn undeclared() { let name = QName(b"xml:random"); let namespace = RESERVED_NAMESPACE_XML.1; let resolver = NamespaceResolver::default(); assert_eq!( resolver.resolve(name, true), (Bound(namespace), LocalName(b"random")) ); assert_eq!( resolver.resolve(name, false), (Bound(namespace), LocalName(b"random")) ); assert_eq!(resolver.find(name), Bound(namespace)); } /// `xml` prefix can be declared but it must be bound to the value /// `http://www.w3.org/XML/1998/namespace` #[test] fn rebound_to_correct_ns() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); resolver.push( &BytesStart::from_content( " xmlns:xml='http://www.w3.org/XML/1998/namespace'", 0, ), ).expect("`xml` prefix should be possible to bound to `http://www.w3.org/XML/1998/namespace`"); assert_eq!(&resolver.buffer[s..], b""); } /// `xml` prefix cannot be re-declared to another namespace #[test] fn rebound_to_incorrect_ns() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); match resolver.push(&BytesStart::from_content( " xmlns:xml='not_correct_namespace'", 0, )) { Err(Error::InvalidPrefixBind { prefix, namespace }) => { assert_eq!(prefix, b"xml"); assert_eq!(namespace, b"not_correct_namespace"); } x => panic!( "Expected `Err(InvalidPrefixBind {{ .. }})`, but got `{:?}`", x ), } assert_eq!(&resolver.buffer[s..], b""); } /// `xml` prefix cannot be unbound #[test] fn unbound() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); match resolver.push(&BytesStart::from_content(" xmlns:xml=''", 0)) { Err(Error::InvalidPrefixBind { prefix, namespace }) => { assert_eq!(prefix, b"xml"); assert_eq!(namespace, b""); } x => panic!( "Expected `Err(InvalidPrefixBind {{ .. }})`, but got `{:?}`", x ), } assert_eq!(&resolver.buffer[s..], b""); } /// Other prefix cannot be bound to `xml` namespace #[test] fn other_prefix_bound_to_xml_namespace() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); match resolver.push(&BytesStart::from_content( " xmlns:not_xml='http://www.w3.org/XML/1998/namespace'", 0, )) { Err(Error::InvalidPrefixBind { prefix, namespace }) => { assert_eq!(prefix, b"not_xml"); assert_eq!(namespace, b"http://www.w3.org/XML/1998/namespace"); } x => panic!( "Expected `Err(InvalidPrefixBind {{ .. }})`, but got `{:?}`", x ), } assert_eq!(&resolver.buffer[s..], b""); } } mod xmlns { use super::*; use pretty_assertions::assert_eq; /// `xmlns` prefix are always defined, it is forbidden to define it explicitly #[test] fn undeclared() { let name = QName(b"xmlns:random"); let namespace = RESERVED_NAMESPACE_XMLNS.1; let resolver = NamespaceResolver::default(); assert_eq!( resolver.resolve(name, true), (Bound(namespace), LocalName(b"random")) ); assert_eq!( resolver.resolve(name, false), (Bound(namespace), LocalName(b"random")) ); assert_eq!(resolver.find(name), Bound(namespace)); } /// `xmlns` prefix cannot be re-declared event to its own namespace #[test] fn rebound_to_correct_ns() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); match resolver.push(&BytesStart::from_content( " xmlns:xmlns='http://www.w3.org/2000/xmlns/'", 0, )) { Err(Error::InvalidPrefixBind { prefix, namespace }) => { assert_eq!(prefix, b"xmlns"); assert_eq!(namespace, b"http://www.w3.org/2000/xmlns/"); } x => panic!( "Expected `Err(InvalidPrefixBind {{ .. }})`, but got `{:?}`", x ), } assert_eq!(&resolver.buffer[s..], b""); } /// `xmlns` prefix cannot be re-declared #[test] fn rebound_to_incorrect_ns() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); match resolver.push(&BytesStart::from_content( " xmlns:xmlns='not_correct_namespace'", 0, )) { Err(Error::InvalidPrefixBind { prefix, namespace }) => { assert_eq!(prefix, b"xmlns"); assert_eq!(namespace, b"not_correct_namespace"); } x => panic!( "Expected `Err(InvalidPrefixBind {{ .. }})`, but got `{:?}`", x ), } assert_eq!(&resolver.buffer[s..], b""); } /// `xmlns` prefix cannot be unbound #[test] fn unbound() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); match resolver.push(&BytesStart::from_content(" xmlns:xmlns=''", 0)) { Err(Error::InvalidPrefixBind { prefix, namespace }) => { assert_eq!(prefix, b"xmlns"); assert_eq!(namespace, b""); } x => panic!( "Expected `Err(InvalidPrefixBind {{ .. }})`, but got `{:?}`", x ), } assert_eq!(&resolver.buffer[s..], b""); } /// Other prefix cannot be bound to `xmlns` namespace #[test] fn other_prefix_bound_to_xmlns_namespace() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); match resolver.push(&BytesStart::from_content( " xmlns:not_xmlns='http://www.w3.org/2000/xmlns/'", 0, )) { Err(Error::InvalidPrefixBind { prefix, namespace }) => { assert_eq!(prefix, b"not_xmlns"); assert_eq!(namespace, b"http://www.w3.org/2000/xmlns/"); } x => panic!( "Expected `Err(InvalidPrefixBind {{ .. }})`, but got `{:?}`", x ), } assert_eq!(&resolver.buffer[s..], b""); } } } #[test] fn undeclared_prefix() { let name = QName(b"unknown:prefix"); let resolver = NamespaceResolver::default(); assert_eq!( resolver.buffer, b"xmlhttp://www.w3.org/XML/1998/namespacexmlnshttp://www.w3.org/2000/xmlns/" ); assert_eq!( resolver.resolve(name, true), (Unknown(b"unknown".to_vec()), LocalName(b"prefix")) ); assert_eq!( resolver.resolve(name, false), (Unknown(b"unknown".to_vec()), LocalName(b"prefix")) ); assert_eq!(resolver.find(name), Unknown(b"unknown".to_vec())); } /// Checks how the QName is decomposed to a prefix and a local name #[test] fn prefix_and_local_name() { let name = QName(b"foo:bus"); assert_eq!(name.prefix(), Some(Prefix(b"foo"))); assert_eq!(name.local_name(), LocalName(b"bus")); assert_eq!(name.decompose(), (LocalName(b"bus"), Some(Prefix(b"foo")))); let name = QName(b"foo:"); assert_eq!(name.prefix(), Some(Prefix(b"foo"))); assert_eq!(name.local_name(), LocalName(b"")); assert_eq!(name.decompose(), (LocalName(b""), Some(Prefix(b"foo")))); let name = QName(b":foo"); assert_eq!(name.prefix(), Some(Prefix(b""))); assert_eq!(name.local_name(), LocalName(b"foo")); assert_eq!(name.decompose(), (LocalName(b"foo"), Some(Prefix(b"")))); let name = QName(b"foo:bus:baz"); assert_eq!(name.prefix(), Some(Prefix(b"foo"))); assert_eq!(name.local_name(), LocalName(b"bus:baz")); assert_eq!( name.decompose(), (LocalName(b"bus:baz"), Some(Prefix(b"foo"))) ); } } quick-xml-0.36.1/src/parser/element.rs000064400000000000000000000106650072674642500157470ustar 00000000000000//! Contains a parser for an XML element. use crate::errors::SyntaxError; use crate::parser::Parser; /// A parser that search a `>` symbol in the slice outside of quoted regions. /// /// The parser considers two quoted regions: a double-quoted (`"..."`) and /// a single-quoted (`'...'`) region. Matches found inside those regions are not /// considered as results. Each region starts and ends by its quote symbol, /// which cannot be escaped (but can be encoded as XML character entity or named /// entity. Anyway, that encoding does not contain literal quotes). /// /// To use a parser create an instance of parser and [`feed`] data into it. /// After successful search the parser will return [`Some`] with position of /// found symbol. If search is unsuccessful, a [`None`] will be returned. You /// typically would expect positive result of search, so that you should feed /// new data until you get it. /// /// NOTE: after successful match the parser does not returned to the initial /// state and should not be used anymore. Create a new parser if you want to perform /// new search. /// /// # Example /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::parser::{ElementParser, Parser}; /// /// let mut parser = ElementParser::default(); /// /// // Parse `and the text follow...` /// // splitted into three chunks /// assert_eq!(parser.feed(b"and the text follow..."), Some(8)); /// // ^ ^ /// // 0 8 /// ``` /// /// [`feed`]: Self::feed() #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum ElementParser { /// The initial state (inside element, but outside of attribute value). Outside, /// Inside a single-quoted region (`'...'`). SingleQ, /// Inside a double-quoted region (`"..."`). DoubleQ, } impl Parser for ElementParser { /// Returns number of consumed bytes or `None` if `>` was not found in `bytes`. #[inline] fn feed(&mut self, bytes: &[u8]) -> Option { for i in memchr::memchr3_iter(b'>', b'\'', b'"', bytes) { *self = match (*self, bytes[i]) { // only allowed to match `>` while we are in state `Outside` (Self::Outside, b'>') => return Some(i), (Self::Outside, b'\'') => Self::SingleQ, (Self::Outside, b'\"') => Self::DoubleQ, // the only end_byte that gets us out if the same character (Self::SingleQ, b'\'') | (Self::DoubleQ, b'"') => Self::Outside, // all other bytes: no state change _ => continue, }; } None } #[inline] fn eof_error() -> SyntaxError { SyntaxError::UnclosedTag } } impl Default for ElementParser { #[inline] fn default() -> Self { Self::Outside } } #[test] fn parse() { use pretty_assertions::assert_eq; use ElementParser::*; /// Returns `Ok(pos)` with the position in the buffer where element is ended. /// /// Returns `Err(internal_state)` if parsing does not done yet. fn parse_element(bytes: &[u8], mut parser: ElementParser) -> Result { match parser.feed(bytes) { Some(i) => Ok(i), None => Err(parser), } } assert_eq!(parse_element(b"", Outside), Err(Outside)); assert_eq!(parse_element(b"", SingleQ), Err(SingleQ)); assert_eq!(parse_element(b"", DoubleQ), Err(DoubleQ)); assert_eq!(parse_element(b"'", Outside), Err(SingleQ)); assert_eq!(parse_element(b"'", SingleQ), Err(Outside)); assert_eq!(parse_element(b"'", DoubleQ), Err(DoubleQ)); assert_eq!(parse_element(b"\"", Outside), Err(DoubleQ)); assert_eq!(parse_element(b"\"", SingleQ), Err(SingleQ)); assert_eq!(parse_element(b"\"", DoubleQ), Err(Outside)); assert_eq!(parse_element(b">", Outside), Ok(0)); assert_eq!(parse_element(b">", SingleQ), Err(SingleQ)); assert_eq!(parse_element(b">", DoubleQ), Err(DoubleQ)); assert_eq!(parse_element(b"''>", Outside), Ok(2)); assert_eq!(parse_element(b"''>", SingleQ), Err(SingleQ)); assert_eq!(parse_element(b"''>", DoubleQ), Err(DoubleQ)); } quick-xml-0.36.1/src/parser/mod.rs000064400000000000000000000020150072674642500150630ustar 00000000000000//! Contains low-level parsers of different XML pieces. use crate::errors::SyntaxError; mod element; mod pi; pub use element::ElementParser; pub use pi::PiParser; /// Used to decouple reading of data from data source and parsing XML structure from it. /// This is a state preserved between getting chunks of bytes from the reader. /// /// This trait is implemented for every parser that processes piece of XML grammar. pub trait Parser { /// Process new data and try to determine end of the parsed thing. /// /// Returns position of the end of thing in `bytes` in case of successful search /// and `None` otherwise. /// /// # Parameters /// - `bytes`: a slice to find the end of a thing. /// Should contain text in ASCII-compatible encoding fn feed(&mut self, bytes: &[u8]) -> Option; /// Returns parse error produced by this parser in case of reaching end of /// input without finding the end of a parsed thing. fn eof_error() -> SyntaxError; } quick-xml-0.36.1/src/parser/pi.rs000064400000000000000000000101210072674642500147110ustar 00000000000000//! Contains a parser for an XML processing instruction. use crate::errors::SyntaxError; use crate::parser::Parser; /// A parser that search a `?>` sequence in the slice. /// /// To use a parser create an instance of parser and [`feed`] data into it. /// After successful search the parser will return [`Some`] with position where /// processing instruction is ended (the position after `?>`). If search was /// unsuccessful, a [`None`] will be returned. You typically would expect positive /// result of search, so that you should feed new data until you get it. /// /// NOTE: after successful match the parser does not returned to the initial /// state and should not be used anymore. Create a new parser if you want to perform /// new search. /// /// # Example /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::parser::{Parser, PiParser}; /// /// let mut parser = PiParser::default(); /// /// // Parse ` and ?' inside?>and the text follow...` /// // splitted into three chunks /// assert_eq!(parser.feed(b" and ?"), None); /// // ...get another chunk of data /// assert_eq!(parser.feed(b"' inside?>and the text follow..."), Some(9)); /// // ^ ^ /// // 0 9 /// ``` /// /// [`feed`]: Self::feed() #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub struct PiParser( /// A flag that indicates was the `bytes` in the previous attempt to find the /// end ended with `?`. pub bool, ); impl Parser for PiParser { /// Determines the end position of a processing instruction in the provided slice. /// Processing instruction ends on the first occurrence of `?>` which cannot be /// escaped. /// /// Returns position after the `?>` or `None` if such sequence was not found. /// /// [Section 2.6]: Parameter entity references MUST NOT be recognized within /// processing instructions, so parser do not search for them. /// /// # Parameters /// - `bytes`: a slice to find the end of a processing instruction. /// Should contain text in ASCII-compatible encoding /// /// [Section 2.6]: https://www.w3.org/TR/xml11/#sec-pi #[inline] fn feed(&mut self, bytes: &[u8]) -> Option { for i in memchr::memchr_iter(b'>', bytes) { match i { 0 if self.0 => return Some(0), // If the previous byte is `?`, then we found `?>` i if i > 0 && bytes[i - 1] == b'?' => return Some(i), _ => {} } } self.0 = bytes.last().copied() == Some(b'?'); None } #[inline] fn eof_error() -> SyntaxError { SyntaxError::UnclosedPIOrXmlDecl } } #[test] fn pi() { use pretty_assertions::assert_eq; /// Returns `Ok(pos)` with the position in the buffer where processing /// instruction is ended. /// /// Returns `Err(internal_state)` if parsing is not done yet. fn parse_pi(bytes: &[u8], had_question_mark: bool) -> Result { let mut parser = PiParser(had_question_mark); match parser.feed(bytes) { Some(i) => Ok(i), None => Err(parser.0), } } // Comments shows which character was seen the last before calling `feed`. // `x` means any character, pipe denotes start of the buffer that passed to `feed` assert_eq!(parse_pi(b"", false), Err(false)); // x| assert_eq!(parse_pi(b"", true), Err(false)); // ?| assert_eq!(parse_pi(b"?", false), Err(true)); // x|? assert_eq!(parse_pi(b"?", true), Err(true)); // ?|? assert_eq!(parse_pi(b">", false), Err(false)); // x|> assert_eq!(parse_pi(b">", true), Ok(0)); // ?|> assert_eq!(parse_pi(b"?>", false), Ok(1)); // x|?> assert_eq!(parse_pi(b"?>", true), Ok(1)); // ?|?> assert_eq!(parse_pi(b">?>", false), Ok(2)); // x|>?> assert_eq!(parse_pi(b">?>", true), Ok(0)); // ?|>?> } quick-xml-0.36.1/src/reader/async_tokio.rs000064400000000000000000000377300072674642500166100ustar 00000000000000//! This is an implementation of [`Reader`] for reading from a [`AsyncBufRead`] //! as underlying byte stream. This reader fully implements async/await so reading //! can use non-blocking I/O. use std::pin::Pin; use std::task::{Context, Poll}; use tokio::io::{self, AsyncBufRead, AsyncBufReadExt, AsyncRead, ReadBuf}; use crate::errors::{Error, Result, SyntaxError}; use crate::events::Event; use crate::name::{QName, ResolveResult}; use crate::parser::{ElementParser, Parser, PiParser}; use crate::reader::buffered_reader::impl_buffered_source; use crate::reader::{BangType, BinaryStream, NsReader, ParseState, ReadTextResult, Reader, Span}; use crate::utils::is_whitespace; /// A struct for read XML asynchronously from an [`AsyncBufRead`]. /// /// Having own struct allows us to implement anything without risk of name conflicts /// and does not suffer from the impossibility of having `async` in traits. struct TokioAdapter<'a, R>(&'a mut R); impl<'a, R: AsyncBufRead + Unpin> TokioAdapter<'a, R> { impl_buffered_source!('b, 0, async, await); } //////////////////////////////////////////////////////////////////////////////////////////////////// impl<'r, R> AsyncRead for BinaryStream<'r, R> where R: AsyncRead + Unpin, { fn poll_read( self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>, ) -> Poll> { let start = buf.remaining(); let this = self.get_mut(); let poll = Pin::new(&mut *this.inner).poll_read(cx, buf); // If something was read, update offset if let Poll::Ready(Ok(_)) = poll { let amt = start - buf.remaining(); *this.offset += amt as u64; } poll } } impl<'r, R> AsyncBufRead for BinaryStream<'r, R> where R: AsyncBufRead + Unpin, { #[inline] fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { Pin::new(&mut *self.get_mut().inner).poll_fill_buf(cx) } #[inline] fn consume(self: Pin<&mut Self>, amt: usize) { let this = self.get_mut(); this.inner.consume(amt); *this.offset += amt as u64; } } //////////////////////////////////////////////////////////////////////////////////////////////////// impl Reader { /// An asynchronous version of [`read_event_into()`]. Reads the next event into /// given buffer. /// /// This is the main entry point for reading XML `Event`s when using an async reader. /// /// See the documentation of [`read_event_into()`] for more information. /// /// # Examples /// /// ``` /// # tokio_test::block_on(async { /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::reader::Reader; /// /// // This explicitly uses `from_reader("...".as_bytes())` to use a buffered /// // reader instead of relying on the zero-copy optimizations for reading /// // from byte slices, which is provides the sync interface anyway. /// let mut reader = Reader::from_reader(r#" /// /// Test /// Test 2 /// /// "#.as_bytes()); /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); /// loop { /// match reader.read_event_into_async(&mut buf).await { /// Ok(Event::Start(_)) => count += 1, /// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()), /// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e), /// Ok(Event::Eof) => break, /// _ => (), /// } /// buf.clear(); /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// # }) // tokio_test::block_on /// ``` /// /// [`read_event_into()`]: Reader::read_event_into pub async fn read_event_into_async<'b>( &mut self, mut buf: &'b mut Vec, ) -> Result> { read_event_impl!( self, buf, TokioAdapter(&mut self.reader), read_until_close_async, await ) } /// An asynchronous version of [`read_to_end_into()`]. /// Reads asynchronously until end element is found using provided buffer as /// intermediate storage for events content. This function is supposed to be /// called after you already read a [`Start`] event. /// /// See the documentation of [`read_to_end_into()`] for more information. /// /// # Examples /// /// This example shows, how you can skip XML content after you read the /// start event. /// /// ``` /// # tokio_test::block_on(async { /// # use pretty_assertions::assert_eq; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::Reader; /// /// let mut reader = Reader::from_reader(r#" /// /// /// /// /// /// /// /// /// "#.as_bytes()); /// reader.config_mut().trim_text(true); /// let mut buf = Vec::new(); /// /// let start = BytesStart::new("outer"); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!(reader.read_event_into_async(&mut buf).await.unwrap(), Event::Start(start)); /// /// // ...then, we could skip all events to the corresponding end event. /// // This call will correctly handle nested elements. /// // Note, however, that this method does not handle namespaces. /// reader.read_to_end_into_async(end.name(), &mut buf).await.unwrap(); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!(reader.read_event_into_async(&mut buf).await.unwrap(), Event::Eof); /// # }) // tokio_test::block_on /// ``` /// /// [`read_to_end_into()`]: Self::read_to_end_into /// [`Start`]: Event::Start pub async fn read_to_end_into_async<'n>( &mut self, // We should name that lifetime due to https://github.com/rust-lang/rust/issues/63033` end: QName<'n>, buf: &mut Vec, ) -> Result { Ok(read_to_end!(self, end, buf, read_event_into_async, { buf.clear(); }, await)) } /// Private function to read until `>` is found. This function expects that /// it was called just after encounter a `<` symbol. async fn read_until_close_async<'b>(&mut self, buf: &'b mut Vec) -> Result> { read_until_close!(self, buf, TokioAdapter(&mut self.reader), await) } } //////////////////////////////////////////////////////////////////////////////////////////////////// impl NsReader { /// An asynchronous version of [`read_event_into()`]. Reads the next event into /// given buffer. /// /// This method manages namespaces but doesn't resolve them automatically. /// You should call [`resolve_element()`] if you want to get a namespace. /// /// You also can use [`read_resolved_event_into_async()`] instead if you want /// to resolve namespace as soon as you get an event. /// /// # Examples /// /// ``` /// # tokio_test::block_on(async { /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_reader(r#" /// /// Test /// Test 2 /// /// "#.as_bytes()); /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); /// loop { /// match reader.read_event_into_async(&mut buf).await.unwrap() { /// Event::Start(e) => { /// count += 1; /// let (ns, local) = reader.resolve_element(e.name()); /// match local.as_ref() { /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))), /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))), /// _ => unreachable!(), /// } /// } /// Event::Text(e) => { /// txt.push(e.unescape().unwrap().into_owned()) /// } /// Event::Eof => break, /// _ => (), /// } /// buf.clear(); /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// # }) // tokio_test::block_on /// ``` /// /// [`read_event_into()`]: NsReader::read_event_into /// [`resolve_element()`]: Self::resolve_element /// [`read_resolved_event_into_async()`]: Self::read_resolved_event_into_async pub async fn read_event_into_async<'b>(&mut self, buf: &'b mut Vec) -> Result> { self.pop(); let event = self.reader.read_event_into_async(buf).await; self.process_event(event) } /// An asynchronous version of [`read_to_end_into()`]. /// Reads asynchronously until end element is found using provided buffer as /// intermediate storage for events content. This function is supposed to be /// called after you already read a [`Start`] event. /// /// See the documentation of [`read_to_end_into()`] for more information. /// /// # Examples /// /// This example shows, how you can skip XML content after you read the /// start event. /// /// ``` /// # tokio_test::block_on(async { /// # use pretty_assertions::assert_eq; /// use quick_xml::name::{Namespace, ResolveResult}; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_reader(r#" /// /// /// /// /// /// /// /// /// /// /// /// /// "#.as_bytes()); /// reader.config_mut().trim_text(true); /// let mut buf = Vec::new(); /// /// let ns = Namespace(b"namespace 1"); /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!( /// reader.read_resolved_event_into_async(&mut buf).await.unwrap(), /// (ResolveResult::Bound(ns), Event::Start(start)) /// ); /// /// // ...then, we could skip all events to the corresponding end event. /// // This call will correctly handle nested elements. /// // Note, however, that this method does not handle namespaces. /// reader.read_to_end_into_async(end.name(), &mut buf).await.unwrap(); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!( /// reader.read_resolved_event_into_async(&mut buf).await.unwrap(), /// (ResolveResult::Unbound, Event::Eof) /// ); /// # }) // tokio_test::block_on /// ``` /// /// [`read_to_end_into()`]: Self::read_to_end_into /// [`Start`]: Event::Start pub async fn read_to_end_into_async<'n>( &mut self, // We should name that lifetime due to https://github.com/rust-lang/rust/issues/63033` end: QName<'n>, buf: &mut Vec, ) -> Result { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should // match literally the start name. See `Config::check_end_names` documentation self.reader.read_to_end_into_async(end, buf).await } /// An asynchronous version of [`read_resolved_event_into()`]. Reads the next /// event into given buffer asynchronously and resolves its namespace (if applicable). /// /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events. /// For all other events the concept of namespace is not defined, so /// a [`ResolveResult::Unbound`] is returned. /// /// If you are not interested in namespaces, you can use [`read_event_into_async()`] /// which will not automatically resolve namespaces for you. /// /// # Examples /// /// ``` /// # tokio_test::block_on(async { /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_reader(r#" /// /// Test /// Test 2 /// /// "#.as_bytes()); /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); /// loop { /// match reader.read_resolved_event_into_async(&mut buf).await.unwrap() { /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag1").into()); /// } /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag2").into()); /// } /// (_, Event::Start(_)) => unreachable!(), /// /// (_, Event::Text(e)) => { /// txt.push(e.unescape().unwrap().into_owned()) /// } /// (_, Event::Eof) => break, /// _ => (), /// } /// buf.clear(); /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// # }) // tokio_test::block_on /// ``` /// /// [`read_resolved_event_into()`]: NsReader::read_resolved_event_into /// [`Start`]: Event::Start /// [`Empty`]: Event::Empty /// [`End`]: Event::End /// [`read_event_into_async()`]: Self::read_event_into_async pub async fn read_resolved_event_into_async<'ns, 'b>( // Name 'ns lifetime, because otherwise we get an error // "implicit elided lifetime not allowed here" on ResolveResult &'ns mut self, buf: &'b mut Vec, ) -> Result<(ResolveResult<'ns>, Event<'b>)> { let event = self.read_event_into_async(buf).await; self.resolve_event(event) } } #[cfg(test)] mod test { use super::TokioAdapter; use crate::reader::test::check; check!( #[tokio::test] read_event_into_async, read_until_close_async, TokioAdapter, &mut Vec::new(), async, await ); #[test] fn test_future_is_send() { // This test should just compile, no actual runtime checks are performed here. use super::*; use tokio::io::BufReader; fn check_send(_: T) {} let input = vec![]; let mut reading_buf = vec![]; let mut reader = Reader::from_reader(BufReader::new(input.as_slice())); check_send(reader.read_event_into_async(&mut reading_buf)); } } quick-xml-0.36.1/src/reader/buffered_reader.rs000064400000000000000000000377740072674642500174020ustar 00000000000000//! This is an implementation of [`Reader`] for reading from a [`BufRead`] as //! underlying byte stream. use std::fs::File; use std::io::{self, BufRead, BufReader}; use std::path::Path; use crate::errors::{Error, Result}; use crate::events::Event; use crate::name::QName; use crate::parser::Parser; use crate::reader::{BangType, ReadTextResult, Reader, Span, XmlSource}; use crate::utils::is_whitespace; macro_rules! impl_buffered_source { ($($lf:lifetime, $reader:tt, $async:ident, $await:ident)?) => { #[cfg(not(feature = "encoding"))] #[inline] $($async)? fn remove_utf8_bom(&mut self) -> io::Result<()> { use crate::encoding::UTF8_BOM; loop { break match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) => { if n.starts_with(UTF8_BOM) { self $(.$reader)? .consume(UTF8_BOM.len()); } Ok(()) }, Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => Err(e), }; } } #[cfg(feature = "encoding")] #[inline] $($async)? fn detect_encoding(&mut self) -> io::Result> { loop { break match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) => if let Some((enc, bom_len)) = crate::encoding::detect_encoding(n) { self $(.$reader)? .consume(bom_len); Ok(Some(enc)) } else { Ok(None) }, Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => Err(e), }; } } #[inline] $($async)? fn read_text $(<$lf>)? ( &mut self, buf: &'b mut Vec, position: &mut u64, ) -> ReadTextResult<'b, &'b mut Vec> { let mut read = 0; let start = buf.len(); loop { let available = match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) if n.is_empty() => break, Ok(n) => n, Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => { *position += read; return ReadTextResult::Err(e); } }; match memchr::memchr(b'<', available) { // Special handling is needed only on the first iteration. // On next iterations we already read something and should emit Text event Some(0) if read == 0 => { self $(.$reader)? .consume(1); *position += 1; return ReadTextResult::Markup(buf); } Some(i) => { buf.extend_from_slice(&available[..i]); let used = i + 1; self $(.$reader)? .consume(used); read += used as u64; *position += read; return ReadTextResult::UpToMarkup(&buf[start..]); } None => { buf.extend_from_slice(available); let used = available.len(); self $(.$reader)? .consume(used); read += used as u64; } } } *position += read; ReadTextResult::UpToEof(&buf[start..]) } #[inline] $($async)? fn read_with<$($lf,)? P: Parser>( &mut self, mut parser: P, buf: &'b mut Vec, position: &mut u64, ) -> Result<&'b [u8]> { let mut read = 0; let start = buf.len(); loop { let available = match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) if n.is_empty() => break, Ok(n) => n, Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => { *position += read; return Err(Error::Io(e.into())); } }; if let Some(i) = parser.feed(available) { buf.extend_from_slice(&available[..i]); // +1 for `>` which we do not include self $(.$reader)? .consume(i + 1); read += i as u64 + 1; *position += read; return Ok(&buf[start..]); } // The `>` symbol not yet found, continue reading buf.extend_from_slice(available); let used = available.len(); self $(.$reader)? .consume(used); read += used as u64; } *position += read; Err(Error::Syntax(P::eof_error())) } #[inline] $($async)? fn read_bang_element $(<$lf>)? ( &mut self, buf: &'b mut Vec, position: &mut u64, ) -> Result<(BangType, &'b [u8])> { // Peeked one bang ('!') before being called, so it's guaranteed to // start with it. let start = buf.len(); let mut read = 1; buf.push(b'!'); self $(.$reader)? .consume(1); let bang_type = BangType::new(self.peek_one() $(.$await)? ?)?; loop { match self $(.$reader)? .fill_buf() $(.$await)? { // Note: Do not update position, so the error points to // somewhere sane rather than at the EOF Ok(n) if n.is_empty() => break, Ok(available) => { // We only parse from start because we don't want to consider // whatever is in the buffer before the bang element if let Some((consumed, used)) = bang_type.parse(&buf[start..], available) { buf.extend_from_slice(consumed); self $(.$reader)? .consume(used); read += used as u64; *position += read; return Ok((bang_type, &buf[start..])); } else { buf.extend_from_slice(available); let used = available.len(); self $(.$reader)? .consume(used); read += used as u64; } } Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => { *position += read; return Err(Error::Io(e.into())); } } } *position += read; Err(bang_type.to_err()) } #[inline] $($async)? fn skip_whitespace(&mut self, position: &mut u64) -> io::Result<()> { loop { break match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) => { let count = n.iter().position(|b| !is_whitespace(*b)).unwrap_or(n.len()); if count > 0 { self $(.$reader)? .consume(count); *position += count as u64; continue; } else { Ok(()) } } Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => Err(e), }; } } #[inline] $($async)? fn peek_one(&mut self) -> io::Result> { loop { break match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) => Ok(n.first().cloned()), Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => Err(e), }; } } }; } // Make it public for use in async implementations. // New rustc reports // > warning: the item `impl_buffered_source` is imported redundantly // so make it public only when async feature is enabled #[cfg(feature = "async-tokio")] pub(super) use impl_buffered_source; /// Implementation of `XmlSource` for any `BufRead` reader using a user-given /// `Vec` as buffer that will be borrowed by events. impl<'b, R: BufRead> XmlSource<'b, &'b mut Vec> for R { impl_buffered_source!(); } //////////////////////////////////////////////////////////////////////////////////////////////////// /// This is an implementation for reading from a [`BufRead`] as underlying byte stream. impl Reader { /// Reads the next `Event`. /// /// This is the main entry point for reading XML `Event`s. /// /// `Event`s borrow `buf` and can be converted to own their data if needed (uses `Cow` /// internally). /// /// Having the possibility to control the internal buffers gives you some additional benefits /// such as: /// /// - Reduce the number of allocations by reusing the same buffer. For constrained systems, /// you can call `buf.clear()` once you are done with processing the event (typically at the /// end of your loop). /// - Reserve the buffer length if you know the file size (using `Vec::with_capacity`). /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::reader::Reader; /// /// let xml = r#" /// Test /// Test 2 /// "#; /// let mut reader = Reader::from_str(xml); /// reader.config_mut().trim_text(true); /// let mut count = 0; /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); /// loop { /// match reader.read_event_into(&mut buf) { /// Ok(Event::Start(_)) => count += 1, /// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()), /// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e), /// Ok(Event::Eof) => break, /// _ => (), /// } /// buf.clear(); /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` #[inline] pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec) -> Result> { self.read_event_impl(buf) } /// Reads until end element is found using provided buffer as intermediate /// storage for events content. This function is supposed to be called after /// you already read a [`Start`] event. /// /// Returns a span that cover content between `>` of an opening tag and `<` of /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// /// Manages nested cases where parent and child elements have the _literally_ /// same name. /// /// If a corresponding [`End`] event is not found, an error of type [`Error::IllFormed`] /// will be returned. In particularly, that error will be returned if you call /// this method without consuming the corresponding [`Start`] event first. /// /// If your reader created from a string slice or byte array slice, it is /// better to use [`read_to_end()`] method, because it will not copy bytes /// into intermediate buffer. /// /// The provided `buf` buffer will be filled only by one event content at time. /// Before reading of each event the buffer will be cleared. If you know an /// appropriate size of each event, you can preallocate the buffer to reduce /// number of reallocations. /// /// The `end` parameter should contain name of the end element _in the reader /// encoding_. It is good practice to always get that parameter using /// [`BytesStart::to_end()`] method. /// /// The correctness of the skipped events does not checked, if you disabled /// the [`check_end_names`] option. /// /// # Namespaces /// /// While the `Reader` does not support namespace resolution, namespaces /// does not change the algorithm for comparing names. Although the names /// `a:name` and `b:name` where both prefixes `a` and `b` resolves to the /// same namespace, are semantically equivalent, `` cannot close /// ``, because according to [the specification] /// /// > The end of every element that begins with a **start-tag** MUST be marked /// > by an **end-tag** containing a name that echoes the element's type as /// > given in the **start-tag** /// /// # Examples /// /// This example shows, how you can skip XML content after you read the /// start event. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::Reader; /// /// let mut reader = Reader::from_str(r#" /// /// /// /// /// /// /// /// /// "#); /// reader.config_mut().trim_text(true); /// let mut buf = Vec::new(); /// /// let start = BytesStart::new("outer"); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Start(start)); /// /// // ...then, we could skip all events to the corresponding end event. /// // This call will correctly handle nested elements. /// // Note, however, that this method does not handle namespaces. /// reader.read_to_end_into(end.name(), &mut buf).unwrap(); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); /// ``` /// /// [`Start`]: Event::Start /// [`End`]: Event::End /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end /// [`read_to_end()`]: Self::read_to_end /// [`expand_empty_elements`]: crate::reader::Config::expand_empty_elements /// [`check_end_names`]: crate::reader::Config::check_end_names /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result { Ok(read_to_end!(self, end, buf, read_event_impl, { buf.clear(); })) } } impl Reader> { /// Creates an XML reader from a file path. pub fn from_file>(path: P) -> Result { let file = File::open(path)?; let reader = BufReader::new(file); Ok(Self::from_reader(reader)) } } #[cfg(test)] mod test { use crate::reader::test::check; use crate::reader::XmlSource; /// Default buffer constructor just pass the byte array from the test fn identity(input: T) -> T { input } check!( #[test] read_event_impl, read_until_close, identity, &mut Vec::new() ); } quick-xml-0.36.1/src/reader/mod.rs000064400000000000000000002363160072674642500150460ustar 00000000000000//! Contains high-level interface for a pull-based XML parser. #[cfg(feature = "encoding")] use encoding_rs::Encoding; use std::io; use std::ops::Range; use crate::encoding::Decoder; use crate::errors::{Error, Result, SyntaxError}; use crate::events::Event; use crate::parser::{ElementParser, Parser, PiParser}; use crate::reader::state::ReaderState; /// A struct that holds a parser configuration. /// /// Current parser configuration can be retrieved by calling [`Reader::config()`] /// and changed by changing properties of the object returned by a call to /// [`Reader::config_mut()`]. /// /// [`Reader::config()`]: crate::reader::Reader::config /// [`Reader::config_mut()`]: crate::reader::Reader::config_mut #[derive(Debug, Clone, PartialEq, Eq)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] #[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))] #[non_exhaustive] pub struct Config { /// Whether unmatched closing tag names should be allowed. Unless enabled, /// in case of a dangling end tag, the [`Error::IllFormed(UnmatchedEndTag)`] /// is returned from read methods. /// /// When set to `true`, it won't check if a closing tag has a corresponding /// opening tag at all. For example, `` will be permitted. /// /// Note that the emitted [`End`] event will not be modified if this is enabled, /// ie. it will contain the data of the unmatched end tag. /// /// Note, that setting this to `true` will lead to additional allocates that /// needed to store tag name for an [`End`] event. /// /// Default: `false` /// /// [`Error::IllFormed(UnmatchedEndTag)`]: crate::errors::IllFormedError::UnmatchedEndTag /// [`End`]: crate::events::Event::End pub allow_unmatched_ends: bool, /// Whether comments should be validated. If enabled, in case of invalid comment /// [`Error::IllFormed(DoubleHyphenInComment)`] is returned from read methods. /// /// When set to `true`, every [`Comment`] event will be checked for not /// containing `--`, which [is not allowed] in XML comments. Most of the time /// we don't want comments at all so we don't really care about comment /// correctness, thus the default value is `false` to improve performance. /// /// Default: `false` /// /// [`Error::IllFormed(DoubleHyphenInComment)`]: crate::errors::IllFormedError::DoubleHyphenInComment /// [`Comment`]: crate::events::Event::Comment /// [is not allowed]: https://www.w3.org/TR/xml11/#sec-comments pub check_comments: bool, /// Whether mismatched closing tag names should be detected. If enabled, in /// case of mismatch the [`Error::IllFormed(MismatchedEndTag)`] is returned from /// read methods. /// /// Note, that start and end tags [should match literally][spec], they cannot /// have different prefixes even if both prefixes resolve to the same namespace. /// The XML /// /// ```xml /// /// /// ``` /// /// is not valid, even though semantically the start tag is the same as the /// end tag. The reason is that namespaces are an extension of the original /// XML specification (without namespaces) and it should be backward-compatible. /// /// When set to `false`, it won't check if a closing tag matches the corresponding /// opening tag. For example, `` will be permitted. /// /// If the XML is known to be sane (already processed, etc.) this saves extra time. /// /// Note that the emitted [`End`] event will not be modified if this is disabled, /// ie. it will contain the data of the mismatched end tag. /// /// Note, that setting this to `true` will lead to additional allocates that /// needed to store tag name for an [`End`] event. However if [`expand_empty_elements`] /// is also set, only one additional allocation will be performed that support /// both these options. /// /// Default: `true` /// /// [`Error::IllFormed(MismatchedEndTag)`]: crate::errors::IllFormedError::MismatchedEndTag /// [spec]: https://www.w3.org/TR/xml11/#dt-etag /// [`End`]: crate::events::Event::End /// [`expand_empty_elements`]: Self::expand_empty_elements pub check_end_names: bool, /// Whether empty elements should be split into an `Open` and a `Close` event. /// /// When set to `true`, all [`Empty`] events produced by a self-closing tag /// like `` are expanded into a [`Start`] event followed by an [`End`] /// event. When set to `false` (the default), those tags are represented by /// an [`Empty`] event instead. /// /// Note, that setting this to `true` will lead to additional allocates that /// needed to store tag name for an [`End`] event. However if [`check_end_names`] /// is also set, only one additional allocation will be performed that support /// both these options. /// /// Default: `false` /// /// [`Empty`]: crate::events::Event::Empty /// [`Start`]: crate::events::Event::Start /// [`End`]: crate::events::Event::End /// [`check_end_names`]: Self::check_end_names pub expand_empty_elements: bool, /// Whether trailing whitespace after the markup name are trimmed in closing /// tags ``. /// /// If `true` the emitted [`End`] event is stripped of trailing whitespace /// after the markup name. /// /// Note that if set to `false` and [`check_end_names`] is `true` the comparison /// of markup names is going to fail erroneously if a closing tag contains /// trailing whitespace. /// /// Default: `true` /// /// [`End`]: crate::events::Event::End /// [`check_end_names`]: Self::check_end_names pub trim_markup_names_in_closing_tags: bool, /// Whether whitespace before character data should be removed. /// /// When set to `true`, leading whitespace is trimmed in [`Text`] events. /// If after that the event is empty it will not be pushed. /// /// Default: `false` /// ///
/// /// WARNING: With this option every text events will be trimmed which is /// incorrect behavior when text events delimited by comments, processing /// instructions or CDATA sections. To correctly trim data manually apply /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`] /// only to necessary events. ///
/// /// [`Text`]: crate::events::Event::Text /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end pub trim_text_start: bool, /// Whether whitespace after character data should be removed. /// /// When set to `true`, trailing whitespace is trimmed in [`Text`] events. /// If after that the event is empty it will not be pushed. /// /// Default: `false` /// ///
/// /// WARNING: With this option every text events will be trimmed which is /// incorrect behavior when text events delimited by comments, processing /// instructions or CDATA sections. To correctly trim data manually apply /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`] /// only to necessary events. ///
/// /// [`Text`]: crate::events::Event::Text /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end pub trim_text_end: bool, } impl Config { /// Set both [`trim_text_start`] and [`trim_text_end`] to the same value. /// ///
/// /// WARNING: With this option every text events will be trimmed which is /// incorrect behavior when text events delimited by comments, processing /// instructions or CDATA sections. To correctly trim data manually apply /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`] /// only to necessary events. ///
/// /// [`trim_text_start`]: Self::trim_text_start /// [`trim_text_end`]: Self::trim_text_end /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end #[inline] pub fn trim_text(&mut self, trim: bool) { self.trim_text_start = trim; self.trim_text_end = trim; } /// Turn on or off all checks for well-formedness. Currently it is that settings: /// - [`check_comments`](Self::check_comments) /// - [`check_end_names`](Self::check_end_names) #[inline] pub fn enable_all_checks(&mut self, enable: bool) { self.check_comments = enable; self.check_end_names = enable; } } impl Default for Config { fn default() -> Self { Self { allow_unmatched_ends: false, check_comments: false, check_end_names: true, expand_empty_elements: false, trim_markup_names_in_closing_tags: true, trim_text_start: false, trim_text_end: false, } } } //////////////////////////////////////////////////////////////////////////////////////////////////// macro_rules! read_event_impl { ( $self:ident, $buf:ident, $reader:expr, $read_until_close:ident $(, $await:ident)? ) => {{ let event = loop { break match $self.state.state { ParseState::Init => { // Go to InsideMarkup state // If encoding set explicitly, we not need to detect it. For example, // explicit UTF-8 set automatically if Reader was created using `from_str`. // But we still need to remove BOM for consistency with no encoding // feature enabled path #[cfg(feature = "encoding")] if let Some(encoding) = $reader.detect_encoding() $(.$await)? ? { if $self.state.encoding.can_be_refined() { $self.state.encoding = crate::reader::EncodingRef::BomDetected(encoding); } } // Removes UTF-8 BOM if it is present #[cfg(not(feature = "encoding"))] $reader.remove_utf8_bom() $(.$await)? ?; $self.state.state = ParseState::InsideText; continue; }, ParseState::InsideText => { // Go to InsideMarkup or Done state if $self.state.config.trim_text_start { $reader.skip_whitespace(&mut $self.state.offset) $(.$await)? ?; } match $reader.read_text($buf, &mut $self.state.offset) $(.$await)? { ReadTextResult::Markup(buf) => { $self.state.state = ParseState::InsideMarkup; // Pass `buf` to the next next iteration of parsing loop $buf = buf; continue; } ReadTextResult::UpToMarkup(bytes) => { $self.state.state = ParseState::InsideMarkup; // FIXME: Can produce an empty event if: // - event contains only spaces // - trim_text_start = false // - trim_text_end = true Ok(Event::Text($self.state.emit_text(bytes))) } ReadTextResult::UpToEof(bytes) => { $self.state.state = ParseState::Done; // Trim bytes from end if required let event = $self.state.emit_text(bytes); if event.is_empty() { Ok(Event::Eof) } else { Ok(Event::Text(event)) } } ReadTextResult::Err(e) => Err(Error::Io(e.into())), } }, // Go to InsideText state in next two arms ParseState::InsideMarkup => $self.$read_until_close($buf) $(.$await)?, ParseState::InsideEmpty => Ok(Event::End($self.state.close_expanded_empty())), ParseState::Done => Ok(Event::Eof), }; }; match event { // #513: In case of ill-formed errors we already consume the wrong data // and change the state. We can continue parsing if we wish Err(Error::IllFormed(_)) => {} Err(_) | Ok(Event::Eof) => $self.state.state = ParseState::Done, _ => {} } event }}; } /// Read bytes up to the `>` and skip it. This method is expected to be called /// after seeing the `<` symbol and skipping it. Inspects the next (current) /// symbol and returns an appropriate [`Event`]: /// /// |Symbol |Event /// |-------|------------------------------------- /// |`!` |[`Comment`], [`CData`] or [`DocType`] /// |`/` |[`End`] /// |`?` |[`PI`] /// |_other_|[`Start`] or [`Empty`] /// /// Moves parser to the `InsideText` state. /// /// [`Comment`]: Event::Comment /// [`CData`]: Event::CData /// [`DocType`]: Event::DocType /// [`End`]: Event::End /// [`PI`]: Event::PI /// [`Start`]: Event::Start /// [`Empty`]: Event::Empty macro_rules! read_until_close { ( $self:ident, $buf:ident, $reader:expr $(, $await:ident)? ) => {{ $self.state.state = ParseState::InsideText; let start = $self.state.offset; match $reader.peek_one() $(.$await)? { // ` match $reader .read_bang_element($buf, &mut $self.state.offset) $(.$await)? { Ok((bang_type, bytes)) => $self.state.emit_bang(bang_type, bytes), Err(e) => { // We want to report error at `<`, but offset was increased, // so return it back (-1 for `<`) $self.state.last_error_offset = start - 1; Err(e) } }, // `` we will parse `
` as end tag // `
` which probably no one existing parser // does. This is malformed XML, however it is tolerated by some parsers // (e.g. the one used by Adobe Flash) and such documents do exist in the wild. Ok(Some(b'/')) => match $reader .read_with(ElementParser::Outside, $buf, &mut $self.state.offset) $(.$await)? { Ok(bytes) => $self.state.emit_end(bytes), Err(e) => { // We want to report error at `<`, but offset was increased, // so return it back (-1 for `<`) $self.state.last_error_offset = start - 1; Err(e) } }, // ` match $reader .read_with(PiParser(false), $buf, &mut $self.state.offset) $(.$await)? { Ok(bytes) => $self.state.emit_question_mark(bytes), Err(e) => { // We want to report error at `<`, but offset was increased, // so return it back (-1 for `<`) $self.state.last_error_offset = start - 1; Err(e) } }, // `<...` - opening or self-closed tag Ok(Some(_)) => match $reader .read_with(ElementParser::Outside, $buf, &mut $self.state.offset) $(.$await)? { Ok(bytes) => Ok($self.state.emit_start(bytes)), Err(e) => { // We want to report error at `<`, but offset was increased, // so return it back (-1 for `<`) $self.state.last_error_offset = start - 1; Err(e) } }, // `<` - syntax error, tag not closed Ok(None) => { // We want to report error at `<`, but offset was increased, // so return it back (-1 for `<`) $self.state.last_error_offset = start - 1; Err(Error::Syntax(SyntaxError::UnclosedTag)) } Err(e) => Err(Error::Io(e.into())), } }}; } /// Generalization of `read_to_end` method for buffered and borrowed readers macro_rules! read_to_end { ( // $self: &mut Reader $self:expr, $end:expr, $buf:expr, $read_event:ident, // Code block that performs clearing of internal buffer after read of each event $clear:block $(, $await:ident)? ) => {{ // Because we take position after the event before the End event, // it is important that this position indicates beginning of the End event. // If between last event and the End event would be only spaces, then we // take position before the spaces, but spaces would be skipped without // generating event if `trim_text_start` is set to `true`. To prevent that // we temporary disable start text trimming. // // We also cannot take position after getting End event, because if // `trim_markup_names_in_closing_tags` is set to `true` (which is the default), // we do not known the real size of the End event that it is occupies in // the source and cannot correct the position after the End event. // So, we in any case should tweak parser configuration. let config = $self.config_mut(); let trim = config.trim_text_start; config.trim_text_start = false; let start = $self.buffer_position(); let mut depth = 0; loop { $clear let end = $self.buffer_position(); match $self.$read_event($buf) $(.$await)? { Err(e) => { $self.config_mut().trim_text_start = trim; return Err(e); } Ok(Event::Start(e)) if e.name() == $end => depth += 1, Ok(Event::End(e)) if e.name() == $end => { if depth == 0 { $self.config_mut().trim_text_start = trim; break start..end; } depth -= 1; } Ok(Event::Eof) => { $self.config_mut().trim_text_start = trim; return Err(Error::missed_end($end, $self.decoder())); } _ => (), } } }}; } #[cfg(feature = "async-tokio")] mod async_tokio; mod buffered_reader; mod ns_reader; mod slice_reader; mod state; pub use ns_reader::NsReader; /// Range of input in bytes, that corresponds to some piece of XML pub type Span = Range; //////////////////////////////////////////////////////////////////////////////////////////////////// /// Possible reader states. The state transition diagram (`true` and `false` shows /// value of [`Config::expand_empty_elements`] option): /// /// ```mermaid /// flowchart LR /// subgraph _ /// direction LR /// /// Init -- "(no event)"\n --> InsideMarkup /// InsideMarkup -- Decl, DocType, PI\nComment, CData\nStart, Empty, End --> InsideText /// InsideText -- "#lt;false#gt;\n(no event)"\nText --> InsideMarkup /// end /// InsideText -- "#lt;true#gt;"\nStart --> InsideEmpty /// InsideEmpty -- End --> InsideText /// _ -. Eof .-> Done /// ``` #[derive(Clone, Debug)] enum ParseState { /// Initial state in which reader stay after creation. Transition from that /// state could produce a `Text`, `Decl`, `Comment` or `Start` event. The next /// state is always `InsideMarkup`. The reader will never return to this state. The /// event emitted during transition to `InsideMarkup` is a `StartEvent` if the /// first symbol not `<`, otherwise no event are emitted. Init, /// State after seeing the `<` symbol. Depending on the next symbol all other /// events could be generated. /// /// After generating one event the reader moves to the `InsideText` state. InsideMarkup, /// State in which reader searches the `<` symbol of a markup. All bytes before /// that symbol will be returned in the [`Event::Text`] event. After that /// the reader moves to the `InsideMarkup` state. InsideText, /// This state is used only if option [`expand_empty_elements`] is set to `true`. /// Reader enters to this state when it is in a `InsideText` state and emits an /// [`Event::Start`] event. The next event emitted will be an [`Event::End`], /// after which reader returned to the `InsideText` state. /// /// [`expand_empty_elements`]: Config::expand_empty_elements InsideEmpty, /// Reader enters this state when `Eof` event generated or an error occurred. /// This is the last state, the reader stay in it forever. Done, } /// A reference to an encoding together with information about how it was retrieved. /// /// The state transition diagram: /// /// ```mermaid /// flowchart LR /// Implicit -- from_str --> Explicit /// Implicit -- BOM --> BomDetected /// Implicit -- "encoding=..." --> XmlDetected /// BomDetected -- "encoding=..." --> XmlDetected /// ``` #[cfg(feature = "encoding")] #[derive(Clone, Copy, Debug)] enum EncodingRef { /// Encoding was implicitly assumed to have a specified value. It can be refined /// using BOM or by the XML declaration event (``) Implicit(&'static Encoding), /// Encoding was explicitly set to the desired value. It cannot be changed /// nor by BOM, nor by parsing XML declaration (``) Explicit(&'static Encoding), /// Encoding was detected from a byte order mark (BOM) or by the first bytes /// of the content. It can be refined by the XML declaration event (``) BomDetected(&'static Encoding), /// Encoding was detected using XML declaration event (``). /// It can no longer change XmlDetected(&'static Encoding), } #[cfg(feature = "encoding")] impl EncodingRef { #[inline] const fn encoding(&self) -> &'static Encoding { match self { Self::Implicit(e) => e, Self::Explicit(e) => e, Self::BomDetected(e) => e, Self::XmlDetected(e) => e, } } #[inline] const fn can_be_refined(&self) -> bool { match self { Self::Implicit(_) | Self::BomDetected(_) => true, Self::Explicit(_) | Self::XmlDetected(_) => false, } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A direct stream to the underlying [`Reader`]s reader which updates /// [`Reader::buffer_position()`] when read from it. #[derive(Debug)] #[must_use = "streams do nothing unless read or polled"] pub struct BinaryStream<'r, R> { inner: &'r mut R, offset: &'r mut u64, } impl<'r, R> BinaryStream<'r, R> { /// Returns current position in bytes in the original source. #[inline] pub const fn offset(&self) -> u64 { *self.offset } /// Gets a reference to the underlying reader. #[inline] pub const fn get_ref(&self) -> &R { self.inner } /// Gets a mutable reference to the underlying reader. /// /// Avoid read from this reader because this will not update reader's position /// and will lead to incorrect positions of errors. Read from this stream instead. #[inline] pub fn get_mut(&mut self) -> &mut R { self.inner } } impl<'r, R> io::Read for BinaryStream<'r, R> where R: io::Read, { #[inline] fn read(&mut self, buf: &mut [u8]) -> io::Result { let amt = self.inner.read(buf)?; *self.offset += amt as u64; Ok(amt) } } impl<'r, R> io::BufRead for BinaryStream<'r, R> where R: io::BufRead, { #[inline] fn fill_buf(&mut self) -> io::Result<&[u8]> { self.inner.fill_buf() } #[inline] fn consume(&mut self, amt: usize) { self.inner.consume(amt); *self.offset += amt as u64; } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A low level encoding-agnostic XML event reader. /// /// Consumes bytes and streams XML [`Event`]s. /// /// This reader does not manage namespace declarations and not able to resolve /// prefixes. If you want these features, use the [`NsReader`]. /// /// # Examples /// /// ``` /// use quick_xml::events::Event; /// use quick_xml::reader::Reader; /// /// let xml = r#" /// Test /// Test 2 /// "#; /// let mut reader = Reader::from_str(xml); /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut txt = Vec::new(); /// let mut buf = Vec::new(); /// /// // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s) /// loop { /// // NOTE: this is the generic case when we don't know about the input BufRead. /// // when the input is a &str or a &[u8], we don't actually need to use another /// // buffer, we could directly call `reader.read_event()` /// match reader.read_event_into(&mut buf) { /// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e), /// // exits the loop when reaching end of file /// Ok(Event::Eof) => break, /// /// Ok(Event::Start(e)) => { /// match e.name().as_ref() { /// b"tag1" => println!("attributes values: {:?}", /// e.attributes().map(|a| a.unwrap().value) /// .collect::>()), /// b"tag2" => count += 1, /// _ => (), /// } /// } /// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()), /// /// // There are several other `Event`s we do not consider here /// _ => (), /// } /// // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low /// buf.clear(); /// } /// ``` /// /// [`NsReader`]: crate::reader::NsReader #[derive(Clone)] pub struct Reader { /// Source of data for parse reader: R, /// Configuration and current parse state state: ReaderState, } /// Builder methods impl Reader { /// Creates a `Reader` that reads from a given reader. pub fn from_reader(reader: R) -> Self { Self { reader, state: ReaderState::default(), } } /// Returns reference to the parser configuration pub const fn config(&self) -> &Config { &self.state.config } /// Returns mutable reference to the parser configuration pub fn config_mut(&mut self) -> &mut Config { &mut self.state.config } } /// Getters impl Reader { /// Consumes `Reader` returning the underlying reader /// /// Can be used to compute line and column of a parsing error position /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use std::{str, io::Cursor}; /// use quick_xml::events::Event; /// use quick_xml::reader::Reader; /// /// let xml = r#" /// Test /// Test 2 /// "#; /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes())); /// let mut buf = Vec::new(); /// /// fn into_line_and_column(reader: Reader>) -> (usize, usize) { /// // We known that size cannot exceed usize::MAX because we created parser from single &[u8] /// let end_pos = reader.buffer_position() as usize; /// let mut cursor = reader.into_inner(); /// let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned()) /// .expect("can't make a string"); /// let mut line = 1; /// let mut column = 0; /// for c in s.chars() { /// if c == '\n' { /// line += 1; /// column = 0; /// } else { /// column += 1; /// } /// } /// (line, column) /// } /// /// loop { /// match reader.read_event_into(&mut buf) { /// Ok(Event::Start(ref e)) => match e.name().as_ref() { /// b"tag1" | b"tag2" => (), /// tag => { /// assert_eq!(b"tag3", tag); /// assert_eq!((3, 22), into_line_and_column(reader)); /// break; /// } /// }, /// Ok(Event::Eof) => unreachable!(), /// _ => (), /// } /// buf.clear(); /// } /// ``` pub fn into_inner(self) -> R { self.reader } /// Gets a reference to the underlying reader. pub const fn get_ref(&self) -> &R { &self.reader } /// Gets a mutable reference to the underlying reader. /// /// Avoid read from this reader because this will not update reader's position /// and will lead to incorrect positions of errors. If you want to read, use /// [`stream()`] instead. /// /// [`stream()`]: Self::stream pub fn get_mut(&mut self) -> &mut R { &mut self.reader } /// Gets the current byte position in the input data. pub const fn buffer_position(&self) -> u64 { // when internal state is InsideMarkup, we have actually read until '<', // which we don't want to show if let ParseState::InsideMarkup = self.state.state { self.state.offset - 1 } else { self.state.offset } } /// Gets the last error byte position in the input data. If there is no errors /// yet, returns `0`. /// /// Unlike `buffer_position` it will point to the place where it is rational /// to report error to the end user. For example, all [`SyntaxError`]s are /// reported when the parser sees EOF inside of some kind of markup. The /// `buffer_position()` will point to the last byte of input which is not /// very useful. `error_position()` will point to the start of corresponding /// markup element (i. e. to the `<` character). /// /// This position is always `<= buffer_position()`. pub const fn error_position(&self) -> u64 { self.state.last_error_offset } /// Get the decoder, used to decode bytes, read by this reader, to the strings. /// /// If [`encoding`] feature is enabled, the used encoding may change after /// parsing the XML declaration, otherwise encoding is fixed to UTF-8. /// /// If [`encoding`] feature is enabled and no encoding is specified in declaration, /// defaults to UTF-8. /// /// [`encoding`]: ../index.html#encoding #[inline] pub const fn decoder(&self) -> Decoder { self.state.decoder() } /// Get the direct access to the underlying reader, but tracks the amount of /// read data and update [`Reader::buffer_position()`] accordingly. /// /// Note, that this method gives you access to the internal reader and read /// data will not be returned in any subsequent events read by `read_event` /// family of methods. /// /// # Example /// /// This example demonstrates how to read stream raw bytes from an XML document. /// This could be used to implement streaming read of text, or to read raw binary /// bytes embedded in an XML document. (Documents with embedded raw bytes are not /// valid XML, but XML-derived file formats exist where such documents are valid). /// /// ``` /// # use pretty_assertions::assert_eq; /// use std::io::{BufRead, Read}; /// use quick_xml::events::{BytesEnd, BytesStart, Event}; /// use quick_xml::reader::Reader; /// /// let mut reader = Reader::from_str("binary << data&>"); /// // ^ ^ ^ ^ /// // 0 5 21 27 /// /// assert_eq!( /// (reader.read_event().unwrap(), reader.buffer_position()), /// // 5 - end of the `` /// (Event::Start(BytesStart::new("tag")), 5) /// ); /// /// // Reading directly from underlying reader will not update position /// // let mut inner = reader.get_mut(); /// /// // Reading from the stream() advances position /// let mut inner = reader.stream(); /// /// // Read binary data. We must know its size /// let mut binary = [0u8; 16]; /// inner.read_exact(&mut binary).unwrap(); /// assert_eq!(&binary, b"binary << data&>"); /// // 21 - end of the `binary << data&>` /// assert_eq!(inner.offset(), 21); /// assert_eq!(reader.buffer_position(), 21); /// /// assert_eq!( /// (reader.read_event().unwrap(), reader.buffer_position()), /// // 27 - end of the `` /// (Event::End(BytesEnd::new("tag")), 27) /// ); /// /// assert_eq!(reader.read_event().unwrap(), Event::Eof); /// ``` #[inline] pub fn stream(&mut self) -> BinaryStream { BinaryStream { inner: &mut self.reader, offset: &mut self.state.offset, } } } /// Private sync reading methods impl Reader { /// Read text into the given buffer, and return an event that borrows from /// either that buffer or from the input itself, based on the type of the /// reader. fn read_event_impl<'i, B>(&mut self, mut buf: B) -> Result> where R: XmlSource<'i, B>, { read_event_impl!(self, buf, self.reader, read_until_close) } /// Private function to read until `>` is found. This function expects that /// it was called just after encounter a `<` symbol. fn read_until_close<'i, B>(&mut self, buf: B) -> Result> where R: XmlSource<'i, B>, { read_until_close!(self, buf, self.reader) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Result of an attempt to read XML textual data from the reader. enum ReadTextResult<'r, B> { /// Start of markup (`<` character) was found in the first byte. /// Contains buffer that should be returned back to the next iteration cycle /// to satisfy borrow checker requirements. Markup(B), /// Contains text block up to start of markup (`<` character). UpToMarkup(&'r [u8]), /// Contains text block up to EOF, start of markup (`<` character) was not found. UpToEof(&'r [u8]), /// IO error occurred. Err(io::Error), } /// Represents an input for a reader that can return borrowed data. /// /// There are two implementors of this trait: generic one that read data from /// `Self`, copies some part of it into a provided buffer of type `B` and then /// returns data that borrow from that buffer. /// /// The other implementor is for `&[u8]` and instead of copying data returns /// borrowed data from `Self` instead. This implementation allows zero-copy /// deserialization. /// /// # Parameters /// - `'r`: lifetime of a buffer from which events will borrow /// - `B`: a type of a buffer that can be used to store data read from `Self` and /// from which events can borrow trait XmlSource<'r, B> { /// Removes UTF-8 BOM if it is present #[cfg(not(feature = "encoding"))] fn remove_utf8_bom(&mut self) -> io::Result<()>; /// Determines encoding from the start of input and removes BOM if it is present #[cfg(feature = "encoding")] fn detect_encoding(&mut self) -> io::Result>; /// Read input until start of markup (the `<`) is found or end of input is reached. /// /// # Parameters /// - `buf`: Buffer that could be filled from an input (`Self`) and /// from which [events] could borrow their data /// - `position`: Will be increased by amount of bytes consumed /// /// [events]: crate::events::Event fn read_text(&mut self, buf: B, position: &mut u64) -> ReadTextResult<'r, B>; /// Read input until processing instruction is finished. /// /// This method expect that start sequence of a parser already was read. /// /// Returns a slice of data read up to the end of the thing being parsed. /// The end of thing and the returned content is determined by the used parser. /// /// If input (`Self`) is exhausted and no bytes was read, or if the specified /// parser could not find the ending sequence of the thing, returns `SyntaxError`. /// /// # Parameters /// - `buf`: Buffer that could be filled from an input (`Self`) and /// from which [events] could borrow their data /// - `position`: Will be increased by amount of bytes consumed /// /// A `P` type parameter is used to preserve state between calls to the underlying /// reader which provides bytes fed into the parser. /// /// [events]: crate::events::Event fn read_with

Usual XML rules does not apply inside it ///

For example, elements not needed to be "closed" /// /// "#); /// reader.config_mut().trim_text(true); /// /// let start = BytesStart::new("html"); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!(reader.read_event().unwrap(), Event::Start(start)); /// // ...and disable checking of end names because we expect HTML further... /// reader.config_mut().check_end_names = false; /// /// // ...then, we could read text content until close tag. /// // This call will correctly handle nested elements. /// let text = reader.read_text(end.name()).unwrap(); /// assert_eq!(text, Cow::Borrowed(r#" /// This is a HTML text ///

Usual XML rules does not apply inside it ///

For example, elements not needed to be "closed" /// "#)); /// /// // Now we can enable checks again /// reader.config_mut().check_end_names = true; /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!(reader.read_event().unwrap(), Event::Eof); /// ``` /// /// [`Start`]: Event::Start /// [`decoder()`]: Reader::decoder() #[inline] pub fn read_text(&mut self, end: QName) -> Result> { self.reader.read_text(end) } } impl Deref for NsReader { type Target = Reader; #[inline] fn deref(&self) -> &Self::Target { &self.reader } } quick-xml-0.36.1/src/reader/slice_reader.rs000064400000000000000000000317650072674642500167110ustar 00000000000000//! This is an implementation of [`Reader`] for reading from a `&[u8]` as //! underlying byte stream. This implementation supports not using an //! intermediate buffer as the byte slice itself can be used to borrow from. use std::borrow::Cow; use std::io; #[cfg(feature = "encoding")] use crate::reader::EncodingRef; #[cfg(feature = "encoding")] use encoding_rs::{Encoding, UTF_8}; use crate::errors::{Error, Result}; use crate::events::Event; use crate::name::QName; use crate::parser::Parser; use crate::reader::{BangType, ReadTextResult, Reader, Span, XmlSource}; use crate::utils::is_whitespace; /// This is an implementation for reading from a `&[u8]` as underlying byte stream. /// This implementation supports not using an intermediate buffer as the byte slice /// itself can be used to borrow from. impl<'a> Reader<&'a [u8]> { /// Creates an XML reader from a string slice. #[allow(clippy::should_implement_trait)] pub fn from_str(s: &'a str) -> Self { // Rust strings are guaranteed to be UTF-8, so lock the encoding #[cfg(feature = "encoding")] { let mut reader = Self::from_reader(s.as_bytes()); reader.state.encoding = EncodingRef::Explicit(UTF_8); reader } #[cfg(not(feature = "encoding"))] Self::from_reader(s.as_bytes()) } /// Read an event that borrows from the input rather than a buffer. /// /// There is no asynchronous `read_event_async()` version of this function, /// because it is not necessary -- the contents are already in memory and no IO /// is needed, therefore there is no potential for blocking. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::reader::Reader; /// /// let mut reader = Reader::from_str(r#" /// /// Test /// Test 2 /// /// "#); /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut txt = Vec::new(); /// loop { /// match reader.read_event().unwrap() { /// Event::Start(e) => count += 1, /// Event::Text(e) => txt.push(e.unescape().unwrap().into_owned()), /// Event::Eof => break, /// _ => (), /// } /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` #[inline] pub fn read_event(&mut self) -> Result> { self.read_event_impl(()) } /// Reads until end element is found. This function is supposed to be called /// after you already read a [`Start`] event. /// /// Returns a span that cover content between `>` of an opening tag and `<` of /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// /// Manages nested cases where parent and child elements have the _literally_ /// same name. /// /// If a corresponding [`End`] event is not found, an error of type [`Error::IllFormed`] /// will be returned. In particularly, that error will be returned if you call /// this method without consuming the corresponding [`Start`] event first. /// /// The `end` parameter should contain name of the end element _in the reader /// encoding_. It is good practice to always get that parameter using /// [`BytesStart::to_end()`] method. /// /// The correctness of the skipped events does not checked, if you disabled /// the [`check_end_names`] option. /// /// There is no asynchronous `read_to_end_async()` version of this function, /// because it is not necessary -- the contents are already in memory and no IO /// is needed, therefore there is no potential for blocking. /// /// # Namespaces /// /// While the `Reader` does not support namespace resolution, namespaces /// does not change the algorithm for comparing names. Although the names /// `a:name` and `b:name` where both prefixes `a` and `b` resolves to the /// same namespace, are semantically equivalent, `` cannot close /// ``, because according to [the specification] /// /// > The end of every element that begins with a **start-tag** MUST be marked /// > by an **end-tag** containing a name that echoes the element's type as /// > given in the **start-tag** /// /// # Examples /// /// This example shows, how you can skip XML content after you read the /// start event. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::Reader; /// /// let mut reader = Reader::from_str(r#" /// /// /// /// /// /// /// /// /// "#); /// reader.config_mut().trim_text(true); /// /// let start = BytesStart::new("outer"); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!(reader.read_event().unwrap(), Event::Start(start)); /// /// // ...then, we could skip all events to the corresponding end event. /// // This call will correctly handle nested elements. /// // Note, however, that this method does not handle namespaces. /// reader.read_to_end(end.name()).unwrap(); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!(reader.read_event().unwrap(), Event::Eof); /// ``` /// /// [`Start`]: Event::Start /// [`End`]: Event::End /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end /// [`expand_empty_elements`]: crate::reader::Config::expand_empty_elements /// [`check_end_names`]: crate::reader::Config::check_end_names /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag pub fn read_to_end(&mut self, end: QName) -> Result { Ok(read_to_end!(self, end, (), read_event_impl, {})) } /// Reads content between start and end tags, including any markup. This /// function is supposed to be called after you already read a [`Start`] event. /// /// Manages nested cases where parent and child elements have the _literally_ /// same name. /// /// This method does not unescape read data, instead it returns content /// "as is" of the XML document. This is because it has no idea what text /// it reads, and if, for example, it contains CDATA section, attempt to /// unescape it content will spoil data. /// /// Any text will be decoded using the XML current [`decoder()`]. /// /// Actually, this method perform the following code: /// /// ```ignore /// let span = reader.read_to_end(end)?; /// let text = reader.decoder().decode(&reader.inner_slice[span]); /// ``` /// /// # Examples /// /// This example shows, how you can read a HTML content from your XML document. /// /// ``` /// # use pretty_assertions::assert_eq; /// # use std::borrow::Cow; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::Reader; /// /// let mut reader = Reader::from_str(" /// /// This is a HTML text ///

Usual XML rules does not apply inside it ///

For example, elements not needed to be "closed" /// /// "); /// reader.config_mut().trim_text(true); /// /// let start = BytesStart::new("html"); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!(reader.read_event().unwrap(), Event::Start(start)); /// // ...and disable checking of end names because we expect HTML further... /// reader.config_mut().check_end_names = false; /// /// // ...then, we could read text content until close tag. /// // This call will correctly handle nested elements. /// let text = reader.read_text(end.name()).unwrap(); /// assert_eq!(text, Cow::Borrowed(r#" /// This is a HTML text ///

Usual XML rules does not apply inside it ///

For example, elements not needed to be "closed" /// "#)); /// assert!(matches!(text, Cow::Borrowed(_))); /// /// // Now we can enable checks again /// reader.config_mut().check_end_names = true; /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!(reader.read_event().unwrap(), Event::Eof); /// ``` /// /// [`Start`]: Event::Start /// [`decoder()`]: Self::decoder() pub fn read_text(&mut self, end: QName) -> Result> { // self.reader will be changed, so store original reference let buffer = self.reader; let span = self.read_to_end(end)?; let len = span.end - span.start; // SAFETY: `span` can only contain indexes up to usize::MAX because it // was created from offsets from a single &[u8] slice self.decoder().decode(&buffer[0..len as usize]) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Implementation of `XmlSource` for `&[u8]` reader using a `Self` as buffer /// that will be borrowed by events. This implementation provides a zero-copy deserialization impl<'a> XmlSource<'a, ()> for &'a [u8] { #[cfg(not(feature = "encoding"))] #[inline] fn remove_utf8_bom(&mut self) -> io::Result<()> { if self.starts_with(crate::encoding::UTF8_BOM) { *self = &self[crate::encoding::UTF8_BOM.len()..]; } Ok(()) } #[cfg(feature = "encoding")] #[inline] fn detect_encoding(&mut self) -> io::Result> { if let Some((enc, bom_len)) = crate::encoding::detect_encoding(self) { *self = &self[bom_len..]; return Ok(Some(enc)); } Ok(None) } #[inline] fn read_text(&mut self, _buf: (), position: &mut u64) -> ReadTextResult<'a, ()> { match memchr::memchr(b'<', self) { Some(0) => { *position += 1; *self = &self[1..]; ReadTextResult::Markup(()) } Some(i) => { *position += i as u64 + 1; let bytes = &self[..i]; *self = &self[i + 1..]; ReadTextResult::UpToMarkup(bytes) } None => { *position += self.len() as u64; let bytes = &self[..]; *self = &[]; ReadTextResult::UpToEof(bytes) } } } #[inline] fn read_with

(&mut self, mut parser: P, _buf: (), position: &mut u64) -> Result<&'a [u8]> where P: Parser, { if let Some(i) = parser.feed(self) { // +1 for `>` which we do not include *position += i as u64 + 1; let bytes = &self[..i]; *self = &self[i + 1..]; return Ok(bytes); } *position += self.len() as u64; Err(Error::Syntax(P::eof_error())) } #[inline] fn read_bang_element(&mut self, _buf: (), position: &mut u64) -> Result<(BangType, &'a [u8])> { // Peeked one bang ('!') before being called, so it's guaranteed to // start with it. debug_assert_eq!(self[0], b'!'); let bang_type = BangType::new(self[1..].first().copied())?; if let Some((bytes, i)) = bang_type.parse(&[], self) { *position += i as u64; *self = &self[i..]; return Ok((bang_type, bytes)); } *position += self.len() as u64; Err(bang_type.to_err()) } #[inline] fn skip_whitespace(&mut self, position: &mut u64) -> io::Result<()> { let whitespaces = self .iter() .position(|b| !is_whitespace(*b)) .unwrap_or(self.len()); *position += whitespaces as u64; *self = &self[whitespaces..]; Ok(()) } #[inline] fn peek_one(&mut self) -> io::Result> { Ok(self.first().copied()) } } #[cfg(test)] mod test { use crate::reader::test::check; use crate::reader::XmlSource; /// Default buffer constructor just pass the byte array from the test fn identity(input: T) -> T { input } check!( #[test] read_event_impl, read_until_close, identity, () ); } quick-xml-0.36.1/src/reader/state.rs000064400000000000000000000347110072674642500154020ustar 00000000000000#[cfg(feature = "encoding")] use encoding_rs::UTF_8; use crate::encoding::Decoder; use crate::errors::{Error, IllFormedError, Result, SyntaxError}; use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event}; #[cfg(feature = "encoding")] use crate::reader::EncodingRef; use crate::reader::{BangType, Config, ParseState}; use crate::utils::{is_whitespace, name_len}; /// A struct that holds a current reader state and a parser configuration. /// It is independent on a way of reading data: the reader feed data into it and /// get back produced [`Event`]s. #[derive(Clone, Debug)] pub(super) struct ReaderState { /// Number of bytes read from the source of data since the reader was created pub offset: u64, /// A snapshot of an `offset` of the last error returned. It can be less than /// `offset`, because some errors conveniently report at earlier position, /// and changing `offset` is not possible, because `Error::IllFormed` errors /// are recoverable. pub last_error_offset: u64, /// Defines how to process next byte pub state: ParseState, /// User-defined settings that affect parsing pub config: Config, /// All currently Started elements which didn't have a matching /// End element yet. /// /// For an XML /// /// ```xml /// | /// ``` /// when cursor at the `|` position buffer contains: /// /// ```text /// rootinner /// ^ ^ /// ``` /// /// The `^` symbols shows which positions stored in the [`Self::opened_starts`] /// (0 and 4 in that case). opened_buffer: Vec, /// Opened name start indexes into [`Self::opened_buffer`]. See documentation /// for that field for details opened_starts: Vec, #[cfg(feature = "encoding")] /// Reference to the encoding used to read an XML pub encoding: EncodingRef, } impl ReaderState { /// Trims end whitespaces from `bytes`, if required, and returns a text event. /// /// # Parameters /// - `bytes`: data from the start of stream to the first `<` or from `>` to `<` pub fn emit_text<'b>(&mut self, bytes: &'b [u8]) -> BytesText<'b> { let mut content = bytes; if self.config.trim_text_end { // Skip the ending '<' let len = bytes .iter() .rposition(|&b| !is_whitespace(b)) .map_or(0, |p| p + 1); content = &bytes[..len]; } BytesText::wrap(content, self.decoder()) } /// Returns `Comment`, `CData` or `DocType` event. /// /// `buf` contains data between `<` and `>`: /// - CDATA: `![CDATA[...]]` /// - Comment: `!--...--` /// - Doctype (uppercase): `!D...` /// - Doctype (lowercase): `!d...` pub fn emit_bang<'b>(&mut self, bang_type: BangType, buf: &'b [u8]) -> Result> { debug_assert_eq!( buf.first(), Some(&b'!'), "CDATA, comment or DOCTYPE should start from '!'" ); let uncased_starts_with = |string: &[u8], prefix: &[u8]| { string.len() >= prefix.len() && string[..prefix.len()].eq_ignore_ascii_case(prefix) }; let len = buf.len(); match bang_type { BangType::Comment if buf.starts_with(b"!--") => { debug_assert!(buf.ends_with(b"--")); if self.config.check_comments { // search if '--' not in comments let mut haystack = &buf[3..len - 2]; let mut off = 0; while let Some(p) = memchr::memchr(b'-', haystack) { off += p + 1; // if next byte after `-` is also `-`, return an error if buf[3 + off] == b'-' { // Explanation of the magic: // // - `self.offset`` just after `>`, // - `buf` contains `!-- con--tent --` // - `p` is counted from byte after `: // ~~~~~~~~~~~~~~~~ : - buf // : =========== : - zone of search (possible values of `p`) // : |---p : - p is counted from | (| is 0) // : : : ^ - self.offset // ^ : : - self.offset - len // ^ : - self.offset - len + 2 // ^ - self.offset - len + 2 + p self.last_error_offset = self.offset - len as u64 + 2 + p as u64; return Err(Error::IllFormed(IllFormedError::DoubleHyphenInComment)); } // Continue search after single `-` (+1 to skip it) haystack = &haystack[p + 1..]; } } Ok(Event::Comment(BytesText::wrap( // Cut of `!--` and `--` from start and end &buf[3..len - 2], self.decoder(), ))) } // XML requires uppercase only: // https://www.w3.org/TR/xml11/#sec-cdata-sect // Even HTML5 required uppercase only: // https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state BangType::CData if buf.starts_with(b"![CDATA[") => { debug_assert!(buf.ends_with(b"]]")); Ok(Event::CData(BytesCData::wrap( // Cut of `![CDATA[` and `]]` from start and end &buf[8..len - 2], self.decoder(), ))) } // XML requires uppercase only, but we will check that on validation stage: // https://www.w3.org/TR/xml11/#sec-prolog-dtd // HTML5 allows mixed case for doctype declarations: // https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state BangType::DocType if uncased_starts_with(buf, b"!DOCTYPE") => { match buf[8..].iter().position(|&b| !is_whitespace(b)) { Some(start) => Ok(Event::DocType(BytesText::wrap( // Cut of `!DOCTYPE` and any number of spaces from start &buf[8 + start..], self.decoder(), ))), None => { // Because we here, we at least read `` and offset after `>`. // We want report error at place where name is expected - this is just // before `>` self.last_error_offset = self.offset - 1; return Err(Error::IllFormed(IllFormedError::MissingDoctypeName)); } } } _ => { // // ^^^^^ - `buf` does not contain `<` and `>`, but `self.offset` is after `>`. // ^------- We report error at that position, so we need to subtract 2 and buf len self.last_error_offset = self.offset - len as u64 - 2; Err(bang_type.to_err()) } } } /// Wraps content of `buf` into the [`Event::End`] event. Does the check that /// end name matches the last opened start name if `self.config.check_end_names` is set. /// /// `buf` contains data between `<` and `>`, for example `/tag`. pub fn emit_end<'b>(&mut self, buf: &'b [u8]) -> Result> { debug_assert_eq!( buf.first(), Some(&b'/'), "closing tag should start from '/'" ); // Strip the `/` character. `content` contains data between `` let content = &buf[1..]; // XML standard permits whitespaces after the markup name in closing tags. // Let's strip them from the buffer before comparing tag names. let name = if self.config.trim_markup_names_in_closing_tags { if let Some(pos_end_name) = content.iter().rposition(|&b| !is_whitespace(b)) { &content[..pos_end_name + 1] } else { content } } else { content }; let decoder = self.decoder(); // Get the index in self.opened_buffer of the name of the last opened tag match self.opened_starts.pop() { Some(start) => { if self.config.check_end_names { let expected = &self.opened_buffer[start..]; if name != expected { let expected = decoder.decode(expected).unwrap_or_default().into_owned(); // #513: In order to allow error recovery we should drop content of the buffer self.opened_buffer.truncate(start); // Report error at start of the end tag at `<` character // -2 for `<` and `>` self.last_error_offset = self.offset - buf.len() as u64 - 2; return Err(Error::IllFormed(IllFormedError::MismatchedEndTag { expected, found: decoder.decode(name).unwrap_or_default().into_owned(), })); } } self.opened_buffer.truncate(start); } None => { if !self.config.allow_unmatched_ends { // Report error at start of the end tag at `<` character // -2 for `<` and `>` self.last_error_offset = self.offset - buf.len() as u64 - 2; return Err(Error::IllFormed(IllFormedError::UnmatchedEndTag( decoder.decode(name).unwrap_or_default().into_owned(), ))); } } } Ok(Event::End(BytesEnd::wrap(name.into()))) } /// `buf` contains data between `<` and `>` and the first byte is `?`. /// `self.offset` already after the `>` /// /// Returns `Decl` or `PI` event pub fn emit_question_mark<'b>(&mut self, buf: &'b [u8]) -> Result> { debug_assert!(buf.len() > 0); debug_assert_eq!(buf[0], b'?'); let len = buf.len(); // We accept at least // ~~ - len = 2 if len > 1 && buf[len - 1] == b'?' { // Cut of `?` and `?` from start and end let content = &buf[1..len - 1]; let len = content.len(); if content.starts_with(b"xml") && (len == 3 || is_whitespace(content[3])) { let event = BytesDecl::from_start(BytesStart::wrap(content, 3)); // Try getting encoding from the declaration event #[cfg(feature = "encoding")] if self.encoding.can_be_refined() { if let Some(encoding) = event.encoder() { self.encoding = EncodingRef::XmlDetected(encoding); } } Ok(Event::Decl(event)) } else { Ok(Event::PI(BytesPI::wrap(content, name_len(content)))) } } else { // `) self.last_error_offset = self.offset - len as u64 - 2; Err(Error::Syntax(SyntaxError::UnclosedPIOrXmlDecl)) } } /// Converts content of a tag to a `Start` or an `Empty` event /// /// # Parameters /// - `content`: Content of a tag between `<` and `>` pub fn emit_start<'b>(&mut self, content: &'b [u8]) -> Event<'b> { if let Some(content) = content.strip_suffix(b"/") { // This is self-closed tag `` let event = BytesStart::wrap(content, name_len(content)); if self.config.expand_empty_elements { self.state = ParseState::InsideEmpty; self.opened_starts.push(self.opened_buffer.len()); self.opened_buffer.extend(event.name().as_ref()); Event::Start(event) } else { Event::Empty(event) } } else { let event = BytesStart::wrap(content, name_len(content)); // #514: Always store names event when .check_end_names == false, // because checks can be temporary disabled and when they would be // enabled, we should have that information self.opened_starts.push(self.opened_buffer.len()); self.opened_buffer.extend(event.name().as_ref()); Event::Start(event) } } #[inline] pub fn close_expanded_empty(&mut self) -> BytesEnd<'static> { self.state = ParseState::InsideText; let name = self .opened_buffer .split_off(self.opened_starts.pop().unwrap()); BytesEnd::wrap(name.into()) } /// Get the decoder, used to decode bytes, read by this reader, to the strings. /// /// If [`encoding`] feature is enabled, the used encoding may change after /// parsing the XML declaration, otherwise encoding is fixed to UTF-8. /// /// If [`encoding`] feature is enabled and no encoding is specified in declaration, /// defaults to UTF-8. /// /// [`encoding`]: ../../index.html#encoding pub const fn decoder(&self) -> Decoder { Decoder { #[cfg(feature = "encoding")] encoding: self.encoding.encoding(), } } } impl Default for ReaderState { fn default() -> Self { Self { offset: 0, last_error_offset: 0, state: ParseState::Init, config: Config::default(), opened_buffer: Vec::new(), opened_starts: Vec::new(), #[cfg(feature = "encoding")] encoding: EncodingRef::Implicit(UTF_8), } } } quick-xml-0.36.1/src/se/content.rs000064400000000000000000001634260072674642500151070ustar 00000000000000//! Contains serializer for content of an XML element use crate::de::TEXT_KEY; use crate::errors::serialize::DeError; use crate::se::element::{ElementSerializer, Struct, Tuple}; use crate::se::simple_type::{QuoteTarget, SimpleTypeSerializer}; use crate::se::{Indent, QuoteLevel, XmlName}; use serde::ser::{ Impossible, Serialize, SerializeSeq, SerializeTuple, SerializeTupleStruct, Serializer, }; use serde::serde_if_integer128; use std::fmt::Write; macro_rules! write_primitive { ($method:ident ( $ty:ty )) => { #[inline] fn $method(self, value: $ty) -> Result { self.into_simple_type_serializer().$method(value)?; Ok(()) } }; } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A serializer used to serialize content of an element. It does not write /// surrounding tags. Unlike the [`ElementSerializer`], this serializer serializes /// enums using variant names as tag names, i. e. as `...` /// /// This serializer does the following: /// - numbers converted to a decimal representation and serialized as naked strings; /// - booleans serialized ether as `"true"` or `"false"`; /// - strings and characters are serialized as naked strings; /// - `None` does not write anything; /// - `Some` and newtypes are serialized as an inner type using the same serializer; /// - units (`()`) and unit structs does not write anything; /// - sequences, tuples and tuple structs are serialized without delimiters. /// `[1, 2, 3]` would be serialized as `123` (if not using indent); /// - structs and maps are not supported ([`DeError::Unsupported`] is returned); /// - enums: /// - unit variants are serialized as self-closed ``; /// - newtype variants are serialized as inner value wrapped in `...`; /// - tuple variants are serialized as sequences where each element is wrapped /// in `...`; /// - struct variants are serialized as a sequence of fields wrapped in /// `...`. Each field is serialized recursively using /// either [`ElementSerializer`], `ContentSerializer` (`$value` fields), or /// [`SimpleTypeSerializer`] (`$text` fields). In particular, the empty struct /// is serialized as ``; /// /// Usage of empty tags depends on the [`Self::expand_empty_elements`] setting. /// /// The difference between this serializer and [`SimpleTypeSerializer`] is in how /// sequences and maps are serialized. Unlike `SimpleTypeSerializer` it supports /// any types in sequences and serializes them as list of elements, but that has /// drawbacks. Sequence of primitives would be serialized without delimiters and /// it will be impossible to distinguish between them. Even worse, when serializing /// with indent, sequence of strings become one big string with additional content /// and it would be impossible to distinguish between content of the original /// strings and inserted indent characters. pub struct ContentSerializer<'w, 'i, W: Write> { pub writer: &'w mut W, /// Defines which XML characters need to be escaped in text content pub level: QuoteLevel, /// Current indentation level. Note, that `Indent::None` means that there is /// no indentation at all, but `write_indent == false` means only, that indent /// writing is disabled in this instantiation of `ContentSerializer`, but /// child serializers should have access to the actual state of indentation. pub(super) indent: Indent<'i>, /// If `true`, then current indent will be written before writing the content, /// but only if content is not empty. pub write_indent: bool, // If `true`, then empty elements will be serialized as `` // instead of ``. pub expand_empty_elements: bool, //TODO: add settings to disallow consequent serialization of primitives } impl<'w, 'i, W: Write> ContentSerializer<'w, 'i, W> { /// Turns this serializer into serializer of a text content #[inline] pub fn into_simple_type_serializer(self) -> SimpleTypeSerializer<'i, &'w mut W> { //TODO: Customization point: choose between CDATA and Text representation SimpleTypeSerializer { writer: self.writer, target: QuoteTarget::Text, level: self.level, indent: if self.write_indent { self.indent } else { Indent::None }, } } /// Creates new serializer that shares state with this serializer and /// writes to the same underlying writer #[inline] pub fn new_seq_element_serializer(&mut self) -> ContentSerializer { ContentSerializer { writer: self.writer, level: self.level, indent: self.indent.borrow(), write_indent: self.write_indent, expand_empty_elements: self.expand_empty_elements, } } /// Writes `name` as self-closed tag #[inline] pub(super) fn write_empty(mut self, name: XmlName) -> Result<(), DeError> { self.write_indent()?; if self.expand_empty_elements { self.writer.write_char('<')?; self.writer.write_str(name.0)?; self.writer.write_str(">')?; } else { self.writer.write_str("<")?; self.writer.write_str(name.0)?; self.writer.write_str("/>")?; } Ok(()) } /// Writes simple type content between `name` tags pub(super) fn write_wrapped(mut self, name: XmlName, serialize: S) -> Result<(), DeError> where S: for<'a> FnOnce(SimpleTypeSerializer<'i, &'a mut W>) -> Result<&'a mut W, DeError>, { self.write_indent()?; self.writer.write_char('<')?; self.writer.write_str(name.0)?; self.writer.write_char('>')?; let writer = serialize(self.into_simple_type_serializer())?; writer.write_str("')?; Ok(()) } pub(super) fn write_indent(&mut self) -> Result<(), DeError> { if self.write_indent { self.indent.write_indent(&mut self.writer)?; self.write_indent = false; } Ok(()) } } impl<'w, 'i, W: Write> Serializer for ContentSerializer<'w, 'i, W> { type Ok = (); type Error = DeError; type SerializeSeq = Self; type SerializeTuple = Self; type SerializeTupleStruct = Self; type SerializeTupleVariant = Tuple<'w, 'i, W>; type SerializeMap = Impossible; type SerializeStruct = Impossible; type SerializeStructVariant = Struct<'w, 'i, W>; write_primitive!(serialize_bool(bool)); write_primitive!(serialize_i8(i8)); write_primitive!(serialize_i16(i16)); write_primitive!(serialize_i32(i32)); write_primitive!(serialize_i64(i64)); write_primitive!(serialize_u8(u8)); write_primitive!(serialize_u16(u16)); write_primitive!(serialize_u32(u32)); write_primitive!(serialize_u64(u64)); serde_if_integer128! { write_primitive!(serialize_i128(i128)); write_primitive!(serialize_u128(u128)); } write_primitive!(serialize_f32(f32)); write_primitive!(serialize_f64(f64)); write_primitive!(serialize_char(char)); write_primitive!(serialize_bytes(&[u8])); #[inline] fn serialize_str(self, value: &str) -> Result { if !value.is_empty() { self.into_simple_type_serializer().serialize_str(value)?; } Ok(()) } /// Does not write anything #[inline] fn serialize_none(self) -> Result { Ok(()) } fn serialize_some(self, value: &T) -> Result { value.serialize(self) } /// Does not write anything #[inline] fn serialize_unit(self) -> Result { Ok(()) } /// Does not write anything #[inline] fn serialize_unit_struct(self, _name: &'static str) -> Result { Ok(()) } /// If `variant` is a special `$text` variant, then do nothing, otherwise /// checks `variant` for XML name validity and writes ``. fn serialize_unit_variant( self, _name: &'static str, _variant_index: u32, variant: &'static str, ) -> Result { if variant == TEXT_KEY { Ok(()) } else { let name = XmlName::try_from(variant)?; self.write_empty(name) } } fn serialize_newtype_struct( self, _name: &'static str, value: &T, ) -> Result { value.serialize(self) } /// If `variant` is a special `$text` variant, then writes `value` as a `xs:simpleType`, /// otherwise checks `variant` for XML name validity and writes `value` as a new /// `` element. fn serialize_newtype_variant( self, _name: &'static str, _variant_index: u32, variant: &'static str, value: &T, ) -> Result { if variant == TEXT_KEY { value.serialize(self.into_simple_type_serializer())?; Ok(()) } else { value.serialize(ElementSerializer { key: XmlName::try_from(variant)?, ser: self, }) } } #[inline] fn serialize_seq(self, _len: Option) -> Result { Ok(self) } #[inline] fn serialize_tuple(self, len: usize) -> Result { self.serialize_seq(Some(len)) } #[inline] fn serialize_tuple_struct( self, _name: &'static str, len: usize, ) -> Result { self.serialize_tuple(len) } /// Serializes variant as a tuple with name `variant`, producing /// /// ```xml /// /// /// /// /// ``` #[inline] fn serialize_tuple_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, len: usize, ) -> Result { if variant == TEXT_KEY { self.into_simple_type_serializer() .serialize_tuple_struct(name, len) .map(Tuple::Text) } else { let ser = ElementSerializer { key: XmlName::try_from(variant)?, ser: self, }; ser.serialize_tuple_struct(name, len).map(Tuple::Element) } } fn serialize_map(self, _len: Option) -> Result { Err(DeError::Unsupported( "serialization of map types is not supported in `$value` field".into(), )) } #[inline] fn serialize_struct( self, name: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!("serialization of struct `{name}` is not supported in `$value` field").into(), )) } /// Serializes variant as an element with name `variant`, producing /// /// ```xml /// /// /// /// ``` /// /// If struct has no fields which is represented by nested elements or a text, /// it may be serialized as self-closed element ``. #[inline] fn serialize_struct_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, len: usize, ) -> Result { if variant == TEXT_KEY { Err(DeError::Unsupported( format!("cannot serialize `$text` struct variant of `{}` enum", name).into(), )) } else { let ser = ElementSerializer { key: XmlName::try_from(variant)?, ser: self, }; ser.serialize_struct(name, len) } } } impl<'w, 'i, W: Write> SerializeSeq for ContentSerializer<'w, 'i, W> { type Ok = (); type Error = DeError; fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { value.serialize(self.new_seq_element_serializer())?; // Write indent for next element self.write_indent = true; Ok(()) } #[inline] fn end(self) -> Result { Ok(()) } } impl<'w, 'i, W: Write> SerializeTuple for ContentSerializer<'w, 'i, W> { type Ok = (); type Error = DeError; #[inline] fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { SerializeSeq::end(self) } } impl<'w, 'i, W: Write> SerializeTupleStruct for ContentSerializer<'w, 'i, W> { type Ok = (); type Error = DeError; #[inline] fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { SerializeSeq::end(self) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Make tests public to reuse types in `elements::tests` module #[cfg(test)] pub(super) mod tests { use super::*; use crate::utils::Bytes; use serde::Serialize; use std::collections::BTreeMap; #[derive(Debug, Serialize, PartialEq)] pub struct Unit; #[derive(Debug, Serialize, PartialEq)] #[serde(rename = "<\"&'>")] pub struct UnitEscaped; #[derive(Debug, Serialize, PartialEq)] pub struct Newtype(pub usize); #[derive(Debug, Serialize, PartialEq)] pub struct Tuple(pub &'static str, pub usize); #[derive(Debug, Serialize, PartialEq)] pub struct Struct { pub key: &'static str, pub val: (usize, usize), } /// Struct with a special `$text` field #[derive(Debug, Serialize, PartialEq)] pub struct Text { pub before: &'static str, #[serde(rename = "$text")] pub content: T, pub after: &'static str, } /// Struct with a special `$value` field #[derive(Debug, Serialize, PartialEq)] pub struct Value { pub before: &'static str, #[serde(rename = "$value")] pub content: T, pub after: &'static str, } /// Attributes identified by starting with `@` character #[derive(Debug, Serialize, PartialEq)] pub struct Attributes { #[serde(rename = "@key")] pub key: &'static str, #[serde(rename = "@val")] pub val: (usize, usize), } #[derive(Debug, Serialize, PartialEq)] pub struct AttributesBefore { #[serde(rename = "@key")] pub key: &'static str, pub val: usize, } #[derive(Debug, Serialize, PartialEq)] pub struct AttributesAfter { pub key: &'static str, #[serde(rename = "@val")] pub val: usize, } #[derive(Debug, Serialize, PartialEq)] pub enum Enum { Unit, /// Variant name becomes a tag name, but the name of variant is invalid /// XML name. Serialization of this element should be forbidden #[serde(rename = "<\"&'>")] UnitEscaped, Newtype(usize), Tuple(&'static str, usize), Struct { key: &'static str, /// Should be serialized as elements val: (usize, usize), }, Attributes { #[serde(rename = "@key")] key: &'static str, #[serde(rename = "@val")] val: (usize, usize), }, AttributesBefore { #[serde(rename = "@key")] key: &'static str, val: usize, }, AttributesAfter { key: &'static str, #[serde(rename = "@val")] val: usize, }, } #[derive(Debug, Serialize, PartialEq)] pub enum SpecialEnum { /// Struct variant with a special `$text` field Text { before: &'static str, #[serde(rename = "$text")] content: T, after: &'static str, }, /// Struct variant with a special `$value` field Value { before: &'static str, #[serde(rename = "$value")] content: T, after: &'static str, }, } mod without_indent { use super::Struct; use super::*; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:expr) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::None, write_indent: false, expand_empty_elements: false, }; $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::None, write_indent: false, expand_empty_elements: false, }; match $data.serialize(ser).unwrap_err() { DeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, e ), } // We could write something before fail // assert_eq!(buffer, ""); } }; } // Primitives is serialized in the same way as for SimpleTypeSerializer serialize_as!(false_: false => "false"); serialize_as!(true_: true => "true"); serialize_as!(i8_: -42i8 => "-42"); serialize_as!(i16_: -4200i16 => "-4200"); serialize_as!(i32_: -42000000i32 => "-42000000"); serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); serialize_as!(isize_: -42000000000000isize => "-42000000000000"); serialize_as!(u8_: 42u8 => "42"); serialize_as!(u16_: 4200u16 => "4200"); serialize_as!(u32_: 42000000u32 => "42000000"); serialize_as!(u64_: 42000000000000u64 => "42000000000000"); serialize_as!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } serialize_as!(f32_: 4.2f32 => "4.2"); serialize_as!(f64_: 4.2f64 => "4.2"); serialize_as!(char_non_escaped: 'h' => "h"); serialize_as!(char_lt: '<' => "<"); serialize_as!(char_gt: '>' => ">"); serialize_as!(char_amp: '&' => "&"); serialize_as!(char_apos: '\'' => "'"); serialize_as!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content serialize_as!(char_space: ' ' => " "); serialize_as!(str_non_escaped: "non-escaped string" => "non-escaped string"); serialize_as!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::::None => ""); serialize_as!(option_some: Some("non-escaped string") => "non-escaped string"); serialize_as!(option_some_empty_str: Some("") => ""); serialize_as!(unit: () => ""); serialize_as!(unit_struct: Unit => ""); serialize_as!(unit_struct_escaped: UnitEscaped => ""); // Unlike SimpleTypeSerializer, enumeration values serialized as tags serialize_as!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: Enum::UnitEscaped => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); // Newtypes recursively applies ContentSerializer serialize_as!(newtype: Newtype(42) => "42"); serialize_as!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! serialize_as!(seq: vec![1, 2, 3] => "123"); serialize_as!(seq_empty: Vec::::new() => ""); serialize_as!(tuple: ("<\"&'>", "with\t\r\n spaces", 3usize) => "<"&'>\ with\t\r\n spaces\ 3"); serialize_as!(tuple_struct: Tuple("first", 42) => "first\ 42"); serialize_as!(enum_tuple: Enum::Tuple("first", 42) => "first\ 42"); // Structured types cannot be serialized without surrounding tag, which // only `enum` can provide err!(map: BTreeMap::from([("_1", 2), ("_3", 4)]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: Struct { key: "answer", val: (42, 42) } => Unsupported("serialization of struct `Struct` is not supported in `$value` field")); serialize_as!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\ answer\ 42\ 42\ "); /// Special field name `$text` should be serialized as a text content mod text_field { use super::*; use pretty_assertions::assert_eq; err!(map: BTreeMap::from([("$text", 2), ("_3", 4)]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: Text { before: "answer", content: (42, 42), after: "answer", } => Unsupported("serialization of struct `Text` is not supported in `$value` field")); serialize_as!(enum_struct: SpecialEnum::Text { before: "answer", content: (42, 42), after: "answer", } => "\ answer\ 42 42\ answer\ "); } /// `$text` field inside a struct variant of an enum mod enum_with_text_field { use super::*; use pretty_assertions::assert_eq; macro_rules! text { ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: SpecialEnum::Text { before: "answer", content: $data, after: "answer", } => concat!( "answer", $expected, "answer", )); }; } text!(false_: false => "false"); text!(true_: true => "true"); text!(i8_: -42i8 => "-42"); text!(i16_: -4200i16 => "-4200"); text!(i32_: -42000000i32 => "-42000000"); text!(i64_: -42000000000000i64 => "-42000000000000"); text!(isize_: -42000000000000isize => "-42000000000000"); text!(u8_: 42u8 => "42"); text!(u16_: 4200u16 => "4200"); text!(u32_: 42000000u32 => "42000000"); text!(u64_: 42000000000000u64 => "42000000000000"); text!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { text!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); text!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } text!(f32_: 4.2f32 => "4.2"); text!(f64_: 4.2f64 => "4.2"); text!(char_non_escaped: 'h' => "h"); text!(char_lt: '<' => "<"); text!(char_gt: '>' => ">"); text!(char_amp: '&' => "&"); text!(char_apos: '\'' => "'"); text!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content text!(char_space: ' ' => " "); text!(str_non_escaped: "non-escaped string" => "non-escaped string"); text!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: SpecialEnum::Text { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); text!(option_none: Option::<&str>::None => ""); text!(option_some: Some("non-escaped string") => "non-escaped string"); text!(option_some_empty_str: Some("") => ""); text!(unit: () => ""); text!(unit_struct: Unit => ""); text!(unit_struct_escaped: UnitEscaped => ""); text!(enum_unit: Enum::Unit => "Unit"); text!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); text!(newtype: Newtype(42) => "42"); // We have no space where name of a variant can be stored err!(enum_newtype: SpecialEnum::Text { before: "answer", content: Enum::Newtype(42), after: "answer", } => Unsupported("cannot serialize enum newtype variant `Enum::Newtype` as text content value")); // Sequences are serialized separated by spaces, all spaces inside are escaped text!(seq: vec![1, 2, 3] => "1 2 3"); text!(seq_empty: Vec::::new() => ""); text!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> \ with spaces \ 3"); text!(tuple_struct: Tuple("first", 42) => "first 42"); // We have no space where name of a variant can be stored err!(enum_tuple: SpecialEnum::Text { before: "answer", content: Enum::Tuple("first", 42), after: "answer", } => Unsupported("cannot serialize enum tuple variant `Enum::Tuple` as text content value")); // Complex types cannot be serialized in `$text` field err!(map: SpecialEnum::Text { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("cannot serialize map as text content value")); err!(struct_: SpecialEnum::Text { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize struct `Struct` as text content value")); err!(enum_struct: SpecialEnum::Text { before: "answer", content: Enum::Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize enum struct variant `Enum::Struct` as text content value")); } /// `$value` field inside a struct variant of an enum mod enum_with_value_field { use super::*; use pretty_assertions::assert_eq; macro_rules! value { ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: SpecialEnum::Value { before: "answer", content: $data, after: "answer", } => concat!( "answer", $expected, "answer", )); }; } value!(false_: false => "false"); value!(true_: true => "true"); value!(i8_: -42i8 => "-42"); value!(i16_: -4200i16 => "-4200"); value!(i32_: -42000000i32 => "-42000000"); value!(i64_: -42000000000000i64 => "-42000000000000"); value!(isize_: -42000000000000isize => "-42000000000000"); value!(u8_: 42u8 => "42"); value!(u16_: 4200u16 => "4200"); value!(u32_: 42000000u32 => "42000000"); value!(u64_: 42000000000000u64 => "42000000000000"); value!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { value!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); value!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } value!(f32_: 4.2f32 => "4.2"); value!(f64_: 4.2f64 => "4.2"); value!(char_non_escaped: 'h' => "h"); value!(char_lt: '<' => "<"); value!(char_gt: '>' => ">"); value!(char_amp: '&' => "&"); value!(char_apos: '\'' => "'"); value!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content value!(char_space: ' ' => " "); value!(str_non_escaped: "non-escaped string" => "non-escaped string"); value!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: SpecialEnum::Value { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); value!(option_none: Option::<&str>::None => ""); value!(option_some: Some("non-escaped string") => "non-escaped string"); value!(option_some_empty_str: Some("") => ""); value!(unit: () => ""); value!(unit_struct: Unit => ""); value!(unit_struct_escaped: UnitEscaped => ""); value!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: SpecialEnum::Value { before: "answer", content: Enum::UnitEscaped, after: "answer", } => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! value!(seq: vec![1, 2, 3] => "123"); value!(seq_empty: Vec::::new() => ""); value!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'>\ with\t\n\r spaces\ 3"); value!(tuple_struct: Tuple("first", 42) => "first42"); value!(enum_tuple: Enum::Tuple("first", 42) => "first\ 42"); // We cannot wrap map or struct in any container and should not // flatten it, so it is impossible to serialize maps and structs err!(map: SpecialEnum::Value { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: SpecialEnum::Value { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("serialization of struct `Struct` is not supported in `$value` field")); value!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\ answer\ 42\ 42\ "); } mod attributes { use super::*; use pretty_assertions::assert_eq; err!(map_attr: BTreeMap::from([("@key1", 1), ("@key2", 2)]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(map_mixed: BTreeMap::from([("@key1", 1), ("key2", 2)]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: Attributes { key: "answer", val: (42, 42) } => Unsupported("serialization of struct `Attributes` is not supported in `$value` field")); err!(struct_before: AttributesBefore { key: "answer", val: 42 } => Unsupported("serialization of struct `AttributesBefore` is not supported in `$value` field")); err!(struct_after: AttributesAfter { key: "answer", val: 42 } => Unsupported("serialization of struct `AttributesAfter` is not supported in `$value` field")); serialize_as!(enum_: Enum::Attributes { key: "answer", val: (42, 42) } => r#""#); serialize_as!(enum_before: Enum::AttributesBefore { key: "answer", val: 42 } => r#"42"#); serialize_as!(enum_after: Enum::AttributesAfter { key: "answer", val: 42 } => r#"answer"#); } } mod with_indent { use super::Struct; use super::*; use crate::writer::Indentation; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:expr) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::Owned(Indentation::new(b' ', 2)), write_indent: false, expand_empty_elements: false, }; $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::Owned(Indentation::new(b' ', 2)), write_indent: false, expand_empty_elements: false, }; match $data.serialize(ser).unwrap_err() { DeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, e ), } // We can write something before fail // assert_eq!(buffer, ""); } }; } serialize_as!(false_: false => "false"); serialize_as!(true_: true => "true"); serialize_as!(i8_: -42i8 => "-42"); serialize_as!(i16_: -4200i16 => "-4200"); serialize_as!(i32_: -42000000i32 => "-42000000"); serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); serialize_as!(isize_: -42000000000000isize => "-42000000000000"); serialize_as!(u8_: 42u8 => "42"); serialize_as!(u16_: 4200u16 => "4200"); serialize_as!(u32_: 42000000u32 => "42000000"); serialize_as!(u64_: 42000000000000u64 => "42000000000000"); serialize_as!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } serialize_as!(f32_: 4.2f32 => "4.2"); serialize_as!(f64_: 4.2f64 => "4.2"); serialize_as!(char_non_escaped: 'h' => "h"); serialize_as!(char_lt: '<' => "<"); serialize_as!(char_gt: '>' => ">"); serialize_as!(char_amp: '&' => "&"); serialize_as!(char_apos: '\'' => "'"); serialize_as!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content serialize_as!(char_space: ' ' => " "); serialize_as!(str_non_escaped: "non-escaped string" => "non-escaped string"); serialize_as!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::::None => ""); serialize_as!(option_some: Some(Enum::Unit) => ""); serialize_as!(unit: () => ""); serialize_as!(unit_struct: Unit => ""); serialize_as!(unit_struct_escaped: UnitEscaped => ""); // Unlike SimpleTypeSerializer, enumeration values serialized as tags serialize_as!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: Enum::UnitEscaped => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); // Newtypes recursively applies ContentSerializer serialize_as!(newtype: Newtype(42) => "42"); serialize_as!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters other that indent! serialize_as!(seq: vec![1, 2, 3] => "1\n\ 2\n\ 3"); serialize_as!(seq_empty: Vec::::new() => ""); serialize_as!(tuple: ("<\"&'>", "with\t\r\n spaces", 3usize) => "<"&'>\n\ with\t\r\n spaces\n\ 3"); serialize_as!(tuple_struct: Tuple("first", 42) => "first\n\ 42"); serialize_as!(enum_tuple: Enum::Tuple("first", 42) => "first\n\ 42"); // Structured types cannot be serialized without surrounding tag, which // only `enum` can provide err!(map: BTreeMap::from([("_1", 2), ("_3", 4)]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: Struct { key: "answer", val: (42, 42) } => Unsupported("serialization of struct `Struct` is not supported in `$value` field")); serialize_as!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\n \ answer\n \ 42\n \ 42\n\ "); /// Special field name `$text` should be serialized as text content mod text_field { use super::*; use pretty_assertions::assert_eq; err!(map: BTreeMap::from([("$text", 2), ("_3", 4)]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: Text { before: "answer", content: (42, 42), after: "answer", } => Unsupported("serialization of struct `Text` is not supported in `$value` field")); serialize_as!(enum_struct: SpecialEnum::Text { before: "answer", content: (42, 42), after: "answer", } => "\n \ answer\n \ 42 42\n \ answer\n\ "); } /// `$text` field inside a struct variant of an enum mod enum_with_text_field { use super::*; use pretty_assertions::assert_eq; macro_rules! text { ($name:ident: $data:expr) => { serialize_as!($name: SpecialEnum::Text { before: "answer", content: $data, after: "answer", } => "\n \ answer\n \ answer\n\ "); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: SpecialEnum::Text { before: "answer", content: $data, after: "answer", } => concat!( "\n answer\n ", $expected, "\n answer\n", )); }; } text!(false_: false => "false"); text!(true_: true => "true"); text!(i8_: -42i8 => "-42"); text!(i16_: -4200i16 => "-4200"); text!(i32_: -42000000i32 => "-42000000"); text!(i64_: -42000000000000i64 => "-42000000000000"); text!(isize_: -42000000000000isize => "-42000000000000"); text!(u8_: 42u8 => "42"); text!(u16_: 4200u16 => "4200"); text!(u32_: 42000000u32 => "42000000"); text!(u64_: 42000000000000u64 => "42000000000000"); text!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { text!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); text!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } text!(f32_: 4.2f32 => "4.2"); text!(f64_: 4.2f64 => "4.2"); text!(char_non_escaped: 'h' => "h"); text!(char_lt: '<' => "<"); text!(char_gt: '>' => ">"); text!(char_amp: '&' => "&"); text!(char_apos: '\'' => "'"); text!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content text!(char_space: ' ' => " "); text!(str_non_escaped: "non-escaped string" => "non-escaped string"); text!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: SpecialEnum::Text { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); text!(option_none: Option::<&str>::None); text!(option_some: Some("non-escaped string") => "non-escaped string"); text!(option_some_empty_str: Some("")); text!(unit: ()); text!(unit_struct: Unit); text!(unit_struct_escaped: UnitEscaped); text!(enum_unit: Enum::Unit => "Unit"); text!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); text!(newtype: Newtype(42) => "42"); // We have no space where name of a variant can be stored err!(enum_newtype: SpecialEnum::Text { before: "answer", content: Enum::Newtype(42), after: "answer", } => Unsupported("cannot serialize enum newtype variant `Enum::Newtype` as text content value")); // Sequences are serialized separated by spaces, all spaces inside are escaped text!(seq: vec![1, 2, 3] => "1 2 3"); text!(seq_empty: Vec::::new()); text!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> \ with spaces \ 3"); text!(tuple_struct: Tuple("first", 42) => "first 42"); // We have no space where name of a variant can be stored err!(enum_tuple: SpecialEnum::Text { before: "answer", content: Enum::Tuple("first", 42), after: "answer", } => Unsupported("cannot serialize enum tuple variant `Enum::Tuple` as text content value")); // Complex types cannot be serialized in `$text` field err!(map: SpecialEnum::Text { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("cannot serialize map as text content value")); err!(struct_: SpecialEnum::Text { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize struct `Struct` as text content value")); err!(enum_struct: SpecialEnum::Text { before: "answer", content: Enum::Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize enum struct variant `Enum::Struct` as text content value")); } /// `$value` field inside a struct variant of an enum mod enum_with_value_field { use super::*; use pretty_assertions::assert_eq; macro_rules! value { ($name:ident: $data:expr) => { serialize_as!($name: SpecialEnum::Value { before: "answer", content: $data, after: "answer", } => "\n \ answer\n \ answer\n\ "); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: SpecialEnum::Value { before: "answer", content: $data, after: "answer", } => concat!( "\n answer\n ", $expected, "\n answer\n", )); }; } value!(false_: false => "false"); value!(true_: true => "true"); value!(i8_: -42i8 => "-42"); value!(i16_: -4200i16 => "-4200"); value!(i32_: -42000000i32 => "-42000000"); value!(i64_: -42000000000000i64 => "-42000000000000"); value!(isize_: -42000000000000isize => "-42000000000000"); value!(u8_: 42u8 => "42"); value!(u16_: 4200u16 => "4200"); value!(u32_: 42000000u32 => "42000000"); value!(u64_: 42000000000000u64 => "42000000000000"); value!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { value!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); value!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } value!(f32_: 4.2f32 => "4.2"); value!(f64_: 4.2f64 => "4.2"); value!(char_non_escaped: 'h' => "h"); value!(char_lt: '<' => "<"); value!(char_gt: '>' => ">"); value!(char_amp: '&' => "&"); value!(char_apos: '\'' => "'"); value!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content value!(char_space: ' ' => " "); value!(str_non_escaped: "non-escaped string" => "non-escaped string"); value!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: SpecialEnum::Value { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); value!(option_none: Option::<&str>::None); value!(option_some: Some("non-escaped string") => "non-escaped string"); value!(option_some_empty_str: Some("")); value!(unit: ()); value!(unit_struct: Unit); value!(unit_struct_escaped: UnitEscaped); value!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: SpecialEnum::Value { before: "answer", content: Enum::UnitEscaped, after: "answer", } => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! value!(seq: vec![1, 2, 3] => "1\n 2\n 3"); value!(seq_empty: Vec::::new()); value!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'>\n \ with\t\n\r spaces\n \ 3"); value!(tuple_struct: Tuple("first", 42) => "first\n 42"); value!(enum_tuple: Enum::Tuple("first", 42) => "first\n \ 42"); // We cannot wrap map or struct in any container and should not // flatten it, so it is impossible to serialize maps and structs err!(map: SpecialEnum::Value { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: SpecialEnum::Value { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("serialization of struct `Struct` is not supported in `$value` field")); value!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\n \ answer\n \ 42\n \ 42\n \ "); } mod attributes { use super::*; use pretty_assertions::assert_eq; err!(map_attr: BTreeMap::from([("@key1", 1), ("@key2", 2)]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(map_mixed: BTreeMap::from([("@key1", 1), ("key2", 2)]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: Attributes { key: "answer", val: (42, 42) } => Unsupported("serialization of struct `Attributes` is not supported in `$value` field")); err!(struct_before: AttributesBefore { key: "answer", val: 42 } => Unsupported("serialization of struct `AttributesBefore` is not supported in `$value` field")); err!(struct_after: AttributesAfter { key: "answer", val: 42 } => Unsupported("serialization of struct `AttributesAfter` is not supported in `$value` field")); serialize_as!(enum_: Enum::Attributes { key: "answer", val: (42, 42) } => r#""#); serialize_as!(enum_before: Enum::AttributesBefore { key: "answer", val: 42 } => "\n \ 42\n\ "); serialize_as!(enum_after: Enum::AttributesAfter { key: "answer", val: 42 } => "\n \ answer\n\ "); } } } quick-xml-0.36.1/src/se/element.rs000064400000000000000000002554130072674642500150640ustar 00000000000000//! Contains serializer for an XML element use crate::de::{TEXT_KEY, VALUE_KEY}; use crate::errors::serialize::DeError; use crate::se::content::ContentSerializer; use crate::se::key::QNameSerializer; use crate::se::simple_type::{QuoteTarget, SimpleSeq, SimpleTypeSerializer}; use crate::se::text::TextSerializer; use crate::se::{Indent, XmlName}; use serde::ser::{ Impossible, Serialize, SerializeMap, SerializeSeq, SerializeStruct, SerializeStructVariant, SerializeTuple, SerializeTupleStruct, SerializeTupleVariant, Serializer, }; use serde::serde_if_integer128; use std::fmt::Write; /// Writes simple type content between [`ElementSerializer::key`] tags. macro_rules! write_primitive { ($method:ident ( $ty:ty )) => { fn $method(self, value: $ty) -> Result { self.ser.write_wrapped(self.key, |ser| ser.$method(value)) } }; } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A serializer used to serialize element with specified name. Unlike the [`ContentSerializer`], /// this serializer never uses variant names of enum variants, and because of that /// it is unable to serialize any enum values, except unit variants. /// /// This serializer is used for an ordinary fields in structs, which are not special /// fields named `$text` ([`TEXT_KEY`]) or `$value` ([`VALUE_KEY`]). `$text` field /// should be serialized using [`SimpleTypeSerializer`] and `$value` field should be /// serialized using [`ContentSerializer`]. /// /// This serializer does the following: /// - numbers converted to a decimal representation and serialized as `value`; /// - booleans serialized ether as `true` or `false`; /// - strings and characters are serialized as `value`. In particular, /// an empty string is serialized as ``; /// - `None` is serialized as ``; /// - `Some` and newtypes are serialized as an inner type using the same serializer; /// - units (`()`) and unit structs are serialized as ``; /// - sequences, tuples and tuple structs are serialized as repeated `` tag. /// In particular, empty sequence is serialized to nothing; /// - structs are serialized as a sequence of fields wrapped in a `` tag. Each /// field is serialized recursively using either `ElementSerializer`, [`ContentSerializer`] /// (`$value` fields), or [`SimpleTypeSerializer`] (`$text` fields). /// In particular, the empty struct is serialized as ``; /// - maps are serialized as a sequence of entries wrapped in a `` tag. If key is /// serialized to a special name, the same rules as for struct fields are applied. /// In particular, the empty map is serialized as ``; /// - enums: /// - unit variants are serialized as `variant`; /// - other variants are not supported ([`DeError::Unsupported`] is returned); /// /// Usage of empty tags depends on the [`ContentSerializer::expand_empty_elements`] setting. pub struct ElementSerializer<'w, 'k, W: Write> { /// The inner serializer that contains the settings and mostly do the actual work pub ser: ContentSerializer<'w, 'k, W>, /// Tag name used to wrap serialized types except enum variants which uses the variant name pub(super) key: XmlName<'k>, } impl<'w, 'k, W: Write> Serializer for ElementSerializer<'w, 'k, W> { type Ok = (); type Error = DeError; type SerializeSeq = Self; type SerializeTuple = Self; type SerializeTupleStruct = Self; type SerializeTupleVariant = Impossible; type SerializeMap = Map<'w, 'k, W>; type SerializeStruct = Struct<'w, 'k, W>; type SerializeStructVariant = Struct<'w, 'k, W>; write_primitive!(serialize_bool(bool)); write_primitive!(serialize_i8(i8)); write_primitive!(serialize_i16(i16)); write_primitive!(serialize_i32(i32)); write_primitive!(serialize_i64(i64)); write_primitive!(serialize_u8(u8)); write_primitive!(serialize_u16(u16)); write_primitive!(serialize_u32(u32)); write_primitive!(serialize_u64(u64)); serde_if_integer128! { write_primitive!(serialize_i128(i128)); write_primitive!(serialize_u128(u128)); } write_primitive!(serialize_f32(f32)); write_primitive!(serialize_f64(f64)); write_primitive!(serialize_char(char)); write_primitive!(serialize_bytes(&[u8])); fn serialize_str(self, value: &str) -> Result { if value.is_empty() { self.ser.write_empty(self.key) } else { self.ser .write_wrapped(self.key, |ser| ser.serialize_str(value)) } } /// By serde contract we should serialize key of [`None`] values. If someone /// wants to skip the field entirely, he should use /// `#[serde(skip_serializing_if = "Option::is_none")]`. /// /// In XML when we serialize field, we write field name as: /// - element name, or /// - attribute name /// /// and field value as /// - content of the element, or /// - attribute value /// /// So serialization of `None` works the same as [serialization of `()`](#method.serialize_unit) fn serialize_none(self) -> Result { self.serialize_unit() } fn serialize_some(self, value: &T) -> Result { value.serialize(self) } fn serialize_unit(self) -> Result { self.ser.write_empty(self.key) } fn serialize_unit_struct(self, _name: &'static str) -> Result { self.ser.write_empty(self.key) } /// Writes a tag with name [`Self::key`] and content of unit variant inside. /// If variant is a special `$text` value, then empty tag `` is written. /// Otherwise a `variant` is written. fn serialize_unit_variant( self, name: &'static str, variant_index: u32, variant: &'static str, ) -> Result { if variant == TEXT_KEY { self.ser.write_empty(self.key) } else { self.ser.write_wrapped(self.key, |ser| { ser.serialize_unit_variant(name, variant_index, variant) }) } } fn serialize_newtype_struct( self, _name: &'static str, value: &T, ) -> Result { value.serialize(self) } /// Always returns [`DeError::Unsupported`]. Newtype variants can be serialized /// only in `$value` fields, which is serialized using [`ContentSerializer`]. #[inline] fn serialize_newtype_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _value: &T, ) -> Result { Err(DeError::Unsupported( format!( "cannot serialize enum newtype variant `{}::{}`", name, variant ) .into(), )) } #[inline] fn serialize_seq(self, _len: Option) -> Result { Ok(self) } #[inline] fn serialize_tuple(self, len: usize) -> Result { self.serialize_seq(Some(len)) } #[inline] fn serialize_tuple_struct( self, _name: &'static str, len: usize, ) -> Result { self.serialize_tuple(len) } /// Always returns [`DeError::Unsupported`]. Tuple variants can be serialized /// only in `$value` fields, which is serialized using [`ContentSerializer`]. #[inline] fn serialize_tuple_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!( "cannot serialize enum tuple variant `{}::{}`", name, variant ) .into(), )) } fn serialize_map(self, _len: Option) -> Result { Ok(Map { ser: self.serialize_struct("", 0)?, key: None, }) } #[inline] fn serialize_struct( mut self, _name: &'static str, _len: usize, ) -> Result { self.ser.write_indent()?; self.ser.indent.increase(); self.ser.writer.write_char('<')?; self.ser.writer.write_str(self.key.0)?; Ok(Struct { ser: self, children: String::new(), }) } /// Always returns [`DeError::Unsupported`]. Struct variants can be serialized /// only in `$value` fields, which is serialized using [`ContentSerializer`]. #[inline] fn serialize_struct_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!( "cannot serialize enum struct variant `{}::{}`", name, variant ) .into(), )) } } impl<'w, 'k, W: Write> SerializeSeq for ElementSerializer<'w, 'k, W> { type Ok = (); type Error = DeError; fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { value.serialize(ElementSerializer { ser: self.ser.new_seq_element_serializer(), key: self.key, })?; // Write indent for the next element self.ser.write_indent = true; Ok(()) } #[inline] fn end(self) -> Result { Ok(()) } } impl<'w, 'k, W: Write> SerializeTuple for ElementSerializer<'w, 'k, W> { type Ok = (); type Error = DeError; #[inline] fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { SerializeSeq::end(self) } } impl<'w, 'k, W: Write> SerializeTupleStruct for ElementSerializer<'w, 'k, W> { type Ok = (); type Error = DeError; #[inline] fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { SerializeSeq::end(self) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A serializer for tuple variants. Tuples can be serialized in two modes: /// - wrapping each tuple field into a tag /// - without wrapping, fields are delimited by a space pub enum Tuple<'w, 'k, W: Write> { /// Serialize each tuple field as an element Element(ElementSerializer<'w, 'k, W>), /// Serialize tuple as an `xs:list`: space-delimited content of fields Text(SimpleSeq<'k, &'w mut W>), } impl<'w, 'k, W: Write> SerializeTupleVariant for Tuple<'w, 'k, W> { type Ok = (); type Error = DeError; #[inline] fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { match self { Self::Element(ser) => SerializeTuple::serialize_element(ser, value), Self::Text(ser) => SerializeTuple::serialize_element(ser, value), } } #[inline] fn end(self) -> Result { match self { Self::Element(ser) => SerializeTuple::end(ser), Self::Text(ser) => SerializeTuple::end(ser).map(|_| ()), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A serializer for struct variants, which serializes the struct contents inside /// of wrapping tags (`<${tag}>...`). /// /// Serialization of each field depends on it representation: /// - attributes written directly to the higher serializer /// - elements buffered into internal buffer and at the end written into higher /// serializer pub struct Struct<'w, 'k, W: Write> { ser: ElementSerializer<'w, 'k, W>, /// Buffer to store serialized elements // TODO: Customization point: allow direct writing of elements, but all // attributes should be listed first. Fail, if attribute encountered after // element. Use feature to configure children: String, } impl<'w, 'k, W: Write> Struct<'w, 'k, W> { #[inline] fn write_field(&mut self, key: &str, value: &T) -> Result<(), DeError> where T: ?Sized + Serialize, { //TODO: Customization point: allow user to determine if field is attribute or not if let Some(key) = key.strip_prefix('@') { let key = XmlName::try_from(key)?; self.write_attribute(key, value) } else { self.write_element(key, value) } } /// Writes `value` as an attribute #[inline] fn write_attribute(&mut self, key: XmlName, value: &T) -> Result<(), DeError> where T: ?Sized + Serialize, { //TODO: Customization point: each attribute on new line self.ser.ser.writer.write_char(' ')?; self.ser.ser.writer.write_str(key.0)?; self.ser.ser.writer.write_char('=')?; //TODO: Customization point: preferred quote style self.ser.ser.writer.write_char('"')?; value.serialize(SimpleTypeSerializer { writer: &mut self.ser.ser.writer, target: QuoteTarget::DoubleQAttr, level: self.ser.ser.level, indent: Indent::None, })?; self.ser.ser.writer.write_char('"')?; Ok(()) } /// Writes `value` either as a text content, or as an element. /// /// If `key` has a magic value [`TEXT_KEY`], then `value` serialized as a /// [simple type]. /// /// If `key` has a magic value [`VALUE_KEY`], then `value` serialized as a /// [content] without wrapping in tags, otherwise it is wrapped in /// `<${key}>...`. /// /// [simple type]: SimpleTypeSerializer /// [content]: ContentSerializer fn write_element(&mut self, key: &str, value: &T) -> Result<(), DeError> where T: ?Sized + Serialize, { let ser = ContentSerializer { writer: &mut self.children, level: self.ser.ser.level, indent: self.ser.ser.indent.borrow(), write_indent: true, expand_empty_elements: self.ser.ser.expand_empty_elements, }; if key == TEXT_KEY { value.serialize(TextSerializer(ser.into_simple_type_serializer()))?; } else if key == VALUE_KEY { value.serialize(ser)?; } else { value.serialize(ElementSerializer { key: XmlName::try_from(key)?, ser, })?; } Ok(()) } } impl<'w, 'k, W: Write> SerializeStruct for Struct<'w, 'k, W> { type Ok = (); type Error = DeError; fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { self.write_field(key, value) } fn end(mut self) -> Result { self.ser.ser.indent.decrease(); if self.children.is_empty() { if self.ser.ser.expand_empty_elements { self.ser.ser.writer.write_str(">')?; } else { self.ser.ser.writer.write_str("/>")?; } } else { self.ser.ser.writer.write_char('>')?; self.ser.ser.writer.write_str(&self.children)?; self.ser.ser.indent.write_indent(&mut self.ser.ser.writer)?; self.ser.ser.writer.write_str("')?; } Ok(()) } } impl<'w, 'k, W: Write> SerializeStructVariant for Struct<'w, 'k, W> { type Ok = (); type Error = DeError; #[inline] fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { SerializeStruct::serialize_field(self, key, value) } #[inline] fn end(self) -> Result { SerializeStruct::end(self) } } //////////////////////////////////////////////////////////////////////////////////////////////////// pub struct Map<'w, 'k, W: Write> { ser: Struct<'w, 'k, W>, /// Key, serialized by `QNameSerializer` if consumer uses `serialize_key` + /// `serialize_value` calls instead of `serialize_entry` key: Option, } impl<'w, 'k, W: Write> Map<'w, 'k, W> { fn make_key(&mut self, key: &T) -> Result where T: ?Sized + Serialize, { key.serialize(QNameSerializer { writer: String::new(), }) } } impl<'w, 'k, W: Write> SerializeMap for Map<'w, 'k, W> { type Ok = (); type Error = DeError; fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { if let Some(_) = self.key.take() { return Err(DeError::Custom( "calling `serialize_key` twice without `serialize_value`".to_string(), )); } self.key = Some(self.make_key(key)?); Ok(()) } fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { if let Some(key) = self.key.take() { return self.ser.write_field(&key, value); } Err(DeError::Custom( "calling `serialize_value` without call of `serialize_key`".to_string(), )) } fn serialize_entry(&mut self, key: &K, value: &V) -> Result<(), Self::Error> where K: ?Sized + Serialize, V: ?Sized + Serialize, { let key = self.make_key(key)?; self.ser.write_field(&key, value) } fn end(mut self) -> Result { if let Some(key) = self.key.take() { return Err(DeError::Custom(format!( "calling `end` without call of `serialize_value` for key `{key}`" ))); } SerializeStruct::end(self.ser) } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod tests { use super::*; use crate::se::content::tests::*; use crate::se::{Indent, QuoteLevel}; use crate::utils::Bytes; use serde::Serialize; use std::collections::BTreeMap; #[derive(Debug, Serialize, PartialEq)] struct OptionalElements { a: Option<&'static str>, #[serde(skip_serializing_if = "Option::is_none")] b: Option<&'static str>, } #[derive(Debug, Serialize, PartialEq)] struct OptionalAttributes { #[serde(rename = "@a")] a: Option<&'static str>, #[serde(rename = "@b")] #[serde(skip_serializing_if = "Option::is_none")] b: Option<&'static str>, } mod without_indent { use super::*; use crate::se::content::tests::Struct; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:expr) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ElementSerializer { ser: ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::None, write_indent: false, expand_empty_elements: false, }, key: XmlName("root"), }; $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ElementSerializer { ser: ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::None, write_indent: false, expand_empty_elements: false, }, key: XmlName("root"), }; match $data.serialize(ser).unwrap_err() { DeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, e ), } // We can write something before fail // assert_eq!(buffer, ""); } }; } serialize_as!(false_: false => "false"); serialize_as!(true_: true => "true"); serialize_as!(i8_: -42i8 => "-42"); serialize_as!(i16_: -4200i16 => "-4200"); serialize_as!(i32_: -42000000i32 => "-42000000"); serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); serialize_as!(isize_: -42000000000000isize => "-42000000000000"); serialize_as!(u8_: 42u8 => "42"); serialize_as!(u16_: 4200u16 => "4200"); serialize_as!(u32_: 42000000u32 => "42000000"); serialize_as!(u64_: 42000000000000u64 => "42000000000000"); serialize_as!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } serialize_as!(f32_: 4.2f32 => "4.2"); serialize_as!(f64_: 4.2f64 => "4.2"); serialize_as!(char_non_escaped: 'h' => "h"); serialize_as!(char_lt: '<' => "<"); serialize_as!(char_gt: '>' => ">"); serialize_as!(char_amp: '&' => "&"); serialize_as!(char_apos: '\'' => "'"); serialize_as!(char_quot: '"' => """); serialize_as!(str_non_escaped: "non-escaped string" => "non-escaped string"); serialize_as!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::<&str>::None => ""); serialize_as!(option_some: Some("non-escaped string") => "non-escaped string"); serialize_as!(option_some_empty_str: Some("") => ""); serialize_as!(unit: () => ""); serialize_as!(unit_struct: Unit => ""); serialize_as!(unit_struct_escaped: UnitEscaped => ""); serialize_as!(enum_unit: Enum::Unit => "Unit"); serialize_as!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); serialize_as!(newtype: Newtype(42) => "42"); err!(enum_newtype: Enum::Newtype(42) => Unsupported("cannot serialize enum newtype variant `Enum::Newtype`")); serialize_as!(seq: vec![1, 2, 3] => "1\ 2\ 3"); serialize_as!(seq_empty: Vec::::new() => ""); serialize_as!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'>\ with\t\n\r spaces\ 3"); serialize_as!(tuple_struct: Tuple("first", 42) => "first\ 42"); err!(enum_tuple: Enum::Tuple("first", 42) => Unsupported("cannot serialize enum tuple variant `Enum::Tuple`")); serialize_as!(map: BTreeMap::from([("_1", 2), ("_3", 4)]) => "\ <_1>2\ <_3>4\ "); serialize_as!(struct_: Struct { key: "answer", val: (42, 42) } => "\ answer\ 42\ 42\ "); err!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => Unsupported("cannot serialize enum struct variant `Enum::Struct`")); /// Special field name `$text` should be serialized as text content. /// Sequences serialized as an `xs:list` content mod text_field { use super::*; /// `$text` key in a map mod map { use super::*; use pretty_assertions::assert_eq; macro_rules! text { ($name:ident: $data:expr) => { serialize_as!($name: BTreeMap::from([("$text", $data)]) => ""); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: BTreeMap::from([("$text", $data)]) => concat!("", $expected,"")); }; } text!(false_: false => "false"); text!(true_: true => "true"); text!(i8_: -42i8 => "-42"); text!(i16_: -4200i16 => "-4200"); text!(i32_: -42000000i32 => "-42000000"); text!(i64_: -42000000000000i64 => "-42000000000000"); text!(isize_: -42000000000000isize => "-42000000000000"); text!(u8_: 42u8 => "42"); text!(u16_: 4200u16 => "4200"); text!(u32_: 42000000u32 => "42000000"); text!(u64_: 42000000000000u64 => "42000000000000"); text!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { text!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); text!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } text!(f32_: 4.2f32 => "4.2"); text!(f64_: 4.2f64 => "4.2"); text!(char_non_escaped: 'h' => "h"); text!(char_lt: '<' => "<"); text!(char_gt: '>' => ">"); text!(char_amp: '&' => "&"); text!(char_apos: '\'' => "'"); text!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content text!(char_space: ' ' => " "); text!(str_non_escaped: "non-escaped string" => "non-escaped string"); text!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Text { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); text!(option_none: Option::<&str>::None); text!(option_some: Some("non-escaped string") => "non-escaped string"); text!(option_some_empty_str: Some("")); text!(unit: ()); text!(unit_struct: Unit); text!(unit_struct_escaped: UnitEscaped); text!(enum_unit: Enum::Unit => "Unit"); text!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); text!(newtype: Newtype(42) => "42"); // We have no space where name of a variant can be stored err!(enum_newtype: Text { before: "answer", content: Enum::Newtype(42), after: "answer", } => Unsupported("cannot serialize enum newtype variant `Enum::Newtype` as text content value")); // Sequences are serialized separated by spaces, all spaces inside are escaped text!(seq: vec![1, 2, 3] => "1 2 3"); text!(seq_empty: Vec::::new()); text!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> \ with spaces \ 3"); text!(tuple_struct: Tuple("first", 42) => "first 42"); // We have no space where name of a variant can be stored err!(enum_tuple: Text { before: "answer", content: Enum::Tuple("first", 42), after: "answer", } => Unsupported("cannot serialize enum tuple variant `Enum::Tuple` as text content value")); // Complex types cannot be serialized in `$text` field err!(map: Text { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("cannot serialize map as text content value")); err!(struct_: Text { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize struct `Struct` as text content value")); err!(enum_struct: Text { before: "answer", content: Enum::Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize enum struct variant `Enum::Struct` as text content value")); } /// `$text` field inside a struct mod struct_ { use super::*; use pretty_assertions::assert_eq; macro_rules! text { ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: Text { before: "answer", content: $data, after: "answer", } => concat!( "answer", $expected, "answer", )); }; } text!(false_: false => "false"); text!(true_: true => "true"); text!(i8_: -42i8 => "-42"); text!(i16_: -4200i16 => "-4200"); text!(i32_: -42000000i32 => "-42000000"); text!(i64_: -42000000000000i64 => "-42000000000000"); text!(isize_: -42000000000000isize => "-42000000000000"); text!(u8_: 42u8 => "42"); text!(u16_: 4200u16 => "4200"); text!(u32_: 42000000u32 => "42000000"); text!(u64_: 42000000000000u64 => "42000000000000"); text!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { text!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); text!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } text!(f32_: 4.2f32 => "4.2"); text!(f64_: 4.2f64 => "4.2"); text!(char_non_escaped: 'h' => "h"); text!(char_lt: '<' => "<"); text!(char_gt: '>' => ">"); text!(char_amp: '&' => "&"); text!(char_apos: '\'' => "'"); text!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content text!(char_space: ' ' => " "); text!(str_non_escaped: "non-escaped string" => "non-escaped string"); text!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Text { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); text!(option_none: Option::<&str>::None => ""); text!(option_some: Some("non-escaped string") => "non-escaped string"); text!(option_some_empty_str: Some("") => ""); text!(unit: () => ""); text!(unit_struct: Unit => ""); text!(unit_struct_escaped: UnitEscaped => ""); text!(enum_unit: Enum::Unit => "Unit"); text!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); text!(newtype: Newtype(42) => "42"); // We have no space where name of a variant can be stored err!(enum_newtype: Text { before: "answer", content: Enum::Newtype(42), after: "answer", } => Unsupported("cannot serialize enum newtype variant `Enum::Newtype` as text content value")); // Sequences are serialized separated by spaces, all spaces inside are escaped text!(seq: vec![1, 2, 3] => "1 2 3"); text!(seq_empty: Vec::::new() => ""); text!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> \ with spaces \ 3"); text!(tuple_struct: Tuple("first", 42) => "first 42"); // We have no space where name of a variant can be stored err!(enum_tuple: Text { before: "answer", content: Enum::Tuple("first", 42), after: "answer", } => Unsupported("cannot serialize enum tuple variant `Enum::Tuple` as text content value")); // Complex types cannot be serialized in `$text` field err!(map: Text { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("cannot serialize map as text content value")); err!(struct_: Text { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize struct `Struct` as text content value")); err!(enum_struct: Text { before: "answer", content: Enum::Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize enum struct variant `Enum::Struct` as text content value")); } } /// Special field name `$value` should be serialized using name, provided /// by the type of value instead of a key. Sequences serialized as a list /// of tags with that name (each element can have their own name) mod value_field { use super::*; /// `$value` key in a map mod map { use super::*; use pretty_assertions::assert_eq; macro_rules! value { ($name:ident: $data:expr) => { serialize_as!($name: BTreeMap::from([("$value", $data)]) => ""); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: BTreeMap::from([("$value", $data)]) => concat!("", $expected,"")); }; } value!(false_: false => "false"); value!(true_: true => "true"); value!(i8_: -42i8 => "-42"); value!(i16_: -4200i16 => "-4200"); value!(i32_: -42000000i32 => "-42000000"); value!(i64_: -42000000000000i64 => "-42000000000000"); value!(isize_: -42000000000000isize => "-42000000000000"); value!(u8_: 42u8 => "42"); value!(u16_: 4200u16 => "4200"); value!(u32_: 42000000u32 => "42000000"); value!(u64_: 42000000000000u64 => "42000000000000"); value!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { value!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); value!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } value!(f32_: 4.2f32 => "4.2"); value!(f64_: 4.2f64 => "4.2"); value!(char_non_escaped: 'h' => "h"); value!(char_lt: '<' => "<"); value!(char_gt: '>' => ">"); value!(char_amp: '&' => "&"); value!(char_apos: '\'' => "'"); value!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content value!(char_space: ' ' => " "); value!(str_non_escaped: "non-escaped string" => "non-escaped string"); value!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: BTreeMap::from([("$value", Bytes(b"<\"escaped & bytes'>"))]) => Unsupported("`serialize_bytes` not supported yet")); value!(option_none: Option::<&str>::None); value!(option_some: Some("non-escaped string") => "non-escaped string"); value!(option_some_empty_str: Some("")); value!(unit: ()); value!(unit_struct: Unit); value!(unit_struct_escaped: UnitEscaped); value!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: BTreeMap::from([("$value", Enum::UnitEscaped)]) => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! value!(seq: vec![1, 2, 3] => "123"); value!(seq_empty: Vec::::new()); value!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'>\ with\t\n\r spaces\ 3"); value!(tuple_struct: Tuple("first", 42) => "first42"); value!(enum_tuple: Enum::Tuple("first", 42) => "first\ 42"); // We cannot wrap map or struct in any container and should not // flatten it, so it is impossible to serialize maps and structs err!(map: BTreeMap::from([("$value", BTreeMap::from([("_1", 2), ("_3", 4)]))]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: BTreeMap::from([("$value", Struct { key: "answer", val: (42, 42) })]) => Unsupported("serialization of struct `Struct` is not supported in `$value` field")); value!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\ answer\ 42\ 42\ "); } /// `$value` field inside a struct mod struct_ { use super::*; use pretty_assertions::assert_eq; macro_rules! value { ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: Value { before: "answer", content: $data, after: "answer", } => concat!( "answer", $expected, "answer", )); }; } value!(false_: false => "false"); value!(true_: true => "true"); value!(i8_: -42i8 => "-42"); value!(i16_: -4200i16 => "-4200"); value!(i32_: -42000000i32 => "-42000000"); value!(i64_: -42000000000000i64 => "-42000000000000"); value!(isize_: -42000000000000isize => "-42000000000000"); value!(u8_: 42u8 => "42"); value!(u16_: 4200u16 => "4200"); value!(u32_: 42000000u32 => "42000000"); value!(u64_: 42000000000000u64 => "42000000000000"); value!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { value!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); value!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } value!(f32_: 4.2f32 => "4.2"); value!(f64_: 4.2f64 => "4.2"); value!(char_non_escaped: 'h' => "h"); value!(char_lt: '<' => "<"); value!(char_gt: '>' => ">"); value!(char_amp: '&' => "&"); value!(char_apos: '\'' => "'"); value!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content value!(char_space: ' ' => " "); value!(str_non_escaped: "non-escaped string" => "non-escaped string"); value!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Value { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); value!(option_none: Option::<&str>::None => ""); value!(option_some: Some("non-escaped string") => "non-escaped string"); value!(option_some_empty_str: Some("") => ""); value!(unit: () => ""); value!(unit_struct: Unit => ""); value!(unit_struct_escaped: UnitEscaped => ""); value!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: Value { before: "answer", content: Enum::UnitEscaped, after: "answer", } => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! value!(seq: vec![1, 2, 3] => "123"); value!(seq_empty: Vec::::new() => ""); value!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'>\ with\t\n\r spaces\ 3"); value!(tuple_struct: Tuple("first", 42) => "first42"); value!(enum_tuple: Enum::Tuple("first", 42) => "first\ 42"); // We cannot wrap map or struct in any container and should not // flatten it, so it is impossible to serialize maps and structs err!(map: Value { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: Value { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("serialization of struct `Struct` is not supported in `$value` field")); value!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\ answer\ 42\ 42\ "); } } mod attributes { use super::*; use pretty_assertions::assert_eq; serialize_as!(map_attr: BTreeMap::from([("@key1", 1), ("@key2", 2)]) => r#""#); serialize_as!(map_mixed: BTreeMap::from([("@key1", 1), ("key2", 2)]) => r#"2"#); serialize_as!(struct_: Attributes { key: "answer", val: (42, 42) } => r#""#); serialize_as!(struct_before: AttributesBefore { key: "answer", val: 42 } => r#"42"#); serialize_as!(struct_after: AttributesAfter { key: "answer", val: 42 } => r#"answer"#); err!(enum_: Enum::Attributes { key: "answer", val: (42, 42) } => Unsupported("cannot serialize enum struct variant `Enum::Attributes`")); /// Test for https://github.com/tafia/quick-xml/issues/252 mod optional { use super::*; use pretty_assertions::assert_eq; serialize_as!(none: OptionalAttributes { a: None, b: None } => r#""#); serialize_as!(some_empty_str: OptionalAttributes { a: Some(""), b: Some(""), } => r#""#); serialize_as!(some_non_empty: OptionalAttributes { a: Some("1"), b: Some("2"), } => r#""#); } } /// Test for https://github.com/tafia/quick-xml/issues/252 mod optional { use super::*; use pretty_assertions::assert_eq; serialize_as!(none: OptionalElements { a: None, b: None } => "\ \ "); serialize_as!(some_empty_str: OptionalElements { a: Some(""), b: Some(""), } => "\ \ \ "); serialize_as!(some_non_empty: OptionalElements { a: Some("1"), b: Some("2"), } => "\ 1\ 2\ "); } } mod with_indent { use super::*; use crate::se::content::tests::Struct; use crate::writer::Indentation; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:expr) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ElementSerializer { ser: ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::Owned(Indentation::new(b' ', 2)), write_indent: false, expand_empty_elements: false, }, key: XmlName("root"), }; $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ElementSerializer { ser: ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::Owned(Indentation::new(b' ', 2)), write_indent: false, expand_empty_elements: false, }, key: XmlName("root"), }; match $data.serialize(ser).unwrap_err() { DeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, e ), } // We can write something before fail // assert_eq!(buffer, ""); } }; } serialize_as!(false_: false => "false"); serialize_as!(true_: true => "true"); serialize_as!(i8_: -42i8 => "-42"); serialize_as!(i16_: -4200i16 => "-4200"); serialize_as!(i32_: -42000000i32 => "-42000000"); serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); serialize_as!(isize_: -42000000000000isize => "-42000000000000"); serialize_as!(u8_: 42u8 => "42"); serialize_as!(u16_: 4200u16 => "4200"); serialize_as!(u32_: 42000000u32 => "42000000"); serialize_as!(u64_: 42000000000000u64 => "42000000000000"); serialize_as!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } serialize_as!(f32_: 4.2f32 => "4.2"); serialize_as!(f64_: 4.2f64 => "4.2"); serialize_as!(char_non_escaped: 'h' => "h"); serialize_as!(char_lt: '<' => "<"); serialize_as!(char_gt: '>' => ">"); serialize_as!(char_amp: '&' => "&"); serialize_as!(char_apos: '\'' => "'"); serialize_as!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content serialize_as!(char_space: ' ' => " "); serialize_as!(str_non_escaped: "non-escaped string" => "non-escaped string"); serialize_as!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::<&str>::None => ""); serialize_as!(option_some: Some("non-escaped string") => "non-escaped string"); serialize_as!(option_some_empty: Some("") => ""); serialize_as!(unit: () => ""); serialize_as!(unit_struct: Unit => ""); serialize_as!(unit_struct_escaped: UnitEscaped => ""); serialize_as!(enum_unit: Enum::Unit => "Unit"); serialize_as!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); serialize_as!(newtype: Newtype(42) => "42"); err!(enum_newtype: Enum::Newtype(42) => Unsupported("cannot serialize enum newtype variant `Enum::Newtype`")); serialize_as!(seq: vec![1, 2, 3] => "1\n\ 2\n\ 3"); serialize_as!(seq_empty: Vec::::new() => ""); serialize_as!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'>\n\ with\t\n\r spaces\n\ 3"); serialize_as!(tuple_struct: Tuple("first", 42) => "first\n\ 42"); err!(enum_tuple: Enum::Tuple("first", 42) => Unsupported("cannot serialize enum tuple variant `Enum::Tuple`")); serialize_as!(map: BTreeMap::from([("_1", 2), ("_3", 4)]) => "\n \ <_1>2\n \ <_3>4\n\ "); serialize_as!(struct_: Struct { key: "answer", val: (42, 42) } => "\n \ answer\n \ 42\n \ 42\n\ "); err!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => Unsupported("cannot serialize enum struct variant `Enum::Struct`")); /// Special field name `$text` should be serialized as text content. /// Sequences serialized as an `xs:list` content mod text_field { use super::*; /// `$text` key in a map mod map { use super::*; use pretty_assertions::assert_eq; macro_rules! text { ($name:ident: $data:expr) => { serialize_as!($name: BTreeMap::from([("$text", $data)]) => ""); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: BTreeMap::from([("$text", $data)]) => concat!("\n ", $expected,"\n")); }; } text!(false_: false => "false"); text!(true_: true => "true"); text!(i8_: -42i8 => "-42"); text!(i16_: -4200i16 => "-4200"); text!(i32_: -42000000i32 => "-42000000"); text!(i64_: -42000000000000i64 => "-42000000000000"); text!(isize_: -42000000000000isize => "-42000000000000"); text!(u8_: 42u8 => "42"); text!(u16_: 4200u16 => "4200"); text!(u32_: 42000000u32 => "42000000"); text!(u64_: 42000000000000u64 => "42000000000000"); text!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { text!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); text!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } text!(f32_: 4.2f32 => "4.2"); text!(f64_: 4.2f64 => "4.2"); text!(char_non_escaped: 'h' => "h"); text!(char_lt: '<' => "<"); text!(char_gt: '>' => ">"); text!(char_amp: '&' => "&"); text!(char_apos: '\'' => "'"); text!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content text!(char_space: ' ' => " "); text!(str_non_escaped: "non-escaped string" => "non-escaped string"); text!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Text { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); text!(option_none: Option::<&str>::None); text!(option_some: Some("non-escaped string") => "non-escaped string"); text!(option_some_empty_str: Some("")); text!(unit: ()); text!(unit_struct: Unit); text!(unit_struct_escaped: UnitEscaped); text!(enum_unit: Enum::Unit => "Unit"); text!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); text!(newtype: Newtype(42) => "42"); // We have no space where name of a variant can be stored err!(enum_newtype: Text { before: "answer", content: Enum::Newtype(42), after: "answer", } => Unsupported("cannot serialize enum newtype variant `Enum::Newtype` as text content value")); // Sequences are serialized separated by spaces, all spaces inside are escaped text!(seq: vec![1, 2, 3] => "1 2 3"); text!(seq_empty: Vec::::new()); text!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> \ with spaces \ 3"); text!(tuple_struct: Tuple("first", 42) => "first 42"); // We have no space where name of a variant can be stored err!(enum_tuple: Text { before: "answer", content: Enum::Tuple("first", 42), after: "answer", } => Unsupported("cannot serialize enum tuple variant `Enum::Tuple` as text content value")); // Complex types cannot be serialized in `$text` field err!(map: Text { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("cannot serialize map as text content value")); err!(struct_: Text { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize struct `Struct` as text content value")); err!(enum_struct: Text { before: "answer", content: Enum::Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize enum struct variant `Enum::Struct` as text content value")); } /// `$text` field inside a struct mod struct_ { use super::*; use pretty_assertions::assert_eq; macro_rules! text { ($name:ident: $data:expr) => { serialize_as!($name: Text { before: "answer", content: $data, after: "answer", } => "\n \ answer\n \ answer\n\ "); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: Text { before: "answer", content: $data, after: "answer", } => concat!( "\n answer\n ", $expected, "\n answer\n", )); }; } text!(false_: false => "false"); text!(true_: true => "true"); text!(i8_: -42i8 => "-42"); text!(i16_: -4200i16 => "-4200"); text!(i32_: -42000000i32 => "-42000000"); text!(i64_: -42000000000000i64 => "-42000000000000"); text!(isize_: -42000000000000isize => "-42000000000000"); text!(u8_: 42u8 => "42"); text!(u16_: 4200u16 => "4200"); text!(u32_: 42000000u32 => "42000000"); text!(u64_: 42000000000000u64 => "42000000000000"); text!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { text!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); text!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } text!(f32_: 4.2f32 => "4.2"); text!(f64_: 4.2f64 => "4.2"); text!(char_non_escaped: 'h' => "h"); text!(char_lt: '<' => "<"); text!(char_gt: '>' => ">"); text!(char_amp: '&' => "&"); text!(char_apos: '\'' => "'"); text!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content text!(char_space: ' ' => " "); text!(str_non_escaped: "non-escaped string" => "non-escaped string"); text!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Text { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); text!(option_none: Option::<&str>::None); text!(option_some: Some("non-escaped string") => "non-escaped string"); text!(option_some_empty_str: Some("")); text!(unit: ()); text!(unit_struct: Unit); text!(unit_struct_escaped: UnitEscaped); text!(enum_unit: Enum::Unit => "Unit"); text!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); text!(newtype: Newtype(42) => "42"); // We have no space where name of a variant can be stored err!(enum_newtype: Text { before: "answer", content: Enum::Newtype(42), after: "answer", } => Unsupported("cannot serialize enum newtype variant `Enum::Newtype` as text content value")); // Sequences are serialized separated by spaces, all spaces inside are escaped text!(seq: vec![1, 2, 3] => "1 2 3"); text!(seq_empty: Vec::::new()); text!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> \ with spaces \ 3"); text!(tuple_struct: Tuple("first", 42) => "first 42"); // We have no space where name of a variant can be stored err!(enum_tuple: Text { before: "answer", content: Enum::Tuple("first", 42), after: "answer", } => Unsupported("cannot serialize enum tuple variant `Enum::Tuple` as text content value")); // Complex types cannot be serialized in `$text` field err!(map: Text { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("cannot serialize map as text content value")); err!(struct_: Text { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize struct `Struct` as text content value")); err!(enum_struct: Text { before: "answer", content: Enum::Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize enum struct variant `Enum::Struct` as text content value")); } } /// Special field name `$value` should be serialized using name, provided /// by the type of value instead of a key. Sequences serialized as a list /// of tags with that name (each element can have their own name) mod value_field { use super::*; /// `$value` key in a map mod map { use super::*; use pretty_assertions::assert_eq; macro_rules! value { ($name:ident: $data:expr) => { serialize_as!($name: BTreeMap::from([("$value", $data)]) => ""); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: BTreeMap::from([("$value", $data)]) => concat!("\n ", $expected,"\n")); }; } value!(false_: false => "false"); value!(true_: true => "true"); value!(i8_: -42i8 => "-42"); value!(i16_: -4200i16 => "-4200"); value!(i32_: -42000000i32 => "-42000000"); value!(i64_: -42000000000000i64 => "-42000000000000"); value!(isize_: -42000000000000isize => "-42000000000000"); value!(u8_: 42u8 => "42"); value!(u16_: 4200u16 => "4200"); value!(u32_: 42000000u32 => "42000000"); value!(u64_: 42000000000000u64 => "42000000000000"); value!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { value!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); value!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } value!(f32_: 4.2f32 => "4.2"); value!(f64_: 4.2f64 => "4.2"); value!(char_non_escaped: 'h' => "h"); value!(char_lt: '<' => "<"); value!(char_gt: '>' => ">"); value!(char_amp: '&' => "&"); value!(char_apos: '\'' => "'"); value!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content value!(char_space: ' ' => " "); value!(str_non_escaped: "non-escaped string" => "non-escaped string"); value!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: BTreeMap::from([("$value", Bytes(b"<\"escaped & bytes'>"))]) => Unsupported("`serialize_bytes` not supported yet")); value!(option_none: Option::<&str>::None); value!(option_some: Some("non-escaped string") => "non-escaped string"); value!(option_some_empty_str: Some("")); value!(unit: ()); value!(unit_struct: Unit); value!(unit_struct_escaped: UnitEscaped); value!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: BTreeMap::from([("$value", Enum::UnitEscaped)]) => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "42"); value!(seq: vec![1, 2, 3] => "1\n 2\n 3"); value!(seq_empty: Vec::::new()); value!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'>\n \ with\t\n\r spaces\n \ 3"); value!(tuple_struct: Tuple("first", 42) => "first\n 42"); value!(enum_tuple: Enum::Tuple("first", 42) => "first\n \ 42"); // We cannot wrap map or struct in any container and should not // flatten it, so it is impossible to serialize maps and structs err!(map: BTreeMap::from([("$value", BTreeMap::from([("_1", 2), ("_3", 4)]))]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: BTreeMap::from([("$value", Struct { key: "answer", val: (42, 42) })]) => Unsupported("serialization of struct `Struct` is not supported in `$value` field")); value!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\n \ answer\n \ 42\n \ 42\n \ "); } /// `$value` field inside a struct mod struct_ { use super::*; use pretty_assertions::assert_eq; macro_rules! value { ($name:ident: $data:expr) => { serialize_as!($name: Value { before: "answer", content: $data, after: "answer", } => "\n \ answer\n \ answer\n\ "); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: Value { before: "answer", content: $data, after: "answer", } => concat!( "\n answer\n ", $expected, "\n answer\n", )); }; } value!(false_: false => "false"); value!(true_: true => "true"); value!(i8_: -42i8 => "-42"); value!(i16_: -4200i16 => "-4200"); value!(i32_: -42000000i32 => "-42000000"); value!(i64_: -42000000000000i64 => "-42000000000000"); value!(isize_: -42000000000000isize => "-42000000000000"); value!(u8_: 42u8 => "42"); value!(u16_: 4200u16 => "4200"); value!(u32_: 42000000u32 => "42000000"); value!(u64_: 42000000000000u64 => "42000000000000"); value!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { value!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); value!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } value!(f32_: 4.2f32 => "4.2"); value!(f64_: 4.2f64 => "4.2"); value!(char_non_escaped: 'h' => "h"); value!(char_lt: '<' => "<"); value!(char_gt: '>' => ">"); value!(char_amp: '&' => "&"); value!(char_apos: '\'' => "'"); value!(char_quot: '"' => """); //TODO: add a setting to escape leading/trailing spaces, in order to // pretty-print does not change the content value!(char_space: ' ' => " "); value!(str_non_escaped: "non-escaped string" => "non-escaped string"); value!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Value { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); value!(option_none: Option::<&str>::None); value!(option_some: Some("non-escaped string") => "non-escaped string"); value!(option_some_empty_str: Some("")); value!(unit: ()); value!(unit_struct: Unit); value!(unit_struct_escaped: UnitEscaped); value!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: Value { before: "answer", content: Enum::UnitEscaped, after: "answer", } => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! value!(seq: vec![1, 2, 3] => "1\n 2\n 3"); value!(seq_empty: Vec::::new()); value!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'>\n \ with\t\n\r spaces\n \ 3"); value!(tuple_struct: Tuple("first", 42) => "first\n 42"); value!(enum_tuple: Enum::Tuple("first", 42) => "first\n \ 42"); // We cannot wrap map or struct in any container and should not // flatten it, so it is impossible to serialize maps and structs err!(map: Value { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("serialization of map types is not supported in `$value` field")); err!(struct_: Value { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("serialization of struct `Struct` is not supported in `$value` field")); value!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\n \ answer\n \ 42\n \ 42\n \ "); } } mod attributes { use super::*; use pretty_assertions::assert_eq; serialize_as!(map_attr: BTreeMap::from([("@key1", 1), ("@key2", 2)]) => r#""#); serialize_as!(map_mixed: BTreeMap::from([("@key1", 1), ("key2", 2)]) => "\n \ 2\n\ "); serialize_as!(struct_: Attributes { key: "answer", val: (42, 42) } => r#""#); serialize_as!(struct_before: AttributesBefore { key: "answer", val: 42 } => "\n \ 42\n\ "); serialize_as!(struct_after: AttributesAfter { key: "answer", val: 42 } => "\n \ answer\n\ "); err!(enum_: Enum::Attributes { key: "answer", val: (42, 42) } => Unsupported("cannot serialize enum struct variant `Enum::Attributes`")); /// Test for https://github.com/tafia/quick-xml/issues/252 mod optional { use super::*; use pretty_assertions::assert_eq; serialize_as!(none: OptionalAttributes { a: None, b: None } => r#""#); serialize_as!(some_empty_str: OptionalAttributes { a: Some(""), b: Some("") } => r#""#); serialize_as!(some_non_empty: OptionalAttributes { a: Some("a"), b: Some("b") } => r#""#); } } /// Test for https://github.com/tafia/quick-xml/issues/252 mod optional { use super::*; use pretty_assertions::assert_eq; serialize_as!(none: OptionalElements { a: None, b: None } => "\n \ \n\ "); serialize_as!(some_empty_str: OptionalElements { a: Some(""), b: Some("") } => "\n \ \n \ \n\ "); serialize_as!(some_non_empty: OptionalElements { a: Some("a"), b: Some("b") } => "\n \ a\n \ b\n\ "); } } mod expand_empty_elements { use super::*; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:expr) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ElementSerializer { ser: ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::None, write_indent: false, expand_empty_elements: true, }, key: XmlName("root"), }; $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); } }; } serialize_as!(option_some_empty: Some("") => ""); serialize_as!(option_some_empty_str: Some("") => ""); serialize_as!(unit: () => ""); serialize_as!(unit_struct: Unit => ""); serialize_as!(unit_struct_escaped: UnitEscaped => ""); serialize_as!(enum_unit: Enum::Unit => "Unit"); serialize_as!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); } } quick-xml-0.36.1/src/se/key.rs000064400000000000000000000250700072674642500142150ustar 00000000000000use crate::errors::serialize::DeError; use serde::ser::{Impossible, Serialize, Serializer}; use serde::serde_if_integer128; use std::fmt::Write; /// A serializer, that ensures, that only plain types can be serialized, /// so result can be used as an XML tag or attribute name. /// /// This serializer does not check that name does not contain characters that /// [not allowed] in XML names, because in some cases it should pass names /// that would be filtered on higher level. /// /// [not allowed]: https://www.w3.org/TR/xml11/#sec-common-syn pub struct QNameSerializer { /// Writer to which this serializer writes content pub writer: W, } impl QNameSerializer { #[inline] fn write_str(&mut self, value: &str) -> Result<(), DeError> { Ok(self.writer.write_str(value)?) } } impl Serializer for QNameSerializer { type Ok = W; type Error = DeError; type SerializeSeq = Impossible; type SerializeTuple = Impossible; type SerializeTupleStruct = Impossible; type SerializeTupleVariant = Impossible; type SerializeMap = Impossible; type SerializeStruct = Impossible; type SerializeStructVariant = Impossible; write_primitive!(); fn serialize_str(mut self, value: &str) -> Result { self.write_str(value)?; Ok(self.writer) } /// Because unit type can be represented only by empty string which is not /// a valid XML name, serialization of unit returns `Err(Unsupported)` fn serialize_unit(self) -> Result { Err(DeError::Unsupported( "cannot serialize unit type `()` as an XML tag name".into(), )) } /// Because unit struct can be represented only by empty string which is not /// a valid XML name, serialization of unit struct returns `Err(Unsupported)` fn serialize_unit_struct(self, name: &'static str) -> Result { Err(DeError::Unsupported( format!("cannot serialize unit struct `{}` as an XML tag name", name).into(), )) } /// We cannot store both a variant discriminant and a variant value, /// so serialization of enum newtype variant returns `Err(Unsupported)` fn serialize_newtype_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _value: &T, ) -> Result { Err(DeError::Unsupported( format!( "cannot serialize enum newtype variant `{}::{}` as an XML tag name", name, variant ) .into(), )) } fn serialize_seq(self, _len: Option) -> Result { Err(DeError::Unsupported( "cannot serialize sequence as an XML tag name".into(), )) } fn serialize_tuple(self, _len: usize) -> Result { Err(DeError::Unsupported( "cannot serialize tuple as an XML tag name".into(), )) } fn serialize_tuple_struct( self, name: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!( "cannot serialize tuple struct `{}` as an XML tag name", name ) .into(), )) } fn serialize_tuple_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!( "cannot serialize enum tuple variant `{}::{}` as an XML tag name", name, variant ) .into(), )) } fn serialize_map(self, _len: Option) -> Result { Err(DeError::Unsupported( "cannot serialize map as an XML tag name".into(), )) } fn serialize_struct( self, name: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!("cannot serialize struct `{}` as an XML tag name", name).into(), )) } fn serialize_struct_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!( "cannot serialize enum struct variant `{}::{}` as an XML tag name", name, variant ) .into(), )) } } #[cfg(test)] mod tests { use super::*; use crate::utils::Bytes; use pretty_assertions::assert_eq; use serde::Serialize; use std::collections::BTreeMap; #[derive(Debug, Serialize, PartialEq)] struct Unit; #[derive(Debug, Serialize, PartialEq)] struct Newtype(bool); #[derive(Debug, Serialize, PartialEq)] struct Tuple(&'static str, usize); #[derive(Debug, Serialize, PartialEq)] struct Struct { key: &'static str, val: usize, } #[derive(Debug, Serialize, PartialEq)] enum Enum { Unit, #[serde(rename = "<\"&'>")] UnitEscaped, Newtype(bool), Tuple(&'static str, usize), Struct { key: &'static str, val: usize, }, } /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:literal) => { #[test] fn $name() { let ser = QNameSerializer { writer: String::new(), }; let buffer = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = QNameSerializer { writer: &mut buffer, }; match $data.serialize(ser).unwrap_err() { DeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, e ), } assert_eq!(buffer, ""); } }; } serialize_as!(false_: false => "false"); serialize_as!(true_: true => "true"); serialize_as!(i8_: -42i8 => "-42"); serialize_as!(i16_: -4200i16 => "-4200"); serialize_as!(i32_: -42000000i32 => "-42000000"); serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); serialize_as!(isize_: -42000000000000isize => "-42000000000000"); serialize_as!(u8_: 42u8 => "42"); serialize_as!(u16_: 4200u16 => "4200"); serialize_as!(u32_: 42000000u32 => "42000000"); serialize_as!(u64_: 42000000000000u64 => "42000000000000"); serialize_as!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } serialize_as!(f32_: 4.2f32 => "4.2"); serialize_as!(f64_: 4.2f64 => "4.2"); serialize_as!(char_non_escaped: 'h' => "h"); serialize_as!(char_lt: '<' => "<"); serialize_as!(char_gt: '>' => ">"); serialize_as!(char_amp: '&' => "&"); serialize_as!(char_apos: '\'' => "'"); serialize_as!(char_quot: '"' => "\""); serialize_as!(str_valid_name: "valid-name" => "valid-name"); serialize_as!(str_space: "string with spaces" => "string with spaces"); serialize_as!(str_lt: "string<" => "string<"); serialize_as!(str_gt: "string>" => "string>"); serialize_as!(str_amp: "string&" => "string&"); serialize_as!(str_apos: "string'" => "string'"); serialize_as!(str_quot: "string\"" => "string\""); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::<&str>::None => ""); serialize_as!(option_some: Some("non-escaped-string") => "non-escaped-string"); err!(unit: () => Unsupported("cannot serialize unit type `()` as an XML tag name")); err!(unit_struct: Unit => Unsupported("cannot serialize unit struct `Unit` as an XML tag name")); serialize_as!(enum_unit: Enum::Unit => "Unit"); serialize_as!(enum_unit_escaped: Enum::UnitEscaped => "<\"&'>"); serialize_as!(newtype: Newtype(true) => "true"); err!(enum_newtype: Enum::Newtype(false) => Unsupported("cannot serialize enum newtype variant `Enum::Newtype` as an XML tag name")); err!(seq: vec![1, 2, 3] => Unsupported("cannot serialize sequence as an XML tag name")); err!(tuple: ("<\"&'>", "with\t\r\n spaces", 3usize) => Unsupported("cannot serialize tuple as an XML tag name")); err!(tuple_struct: Tuple("first", 42) => Unsupported("cannot serialize tuple struct `Tuple` as an XML tag name")); err!(enum_tuple: Enum::Tuple("first", 42) => Unsupported("cannot serialize enum tuple variant `Enum::Tuple` as an XML tag name")); err!(map: BTreeMap::from([("_1", 2), ("_3", 4)]) => Unsupported("cannot serialize map as an XML tag name")); err!(struct_: Struct { key: "answer", val: 42 } => Unsupported("cannot serialize struct `Struct` as an XML tag name")); err!(enum_struct: Enum::Struct { key: "answer", val: 42 } => Unsupported("cannot serialize enum struct variant `Enum::Struct` as an XML tag name")); } quick-xml-0.36.1/src/se/mod.rs000064400000000000000000000667660072674642500142250ustar 00000000000000//! Module to handle custom serde `Serializer` /// Implements writing primitives to the underlying writer. /// Implementor must provide `write_str(self, &str) -> Result<(), DeError>` method macro_rules! write_primitive { ($method:ident ( $ty:ty )) => { fn $method(mut self, value: $ty) -> Result { self.write_str(&value.to_string())?; Ok(self.writer) } }; () => { fn serialize_bool(mut self, value: bool) -> Result { self.write_str(if value { "true" } else { "false" })?; Ok(self.writer) } write_primitive!(serialize_i8(i8)); write_primitive!(serialize_i16(i16)); write_primitive!(serialize_i32(i32)); write_primitive!(serialize_i64(i64)); write_primitive!(serialize_u8(u8)); write_primitive!(serialize_u16(u16)); write_primitive!(serialize_u32(u32)); write_primitive!(serialize_u64(u64)); serde_if_integer128! { write_primitive!(serialize_i128(i128)); write_primitive!(serialize_u128(u128)); } write_primitive!(serialize_f32(f32)); write_primitive!(serialize_f64(f64)); fn serialize_char(self, value: char) -> Result { self.serialize_str(&value.to_string()) } fn serialize_bytes(self, _value: &[u8]) -> Result { //TODO: customization point - allow user to decide how to encode bytes Err(DeError::Unsupported( "`serialize_bytes` not supported yet".into(), )) } fn serialize_none(self) -> Result { Ok(self.writer) } fn serialize_some(self, value: &T) -> Result { value.serialize(self) } fn serialize_unit_variant( self, _name: &'static str, _variant_index: u32, variant: &'static str, ) -> Result { self.serialize_str(variant) } fn serialize_newtype_struct( self, _name: &'static str, value: &T, ) -> Result { value.serialize(self) } }; } //////////////////////////////////////////////////////////////////////////////////////////////////// mod content; mod element; pub(crate) mod key; pub(crate) mod simple_type; mod text; use self::content::ContentSerializer; use self::element::{ElementSerializer, Map, Struct, Tuple}; use crate::de::TEXT_KEY; use crate::errors::serialize::DeError; use crate::writer::Indentation; use serde::ser::{self, Serialize}; use serde::serde_if_integer128; use std::fmt::Write; use std::str::from_utf8; /// Serialize struct into a `Write`r. /// /// # Examples /// /// ``` /// # use quick_xml::se::to_writer; /// # use serde::Serialize; /// # use pretty_assertions::assert_eq; /// #[derive(Serialize)] /// struct Root<'a> { /// #[serde(rename = "@attribute")] /// attribute: &'a str, /// element: &'a str, /// #[serde(rename = "$text")] /// text: &'a str, /// } /// /// let data = Root { /// attribute: "attribute content", /// element: "element content", /// text: "text content", /// }; /// /// let mut buffer = String::new(); /// to_writer(&mut buffer, &data).unwrap(); /// assert_eq!( /// buffer, /// // The root tag name is automatically deduced from the struct name /// // This will not work for other types or struct with #[serde(flatten)] fields /// "\ /// element content\ /// text content\ /// " /// ); /// ``` pub fn to_writer(mut writer: W, value: &T) -> Result<(), DeError> where W: Write, T: ?Sized + Serialize, { value.serialize(Serializer::new(&mut writer)) } /// Serialize struct into a `String`. /// /// # Examples /// /// ``` /// # use quick_xml::se::to_string; /// # use serde::Serialize; /// # use pretty_assertions::assert_eq; /// #[derive(Serialize)] /// struct Root<'a> { /// #[serde(rename = "@attribute")] /// attribute: &'a str, /// element: &'a str, /// #[serde(rename = "$text")] /// text: &'a str, /// } /// /// let data = Root { /// attribute: "attribute content", /// element: "element content", /// text: "text content", /// }; /// /// assert_eq!( /// to_string(&data).unwrap(), /// // The root tag name is automatically deduced from the struct name /// // This will not work for other types or struct with #[serde(flatten)] fields /// "\ /// element content\ /// text content\ /// " /// ); /// ``` pub fn to_string(value: &T) -> Result where T: ?Sized + Serialize, { let mut buffer = String::new(); to_writer(&mut buffer, value)?; Ok(buffer) } /// Serialize struct into a `Write`r using specified root tag name. /// `root_tag` should be valid [XML name], otherwise error is returned. /// /// # Examples /// /// ``` /// # use quick_xml::se::to_writer_with_root; /// # use serde::Serialize; /// # use pretty_assertions::assert_eq; /// #[derive(Serialize)] /// struct Root<'a> { /// #[serde(rename = "@attribute")] /// attribute: &'a str, /// element: &'a str, /// #[serde(rename = "$text")] /// text: &'a str, /// } /// /// let data = Root { /// attribute: "attribute content", /// element: "element content", /// text: "text content", /// }; /// /// let mut buffer = String::new(); /// to_writer_with_root(&mut buffer, "top-level", &data).unwrap(); /// assert_eq!( /// buffer, /// "\ /// element content\ /// text content\ /// " /// ); /// ``` /// /// [XML name]: https://www.w3.org/TR/xml11/#NT-Name pub fn to_writer_with_root(mut writer: W, root_tag: &str, value: &T) -> Result<(), DeError> where W: Write, T: ?Sized + Serialize, { value.serialize(Serializer::with_root(&mut writer, Some(root_tag))?) } /// Serialize struct into a `String` using specified root tag name. /// `root_tag` should be valid [XML name], otherwise error is returned. /// /// # Examples /// /// ``` /// # use quick_xml::se::to_string_with_root; /// # use serde::Serialize; /// # use pretty_assertions::assert_eq; /// #[derive(Serialize)] /// struct Root<'a> { /// #[serde(rename = "@attribute")] /// attribute: &'a str, /// element: &'a str, /// #[serde(rename = "$text")] /// text: &'a str, /// } /// /// let data = Root { /// attribute: "attribute content", /// element: "element content", /// text: "text content", /// }; /// /// assert_eq!( /// to_string_with_root("top-level", &data).unwrap(), /// "\ /// element content\ /// text content\ /// " /// ); /// ``` /// /// [XML name]: https://www.w3.org/TR/xml11/#NT-Name pub fn to_string_with_root(root_tag: &str, value: &T) -> Result where T: ?Sized + Serialize, { let mut buffer = String::new(); to_writer_with_root(&mut buffer, root_tag, value)?; Ok(buffer) } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Defines which characters would be escaped in [`Text`] events and attribute /// values. /// /// [`Text`]: crate::events::Event::Text #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum QuoteLevel { /// Performs escaping, escape all characters that could have special meaning /// in the XML. This mode is compatible with SGML specification. /// /// Characters that will be replaced: /// /// Original | Replacement /// ---------|------------ /// `<` | `<` /// `>` | `>` /// `&` | `&` /// `"` | `"` /// `'` | `'` Full, /// Performs escaping that is compatible with SGML specification. /// /// This level adds escaping of `>` to the `Minimal` level, which is [required] /// for compatibility with SGML. /// /// Characters that will be replaced: /// /// Original | Replacement /// ---------|------------ /// `<` | `<` /// `>` | `>` /// `&` | `&` /// /// [required]: https://www.w3.org/TR/xml11/#syntax Partial, /// Performs the minimal possible escaping, escape only strictly necessary /// characters. /// /// Characters that will be replaced: /// /// Original | Replacement /// ---------|------------ /// `<` | `<` /// `&` | `&` Minimal, } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Implements serialization method by forwarding it to the serializer created by /// the helper method [`Serializer::ser`]. macro_rules! forward { ($name:ident($ty:ty)) => { fn $name(self, value: $ty) -> Result { self.ser(&concat!("`", stringify!($ty), "`"))?.$name(value) } }; } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Almost all characters can form a name. Citation from : /// /// > The overall philosophy of names has changed since XML 1.0. Whereas XML 1.0 /// > provided a rigid definition of names, wherein everything that was not permitted /// > was forbidden, XML 1.1 names are designed so that everything that is not /// > forbidden (for a specific reason) is permitted. Since Unicode will continue /// > to grow past version 4.0, further changes to XML can be avoided by allowing /// > almost any character, including those not yet assigned, in names. /// /// const fn is_xml11_name_start_char(ch: char) -> bool { match ch { ':' | 'A'..='Z' | '_' | 'a'..='z' | '\u{00C0}'..='\u{00D6}' | '\u{00D8}'..='\u{00F6}' | '\u{00F8}'..='\u{02FF}' | '\u{0370}'..='\u{037D}' | '\u{037F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' | '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | '\u{10000}'..='\u{EFFFF}' => true, _ => false, } } /// const fn is_xml11_name_char(ch: char) -> bool { match ch { '-' | '.' | '0'..='9' | '\u{00B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}' => { true } _ => is_xml11_name_start_char(ch), } } /// Helper struct to self-defense from errors #[derive(Clone, Copy, Debug, PartialEq)] pub(self) struct XmlName<'n>(&'n str); impl<'n> XmlName<'n> { /// Checks correctness of the XML name according to [XML 1.1 specification] /// /// [XML 1.1 specification]: https://www.w3.org/TR/xml11/#NT-Name pub fn try_from(name: &'n str) -> Result, DeError> { //TODO: Customization point: allow user to decide if he want to reject or encode the name match name.chars().next() { Some(ch) if !is_xml11_name_start_char(ch) => Err(DeError::Unsupported( format!("character `{ch}` is not allowed at the start of an XML name `{name}`") .into(), )), _ => match name.matches(|ch| !is_xml11_name_char(ch)).next() { Some(s) => Err(DeError::Unsupported( format!("character `{s}` is not allowed in an XML name `{name}`").into(), )), None => Ok(XmlName(name)), }, } } } //////////////////////////////////////////////////////////////////////////////////////////////////// pub(crate) enum Indent<'i> { /// No indent should be written before the element None, /// The specified indent should be written. The type owns the buffer with indent Owned(Indentation), /// The specified indent should be written. The type borrows buffer with indent /// from its owner Borrow(&'i mut Indentation), } impl<'i> Indent<'i> { pub fn borrow(&mut self) -> Indent { match self { Self::None => Indent::None, Self::Owned(ref mut i) => Indent::Borrow(i), Self::Borrow(i) => Indent::Borrow(i), } } pub fn increase(&mut self) { match self { Self::None => {} Self::Owned(i) => i.grow(), Self::Borrow(i) => i.grow(), } } pub fn decrease(&mut self) { match self { Self::None => {} Self::Owned(i) => i.shrink(), Self::Borrow(i) => i.shrink(), } } pub fn write_indent(&mut self, mut writer: W) -> Result<(), DeError> { match self { Self::None => {} Self::Owned(i) => { writer.write_char('\n')?; writer.write_str(from_utf8(i.current())?)?; } Self::Borrow(i) => { writer.write_char('\n')?; writer.write_str(from_utf8(i.current())?)?; } } Ok(()) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A Serializer pub struct Serializer<'w, 'r, W: Write> { ser: ContentSerializer<'w, 'r, W>, /// Name of the root tag. If not specified, deduced from the structure name root_tag: Option>, } impl<'w, 'r, W: Write> Serializer<'w, 'r, W> { /// Creates a new `Serializer` that uses struct name as a root tag name. /// /// Note, that attempt to serialize a non-struct (including unit structs /// and newtype structs) will end up to an error. Use `with_root` to create /// serializer with explicitly defined root element name pub fn new(writer: &'w mut W) -> Self { Self { ser: ContentSerializer { writer, level: QuoteLevel::Partial, indent: Indent::None, write_indent: false, expand_empty_elements: false, }, root_tag: None, } } /// Creates a new `Serializer` that uses specified root tag name. `name` should /// be valid [XML name], otherwise error is returned. /// /// # Examples /// /// When serializing a primitive type, only its representation will be written: /// /// ``` /// # use pretty_assertions::assert_eq; /// # use serde::Serialize; /// # use quick_xml::se::Serializer; /// /// let mut buffer = String::new(); /// let ser = Serializer::with_root(&mut buffer, Some("root")).unwrap(); /// /// "node".serialize(ser).unwrap(); /// assert_eq!(buffer, "node"); /// ``` /// /// When serializing a struct, newtype struct, unit struct or tuple `root_tag` /// is used as tag name of root(s) element(s): /// /// ``` /// # use pretty_assertions::assert_eq; /// # use serde::Serialize; /// # use quick_xml::se::Serializer; /// /// #[derive(Debug, PartialEq, Serialize)] /// struct Struct { /// question: String, /// answer: u32, /// } /// /// let mut buffer = String::new(); /// let ser = Serializer::with_root(&mut buffer, Some("root")).unwrap(); /// /// let data = Struct { /// question: "The Ultimate Question of Life, the Universe, and Everything".into(), /// answer: 42, /// }; /// /// data.serialize(ser).unwrap(); /// assert_eq!( /// buffer, /// "\ /// The Ultimate Question of Life, the Universe, and Everything\ /// 42\ /// " /// ); /// ``` /// /// [XML name]: https://www.w3.org/TR/xml11/#NT-Name pub fn with_root(writer: &'w mut W, root_tag: Option<&'r str>) -> Result { Ok(Self { ser: ContentSerializer { writer, level: QuoteLevel::Partial, indent: Indent::None, write_indent: false, expand_empty_elements: false, }, root_tag: root_tag.map(|tag| XmlName::try_from(tag)).transpose()?, }) } /// Enable or disable expansion of empty elements. Defaults to `false`. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// # use serde::Serialize; /// # use quick_xml::se::Serializer; /// /// #[derive(Debug, PartialEq, Serialize)] /// struct Struct { /// question: Option, /// } /// /// let mut buffer = String::new(); /// let mut ser = Serializer::new(&mut buffer); /// ser.expand_empty_elements(true); /// /// let data = Struct { /// question: None, /// }; /// /// data.serialize(ser).unwrap(); /// assert_eq!( /// buffer, /// "" /// ); /// ``` pub fn expand_empty_elements(&mut self, expand: bool) -> &mut Self { self.ser.expand_empty_elements = expand; self } /// Configure indent for a serializer pub fn indent(&mut self, indent_char: char, indent_size: usize) -> &mut Self { self.ser.indent = Indent::Owned(Indentation::new(indent_char as u8, indent_size)); self } /// Set the level of quoting used when writing texts /// /// Default: [`QuoteLevel::Minimal`] pub fn set_quote_level(&mut self, level: QuoteLevel) -> &mut Self { self.ser.level = level; self } /// Set the indent object for a serializer pub(crate) fn set_indent(&mut self, indent: Indent<'r>) -> &mut Self { self.ser.indent = indent; self } /// Creates actual serializer or returns an error if root tag is not defined. /// In that case `err` contains the name of type that cannot be serialized. fn ser(self, err: &str) -> Result, DeError> { if let Some(key) = self.root_tag { Ok(ElementSerializer { ser: self.ser, key }) } else { Err(DeError::Unsupported( format!("cannot serialize {} without defined root tag", err).into(), )) } } /// Creates actual serializer using root tag or a specified `key` if root tag /// is not defined. Returns an error if root tag is not defined and a `key` /// does not conform [XML rules](XmlName::try_from) for names. fn ser_name(self, key: &'static str) -> Result, DeError> { Ok(ElementSerializer { ser: self.ser, key: match self.root_tag { Some(key) => key, None => XmlName::try_from(key)?, }, }) } } impl<'w, 'r, W: Write> ser::Serializer for Serializer<'w, 'r, W> { type Ok = (); type Error = DeError; type SerializeSeq = ElementSerializer<'w, 'r, W>; type SerializeTuple = ElementSerializer<'w, 'r, W>; type SerializeTupleStruct = ElementSerializer<'w, 'r, W>; type SerializeTupleVariant = Tuple<'w, 'r, W>; type SerializeMap = Map<'w, 'r, W>; type SerializeStruct = Struct<'w, 'r, W>; type SerializeStructVariant = Struct<'w, 'r, W>; forward!(serialize_bool(bool)); forward!(serialize_i8(i8)); forward!(serialize_i16(i16)); forward!(serialize_i32(i32)); forward!(serialize_i64(i64)); forward!(serialize_u8(u8)); forward!(serialize_u16(u16)); forward!(serialize_u32(u32)); forward!(serialize_u64(u64)); serde_if_integer128! { forward!(serialize_i128(i128)); forward!(serialize_u128(u128)); } forward!(serialize_f32(f32)); forward!(serialize_f64(f64)); forward!(serialize_char(char)); forward!(serialize_str(&str)); forward!(serialize_bytes(&[u8])); fn serialize_none(self) -> Result { Ok(()) } fn serialize_some(self, value: &T) -> Result { value.serialize(self) } fn serialize_unit(self) -> Result { self.ser("`()`")?.serialize_unit() } fn serialize_unit_struct(self, name: &'static str) -> Result { self.ser_name(name)?.serialize_unit_struct(name) } fn serialize_unit_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, ) -> Result { if variant == TEXT_KEY { // We should write some text but we don't known what text to write Err(DeError::Unsupported( format!( "cannot serialize enum unit variant `{}::$text` as text content value", name ) .into(), )) } else { let name = XmlName::try_from(variant)?; self.ser.write_empty(name) } } fn serialize_newtype_struct( self, name: &'static str, value: &T, ) -> Result { self.ser_name(name)?.serialize_newtype_struct(name, value) } fn serialize_newtype_variant( self, _name: &'static str, _variant_index: u32, variant: &'static str, value: &T, ) -> Result { if variant == TEXT_KEY { value.serialize(self.ser.into_simple_type_serializer())?; Ok(()) } else { let ser = ElementSerializer { ser: self.ser, key: XmlName::try_from(variant)?, }; value.serialize(ser) } } fn serialize_seq(self, len: Option) -> Result { self.ser("sequence")?.serialize_seq(len) } fn serialize_tuple(self, len: usize) -> Result { self.ser("unnamed tuple")?.serialize_tuple(len) } fn serialize_tuple_struct( self, name: &'static str, len: usize, ) -> Result { self.ser_name(name)?.serialize_tuple_struct(name, len) } fn serialize_tuple_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, len: usize, ) -> Result { if variant == TEXT_KEY { self.ser .into_simple_type_serializer() .serialize_tuple_struct(name, len) .map(Tuple::Text) } else { let ser = ElementSerializer { ser: self.ser, key: XmlName::try_from(variant)?, }; ser.serialize_tuple_struct(name, len).map(Tuple::Element) } } fn serialize_map(self, len: Option) -> Result { self.ser("map")?.serialize_map(len) } fn serialize_struct( self, name: &'static str, len: usize, ) -> Result { self.ser_name(name)?.serialize_struct(name, len) } fn serialize_struct_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, len: usize, ) -> Result { if variant == TEXT_KEY { Err(DeError::Unsupported( format!( "cannot serialize enum struct variant `{}::$text` as text content value", name ) .into(), )) } else { let ser = ElementSerializer { ser: self.ser, key: XmlName::try_from(variant)?, }; ser.serialize_struct(name, len) } } } #[cfg(test)] mod quote_level { use super::*; use pretty_assertions::assert_eq; use serde::Serialize; #[derive(Debug, PartialEq, Serialize)] struct Element(&'static str); #[derive(Debug, PartialEq, Serialize)] struct Example { #[serde(rename = "@attribute")] attribute: &'static str, element: Element, } #[test] fn default_() { let example = Example { attribute: "special chars: &, <, >, \", '", element: Element("special chars: &, <, >, \", '"), }; let mut buffer = String::new(); let ser = Serializer::new(&mut buffer); example.serialize(ser).unwrap(); assert_eq!( buffer, "\ special chars: &, <, >, \", '\ " ); } #[test] fn minimal() { let example = Example { attribute: "special chars: &, <, >, \", '", element: Element("special chars: &, <, >, \", '"), }; let mut buffer = String::new(); let mut ser = Serializer::new(&mut buffer); ser.set_quote_level(QuoteLevel::Minimal); example.serialize(ser).unwrap(); assert_eq!( buffer, ", ", '\">\ special chars: &, <, >, \", '\ " ); } #[test] fn partial() { let example = Example { attribute: "special chars: &, <, >, \", '", element: Element("special chars: &, <, >, \", '"), }; let mut buffer = String::new(); let mut ser = Serializer::new(&mut buffer); ser.set_quote_level(QuoteLevel::Partial); example.serialize(ser).unwrap(); assert_eq!( buffer, "\ special chars: &, <, >, \", '\ " ); } #[test] fn full() { let example = Example { attribute: "special chars: &, <, >, \", '", element: Element("special chars: &, <, >, \", '"), }; let mut buffer = String::new(); let mut ser = Serializer::new(&mut buffer); ser.set_quote_level(QuoteLevel::Full); example.serialize(ser).unwrap(); assert_eq!( buffer, "\ special chars: &, <, >, ", '\ " ); } } quick-xml-0.36.1/src/se/simple_type.rs000064400000000000000000001273200072674642500157600ustar 00000000000000//! Contains Serde `Serializer` for XML [simple types] [as defined] in the XML Schema. //! //! [simple types]: https://www.w3schools.com/xml/el_simpletype.asp //! [as defined]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition use crate::errors::serialize::DeError; use crate::escape::_escape; use crate::se::{Indent, QuoteLevel}; use serde::ser::{ Impossible, Serialize, SerializeSeq, SerializeTuple, SerializeTupleStruct, SerializeTupleVariant, Serializer, }; use serde::serde_if_integer128; use std::borrow::Cow; use std::fmt::Write; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum QuoteTarget { /// Escape data for a text content. No additional escape symbols Text, /// Escape data for a double-quoted attribute. `"` always escaped DoubleQAttr, /// Escape data for a single-quoted attribute. `'` always escaped SingleQAttr, } /// Escapes atomic value that could be part of a `xs:list`. All whitespace characters /// additionally escaped fn escape_item(value: &str, target: QuoteTarget, level: QuoteLevel) -> Cow { use QuoteLevel::*; use QuoteTarget::*; match (target, level) { (_, Full) => _escape(value, |ch| match ch { // Spaces used as delimiters of list items, cannot be used in the item b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' | b'>' | b'\'' | b'\"' => true, _ => false, }), //---------------------------------------------------------------------- (Text, Partial) => _escape(value, |ch| match ch { // Spaces used as delimiters of list items, cannot be used in the item b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' | b'>' => true, _ => false, }), (Text, Minimal) => _escape(value, |ch| match ch { // Spaces used as delimiters of list items, cannot be used in the item b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' => true, _ => false, }), //---------------------------------------------------------------------- (DoubleQAttr, Partial) => _escape(value, |ch| match ch { // Spaces used as delimiters of list items, cannot be used in the item b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' | b'>' => true, // Double quoted attribute should escape quote b'"' => true, _ => false, }), (DoubleQAttr, Minimal) => _escape(value, |ch| match ch { // Spaces used as delimiters of list items, cannot be used in the item b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' => true, // Double quoted attribute should escape quote b'"' => true, _ => false, }), //---------------------------------------------------------------------- (SingleQAttr, Partial) => _escape(value, |ch| match ch { // Spaces used as delimiters of list items b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' | b'>' => true, // Single quoted attribute should escape quote b'\'' => true, _ => false, }), (SingleQAttr, Minimal) => _escape(value, |ch| match ch { // Spaces used as delimiters of list items b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' => true, // Single quoted attribute should escape quote b'\'' => true, _ => false, }), } } /// Escapes XSD simple type value fn escape_list(value: &str, target: QuoteTarget, level: QuoteLevel) -> Cow { use QuoteLevel::*; use QuoteTarget::*; match (target, level) { (_, Full) => _escape(value, |ch| match ch { // Required characters to escape b'&' | b'<' | b'>' | b'\'' | b'\"' => true, _ => false, }), //---------------------------------------------------------------------- (Text, Partial) => _escape(value, |ch| match ch { // Required characters to escape b'&' | b'<' | b'>' => true, _ => false, }), (Text, Minimal) => _escape(value, |ch| match ch { // Required characters to escape b'&' | b'<' => true, _ => false, }), //---------------------------------------------------------------------- (DoubleQAttr, Partial) => _escape(value, |ch| match ch { // Required characters to escape b'&' | b'<' | b'>' => true, // Double quoted attribute should escape quote b'"' => true, _ => false, }), (DoubleQAttr, Minimal) => _escape(value, |ch| match ch { // Required characters to escape b'&' | b'<' => true, // Double quoted attribute should escape quote b'"' => true, _ => false, }), //---------------------------------------------------------------------- (SingleQAttr, Partial) => _escape(value, |ch| match ch { // Required characters to escape b'&' | b'<' | b'>' => true, // Single quoted attribute should escape quote b'\'' => true, _ => false, }), (SingleQAttr, Minimal) => _escape(value, |ch| match ch { // Required characters to escape b'&' | b'<' => true, // Single quoted attribute should escape quote b'\'' => true, _ => false, }), } } //////////////////////////////////////////////////////////////////////////////////////////////////// macro_rules! write_atomic { ($method:ident ( $ty:ty )) => { fn $method(mut self, value: $ty) -> Result { self.write_str(&value.to_string())?; Ok(true) } }; } /// A serializer that handles ordinary [simple type definition][item] with /// `{variety} = atomic`, or an ordinary [simple type] definition with /// `{variety} = union` whose basic members are all atomic. /// /// This serializer can serialize only primitive types: /// - numbers /// - booleans /// - strings /// - units /// - options /// - unit variants of enums /// /// Identifiers represented as strings and serialized accordingly. /// /// Serialization of all other types returns [`Unsupported`][DeError::Unsupported] error. /// /// This serializer returns `true` if something was written and `false` otherwise. /// /// [item]: https://www.w3.org/TR/xmlschema11-1/#std-item_type_definition /// [simple type]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition pub struct AtomicSerializer<'i, W: Write> { pub writer: W, pub target: QuoteTarget, /// Defines which XML characters need to be escaped pub level: QuoteLevel, /// When `Some`, the indent that should be written before the content /// if content is not an empty string. /// When `None` an `xs:list` delimiter (a space) should be written pub(crate) indent: Option>, } impl<'i, W: Write> AtomicSerializer<'i, W> { fn write_str(&mut self, value: &str) -> Result<(), DeError> { if let Some(indent) = self.indent.as_mut() { indent.write_indent(&mut self.writer)?; } else { // TODO: Customization point -- possible non-XML compatible extension to specify delimiter char self.writer.write_char(' ')?; } Ok(self.writer.write_str(value)?) } } impl<'i, W: Write> Serializer for AtomicSerializer<'i, W> { type Ok = bool; type Error = DeError; type SerializeSeq = Impossible; type SerializeTuple = Impossible; type SerializeTupleStruct = Impossible; type SerializeTupleVariant = Impossible; type SerializeMap = Impossible; type SerializeStruct = Impossible; type SerializeStructVariant = Impossible; fn serialize_bool(mut self, value: bool) -> Result { self.write_str(if value { "true" } else { "false" })?; Ok(true) } write_atomic!(serialize_i8(i8)); write_atomic!(serialize_i16(i16)); write_atomic!(serialize_i32(i32)); write_atomic!(serialize_i64(i64)); write_atomic!(serialize_u8(u8)); write_atomic!(serialize_u16(u16)); write_atomic!(serialize_u32(u32)); write_atomic!(serialize_u64(u64)); serde_if_integer128! { write_atomic!(serialize_i128(i128)); write_atomic!(serialize_u128(u128)); } write_atomic!(serialize_f32(f32)); write_atomic!(serialize_f64(f64)); fn serialize_char(self, value: char) -> Result { self.serialize_str(&value.to_string()) } fn serialize_str(mut self, value: &str) -> Result { if !value.is_empty() { self.write_str(&escape_item(value, self.target, self.level))?; } Ok(!value.is_empty()) } fn serialize_bytes(self, _value: &[u8]) -> Result { //TODO: Customization point - allow user to decide how to encode bytes Err(DeError::Unsupported( "`serialize_bytes` not supported yet".into(), )) } fn serialize_none(self) -> Result { Ok(false) } fn serialize_some(self, value: &T) -> Result { value.serialize(self) } /// We cannot store anything, so the absence of a unit and presence of it /// does not differ, so serialization of unit returns `Err(Unsupported)` fn serialize_unit(self) -> Result { Err(DeError::Unsupported( "cannot serialize unit type `()` as an `xs:list` item".into(), )) } /// We cannot store anything, so the absence of a unit and presence of it /// does not differ, so serialization of unit returns `Err(Unsupported)` fn serialize_unit_struct(self, name: &'static str) -> Result { Err(DeError::Unsupported( format!( "cannot serialize unit struct `{}` as an `xs:list` item", name ) .into(), )) } fn serialize_unit_variant( self, _name: &'static str, _variant_index: u32, variant: &'static str, ) -> Result { self.serialize_str(variant) } fn serialize_newtype_struct( self, _name: &'static str, value: &T, ) -> Result { value.serialize(self) } /// We cannot store both a variant discriminant and a variant value, /// so serialization of enum newtype variant returns `Err(Unsupported)` fn serialize_newtype_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _value: &T, ) -> Result { Err(DeError::Unsupported( format!( "cannot serialize enum newtype variant `{}::{}` as an `xs:list` item", name, variant ) .into(), )) } fn serialize_seq(self, _len: Option) -> Result { Err(DeError::Unsupported( "cannot serialize sequence as an `xs:list` item".into(), )) } fn serialize_tuple(self, _len: usize) -> Result { Err(DeError::Unsupported( "cannot serialize tuple as an `xs:list` item".into(), )) } fn serialize_tuple_struct( self, name: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!( "cannot serialize tuple struct `{}` as an `xs:list` item", name ) .into(), )) } fn serialize_tuple_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!( "cannot serialize enum tuple variant `{}::{}` as an `xs:list` item", name, variant ) .into(), )) } fn serialize_map(self, _len: Option) -> Result { Err(DeError::Unsupported( "cannot serialize map as an `xs:list` item".into(), )) } fn serialize_struct( self, name: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!("cannot serialize struct `{}` as an `xs:list` item", name).into(), )) } fn serialize_struct_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!( "cannot serialize enum struct variant `{}::{}` as an `xs:list` item", name, variant ) .into(), )) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A serializer for a values representing XSD [simple types], which used in: /// - attribute values (`<... ...="value" ...>`) /// - text content (`<...>text`) /// - CDATA content (`<...>`) /// /// [simple types]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition pub struct SimpleTypeSerializer<'i, W: Write> { /// Writer to which this serializer writes content pub writer: W, /// Target for which element is serializing. Affects additional characters to escape. pub target: QuoteTarget, /// Defines which XML characters need to be escaped pub level: QuoteLevel, /// Indent that should be written before the content if content is not an empty string pub(crate) indent: Indent<'i>, } impl<'i, W: Write> SimpleTypeSerializer<'i, W> { fn write_str(&mut self, value: &str) -> Result<(), DeError> { self.indent.write_indent(&mut self.writer)?; Ok(self.writer.write_str(value)?) } } impl<'i, W: Write> Serializer for SimpleTypeSerializer<'i, W> { type Ok = W; type Error = DeError; type SerializeSeq = SimpleSeq<'i, W>; type SerializeTuple = SimpleSeq<'i, W>; type SerializeTupleStruct = SimpleSeq<'i, W>; type SerializeTupleVariant = Impossible; type SerializeMap = Impossible; type SerializeStruct = Impossible; type SerializeStructVariant = Impossible; write_primitive!(); fn serialize_str(mut self, value: &str) -> Result { if !value.is_empty() { self.write_str(&escape_list(value, self.target, self.level))?; } Ok(self.writer) } /// Does not write anything fn serialize_unit(self) -> Result { Ok(self.writer) } /// Does not write anything fn serialize_unit_struct(self, _name: &'static str) -> Result { Ok(self.writer) } /// We cannot store both a variant discriminant and a variant value, /// so serialization of enum newtype variant returns `Err(Unsupported)` fn serialize_newtype_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _value: &T, ) -> Result { Err(DeError::Unsupported( format!("cannot serialize enum newtype variant `{}::{}` as an attribute or text content value", name, variant).into(), )) } #[inline] fn serialize_seq(self, _len: Option) -> Result { Ok(SimpleSeq { writer: self.writer, target: self.target, level: self.level, indent: self.indent, is_empty: true, }) } #[inline] fn serialize_tuple(self, _len: usize) -> Result { self.serialize_seq(None) } #[inline] fn serialize_tuple_struct( self, _name: &'static str, _len: usize, ) -> Result { self.serialize_seq(None) } fn serialize_tuple_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!("cannot serialize enum tuple variant `{}::{}` as an attribute or text content value", name, variant).into(), )) } fn serialize_map(self, _len: Option) -> Result { Err(DeError::Unsupported( "cannot serialize map as an attribute or text content value".into(), )) } fn serialize_struct( self, name: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!( "cannot serialize struct `{}` as an attribute or text content value", name ) .into(), )) } fn serialize_struct_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!("cannot serialize enum struct variant `{}::{}` as an attribute or text content value", name, variant).into(), )) } } /// Serializer for a sequence of atomic values delimited by space pub struct SimpleSeq<'i, W: Write> { writer: W, target: QuoteTarget, level: QuoteLevel, /// Indent that should be written before the content if content is not an empty string indent: Indent<'i>, /// If `true`, nothing was written yet to the `writer` is_empty: bool, } impl<'i, W: Write> SerializeSeq for SimpleSeq<'i, W> { type Ok = W; type Error = DeError; fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { // Write indent for the first element and delimiter for others let indent = if self.is_empty { Some(self.indent.borrow()) } else { None }; if value.serialize(AtomicSerializer { writer: &mut self.writer, target: self.target, level: self.level, indent, })? { self.is_empty = false; } Ok(()) } #[inline] fn end(self) -> Result { Ok(self.writer) } } impl<'i, W: Write> SerializeTuple for SimpleSeq<'i, W> { type Ok = W; type Error = DeError; #[inline] fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { SerializeSeq::end(self) } } impl<'i, W: Write> SerializeTupleStruct for SimpleSeq<'i, W> { type Ok = W; type Error = DeError; #[inline] fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { SerializeSeq::end(self) } } impl<'i, W: Write> SerializeTupleVariant for SimpleSeq<'i, W> { type Ok = W; type Error = DeError; #[inline] fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { SerializeSeq::end(self) } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod tests { use super::*; use crate::utils::Bytes; use serde::Serialize; use std::collections::BTreeMap; #[derive(Debug, Serialize, PartialEq)] struct Unit; #[derive(Debug, Serialize, PartialEq)] struct Newtype(usize); #[derive(Debug, Serialize, PartialEq)] struct Tuple(&'static str, usize); #[derive(Debug, Serialize, PartialEq)] struct Struct { key: &'static str, val: usize, } #[derive(Debug, Serialize, PartialEq)] enum Enum { Unit, #[serde(rename = "<\"&'>")] UnitEscaped, Newtype(usize), Tuple(&'static str, usize), Struct { key: &'static str, val: usize, }, } mod escape_item { use super::*; mod full { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { assert_eq!( escape_item("text<\"'&> \t\n\rtext", QuoteTarget::Text, QuoteLevel::Full), "text<"'&> text" ); } #[test] fn double_quote_attr() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::DoubleQAttr, QuoteLevel::Full ), "text<"'&> text" ); } #[test] fn single_quote_attr() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::SingleQAttr, QuoteLevel::Full ), "text<"'&> text" ); } } mod partial { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::Text, QuoteLevel::Partial ), "text<\"'&> text" ); } #[test] fn double_quote_attr() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::DoubleQAttr, QuoteLevel::Partial ), "text<"'&> text" ); } #[test] fn single_quote_attr() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::SingleQAttr, QuoteLevel::Partial ), "text<\"'&> text" ); } } mod minimal { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::Text, QuoteLevel::Minimal ), "text<\"'&> text" ); } #[test] fn double_quote_attr() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::DoubleQAttr, QuoteLevel::Minimal ), "text<"'&> text" ); } #[test] fn single_quote_attr() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::SingleQAttr, QuoteLevel::Minimal ), "text<\"'&> text" ); } } } mod escape_list { use super::*; mod full { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { assert_eq!( escape_list("text<\"'&> \t\n\rtext", QuoteTarget::Text, QuoteLevel::Full), "text<"'&> \t\n\rtext" ); } #[test] fn double_quote_attr() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::DoubleQAttr, QuoteLevel::Full ), "text<"'&> \t\n\rtext" ); } #[test] fn single_quote_attr() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::SingleQAttr, QuoteLevel::Full ), "text<"'&> \t\n\rtext" ); } } mod partial { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::Text, QuoteLevel::Partial ), "text<\"'&> \t\n\rtext" ); } #[test] fn double_quote_attr() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::DoubleQAttr, QuoteLevel::Partial ), "text<"'&> \t\n\rtext" ); } #[test] fn single_quote_attr() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::SingleQAttr, QuoteLevel::Partial ), "text<\"'&> \t\n\rtext" ); } } mod minimal { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::Text, QuoteLevel::Minimal ), "text<\"'&> \t\n\rtext" ); } #[test] fn double_quote_attr() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::DoubleQAttr, QuoteLevel::Minimal ), "text<"'&> \t\n\rtext" ); } #[test] fn single_quote_attr() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::SingleQAttr, QuoteLevel::Minimal ), "text<\"'&> \t\n\rtext" ); } } } /// Tests for serialize atomic and union values, as defined in XSD specification mod atomic { use super::*; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:literal) => { #[test] fn $name() { let mut buffer = String::new(); let ser = AtomicSerializer { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, indent: Some(Indent::None), }; let has_written = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); assert_eq!(has_written, !buffer.is_empty()); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = AtomicSerializer { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, indent: Some(Indent::None), }; match $data.serialize(ser).unwrap_err() { DeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, e ), } assert_eq!(buffer, ""); } }; } serialize_as!(false_: false => "false"); serialize_as!(true_: true => "true"); serialize_as!(i8_: -42i8 => "-42"); serialize_as!(i16_: -4200i16 => "-4200"); serialize_as!(i32_: -42000000i32 => "-42000000"); serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); serialize_as!(isize_: -42000000000000isize => "-42000000000000"); serialize_as!(u8_: 42u8 => "42"); serialize_as!(u16_: 4200u16 => "4200"); serialize_as!(u32_: 42000000u32 => "42000000"); serialize_as!(u64_: 42000000000000u64 => "42000000000000"); serialize_as!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } serialize_as!(f32_: 4.2f32 => "4.2"); serialize_as!(f64_: 4.2f64 => "4.2"); serialize_as!(char_non_escaped: 'h' => "h"); serialize_as!(char_lt: '<' => "<"); serialize_as!(char_gt: '>' => ">"); serialize_as!(char_amp: '&' => "&"); serialize_as!(char_apos: '\'' => "'"); serialize_as!(char_quot: '"' => """); serialize_as!(str_non_escaped: "non-escaped-string" => "non-escaped-string"); serialize_as!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::<&str>::None => ""); serialize_as!(option_some: Some("non-escaped-string") => "non-escaped-string"); err!(unit: () => Unsupported("cannot serialize unit type `()` as an `xs:list` item")); err!(unit_struct: Unit => Unsupported("cannot serialize unit struct `Unit` as an `xs:list` item")); serialize_as!(enum_unit: Enum::Unit => "Unit"); serialize_as!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); serialize_as!(newtype: Newtype(42) => "42"); err!(enum_newtype: Enum::Newtype(42) => Unsupported("cannot serialize enum newtype variant `Enum::Newtype` as an `xs:list` item")); err!(seq: vec![1, 2, 3] => Unsupported("cannot serialize sequence as an `xs:list` item")); err!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => Unsupported("cannot serialize tuple as an `xs:list` item")); err!(tuple_struct: Tuple("first", 42) => Unsupported("cannot serialize tuple struct `Tuple` as an `xs:list` item")); err!(enum_tuple: Enum::Tuple("first", 42) => Unsupported("cannot serialize enum tuple variant `Enum::Tuple` as an `xs:list` item")); err!(map: BTreeMap::from([(1, 2), (3, 4)]) => Unsupported("cannot serialize map as an `xs:list` item")); err!(struct_: Struct { key: "answer", val: 42 } => Unsupported("cannot serialize struct `Struct` as an `xs:list` item")); err!(enum_struct: Enum::Struct { key: "answer", val: 42 } => Unsupported("cannot serialize enum struct variant `Enum::Struct` as an `xs:list` item")); } mod simple_type { use super::*; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:literal) => { #[test] fn $name() { let ser = SimpleTypeSerializer { writer: String::new(), target: QuoteTarget::Text, level: QuoteLevel::Full, indent: Indent::None, }; let buffer = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = SimpleTypeSerializer { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, indent: Indent::None, }; match $data.serialize(ser).unwrap_err() { DeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, e ), } assert_eq!(buffer, ""); } }; } serialize_as!(false_: false => "false"); serialize_as!(true_: true => "true"); serialize_as!(i8_: -42i8 => "-42"); serialize_as!(i16_: -4200i16 => "-4200"); serialize_as!(i32_: -42000000i32 => "-42000000"); serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); serialize_as!(isize_: -42000000000000isize => "-42000000000000"); serialize_as!(u8_: 42u8 => "42"); serialize_as!(u16_: 4200u16 => "4200"); serialize_as!(u32_: 42000000u32 => "42000000"); serialize_as!(u64_: 42000000000000u64 => "42000000000000"); serialize_as!(usize_: 42000000000000usize => "42000000000000"); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } serialize_as!(f32_: 4.2f32 => "4.2"); serialize_as!(f64_: 4.2f64 => "4.2"); serialize_as!(char_non_escaped: 'h' => "h"); serialize_as!(char_lt: '<' => "<"); serialize_as!(char_gt: '>' => ">"); serialize_as!(char_amp: '&' => "&"); serialize_as!(char_apos: '\'' => "'"); serialize_as!(char_quot: '"' => """); serialize_as!(str_non_escaped: "non-escaped string" => "non-escaped string"); serialize_as!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::<&str>::None => ""); serialize_as!(option_some: Some("non-escaped string") => "non-escaped string"); serialize_as!(unit: () => ""); serialize_as!(unit_struct: Unit => ""); serialize_as!(enum_unit: Enum::Unit => "Unit"); serialize_as!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); serialize_as!(newtype: Newtype(42) => "42"); err!(enum_newtype: Enum::Newtype(42) => Unsupported("cannot serialize enum newtype variant `Enum::Newtype` as an attribute or text content value")); serialize_as!(seq: vec![1, 2, 3] => "1 2 3"); serialize_as!(seq_empty: Vec::::new() => ""); serialize_as!(seq_with_1_empty_str: vec![""] => ""); serialize_as!(seq_with_2_empty_strs: vec!["", ""] => ""); serialize_as!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> with spaces 3"); serialize_as!(tuple_struct: Tuple("first", 42) => "first 42"); err!(enum_tuple: Enum::Tuple("first", 42) => Unsupported("cannot serialize enum tuple variant `Enum::Tuple` as an attribute or text content value")); err!(map: BTreeMap::from([(1, 2), (3, 4)]) => Unsupported("cannot serialize map as an attribute or text content value")); err!(struct_: Struct { key: "answer", val: 42 } => Unsupported("cannot serialize struct `Struct` as an attribute or text content value")); err!(enum_struct: Enum::Struct { key: "answer", val: 42 } => Unsupported("cannot serialize enum struct variant `Enum::Struct` as an attribute or text content value")); } mod simple_seq { use super::*; use crate::writer::Indentation; use pretty_assertions::assert_eq; #[test] fn empty_seq() { let mut buffer = String::new(); let mut indent = Indentation::new(b'*', 2); indent.grow(); let ser = SimpleSeq { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, indent: Indent::Owned(indent), is_empty: true, }; SerializeSeq::end(ser).unwrap(); assert_eq!(buffer, ""); } #[test] fn all_items_empty() { let mut buffer = String::new(); let mut indent = Indentation::new(b'*', 2); indent.grow(); let mut ser = SimpleSeq { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, indent: Indent::Owned(indent), is_empty: true, }; SerializeSeq::serialize_element(&mut ser, "").unwrap(); SerializeSeq::serialize_element(&mut ser, "").unwrap(); SerializeSeq::serialize_element(&mut ser, "").unwrap(); SerializeSeq::end(ser).unwrap(); assert_eq!(buffer, ""); } #[test] fn some_items_empty1() { let mut buffer = String::new(); let mut indent = Indentation::new(b'*', 2); indent.grow(); let mut ser = SimpleSeq { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, indent: Indent::Owned(indent), is_empty: true, }; SerializeSeq::serialize_element(&mut ser, "").unwrap(); SerializeSeq::serialize_element(&mut ser, &1).unwrap(); SerializeSeq::serialize_element(&mut ser, "").unwrap(); SerializeSeq::end(ser).unwrap(); assert_eq!(buffer, "\n**1"); } #[test] fn some_items_empty2() { let mut buffer = String::new(); let mut indent = Indentation::new(b'*', 2); indent.grow(); let mut ser = SimpleSeq { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, indent: Indent::Owned(indent), is_empty: true, }; SerializeSeq::serialize_element(&mut ser, &1).unwrap(); SerializeSeq::serialize_element(&mut ser, "").unwrap(); SerializeSeq::serialize_element(&mut ser, &2).unwrap(); SerializeSeq::end(ser).unwrap(); assert_eq!(buffer, "\n**1 2"); } #[test] fn items() { let mut buffer = String::new(); let mut indent = Indentation::new(b'*', 2); indent.grow(); let mut ser = SimpleSeq { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, indent: Indent::Owned(indent), is_empty: true, }; SerializeSeq::serialize_element(&mut ser, &1).unwrap(); SerializeSeq::serialize_element(&mut ser, &2).unwrap(); SerializeSeq::serialize_element(&mut ser, &3).unwrap(); SerializeSeq::end(ser).unwrap(); assert_eq!(buffer, "\n**1 2 3"); } } } quick-xml-0.36.1/src/se/text.rs000064400000000000000000000133360072674642500144130ustar 00000000000000//! Contains serializer for a special `&text` field use crate::de::TEXT_KEY; use crate::errors::serialize::DeError; use crate::se::simple_type::{SimpleSeq, SimpleTypeSerializer}; use serde::ser::{Impossible, Serialize, Serializer}; use serde::serde_if_integer128; use std::fmt::Write; macro_rules! write_primitive { ($method:ident ( $ty:ty )) => { #[inline] fn $method(self, value: $ty) -> Result { self.0.$method(value) } }; } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A serializer used to serialize a `$text` field of a struct or map. /// /// This serializer a very similar to [`SimpleTypeSerializer`], but different /// from it in how it processes unit enum variants. Unlike [`SimpleTypeSerializer`] /// this serializer does not write anything for the unit variant. pub struct TextSerializer<'i, W: Write>(pub SimpleTypeSerializer<'i, W>); impl<'i, W: Write> Serializer for TextSerializer<'i, W> { type Ok = W; type Error = DeError; type SerializeSeq = SimpleSeq<'i, W>; type SerializeTuple = SimpleSeq<'i, W>; type SerializeTupleStruct = SimpleSeq<'i, W>; type SerializeTupleVariant = SimpleSeq<'i, W>; type SerializeMap = Impossible; type SerializeStruct = Impossible; type SerializeStructVariant = Impossible; write_primitive!(serialize_bool(bool)); write_primitive!(serialize_i8(i8)); write_primitive!(serialize_i16(i16)); write_primitive!(serialize_i32(i32)); write_primitive!(serialize_i64(i64)); write_primitive!(serialize_u8(u8)); write_primitive!(serialize_u16(u16)); write_primitive!(serialize_u32(u32)); write_primitive!(serialize_u64(u64)); serde_if_integer128! { write_primitive!(serialize_i128(i128)); write_primitive!(serialize_u128(u128)); } write_primitive!(serialize_f32(f32)); write_primitive!(serialize_f64(f64)); write_primitive!(serialize_char(char)); write_primitive!(serialize_str(&str)); write_primitive!(serialize_bytes(&[u8])); #[inline] fn serialize_none(self) -> Result { self.0.serialize_none() } fn serialize_some(self, value: &T) -> Result { value.serialize(self) } #[inline] fn serialize_unit(self) -> Result { self.0.serialize_unit() } #[inline] fn serialize_unit_struct(self, name: &'static str) -> Result { self.0.serialize_unit_struct(name) } #[inline] fn serialize_unit_variant( self, name: &'static str, variant_index: u32, variant: &'static str, ) -> Result { if variant == TEXT_KEY { Ok(self.0.writer) } else { self.0.serialize_unit_variant(name, variant_index, variant) } } fn serialize_newtype_struct( self, _name: &'static str, value: &T, ) -> Result { value.serialize(self) } #[inline] fn serialize_newtype_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _value: &T, ) -> Result { Err(DeError::Unsupported( format!( "cannot serialize enum newtype variant `{}::{}` as text content value", name, variant ) .into(), )) } #[inline] fn serialize_seq(self, len: Option) -> Result { self.0.serialize_seq(len) } #[inline] fn serialize_tuple(self, len: usize) -> Result { self.0.serialize_tuple(len) } #[inline] fn serialize_tuple_struct( self, name: &'static str, len: usize, ) -> Result { self.0.serialize_tuple_struct(name, len) } #[inline] fn serialize_tuple_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!( "cannot serialize enum tuple variant `{}::{}` as text content value", name, variant ) .into(), )) } #[inline] fn serialize_map(self, _len: Option) -> Result { Err(DeError::Unsupported( "cannot serialize map as text content value".into(), )) } #[inline] fn serialize_struct( self, name: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!("cannot serialize struct `{}` as text content value", name).into(), )) } #[inline] fn serialize_struct_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(DeError::Unsupported( format!( "cannot serialize enum struct variant `{}::{}` as text content value", name, variant ) .into(), )) } } quick-xml-0.36.1/src/serde_helpers.rs000064400000000000000000000343260072674642500156460ustar 00000000000000//! Provides helper functions to glue an XML with a serde content model. use serde::{Deserialize, Deserializer, Serialize, Serializer}; #[macro_export] #[doc(hidden)] macro_rules! deserialize_variant { // Produce struct enum variant ( $de:expr, $enum:tt, $variant:ident { $( $(#[$meta:meta])* $field:ident : $typ:ty ),* $(,)? } ) => ({ let var = { // Create anonymous type #[derive(serde::Deserialize)] struct $variant { $( $(#[$meta])* $field: $typ, )* } <$variant>::deserialize($de)? }; // Due to https://github.com/rust-lang/rust/issues/86935 we cannot use // <$enum> :: $variant use $enum :: *; $variant { $($field: var.$field,)* } }); // Produce newtype enum variant ( $de:expr, $enum:tt, $variant:ident($typ:ty) ) => ({ let var = <$typ>::deserialize($de)?; <$enum> :: $variant(var) }); // Produce unit enum variant ( $de:expr, $enum:tt, $variant:ident ) => ({ serde::de::IgnoredAny::deserialize($de)?; <$enum> :: $variant }); } /// Helper macro that generates different match expressions depending on the presence /// of default variant #[macro_export] #[doc(hidden)] macro_rules! deserialize_match { // Only default variant ( $tag:ident, $de:ident, $enum:ty, (_ => $($default_variant:tt)+ ) $(,)? ) => ( Ok($crate::deserialize_variant!( $de, $enum, $($default_variant)+ )) ); // With default variant ( $tag:ident, $de:ident, $enum:ty, $( ($variant_tag:literal => $($variant:tt)+ ) ),* , (_ => $($default_variant:tt)+ ) $(,)? ) => ( match $tag.as_ref() { $( $variant_tag => Ok($crate::deserialize_variant!( $de, $enum, $($variant)+ )), )* _ => Ok($crate::deserialize_variant!( $de, $enum, $($default_variant)+ )), } ); // Without default variant ( $tag:ident, $de:ident, $enum:ty, $( ($variant_tag:literal => $($variant:tt)+ ) ),* $(,)? ) => ( match $tag.as_ref() { $( $variant_tag => Ok($crate::deserialize_variant!( $de, $enum, $($variant)+ )), )* _ => Err(A::Error::unknown_field(&$tag, &[$($variant_tag),+])), } ); } /// A helper to implement [`Deserialize`] for [internally tagged] enums which /// does not use [`Deserializer::deserialize_any`] that produces wrong results /// with XML because of [serde#1183]. /// /// In contrast to deriving [`Deserialize`] this macro assumes that a tag will be /// the first element or attribute in the XML. /// /// # Example /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::de::from_str; /// use quick_xml::impl_deserialize_for_internally_tagged_enum; /// use serde::Deserialize; /// /// #[derive(Deserialize, Debug, PartialEq)] /// struct Root { /// one: InternallyTaggedEnum, /// two: InternallyTaggedEnum, /// three: InternallyTaggedEnum, /// } /// /// #[derive(Debug, PartialEq)] /// // #[serde(tag = "@tag")] /// enum InternallyTaggedEnum { /// Unit, /// Newtype(Newtype), /// Struct { /// // #[serde(rename = "@attribute")] /// attribute: u32, /// element: f32, /// }, /// } /// /// #[derive(Deserialize, Debug, PartialEq)] /// struct Newtype { /// #[serde(rename = "@attribute")] /// attribute: u64, /// } /// /// // The macro needs the type of the enum, the tag name, /// // and information about all the variants /// impl_deserialize_for_internally_tagged_enum!{ /// InternallyTaggedEnum, "@tag", /// ("Unit" => Unit), /// ("Newtype" => Newtype(Newtype)), /// ("Struct" => Struct { /// #[serde(rename = "@attribute")] /// attribute: u32, /// element: f32, /// }), /// } /// /// assert_eq!( /// from_str::(r#" /// /// /// /// /// 4.2 /// /// /// "#).unwrap(), /// Root { /// one: InternallyTaggedEnum::Unit, /// two: InternallyTaggedEnum::Newtype(Newtype { attribute: 42 }), /// three: InternallyTaggedEnum::Struct { /// attribute: 42, /// element: 4.2, /// }, /// }, /// ); /// ``` /// /// You don't necessarily have to provide all the enumeration variants and can use /// `_` to put every undefined tag into an enumeration variant. /// This default variant (`_ => ...`) must be the last one to appear in the macro, /// like `_ => Other` in the example below: /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::de::from_str; /// use quick_xml::impl_deserialize_for_internally_tagged_enum; /// use serde::Deserialize; /// /// #[derive(Deserialize, Debug, PartialEq)] /// struct Root { /// one: InternallyTaggedEnum, /// two: InternallyTaggedEnum, /// three: InternallyTaggedEnum, /// } /// /// #[derive(Debug, PartialEq)] /// enum InternallyTaggedEnum { /// NewType(Newtype), /// Other, /// } /// /// #[derive(Deserialize, Debug, PartialEq)] /// struct Newtype { /// #[serde(rename = "@attribute")] /// attribute: u64, /// } /// /// // The macro needs the type of the enum, the tag name, /// // and information about all the variants /// impl_deserialize_for_internally_tagged_enum!{ /// InternallyTaggedEnum, "@tag", /// ("NewType" => NewType(Newtype)), /// (_ => Other), /// } /// /// assert_eq!( /// from_str::(r#" /// /// /// /// /// /// /// /// "#).unwrap(), /// Root { /// one: InternallyTaggedEnum::NewType(Newtype { attribute: 42 }), /// two: InternallyTaggedEnum::Other, /// three: InternallyTaggedEnum::Other, /// }, /// ); /// ``` /// /// [internally tagged]: https://serde.rs/enum-representations.html#internally-tagged /// [serde#1183]: https://github.com/serde-rs/serde/issues/1183 #[macro_export(local_inner_macros)] macro_rules! impl_deserialize_for_internally_tagged_enum { ( $enum:ty, $tag:literal, $($cases:tt)* ) => { impl<'de> serde::de::Deserialize<'de> for $enum { fn deserialize(deserializer: D) -> Result where D: serde::de::Deserializer<'de>, { use serde::de::{Error, MapAccess, Visitor}; // The Visitor struct is normally used for state, but none is needed struct TheVisitor; // The main logic of the deserializing happens in the Visitor trait impl<'de> Visitor<'de> for TheVisitor { // The type that is being deserialized type Value = $enum; // Try to give a better error message when this is used wrong fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { f.write_str("expecting map with tag in ")?; f.write_str($tag) } // The xml data is provided as an opaque map, // that map is parsed into the type fn visit_map(self, mut map: A) -> Result where A: MapAccess<'de>, { // Here the assumption is made that only one attribute // exists and it's the discriminator (enum "tag"). let entry: Option<(String, String)> = map.next_entry()?; // If there are more attributes those would need // to be parsed as well. let tag = match entry { // Return an error if the no attributes are found, // and indicate that the @tag attribute is missing. None => Err(A::Error::missing_field($tag)), // Check if the attribute is the tag Some((attribute, value)) => { if attribute == $tag { // return the value of the tag Ok(value) } else { // The attribute is not @tag, return an error // indicating that there is an unexpected attribute Err(A::Error::unknown_field(&attribute, &[$tag])) } } }?; let de = serde::de::value::MapAccessDeserializer::new(map); $crate::deserialize_match!( tag, de, $enum, $($cases)* ) } } // Tell the deserializer to deserialize the data as a map, // using the TheVisitor as the decoder deserializer.deserialize_map(TheVisitor) } } } } /// Provides helper functions to serialization and deserialization of types /// (usually enums) as a text content of an element and intended to use with /// [`#[serde(with = "...")]`][with], [`#[serde(deserialize_with = "...")]`][de-with] /// and [`#[serde(serialize_with = "...")]`][se-with]. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::de::from_str; /// use quick_xml::se::to_string; /// use serde::{Serialize, Deserialize}; /// /// #[derive(Serialize, Deserialize, PartialEq, Debug)] /// enum SomeEnum { /// // Default implementation serializes enum as an `` element /// EnumValue, /// # /* /// ... /// # */ /// } /// /// #[derive(Serialize, Deserialize, PartialEq, Debug)] /// #[serde(rename = "some-container")] /// struct SomeContainer { /// #[serde(with = "quick_xml::serde_helpers::text_content")] /// field: SomeEnum, /// } /// /// let container = SomeContainer { /// field: SomeEnum::EnumValue, /// }; /// let xml = "\ /// \ /// EnumValue\ /// "; /// /// assert_eq!(to_string(&container).unwrap(), xml); /// assert_eq!(from_str::(xml).unwrap(), container); /// ``` /// /// Using of this module is equivalent to replacing `field`'s type to this: /// /// ``` /// # use serde::{Deserialize, Serialize}; /// # type SomeEnum = (); /// #[derive(Serialize, Deserialize)] /// struct Field { /// // Use a special name `$text` to map field to the text content /// #[serde(rename = "$text")] /// content: SomeEnum, /// } /// /// #[derive(Serialize, Deserialize)] /// #[serde(rename = "some-container")] /// struct SomeContainer { /// field: Field, /// } /// ``` /// Read about the meaning of a special [`$text`] field. /// /// In versions of quick-xml before 0.31.0 this module used to represent enum /// unit variants as `EnumUnitVariant` instead of ``. /// Since version 0.31.0 this is default representation of enums in normal fields, /// and `` requires `$value` field: /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::de::from_str; /// use quick_xml::se::to_string; /// use serde::{Serialize, Deserialize}; /// /// #[derive(Serialize, Deserialize, PartialEq, Debug)] /// enum SomeEnum { /// // Default implementation serializes enum as an `` element /// EnumValue, /// # /* /// ... /// # */ /// } /// /// #[derive(Serialize, Deserialize, PartialEq, Debug)] /// #[serde(rename = "some-container")] /// struct SomeContainer { /// #[serde(rename = "$value")] /// field: SomeEnum, /// } /// /// let container = SomeContainer { /// field: SomeEnum::EnumValue, /// }; /// let xml = "\ /// \ /// \ /// "; /// /// assert_eq!(to_string(&container).unwrap(), xml); /// assert_eq!(from_str::(xml).unwrap(), container); /// ``` /// /// [with]: https://serde.rs/field-attrs.html#with /// [de-with]: https://serde.rs/field-attrs.html#deserialize_with /// [se-with]: https://serde.rs/field-attrs.html#serialize_with /// [`$text`]: ../../de/index.html#text pub mod text_content { use super::*; /// Serializes `value` as an XSD [simple type]. Intended to use with /// `#[serde(serialize_with = "...")]`. See example at [`text_content`] /// module level. /// /// [simple type]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition pub fn serialize(value: &T, serializer: S) -> Result where S: Serializer, T: Serialize, { #[derive(Serialize)] struct Field<'a, T> { #[serde(rename = "$text")] value: &'a T, } Field { value }.serialize(serializer) } /// Deserializes XSD's [simple type]. Intended to use with /// `#[serde(deserialize_with = "...")]`. See example at [`text_content`] /// module level. /// /// [simple type]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition pub fn deserialize<'de, D, T>(deserializer: D) -> Result where D: Deserializer<'de>, T: Deserialize<'de>, { #[derive(Deserialize)] struct Field { #[serde(rename = "$text")] value: T, } Ok(Field::deserialize(deserializer)?.value) } } quick-xml-0.36.1/src/utils.rs000064400000000000000000000275620072674642500141660ustar 00000000000000use std::borrow::{Borrow, Cow}; use std::fmt::{self, Debug, Formatter}; use std::io; use std::ops::Deref; #[cfg(feature = "async-tokio")] use std::{ pin::Pin, task::{Context, Poll}, }; #[cfg(feature = "serialize")] use serde::de::{Deserialize, Deserializer, Error, Visitor}; #[cfg(feature = "serialize")] use serde::ser::{Serialize, Serializer}; #[allow(clippy::ptr_arg)] pub fn write_cow_string(f: &mut Formatter, cow_string: &Cow<[u8]>) -> fmt::Result { match cow_string { Cow::Owned(s) => { write!(f, "Owned(")?; write_byte_string(f, s)?; } Cow::Borrowed(s) => { write!(f, "Borrowed(")?; write_byte_string(f, s)?; } } write!(f, ")") } pub fn write_byte_string(f: &mut Formatter, byte_string: &[u8]) -> fmt::Result { write!(f, "\"")?; for b in byte_string { match *b { 32..=33 | 35..=126 => write!(f, "{}", *b as char)?, 34 => write!(f, "\\\"")?, _ => write!(f, "{:#02X}", b)?, } } write!(f, "\"")?; Ok(()) } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A version of [`Cow`] that can borrow from two different buffers, one of them /// is a deserializer input. /// /// # Lifetimes /// /// - `'i`: lifetime of the data that deserializer borrow from the parsed input /// - `'s`: lifetime of the data that owned by a deserializer pub enum CowRef<'i, 's, B> where B: ToOwned + ?Sized, { /// An input borrowed from the parsed data Input(&'i B), /// An input borrowed from the buffer owned by another deserializer Slice(&'s B), /// An input taken from an external deserializer, owned by that deserializer Owned(::Owned), } impl<'i, 's, B> Deref for CowRef<'i, 's, B> where B: ToOwned + ?Sized, B::Owned: Borrow, { type Target = B; fn deref(&self) -> &B { match *self { Self::Input(borrowed) => borrowed, Self::Slice(borrowed) => borrowed, Self::Owned(ref owned) => owned.borrow(), } } } impl<'i, 's, B> Debug for CowRef<'i, 's, B> where B: ToOwned + ?Sized + Debug, B::Owned: Debug, { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match *self { Self::Input(borrowed) => Debug::fmt(borrowed, f), Self::Slice(borrowed) => Debug::fmt(borrowed, f), Self::Owned(ref owned) => Debug::fmt(owned, f), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Wrapper around `Vec` that has a human-readable debug representation: /// printable ASCII symbols output as is, all other output in HEX notation. /// /// Also, when [`serialize`] feature is on, this type deserialized using /// [`deserialize_byte_buf`](serde::Deserializer::deserialize_byte_buf) instead /// of vector's generic [`deserialize_seq`](serde::Deserializer::deserialize_seq) /// /// [`serialize`]: ../index.html#serialize #[derive(PartialEq, Eq)] pub struct ByteBuf(pub Vec); impl Debug for ByteBuf { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write_byte_string(f, &self.0) } } #[cfg(feature = "serialize")] impl<'de> Deserialize<'de> for ByteBuf { fn deserialize(d: D) -> Result where D: Deserializer<'de>, { struct ValueVisitor; impl<'de> Visitor<'de> for ValueVisitor { type Value = ByteBuf; fn expecting(&self, f: &mut Formatter) -> fmt::Result { f.write_str("byte data") } fn visit_bytes(self, v: &[u8]) -> Result { Ok(ByteBuf(v.to_vec())) } fn visit_byte_buf(self, v: Vec) -> Result { Ok(ByteBuf(v)) } } d.deserialize_byte_buf(ValueVisitor) } } #[cfg(feature = "serialize")] impl Serialize for ByteBuf { fn serialize(&self, serializer: S) -> Result where S: Serializer, { serializer.serialize_bytes(&self.0) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Wrapper around `&[u8]` that has a human-readable debug representation: /// printable ASCII symbols output as is, all other output in HEX notation. /// /// Also, when [`serialize`] feature is on, this type deserialized using /// [`deserialize_bytes`](serde::Deserializer::deserialize_bytes) instead /// of vector's generic [`deserialize_seq`](serde::Deserializer::deserialize_seq) /// /// [`serialize`]: ../index.html#serialize #[derive(PartialEq, Eq)] pub struct Bytes<'de>(pub &'de [u8]); impl<'de> Debug for Bytes<'de> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write_byte_string(f, self.0) } } #[cfg(feature = "serialize")] impl<'de> Deserialize<'de> for Bytes<'de> { fn deserialize(d: D) -> Result where D: Deserializer<'de>, { struct ValueVisitor; impl<'de> Visitor<'de> for ValueVisitor { type Value = Bytes<'de>; fn expecting(&self, f: &mut Formatter) -> fmt::Result { f.write_str("borrowed bytes") } fn visit_borrowed_bytes(self, v: &'de [u8]) -> Result { Ok(Bytes(v)) } } d.deserialize_bytes(ValueVisitor) } } #[cfg(feature = "serialize")] impl<'de> Serialize for Bytes<'de> { fn serialize(&self, serializer: S) -> Result where S: Serializer, { serializer.serialize_bytes(self.0) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A simple producer of infinite stream of bytes, useful in tests. /// /// Will repeat `chunk` field indefinitely. pub struct Fountain<'a> { /// That piece of data repeated infinitely... pub chunk: &'a [u8], /// Part of `chunk` that was consumed by BufRead impl pub consumed: usize, /// The overall count of read bytes pub overall_read: u64, } impl<'a> io::Read for Fountain<'a> { fn read(&mut self, buf: &mut [u8]) -> io::Result { let available = &self.chunk[self.consumed..]; let len = buf.len().min(available.len()); let (portion, _) = available.split_at(len); buf.copy_from_slice(portion); Ok(len) } } impl<'a> io::BufRead for Fountain<'a> { #[inline] fn fill_buf(&mut self) -> io::Result<&[u8]> { Ok(&self.chunk[self.consumed..]) } fn consume(&mut self, amt: usize) { self.consumed += amt; if self.consumed == self.chunk.len() { self.consumed = 0; } self.overall_read += amt as u64; } } #[cfg(feature = "async-tokio")] impl<'a> tokio::io::AsyncRead for Fountain<'a> { fn poll_read( self: Pin<&mut Self>, _cx: &mut Context<'_>, buf: &mut tokio::io::ReadBuf<'_>, ) -> Poll> { let available = &self.chunk[self.consumed..]; let len = buf.remaining().min(available.len()); let (portion, _) = available.split_at(len); buf.put_slice(portion); Poll::Ready(Ok(())) } } #[cfg(feature = "async-tokio")] impl<'a> tokio::io::AsyncBufRead for Fountain<'a> { #[inline] fn poll_fill_buf(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { Poll::Ready(io::BufRead::fill_buf(self.get_mut())) } #[inline] fn consume(self: Pin<&mut Self>, amt: usize) { io::BufRead::consume(self.get_mut(), amt); } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A function to check whether the byte is a whitespace (blank, new line, carriage return or tab). #[inline] pub const fn is_whitespace(b: u8) -> bool { matches!(b, b' ' | b'\r' | b'\n' | b'\t') } /// Calculates name from an element-like content. Name is the first word in `content`, /// where word boundaries is XML whitespace characters. /// /// 'Whitespace' refers to the definition used by [`is_whitespace`]. #[inline] pub const fn name_len(mut bytes: &[u8]) -> usize { // Note: A pattern matching based approach (instead of indexing) allows // making the function const. let mut len = 0; while let [first, rest @ ..] = bytes { if is_whitespace(*first) { break; } len += 1; bytes = rest; } len } /// Returns a byte slice with leading XML whitespace bytes removed. /// /// 'Whitespace' refers to the definition used by [`is_whitespace`]. #[inline] pub const fn trim_xml_start(mut bytes: &[u8]) -> &[u8] { // Note: A pattern matching based approach (instead of indexing) allows // making the function const. while let [first, rest @ ..] = bytes { if is_whitespace(*first) { bytes = rest; } else { break; } } bytes } /// Returns a byte slice with trailing XML whitespace bytes removed. /// /// 'Whitespace' refers to the definition used by [`is_whitespace`]. #[inline] pub const fn trim_xml_end(mut bytes: &[u8]) -> &[u8] { // Note: A pattern matching based approach (instead of indexing) allows // making the function const. while let [rest @ .., last] = bytes { if is_whitespace(*last) { bytes = rest; } else { break; } } bytes } //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod tests { use super::*; use pretty_assertions::assert_eq; #[test] fn write_byte_string0() { let bytes = ByteBuf(vec![10, 32, 32, 32, 32, 32, 32, 32, 32]); assert_eq!(format!("{:?}", bytes), "\"0xA \""); } #[test] fn write_byte_string1() { let bytes = ByteBuf(vec![ 104, 116, 116, 112, 58, 47, 47, 119, 119, 119, 46, 119, 51, 46, 111, 114, 103, 47, 50, 48, 48, 50, 47, 48, 55, 47, 111, 119, 108, 35, ]); assert_eq!( format!("{:?}", bytes), r##""http://www.w3.org/2002/07/owl#""## ); } #[test] fn write_byte_string3() { let bytes = ByteBuf(vec![ 67, 108, 97, 115, 115, 32, 73, 82, 73, 61, 34, 35, 66, 34, ]); assert_eq!(format!("{:?}", bytes), r##""Class IRI=\"#B\"""##); } #[test] fn name_len() { assert_eq!(super::name_len(b""), 0); assert_eq!(super::name_len(b" abc"), 0); assert_eq!(super::name_len(b" \t\r\n"), 0); assert_eq!(super::name_len(b"abc"), 3); assert_eq!(super::name_len(b"abc "), 3); assert_eq!(super::name_len(b"a bc"), 1); assert_eq!(super::name_len(b"ab\tc"), 2); assert_eq!(super::name_len(b"ab\rc"), 2); assert_eq!(super::name_len(b"ab\nc"), 2); } #[test] fn trim_xml_start() { assert_eq!(Bytes(super::trim_xml_start(b"")), Bytes(b"")); assert_eq!(Bytes(super::trim_xml_start(b"abc")), Bytes(b"abc")); assert_eq!( Bytes(super::trim_xml_start(b"\r\n\t ab \t\r\nc \t\r\n")), Bytes(b"ab \t\r\nc \t\r\n") ); } #[test] fn trim_xml_end() { assert_eq!(Bytes(super::trim_xml_end(b"")), Bytes(b"")); assert_eq!(Bytes(super::trim_xml_end(b"abc")), Bytes(b"abc")); assert_eq!( Bytes(super::trim_xml_end(b"\r\n\t ab \t\r\nc \t\r\n")), Bytes(b"\r\n\t ab \t\r\nc") ); } } quick-xml-0.36.1/src/writer/async_tokio.rs000064400000000000000000000464240072674642500166620ustar 00000000000000use std::future::Future; use std::result::Result as StdResult; use tokio::io::{AsyncWrite, AsyncWriteExt}; use crate::errors::{Error, Result}; use crate::events::{BytesCData, BytesPI, BytesText, Event}; use crate::{ElementWriter, Writer}; impl Writer { /// Writes the given event to the underlying writer. Async version of [`Writer::write_event`]. pub async fn write_event_async<'a, E: Into>>(&mut self, event: E) -> Result<()> { let mut next_should_line_break = true; let result = match event.into() { Event::Start(e) => { let result = self.write_wrapped_async(b"<", &e, b">").await; if let Some(i) = self.indent.as_mut() { i.grow(); } result } Event::End(e) => { if let Some(i) = self.indent.as_mut() { i.shrink(); } self.write_wrapped_async(b"").await } Event::Empty(e) => self.write_wrapped_async(b"<", &e, b"/>").await, Event::Text(e) => { next_should_line_break = false; self.write_async(&e).await } Event::Comment(e) => self.write_wrapped_async(b"").await, Event::CData(e) => { next_should_line_break = false; self.write_async(b"").await } Event::Decl(e) => self.write_wrapped_async(b"").await, Event::PI(e) => self.write_wrapped_async(b"").await, Event::DocType(e) => self.write_wrapped_async(b"").await, Event::Eof => Ok(()), }; if let Some(i) = self.indent.as_mut() { i.should_line_break = next_should_line_break; } result } /// Manually write a newline and indentation at the proper level. Async version of /// [`Writer::write_indent`]. /// /// This method will do nothing if `Writer` was not constructed with [`Writer::new_with_indent`]. pub async fn write_indent_async(&mut self) -> Result<()> { if let Some(ref i) = self.indent { self.writer.write_all(b"\n").await?; self.writer.write_all(i.current()).await?; } Ok(()) } #[inline] async fn write_async(&mut self, value: &[u8]) -> Result<()> { self.writer.write_all(value).await.map_err(Into::into) } #[inline] async fn write_wrapped_async( &mut self, before: &[u8], value: &[u8], after: &[u8], ) -> Result<()> { if let Some(ref i) = self.indent { if i.should_line_break { self.writer.write_all(b"\n").await?; self.writer.write_all(i.current()).await?; } } self.write_async(before).await?; self.write_async(value).await?; self.write_async(after).await?; Ok(()) } } impl<'a, W: AsyncWrite + Unpin> ElementWriter<'a, W> { /// Write some text inside the current element. /// /// # Example /// /// ``` /// # use quick_xml::writer::Writer; /// # use quick_xml::events::BytesText; /// # use tokio::io::AsyncWriteExt; /// # #[tokio::main(flavor = "current_thread")] async fn main() { /// let mut buffer = Vec::new(); /// let mut tokio_buffer = tokio::io::BufWriter::new(&mut buffer); /// let mut writer = Writer::new_with_indent(&mut tokio_buffer, b' ', 4); /// /// writer /// .create_element("paired") /// .with_attribute(("attr1", "value1")) /// .with_attribute(("attr2", "value2")) /// .write_text_content_async(BytesText::new("text")) /// .await /// .expect("cannot write content"); /// /// tokio_buffer.flush().await.expect("flush failed"); /// /// assert_eq!( /// std::str::from_utf8(&buffer).unwrap(), /// r#"text"# /// ); /// # } pub async fn write_text_content_async(self, text: BytesText<'_>) -> Result<&'a mut Writer> { self.writer .write_event_async(Event::Start(self.start_tag.borrow())) .await?; self.writer.write_event_async(Event::Text(text)).await?; self.writer .write_event_async(Event::End(self.start_tag.to_end())) .await?; Ok(self.writer) } /// Write a CData event `` inside the current element. /// /// # Example /// /// ``` /// # use quick_xml::writer::Writer; /// # use quick_xml::events::BytesCData; /// # use tokio::io::AsyncWriteExt; /// # #[tokio::main(flavor = "current_thread")] async fn main() { /// let mut buffer = Vec::new(); /// let mut tokio_buffer = tokio::io::BufWriter::new(&mut buffer); /// let mut writer = Writer::new_with_indent(&mut tokio_buffer, b' ', 4); /// /// writer /// .create_element("paired") /// .with_attribute(("attr1", "value1")) /// .with_attribute(("attr2", "value2")) /// .write_cdata_content_async(BytesCData::new("text & content")) /// .await /// .expect("cannot write content"); /// /// tokio_buffer.flush().await.expect("flush failed"); /// /// assert_eq!( /// std::str::from_utf8(&buffer).unwrap(), /// r#""# /// ); /// # } pub async fn write_cdata_content_async( self, text: BytesCData<'_>, ) -> Result<&'a mut Writer> { self.writer .write_event_async(Event::Start(self.start_tag.borrow())) .await?; self.writer.write_event_async(Event::CData(text)).await?; self.writer .write_event_async(Event::End(self.start_tag.to_end())) .await?; Ok(self.writer) } /// Write a processing instruction `` inside the current element. /// /// # Example /// /// ``` /// # use quick_xml::writer::Writer; /// # use quick_xml::events::BytesPI; /// # use tokio::io::AsyncWriteExt; /// # #[tokio::main(flavor = "current_thread")] async fn main() { /// let mut buffer = Vec::new(); /// let mut tokio_buffer = tokio::io::BufWriter::new(&mut buffer); /// let mut writer = Writer::new_with_indent(&mut tokio_buffer, b' ', 4); /// /// writer /// .create_element("paired") /// .with_attribute(("attr1", "value1")) /// .with_attribute(("attr2", "value2")) /// .write_pi_content_async(BytesPI::new(r#"xml-stylesheet href="style.css""#)) /// .await /// .expect("cannot write content"); /// /// tokio_buffer.flush().await.expect("flush failed"); /// /// assert_eq!( /// std::str::from_utf8(&buffer).unwrap(), /// r#" /// /// "# /// ); /// # } pub async fn write_pi_content_async(self, text: BytesPI<'_>) -> Result<&'a mut Writer> { self.writer .write_event_async(Event::Start(self.start_tag.borrow())) .await?; self.writer.write_event_async(Event::PI(text)).await?; self.writer .write_event_async(Event::End(self.start_tag.to_end())) .await?; Ok(self.writer) } /// Write an empty (self-closing) tag. /// /// # Example /// /// ``` /// # use quick_xml::writer::Writer; /// # use quick_xml::events::BytesText; /// # use tokio::io::AsyncWriteExt; /// # #[tokio::main(flavor = "current_thread")] async fn main() { /// let mut buffer = Vec::new(); /// let mut tokio_buffer = tokio::io::BufWriter::new(&mut buffer); /// let mut writer = Writer::new_with_indent(&mut tokio_buffer, b' ', 4); /// /// writer /// .create_element("empty") /// .with_attribute(("attr1", "value1")) /// .with_attribute(("attr2", "value2")) /// .write_empty_async() /// .await /// .expect("cannot write content"); /// /// tokio_buffer.flush().await.expect("flush failed"); /// /// assert_eq!( /// std::str::from_utf8(&buffer).unwrap(), /// r#""# /// ); /// # } pub async fn write_empty_async(self) -> Result<&'a mut Writer> { self.writer .write_event_async(Event::Empty(self.start_tag)) .await?; Ok(self.writer) } /// Create a new scope for writing XML inside the current element. /// /// # Example /// /// ``` /// # use quick_xml::writer::Writer; /// # use quick_xml::events::BytesText; /// # use tokio::io::AsyncWriteExt; /// use quick_xml::Error; /// /// # #[tokio::main(flavor = "current_thread")] async fn main() { /// let mut buffer = Vec::new(); /// let mut tokio_buffer = tokio::io::BufWriter::new(&mut buffer); /// let mut writer = Writer::new_with_indent(&mut tokio_buffer, b' ', 4); /// /// writer /// .create_element("outer") /// .with_attributes([("attr1", "value1"), ("attr2", "value2")]) /// // We need to provide error type, because it is not named somewhere explicitly /// .write_inner_content_async::<_, _, Error>(|writer| async move { /// let fruits = ["apple", "orange", "banana"]; /// for (quant, item) in fruits.iter().enumerate() { /// writer /// .create_element("fruit") /// .with_attributes([("quantity", quant.to_string().as_str())]) /// .write_text_content_async(BytesText::new(item)) /// .await?; /// } /// writer /// .create_element("inner") /// .write_inner_content_async(|writer| async move { /// writer.create_element("empty").write_empty_async().await /// }) /// .await?; /// /// Ok(writer) /// }) /// .await /// .expect("cannot write content"); /// /// tokio_buffer.flush().await.expect("flush failed"); /// assert_eq!( /// std::str::from_utf8(&buffer).unwrap(), /// r#" /// apple /// orange /// banana /// /// /// /// "# /// ); /// # } pub async fn write_inner_content_async( mut self, closure: F, ) -> StdResult<&'a mut Writer, E> where F: FnOnce(&'a mut Writer) -> Fut, Fut: Future, E>>, E: From, { self.writer .write_event_async(Event::Start(self.start_tag.borrow())) .await?; self.writer = closure(self.writer).await?; self.writer .write_event_async(Event::End(self.start_tag.to_end())) .await?; Ok(self.writer) } } #[cfg(test)] mod tests { use super::*; use crate::events::*; use pretty_assertions::assert_eq; macro_rules! test { ($name: ident, $event: expr, $expected: expr) => { #[tokio::test] async fn $name() { let mut buffer = Vec::new(); let mut writer = Writer::new(&mut buffer); writer .write_event_async($event) .await .expect("write event failed"); assert_eq!(std::str::from_utf8(&buffer).unwrap(), $expected,); } }; } test!( xml_header, Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), Some("no"))), r#""# ); test!(empty_tag, Event::Empty(BytesStart::new("tag")), r#""#); test!( comment, Event::Comment(BytesText::new("this is a comment")), r#""# ); test!( cdata, Event::CData(BytesCData::new("this is a cdata")), r#""# ); test!( pi, Event::PI(BytesPI::new("this is a processing instruction")), r#""# ); test!( doctype, Event::DocType(BytesText::new("this is a doctype")), r#""# ); #[tokio::test] async fn full_tag() { let mut buffer = Vec::new(); let mut writer = Writer::new(&mut buffer); let start = Event::Start(BytesStart::new("tag")); let text = Event::Text(BytesText::new("inner text")); let end = Event::End(BytesEnd::new("tag")); for i in [start, text, end] { writer.write_event_async(i).await.expect("write tag failed"); } assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#"inner text"# ); } } #[cfg(test)] mod indentation_async { use super::*; use crate::events::*; use pretty_assertions::assert_eq; #[tokio::test] async fn self_closed() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); let tag = BytesStart::new("self-closed") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); writer .write_event_async(Event::Empty(tag)) .await .expect("write tag failed"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#""# ); } #[tokio::test] async fn empty_paired() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); let end = start.to_end(); writer .write_event_async(Event::Start(start.clone())) .await .expect("write start tag failed"); writer .write_event_async(Event::End(end)) .await .expect("write end tag failed"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#" "# ); } #[tokio::test] async fn paired_with_inner() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); let end = start.to_end(); let inner = BytesStart::new("inner"); writer .write_event_async(Event::Start(start.clone())) .await .expect("write start tag failed"); writer .write_event_async(Event::Empty(inner)) .await .expect("write inner tag failed"); writer .write_event_async(Event::End(end)) .await .expect("write end tag failed"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#" "# ); } #[tokio::test] async fn paired_with_text() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); let end = start.to_end(); let text = BytesText::new("text"); writer .write_event_async(Event::Start(start.clone())) .await .expect("write start tag failed"); writer .write_event_async(Event::Text(text)) .await .expect("write text failed"); writer .write_event_async(Event::End(end)) .await .expect("write end tag failed"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#"text"# ); } #[tokio::test] async fn mixed_content() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); let end = start.to_end(); let text = BytesText::new("text"); let inner = BytesStart::new("inner"); writer .write_event_async(Event::Start(start.clone())) .await .expect("write start tag failed"); writer .write_event_async(Event::Text(text)) .await .expect("write text failed"); writer .write_event_async(Event::Empty(inner)) .await .expect("write inner tag failed"); writer .write_event_async(Event::End(end)) .await .expect("write end tag failed"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#"text "# ); } #[tokio::test] async fn nested() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); let end = start.to_end(); let inner = BytesStart::new("inner"); writer .write_event_async(Event::Start(start.clone())) .await .expect("write start 1 tag failed"); writer .write_event_async(Event::Start(start.clone())) .await .expect("write start 2 tag failed"); writer .write_event_async(Event::Empty(inner)) .await .expect("write inner tag failed"); writer .write_event_async(Event::End(end.clone())) .await .expect("write end tag 2 failed"); writer .write_event_async(Event::End(end)) .await .expect("write end tag 1 failed"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#" "# ); } } quick-xml-0.36.1/src/writer.rs000064400000000000000000000562350072674642500143410ustar 00000000000000//! Contains high-level interface for an events-based XML emitter. use std::borrow::Cow; use std::io::Write; use std::result::Result as StdResult; use crate::encoding::UTF8_BOM; use crate::errors::{Error, Result}; use crate::events::{attributes::Attribute, BytesCData, BytesPI, BytesStart, BytesText, Event}; #[cfg(feature = "async-tokio")] mod async_tokio; /// XML writer. Writes XML [`Event`]s to a [`std::io::Write`] or [`tokio::io::AsyncWrite`] implementor. #[cfg(feature = "serialize")] use {crate::de::DeError, serde::Serialize}; /// XML writer. Writes XML [`Event`]s to a [`std::io::Write`] implementor. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::{Event, BytesEnd, BytesStart}; /// use quick_xml::reader::Reader; /// use quick_xml::writer::Writer; /// use std::io::Cursor; /// /// let xml = r#"text"#; /// let mut reader = Reader::from_str(xml); /// let mut writer = Writer::new(Cursor::new(Vec::new())); /// loop { /// match reader.read_event() { /// Ok(Event::Start(e)) if e.name().as_ref() == b"this_tag" => { /// /// // crates a new element ... alternatively we could reuse `e` by calling /// // `e.into_owned()` /// let mut elem = BytesStart::new("my_elem"); /// /// // collect existing attributes /// elem.extend_attributes(e.attributes().map(|attr| attr.unwrap())); /// /// // copy existing attributes, adds a new my-key="some value" attribute /// elem.push_attribute(("my-key", "some value")); /// /// // writes the event to the writer /// assert!(writer.write_event(Event::Start(elem)).is_ok()); /// }, /// Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => { /// assert!(writer.write_event(Event::End(BytesEnd::new("my_elem"))).is_ok()); /// }, /// Ok(Event::Eof) => break, /// // we can either move or borrow the event to write, depending on your use-case /// Ok(e) => assert!(writer.write_event(e.borrow()).is_ok()), /// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e), /// } /// } /// /// let result = writer.into_inner().into_inner(); /// let expected = r#"text"#; /// assert_eq!(result, expected.as_bytes()); /// ``` #[derive(Clone)] pub struct Writer { /// underlying writer writer: W, indent: Option, } impl Writer { /// Creates a `Writer` from a generic writer. pub const fn new(inner: W) -> Writer { Writer { writer: inner, indent: None, } } /// Creates a `Writer` with configured indents from a generic writer. pub fn new_with_indent(inner: W, indent_char: u8, indent_size: usize) -> Writer { Writer { writer: inner, indent: Some(Indentation::new(indent_char, indent_size)), } } /// Consumes this `Writer`, returning the underlying writer. pub fn into_inner(self) -> W { self.writer } /// Get a mutable reference to the underlying writer. pub fn get_mut(&mut self) -> &mut W { &mut self.writer } /// Get a reference to the underlying writer. pub const fn get_ref(&self) -> &W { &self.writer } /// Provides a simple, high-level API for writing XML elements. /// /// Returns an [`ElementWriter`] that simplifies setting attributes and writing /// content inside the element. /// /// # Example /// /// ``` /// # use quick_xml::Result; /// # fn main() -> Result<()> { /// use quick_xml::events::{BytesStart, BytesText, Event}; /// use quick_xml::writer::Writer; /// use quick_xml::Error; /// use std::io::Cursor; /// /// let mut writer = Writer::new(Cursor::new(Vec::new())); /// /// // writes /// writer.create_element("tag") /// .with_attribute(("attr1", "value1")) // chain `with_attribute()` calls to add many attributes /// .write_empty()?; /// /// // writes with some text inside /// writer.create_element("tag") /// .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()) // or add attributes from an iterator /// .write_text_content(BytesText::new("with some text inside"))?; /// /// // writes appleorange /// writer.create_element("tag") /// // We need to provide error type, because it is not named somewhere explicitly /// .write_inner_content::<_, Error>(|writer| { /// let fruits = ["apple", "orange"]; /// for (quant, item) in fruits.iter().enumerate() { /// writer /// .create_element("fruit") /// .with_attribute(("quantity", quant.to_string().as_str())) /// .write_text_content(BytesText::new(item))?; /// } /// Ok(()) /// })?; /// # Ok(()) /// # } /// ``` #[must_use] pub fn create_element<'a, N>(&'a mut self, name: N) -> ElementWriter where N: Into>, { ElementWriter { writer: self, start_tag: BytesStart::new(name), state: AttributeIndent::NoneAttributesWritten, spaces: Vec::new(), } } } impl Writer { /// Write a [Byte-Order-Mark] character to the document. /// /// # Example /// /// ```rust /// # use quick_xml::Result; /// # fn main() -> Result<()> { /// use quick_xml::events::{BytesStart, BytesText, Event}; /// use quick_xml::writer::Writer; /// use quick_xml::Error; /// use std::io::Cursor; /// /// let mut buffer = Vec::new(); /// let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); /// /// writer.write_bom()?; /// writer /// .create_element("empty") /// .with_attribute(("attr1", "value1")) /// .write_empty() /// .expect("failure"); /// /// assert_eq!( /// std::str::from_utf8(&buffer).unwrap(), /// "\u{FEFF}" /// ); /// # Ok(()) /// # } /// ``` /// [Byte-Order-Mark]: https://unicode.org/faq/utf_bom.html#BOM pub fn write_bom(&mut self) -> Result<()> { self.write(UTF8_BOM) } /// Writes the given event to the underlying writer. pub fn write_event<'a, E: Into>>(&mut self, event: E) -> Result<()> { let mut next_should_line_break = true; let result = match event.into() { Event::Start(e) => { let result = self.write_wrapped(b"<", &e, b">"); if let Some(i) = self.indent.as_mut() { i.grow(); } result } Event::End(e) => { if let Some(i) = self.indent.as_mut() { i.shrink(); } self.write_wrapped(b"") } Event::Empty(e) => self.write_wrapped(b"<", &e, b"/>"), Event::Text(e) => { next_should_line_break = false; self.write(&e) } Event::Comment(e) => self.write_wrapped(b""), Event::CData(e) => { next_should_line_break = false; self.write(b"") } Event::Decl(e) => self.write_wrapped(b""), Event::PI(e) => self.write_wrapped(b""), Event::DocType(e) => self.write_wrapped(b""), Event::Eof => Ok(()), }; if let Some(i) = self.indent.as_mut() { i.should_line_break = next_should_line_break; } result } /// Writes bytes #[inline] pub(crate) fn write(&mut self, value: &[u8]) -> Result<()> { self.writer.write_all(value).map_err(Into::into) } #[inline] fn write_wrapped(&mut self, before: &[u8], value: &[u8], after: &[u8]) -> Result<()> { if let Some(ref i) = self.indent { if i.should_line_break { self.writer.write_all(b"\n")?; self.writer.write_all(i.current())?; } } self.write(before)?; self.write(value)?; self.write(after)?; Ok(()) } /// Manually write a newline and indentation at the proper level. /// /// This can be used when the heuristic to line break and indent after any /// [`Event`] apart from [`Text`] fails such as when a [`Start`] occurs directly /// after [`Text`]. /// /// This method will do nothing if `Writer` was not constructed with [`new_with_indent`]. /// /// [`Text`]: Event::Text /// [`Start`]: Event::Start /// [`new_with_indent`]: Self::new_with_indent pub fn write_indent(&mut self) -> Result<()> { if let Some(ref i) = self.indent { self.writer.write_all(b"\n")?; self.writer.write_all(i.current())?; } Ok(()) } /// Write an arbitrary serializable type /// /// Note: If you are attempting to write XML in a non-UTF-8 encoding, this may not /// be safe to use. Rust basic types assume UTF-8 encodings. /// /// ```rust /// # use pretty_assertions::assert_eq; /// # use serde::Serialize; /// # use quick_xml::events::{BytesStart, Event}; /// # use quick_xml::writer::Writer; /// # use quick_xml::DeError; /// # fn main() -> Result<(), DeError> { /// #[derive(Debug, PartialEq, Serialize)] /// struct MyData { /// question: String, /// answer: u32, /// } /// /// let data = MyData { /// question: "The Ultimate Question of Life, the Universe, and Everything".into(), /// answer: 42, /// }; /// /// let mut buffer = Vec::new(); /// let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); /// /// let start = BytesStart::new("root"); /// let end = start.to_end(); /// /// writer.write_event(Event::Start(start.clone()))?; /// writer.write_serializable("my_data", &data)?; /// writer.write_event(Event::End(end))?; /// /// assert_eq!( /// std::str::from_utf8(&buffer)?, /// r#" /// /// The Ultimate Question of Life, the Universe, and Everything /// 42 /// /// "# /// ); /// # Ok(()) /// # } /// ``` #[cfg(feature = "serialize")] pub fn write_serializable( &mut self, tag_name: &str, content: &T, ) -> std::result::Result<(), DeError> { use crate::se::{Indent, Serializer}; self.write_indent()?; let mut fmt = ToFmtWrite(&mut self.writer); let mut serializer = Serializer::with_root(&mut fmt, Some(tag_name))?; if let Some(indent) = &mut self.indent { serializer.set_indent(Indent::Borrow(indent)); } content.serialize(serializer)?; Ok(()) } } /// Track indent inside elements state /// /// ```mermaid /// stateDiagram-v2 /// [*] --> NoneAttributesWritten /// NoneAttributesWritten --> Spaces : .with_attribute() /// NoneAttributesWritten --> WriteConfigured : .new_line() /// /// Spaces --> Spaces : .with_attribute() /// Spaces --> WriteSpaces : .new_line() /// /// WriteSpaces --> Spaces : .with_attribute() /// WriteSpaces --> WriteSpaces : .new_line() /// /// Configured --> Configured : .with_attribute() /// Configured --> WriteConfigured : .new_line() /// /// WriteConfigured --> Configured : .with_attribute() /// WriteConfigured --> WriteConfigured : .new_line() /// ``` #[derive(Debug)] enum AttributeIndent { /// Initial state. `ElementWriter` was just created and no attributes written yet NoneAttributesWritten, /// Write specified count of spaces to indent before writing attribute in `with_attribute()` WriteSpaces(usize), /// Keep space indent that should be used if `new_line()` would be called Spaces(usize), /// Write specified count of indent characters before writing attribute in `with_attribute()` WriteConfigured(usize), /// Keep indent that should be used if `new_line()` would be called Configured(usize), } /// A struct to write an element. Contains methods to add attributes and inner /// elements to the element pub struct ElementWriter<'a, W> { writer: &'a mut Writer, start_tag: BytesStart<'a>, state: AttributeIndent, /// Contains spaces used to write space indents of attributes spaces: Vec, } impl<'a, W> ElementWriter<'a, W> { /// Adds an attribute to this element. pub fn with_attribute<'b, I>(mut self, attr: I) -> Self where I: Into>, { self.write_attr(attr.into()); self } /// Add additional attributes to this element using an iterator. /// /// The yielded items must be convertible to [`Attribute`] using `Into`. pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self where I: IntoIterator, I::Item: Into>, { let mut iter = attributes.into_iter(); if let Some(attr) = iter.next() { self.write_attr(attr.into()); self.start_tag.extend_attributes(iter); } self } /// Push a new line inside an element between attributes. Note, that this /// method does nothing if [`Writer`] was created without indentation support. /// /// # Examples /// /// The following code /// /// ``` /// # use quick_xml::writer::Writer; /// let mut buffer = Vec::new(); /// let mut writer = Writer::new_with_indent(&mut buffer, b' ', 2); /// writer /// .create_element("element") /// //.new_line() (1) /// .with_attribute(("first", "1")) /// .with_attribute(("second", "2")) /// .new_line() /// .with_attributes([ /// ("third", "3"), /// ("fourth", "4"), /// ]) /// //.new_line() (2) /// .write_empty(); /// ``` /// will produce the following XMLs: /// ```xml /// /// /// /// /// /// /// /// /// ``` pub fn new_line(mut self) -> Self { if let Some(i) = self.writer.indent.as_mut() { match self.state { // .new_line() called just after .create_element(). // Use element indent to additionally indent attributes AttributeIndent::NoneAttributesWritten => { self.state = AttributeIndent::WriteConfigured(i.indent_size) } AttributeIndent::WriteSpaces(_) => {} // .new_line() called when .with_attribute() was called at least once. // The spaces should be used to indent // Plan saved indent AttributeIndent::Spaces(indent) => { self.state = AttributeIndent::WriteSpaces(indent) } AttributeIndent::WriteConfigured(_) => {} // .new_line() called when .with_attribute() was called at least once. // The configured indent characters should be used to indent // Plan saved indent AttributeIndent::Configured(indent) => { self.state = AttributeIndent::WriteConfigured(indent) } } self.start_tag.push_newline(); }; self } /// Writes attribute and maintain indentation state fn write_attr<'b>(&mut self, attr: Attribute<'b>) { if let Some(i) = self.writer.indent.as_mut() { // Save the indent that we should use next time when .new_line() be called self.state = match self.state { // Neither .new_line() or .with_attribute() yet called // If newline inside attributes will be requested, we should indent them // by the length of tag name and +1 for `<` and +1 for one space AttributeIndent::NoneAttributesWritten => { self.start_tag.push_attribute(attr); AttributeIndent::Spaces(self.start_tag.name().as_ref().len() + 2) } // Indent was requested by previous call to .new_line(), write it // New line was already written AttributeIndent::WriteSpaces(indent) => { if self.spaces.len() < indent { self.spaces.resize(indent, b' '); } self.start_tag.push_indent(&self.spaces[..indent]); self.start_tag.push_attr(attr.into()); AttributeIndent::Spaces(indent) } // .new_line() was not called, but .with_attribute() was. // use the previously calculated indent AttributeIndent::Spaces(indent) => { self.start_tag.push_attribute(attr); AttributeIndent::Spaces(indent) } // Indent was requested by previous call to .new_line(), write it // New line was already written AttributeIndent::WriteConfigured(indent) => { self.start_tag.push_indent(i.additional(indent)); self.start_tag.push_attr(attr.into()); AttributeIndent::Configured(indent) } // .new_line() was not called, but .with_attribute() was. // use the previously calculated indent AttributeIndent::Configured(indent) => { self.start_tag.push_attribute(attr); AttributeIndent::Configured(indent) } }; } else { self.start_tag.push_attribute(attr); } } } impl<'a, W: Write> ElementWriter<'a, W> { /// Write some text inside the current element. pub fn write_text_content(self, text: BytesText) -> Result<&'a mut Writer> { self.writer .write_event(Event::Start(self.start_tag.borrow()))?; self.writer.write_event(Event::Text(text))?; self.writer .write_event(Event::End(self.start_tag.to_end()))?; Ok(self.writer) } /// Write a CData event `` inside the current element. pub fn write_cdata_content(self, text: BytesCData) -> Result<&'a mut Writer> { self.writer .write_event(Event::Start(self.start_tag.borrow()))?; self.writer.write_event(Event::CData(text))?; self.writer .write_event(Event::End(self.start_tag.to_end()))?; Ok(self.writer) } /// Write a processing instruction `` inside the current element. pub fn write_pi_content(self, pi: BytesPI) -> Result<&'a mut Writer> { self.writer .write_event(Event::Start(self.start_tag.borrow()))?; self.writer.write_event(Event::PI(pi))?; self.writer .write_event(Event::End(self.start_tag.to_end()))?; Ok(self.writer) } /// Write an empty (self-closing) tag. pub fn write_empty(self) -> Result<&'a mut Writer> { self.writer.write_event(Event::Empty(self.start_tag))?; Ok(self.writer) } /// Create a new scope for writing XML inside the current element. pub fn write_inner_content(self, closure: F) -> StdResult<&'a mut Writer, E> where F: FnOnce(&mut Writer) -> StdResult<(), E>, E: From, { self.writer .write_event(Event::Start(self.start_tag.borrow()))?; closure(self.writer)?; self.writer .write_event(Event::End(self.start_tag.to_end()))?; Ok(self.writer) } } #[cfg(feature = "serialize")] struct ToFmtWrite(pub T); #[cfg(feature = "serialize")] impl std::fmt::Write for ToFmtWrite where T: std::io::Write, { fn write_str(&mut self, s: &str) -> std::fmt::Result { self.0.write_all(s.as_bytes()).map_err(|_| std::fmt::Error) } } #[derive(Clone)] pub(crate) struct Indentation { /// todo: this is an awkward fit as it has no impact on indentation logic, but it is /// only applicable when an indentation exists. Potentially refactor later should_line_break: bool, /// The character code to be used for indentations (e.g. ` ` or `\t`) indent_char: u8, /// How many instances of the indent character ought to be used for each level of indentation indent_size: usize, /// Used as a cache for the bytes used for indentation indents: Vec, /// The current amount of indentation current_indent_len: usize, } impl Indentation { pub fn new(indent_char: u8, indent_size: usize) -> Self { Self { should_line_break: false, indent_char, indent_size, indents: vec![indent_char; 128], current_indent_len: 0, // invariant - needs to remain less than indents.len() } } /// Increase indentation by one level pub fn grow(&mut self) { self.current_indent_len += self.indent_size; self.ensure(self.current_indent_len); } /// Decrease indentation by one level. Do nothing, if level already zero pub fn shrink(&mut self) { self.current_indent_len = self.current_indent_len.saturating_sub(self.indent_size); } /// Returns indent string for current level pub fn current(&self) -> &[u8] { &self.indents[..self.current_indent_len] } /// Returns indent with current indent plus additional indent pub fn additional(&mut self, additional_indent: usize) -> &[u8] { let new_len = self.current_indent_len + additional_indent; self.ensure(new_len); &self.indents[..new_len] } fn ensure(&mut self, new_len: usize) { if self.indents.len() < new_len { self.indents.resize(new_len, self.indent_char); } } } quick-xml-0.36.1/tests/README.md000064400000000000000000000023760072674642500143060ustar 00000000000000# Document descriptions document.xml medium length, mostly empty tags, a few short attributes per element, no escaping html5.html html5.txt libreoffice_document.fodt long, mix of attributes and text, not much escaping, lots of non-ascii characters, lots of namespaces linescore.xml medium length, lots of attributes, short attributes, few escapes opennews_all.rss players.xml long, lots of attributes, short attributes, no text, no escapes rpm_filelists.xml long, mostly medium-length text elements, not much escaping rpm_other.xml long, mix of attributes and text, lots of escaping (both entity and char literal), long attributes rpm_primary.xml long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces rpm_primary2.xml long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces sample_1.xml short, mix of attributes and text, lots of escapes sample_ns.xml short, lots of namespaces, no escapes sample_rss.xml long, few attributes, mix of attribute lengths, escapes in text content test_writer_indent_cdata.xml test_writer_indent.xml medium length, lots of namespaces, no escaping test_writer.xml utf16be.xml utf16le.xml

(&mut self, parser: P, buf: B, position: &mut u64) -> Result<&'r [u8]> where P: Parser; /// Read input until comment or CDATA is finished. /// /// This method expect that `<` already was read. /// /// Returns a slice of data read up to end of comment or CDATA (`>`), /// which does not include into result. /// /// If input (`Self`) is exhausted and nothing was read, returns `None`. /// /// # Parameters /// - `buf`: Buffer that could be filled from an input (`Self`) and /// from which [events] could borrow their data /// - `position`: Will be increased by amount of bytes consumed /// /// [events]: crate::events::Event fn read_bang_element(&mut self, buf: B, position: &mut u64) -> Result<(BangType, &'r [u8])>; /// Consume and discard all the whitespace until the next non-whitespace /// character or EOF. /// /// # Parameters /// - `position`: Will be increased by amount of bytes consumed fn skip_whitespace(&mut self, position: &mut u64) -> io::Result<()>; /// Return one character without consuming it, so that future `read_*` calls /// will still include it. On EOF, return `None`. fn peek_one(&mut self) -> io::Result>; } /// Possible elements started with ` CData, /// Comment, /// DocType, } impl BangType { #[inline(always)] const fn new(byte: Option) -> Result { Ok(match byte { Some(b'[') => Self::CData, Some(b'-') => Self::Comment, Some(b'D') | Some(b'd') => Self::DocType, _ => return Err(Error::Syntax(SyntaxError::InvalidBangMarkup)), }) } /// If element is finished, returns its content up to `>` symbol and /// an index of this symbol, otherwise returns `None` /// /// # Parameters /// - `buf`: buffer with data consumed on previous iterations /// - `chunk`: data read on current iteration and not yet consumed from reader #[inline(always)] fn parse<'b>(&self, buf: &[u8], chunk: &'b [u8]) -> Option<(&'b [u8], usize)> { match self { Self::Comment => { for i in memchr::memchr_iter(b'>', chunk) { // Need to read at least 6 symbols (`!---->`) for properly finished comment // - XML comment // 012345 - i if buf.len() + i > 4 { if chunk[..i].ends_with(b"--") { // We cannot strip last `--` from the buffer because we need it in case of // check_comments enabled option. XML standard requires that comment // will not end with `--->` sequence because this is a special case of // `--` in the comment (https://www.w3.org/TR/xml11/#sec-comments) return Some((&chunk[..i], i + 1)); // +1 for `>` } // End sequence `-|->` was splitted at | // buf --/ \-- chunk if i == 1 && buf.ends_with(b"-") && chunk[0] == b'-' { return Some((&chunk[..i], i + 1)); // +1 for `>` } // End sequence `--|>` was splitted at | // buf --/ \-- chunk if i == 0 && buf.ends_with(b"--") { return Some((&[], i + 1)); // +1 for `>` } } } } Self::CData => { for i in memchr::memchr_iter(b'>', chunk) { if chunk[..i].ends_with(b"]]") { return Some((&chunk[..i], i + 1)); // +1 for `>` } // End sequence `]|]>` was splitted at | // buf --/ \-- chunk if i == 1 && buf.ends_with(b"]") && chunk[0] == b']' { return Some((&chunk[..i], i + 1)); // +1 for `>` } // End sequence `]]|>` was splitted at | // buf --/ \-- chunk if i == 0 && buf.ends_with(b"]]") { return Some((&[], i + 1)); // +1 for `>` } } } Self::DocType => { for i in memchr::memchr_iter(b'>', chunk) { let content = &chunk[..i]; let balance = memchr::memchr2_iter(b'<', b'>', content) .map(|p| if content[p] == b'<' { 1i32 } else { -1 }) .sum::(); if balance == 0 { return Some((content, i + 1)); // +1 for `>` } } } } None } #[inline] const fn to_err(&self) -> Error { match self { Self::CData => Error::Syntax(SyntaxError::UnclosedCData), Self::Comment => Error::Syntax(SyntaxError::UnclosedComment), Self::DocType => Error::Syntax(SyntaxError::UnclosedDoctype), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod test { /// Checks the internal implementation of the various reader methods macro_rules! check { ( #[$test:meta] $read_event:ident, $read_until_close:ident, // constructor of the XML source on which internal functions will be called $source:path, // constructor of the buffer to which read data will stored $buf:expr $(, $async:ident, $await:ident)? ) => { mod read_bang_element { use super::*; use crate::errors::{Error, SyntaxError}; use crate::reader::BangType; use crate::utils::Bytes; /// Checks that reading CDATA content works correctly mod cdata { use super::*; use pretty_assertions::assert_eq; /// Checks that if input begins like CDATA element, but CDATA start sequence /// is not finished, parsing ends with an error #[$test] #[ignore = "start CDATA sequence fully checked outside of `read_bang_element`"] $($async)? fn not_properly_start() { let buf = $buf; let mut position = 1; let mut input = b"![]]>other content".as_ref(); // ^= 1 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 1); } /// Checks that if CDATA startup sequence was matched, but an end sequence /// is not found, parsing ends with an error #[$test] $($async)? fn not_closed() { let buf = $buf; let mut position = 1; let mut input = b"![CDATA[other content".as_ref(); // ^= 1 ^= 22 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 22); } /// Checks that CDATA element without content inside parsed successfully #[$test] $($async)? fn empty() { let buf = $buf; let mut position = 1; let mut input = b"![CDATA[]]>other content".as_ref(); // ^= 1 ^= 12 let (ty, bytes) = $source(&mut input) .read_bang_element(buf, &mut position) $(.$await)? .unwrap(); assert_eq!( (ty, Bytes(bytes)), (BangType::CData, Bytes(b"![CDATA[]]")) ); assert_eq!(position, 12); } /// Checks that CDATA element with content parsed successfully. /// Additionally checks that sequences inside CDATA that may look like /// a CDATA end sequence do not interrupt CDATA parsing #[$test] $($async)? fn with_content() { let buf = $buf; let mut position = 1; let mut input = b"![CDATA[cdata]] ]>content]]>other content]]>".as_ref(); // ^= 1 ^= 29 let (ty, bytes) = $source(&mut input) .read_bang_element(buf, &mut position) $(.$await)? .unwrap(); assert_eq!( (ty, Bytes(bytes)), (BangType::CData, Bytes(b"![CDATA[cdata]] ]>content]]")) ); assert_eq!(position, 29); } } /// Checks that reading XML comments works correctly. According to the [specification], /// comment data can contain any sequence except `--`: /// /// ```peg /// comment = '<--' (!'--' char)* '-->'; /// char = [#x1-#x2C] /// / [#x2E-#xD7FF] /// / [#xE000-#xFFFD] /// / [#x10000-#x10FFFF] /// ``` /// /// The presence of this limitation, however, is simply a poorly designed specification /// (maybe for purpose of building of LL(1) XML parser) and quick-xml does not check for /// presence of these sequences by default. This tests allow such content. /// /// [specification]: https://www.w3.org/TR/xml11/#dt-comment mod comment { use super::*; use pretty_assertions::assert_eq; #[$test] #[ignore = "start comment sequence fully checked outside of `read_bang_element`"] $($async)? fn not_properly_start() { let buf = $buf; let mut position = 1; let mut input = b"!- -->other content".as_ref(); // ^= 1 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 1); } #[$test] $($async)? fn not_properly_end() { let buf = $buf; let mut position = 1; let mut input = b"!->other content".as_ref(); // ^= 1 ^= 17 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 17); } #[$test] $($async)? fn not_closed1() { let buf = $buf; let mut position = 1; let mut input = b"!--other content".as_ref(); // ^= 1 ^= 17 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 17); } #[$test] $($async)? fn not_closed2() { let buf = $buf; let mut position = 1; let mut input = b"!-->other content".as_ref(); // ^= 1 ^= 18 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 18); } #[$test] $($async)? fn not_closed3() { let buf = $buf; let mut position = 1; let mut input = b"!--->other content".as_ref(); // ^= 1 ^= 19 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 19); } #[$test] $($async)? fn empty() { let buf = $buf; let mut position = 1; let mut input = b"!---->other content".as_ref(); // ^= 1 ^= 7 let (ty, bytes) = $source(&mut input) .read_bang_element(buf, &mut position) $(.$await)? .unwrap(); assert_eq!( (ty, Bytes(bytes)), (BangType::Comment, Bytes(b"!----")) ); assert_eq!(position, 7); } #[$test] $($async)? fn with_content() { let buf = $buf; let mut position = 1; let mut input = b"!--->comment<--->other content".as_ref(); // ^= 1 ^= 18 let (ty, bytes) = $source(&mut input) .read_bang_element(buf, &mut position) $(.$await)? .unwrap(); assert_eq!( (ty, Bytes(bytes)), (BangType::Comment, Bytes(b"!--->comment<---")) ); assert_eq!(position, 18); } } /// Checks that reading DOCTYPE definition works correctly mod doctype { use super::*; mod uppercase { use super::*; use pretty_assertions::assert_eq; #[$test] $($async)? fn not_properly_start() { let buf = $buf; let mut position = 1; let mut input = b"!D other content".as_ref(); // ^= 1 ^= 17 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 17); } #[$test] $($async)? fn without_space() { let buf = $buf; let mut position = 1; let mut input = b"!DOCTYPEother content".as_ref(); // ^= 1 ^= 22 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 22); } #[$test] $($async)? fn empty() { let buf = $buf; let mut position = 1; let mut input = b"!DOCTYPE>other content".as_ref(); // ^= 1 ^= 10 let (ty, bytes) = $source(&mut input) .read_bang_element(buf, &mut position) $(.$await)? .unwrap(); assert_eq!( (ty, Bytes(bytes)), (BangType::DocType, Bytes(b"!DOCTYPE")) ); assert_eq!(position, 10); } #[$test] $($async)? fn not_closed() { let buf = $buf; let mut position = 1; let mut input = b"!DOCTYPE other content".as_ref(); // ^= 1 ^23 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 23); } } mod lowercase { use super::*; use pretty_assertions::assert_eq; #[$test] $($async)? fn not_properly_start() { let buf = $buf; let mut position = 1; let mut input = b"!d other content".as_ref(); // ^= 1 ^= 17 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 17); } #[$test] $($async)? fn without_space() { let buf = $buf; let mut position = 1; let mut input = b"!doctypeother content".as_ref(); // ^= 1 ^= 22 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 22); } #[$test] $($async)? fn empty() { let buf = $buf; let mut position = 1; let mut input = b"!doctype>other content".as_ref(); // ^= 1 ^= 10 let (ty, bytes) = $source(&mut input) .read_bang_element(buf, &mut position) $(.$await)? .unwrap(); assert_eq!( (ty, Bytes(bytes)), (BangType::DocType, Bytes(b"!doctype")) ); assert_eq!(position, 10); } #[$test] $($async)? fn not_closed() { let buf = $buf; let mut position = 1; let mut input = b"!doctype other content".as_ref(); // ^= 1 ^= 23 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 23); } } } } mod read_element { use super::*; use crate::errors::{Error, SyntaxError}; use crate::parser::ElementParser; use crate::utils::Bytes; use pretty_assertions::assert_eq; /// Checks that nothing was read from empty buffer #[$test] $($async)? fn empty() { let buf = $buf; let mut position = 1; let mut input = b"".as_ref(); // ^= 1 match $source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedTag), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 1); } mod open { use super::*; use pretty_assertions::assert_eq; #[$test] $($async)? fn empty_tag() { let buf = $buf; let mut position = 1; let mut input = b">".as_ref(); // ^= 2 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b"") ); assert_eq!(position, 2); } #[$test] $($async)? fn normal() { let buf = $buf; let mut position = 1; let mut input = b"tag>".as_ref(); // ^= 5 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b"tag") ); assert_eq!(position, 5); } #[$test] $($async)? fn empty_ns_empty_tag() { let buf = $buf; let mut position = 1; let mut input = b":>".as_ref(); // ^= 3 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b":") ); assert_eq!(position, 3); } #[$test] $($async)? fn empty_ns() { let buf = $buf; let mut position = 1; let mut input = b":tag>".as_ref(); // ^= 6 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b":tag") ); assert_eq!(position, 6); } #[$test] $($async)? fn with_attributes() { let buf = $buf; let mut position = 1; let mut input = br#"tag attr-1=">" attr2 = '>' 3attr>"#.as_ref(); // ^= 39 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(br#"tag attr-1=">" attr2 = '>' 3attr"#) ); assert_eq!(position, 39); } } mod self_closed { use super::*; use pretty_assertions::assert_eq; #[$test] $($async)? fn empty_tag() { let buf = $buf; let mut position = 1; let mut input = b"/>".as_ref(); // ^= 3 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b"/") ); assert_eq!(position, 3); } #[$test] $($async)? fn normal() { let buf = $buf; let mut position = 1; let mut input = b"tag/>".as_ref(); // ^= 6 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b"tag/") ); assert_eq!(position, 6); } #[$test] $($async)? fn empty_ns_empty_tag() { let buf = $buf; let mut position = 1; let mut input = b":/>".as_ref(); // ^= 4 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b":/") ); assert_eq!(position, 4); } #[$test] $($async)? fn empty_ns() { let buf = $buf; let mut position = 1; let mut input = b":tag/>".as_ref(); // ^= 7 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b":tag/") ); assert_eq!(position, 7); } #[$test] $($async)? fn with_attributes() { let buf = $buf; let mut position = 1; let mut input = br#"tag attr-1="/>" attr2 = '/>' 3attr/>"#.as_ref(); // ^= 42 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(br#"tag attr-1="/>" attr2 = '/>' 3attr/"#) ); assert_eq!(position, 42); } } mod close { use super::*; use pretty_assertions::assert_eq; #[$test] $($async)? fn empty_tag() { let buf = $buf; let mut position = 1; let mut input = b"/ >".as_ref(); // ^= 4 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b"/ ") ); assert_eq!(position, 4); } #[$test] $($async)? fn normal() { let buf = $buf; let mut position = 1; let mut input = b"/tag>".as_ref(); // ^= 6 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b"/tag") ); assert_eq!(position, 6); } #[$test] $($async)? fn empty_ns_empty_tag() { let buf = $buf; let mut position = 1; let mut input = b"/:>".as_ref(); // ^= 4 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b"/:") ); assert_eq!(position, 4); } #[$test] $($async)? fn empty_ns() { let buf = $buf; let mut position = 1; let mut input = b"/:tag>".as_ref(); // ^= 7 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b"/:tag") ); assert_eq!(position, 7); } #[$test] $($async)? fn with_attributes() { let buf = $buf; let mut position = 1; let mut input = br#"/tag attr-1=">" attr2 = '>' 3attr>"#.as_ref(); // ^= 40 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(br#"/tag attr-1=">" attr2 = '>' 3attr"#) ); assert_eq!(position, 40); } } } /// Ensures, that no empty `Text` events are generated mod $read_event { use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event}; use crate::reader::Reader; use pretty_assertions::assert_eq; /// When `encoding` feature is enabled, encoding should be detected /// from BOM (UTF-8) and BOM should be stripped. /// /// When `encoding` feature is disabled, UTF-8 is assumed and BOM /// character should be stripped for consistency #[$test] $($async)? fn bom_from_reader() { let mut reader = Reader::from_reader("\u{feff}\u{feff}".as_bytes()); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Text(BytesText::from_escaped("\u{feff}")) ); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Eof ); } /// When parsing from &str, encoding is fixed (UTF-8), so /// - when `encoding` feature is disabled, the behavior the /// same as in `bom_from_reader` text /// - when `encoding` feature is enabled, the behavior should /// stay consistent, so the first BOM character is stripped #[$test] $($async)? fn bom_from_str() { let mut reader = Reader::from_str("\u{feff}\u{feff}"); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Text(BytesText::from_escaped("\u{feff}")) ); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Eof ); } #[$test] $($async)? fn declaration() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3))) ); } #[$test] $($async)? fn doctype() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::DocType(BytesText::from_escaped("x")) ); } #[$test] $($async)? fn processing_instruction() { let mut reader = Reader::from_str("\" ?>"); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::PI(BytesPI::new("xml-stylesheet '? >\" ")) ); } /// Lone closing tags are not allowed, so testing it together with start tag #[$test] $($async)? fn start_and_end() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Start(BytesStart::new("tag")) ); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::End(BytesEnd::new("tag")) ); } #[$test] $($async)? fn empty() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Empty(BytesStart::new("tag")) ); } #[$test] $($async)? fn text() { let mut reader = Reader::from_str("text"); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Text(BytesText::from_escaped("text")) ); } #[$test] $($async)? fn cdata() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::CData(BytesCData::new("")) ); } #[$test] $($async)? fn comment() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Comment(BytesText::from_escaped("")) ); } #[$test] $($async)? fn eof() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Eof ); } } }; } // Export macros for the child modules: // - buffered_reader // - slice_reader pub(super) use check; } quick-xml-0.36.1/src/reader/ns_reader.rs000064400000000000000000001102410072674642500162150ustar 00000000000000//! A reader that manages namespace declarations found in the input and able //! to resolve [qualified names] to [expanded names]. //! //! [qualified names]: https://www.w3.org/TR/xml-names11/#dt-qualname //! [expanded names]: https://www.w3.org/TR/xml-names11/#dt-expname use std::borrow::Cow; use std::fs::File; use std::io::{BufRead, BufReader}; use std::ops::Deref; use std::path::Path; use crate::errors::Result; use crate::events::Event; use crate::name::{LocalName, NamespaceResolver, PrefixIter, QName, ResolveResult}; use crate::reader::{Config, Reader, Span, XmlSource}; /// A low level encoding-agnostic XML event reader that performs namespace resolution. /// /// Consumes a [`BufRead`] and streams XML `Event`s. pub struct NsReader { /// An XML reader pub(super) reader: Reader, /// A buffer to manage namespaces ns_resolver: NamespaceResolver, /// We cannot pop data from the namespace stack until returned `Empty` or `End` /// event will be processed by the user, so we only mark that we should that /// in the next [`Self::read_event_impl()`] call. pending_pop: bool, } /// Builder methods impl NsReader { /// Creates a `NsReader` that reads from a reader. #[inline] pub fn from_reader(reader: R) -> Self { Self::new(Reader::from_reader(reader)) } /// Returns reference to the parser configuration #[inline] pub const fn config(&self) -> &Config { self.reader.config() } /// Returns mutable reference to the parser configuration #[inline] pub fn config_mut(&mut self) -> &mut Config { self.reader.config_mut() } /// Returns all the prefixes currently declared except the default `xml` and `xmlns` namespaces. /// /// # Examples /// /// This example shows what results the returned iterator would return after /// reading each event of a simple XML. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::name::{Namespace, PrefixDeclaration}; /// use quick_xml::NsReader; /// /// let src = " /// /// /// /// /// /// /// "; /// let mut reader = NsReader::from_str(src); /// reader.config_mut().trim_text(true); /// // No prefixes at the beginning /// assert_eq!(reader.prefixes().collect::>(), vec![]); /// /// reader.read_resolved_event()?; // /// // No prefixes declared on root /// assert_eq!(reader.prefixes().collect::>(), vec![]); /// /// reader.read_resolved_event()?; // /// // Two prefixes declared on "a" /// assert_eq!(reader.prefixes().collect::>(), vec![ /// (PrefixDeclaration::Default, Namespace(b"a1")), /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")) /// ]); /// /// reader.read_resolved_event()?; // /// // The default prefix got overridden and new "b" prefix /// assert_eq!(reader.prefixes().collect::>(), vec![ /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")), /// (PrefixDeclaration::Default, Namespace(b"b1")), /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2")) /// ]); /// /// reader.read_resolved_event()?; // /// // Still the same /// assert_eq!(reader.prefixes().collect::>(), vec![ /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")), /// (PrefixDeclaration::Default, Namespace(b"b1")), /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2")) /// ]); /// /// reader.read_resolved_event()?; // /// // Still the same /// assert_eq!(reader.prefixes().collect::>(), vec![ /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")), /// (PrefixDeclaration::Default, Namespace(b"b1")), /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2")) /// ]); /// /// reader.read_resolved_event()?; // /// // got closed so back to the prefixes declared on /// assert_eq!(reader.prefixes().collect::>(), vec![ /// (PrefixDeclaration::Default, Namespace(b"a1")), /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")) /// ]); /// /// reader.read_resolved_event()?; // /// // Still the same /// assert_eq!(reader.prefixes().collect::>(), vec![ /// (PrefixDeclaration::Default, Namespace(b"a1")), /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")) /// ]); /// /// reader.read_resolved_event()?; // /// // got closed /// assert_eq!(reader.prefixes().collect::>(), vec![]); /// # quick_xml::Result::Ok(()) /// ``` #[inline] pub const fn prefixes(&self) -> PrefixIter { self.ns_resolver.iter() } } /// Private methods impl NsReader { #[inline] fn new(reader: Reader) -> Self { Self { reader, ns_resolver: NamespaceResolver::default(), pending_pop: false, } } fn read_event_impl<'i, B>(&mut self, buf: B) -> Result> where R: XmlSource<'i, B>, { self.pop(); let event = self.reader.read_event_impl(buf); self.process_event(event) } pub(super) fn pop(&mut self) { if self.pending_pop { self.ns_resolver.pop(); self.pending_pop = false; } } pub(super) fn process_event<'i>(&mut self, event: Result>) -> Result> { match event { Ok(Event::Start(e)) => { self.ns_resolver.push(&e)?; Ok(Event::Start(e)) } Ok(Event::Empty(e)) => { self.ns_resolver.push(&e)?; // notify next `read_event_impl()` invocation that it needs to pop this // namespace scope self.pending_pop = true; Ok(Event::Empty(e)) } Ok(Event::End(e)) => { // notify next `read_event_impl()` invocation that it needs to pop this // namespace scope self.pending_pop = true; Ok(Event::End(e)) } e => e, } } pub(super) fn resolve_event<'i>( &mut self, event: Result>, ) -> Result<(ResolveResult, Event<'i>)> { match event { Ok(Event::Start(e)) => Ok((self.ns_resolver.find(e.name()), Event::Start(e))), Ok(Event::Empty(e)) => Ok((self.ns_resolver.find(e.name()), Event::Empty(e))), Ok(Event::End(e)) => Ok((self.ns_resolver.find(e.name()), Event::End(e))), Ok(e) => Ok((ResolveResult::Unbound, e)), Err(e) => Err(e), } } } /// Getters impl NsReader { /// Consumes `NsReader` returning the underlying reader /// /// See the [`Reader::into_inner`] for examples #[inline] pub fn into_inner(self) -> R { self.reader.into_inner() } /// Gets a mutable reference to the underlying reader. pub fn get_mut(&mut self) -> &mut R { self.reader.get_mut() } /// Resolves a potentially qualified **element name** or **attribute name** /// into _(namespace name, local name)_. /// /// _Qualified_ names have the form `prefix:local-name` where the `prefix` /// is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. /// The namespace prefix can be defined on the same element as the name in question. /// /// The method returns following results depending on the `name` shape, /// `attribute` flag and the presence of the default namespace: /// /// |attribute|`xmlns="..."`|QName |ResolveResult |LocalName /// |---------|-------------|-------------------|-----------------------|------------ /// |`true` |Not defined |`local-name` |[`Unbound`] |`local-name` /// |`true` |Defined |`local-name` |[`Unbound`] |`local-name` /// |`true` |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name` /// |`false` |Not defined |`local-name` |[`Unbound`] |`local-name` /// |`false` |Defined |`local-name` |[`Bound`] (default) |`local-name` /// |`false` |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name` /// /// If you want to clearly indicate that name that you resolve is an element /// or an attribute name, you could use [`resolve_attribute()`] or [`resolve_element()`] /// methods. /// /// # Lifetimes /// /// - `'n`: lifetime of a name. Returned local name will be bound to the same /// lifetime as the name in question. /// - returned namespace name will be bound to the reader itself /// /// [`Bound`]: ResolveResult::Bound /// [`Unbound`]: ResolveResult::Unbound /// [`Unknown`]: ResolveResult::Unknown /// [`resolve_attribute()`]: Self::resolve_attribute() /// [`resolve_element()`]: Self::resolve_element() #[inline] pub fn resolve<'n>(&self, name: QName<'n>, attribute: bool) -> (ResolveResult, LocalName<'n>) { self.ns_resolver.resolve(name, !attribute) } /// Resolves a potentially qualified **element name** into _(namespace name, local name)_. /// /// _Qualified_ element names have the form `prefix:local-name` where the /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. /// The namespace prefix can be defined on the same element as the element /// in question. /// /// _Unqualified_ elements inherits the current _default namespace_. /// /// The method returns following results depending on the `name` shape and /// the presence of the default namespace: /// /// |`xmlns="..."`|QName |ResolveResult |LocalName /// |-------------|-------------------|-----------------------|------------ /// |Not defined |`local-name` |[`Unbound`] |`local-name` /// |Defined |`local-name` |[`Bound`] (default) |`local-name` /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name` /// /// # Lifetimes /// /// - `'n`: lifetime of an element name. Returned local name will be bound /// to the same lifetime as the name in question. /// - returned namespace name will be bound to the reader itself /// /// # Examples /// /// This example shows how you can resolve qualified name into a namespace. /// Note, that in the code like this you do not need to do that manually, /// because the namespace resolution result returned by the [`read_resolved_event()`]. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(""); /// /// match reader.read_event().unwrap() { /// Event::Empty(e) => assert_eq!( /// reader.resolve_element(e.name()), /// (Bound(Namespace(b"root namespace")), QName(b"tag").into()) /// ), /// _ => unreachable!(), /// } /// ``` /// /// [`Bound`]: ResolveResult::Bound /// [`Unbound`]: ResolveResult::Unbound /// [`Unknown`]: ResolveResult::Unknown /// [`read_resolved_event()`]: Self::read_resolved_event #[inline] pub fn resolve_element<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) { self.ns_resolver.resolve(name, true) } /// Resolves a potentially qualified **attribute name** into _(namespace name, local name)_. /// /// _Qualified_ attribute names have the form `prefix:local-name` where the /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. /// The namespace prefix can be defined on the same element as the attribute /// in question. /// /// _Unqualified_ attribute names do *not* inherit the current _default namespace_. /// /// The method returns following results depending on the `name` shape and /// the presence of the default namespace: /// /// |`xmlns="..."`|QName |ResolveResult |LocalName /// |-------------|-------------------|-----------------------|------------ /// |Not defined |`local-name` |[`Unbound`] |`local-name` /// |Defined |`local-name` |[`Unbound`] |`local-name` /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name` /// /// # Lifetimes /// /// - `'n`: lifetime of an attribute name. Returned local name will be bound /// to the same lifetime as the name in question. /// - returned namespace name will be bound to the reader itself /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(" /// /// "); /// reader.config_mut().trim_text(true); /// /// match reader.read_event().unwrap() { /// Event::Empty(e) => { /// let mut iter = e.attributes(); /// /// // Unlike elements, attributes without explicit namespace /// // not bound to any namespace /// let one = iter.next().unwrap().unwrap(); /// assert_eq!( /// reader.resolve_attribute(one.key), /// (Unbound, QName(b"one").into()) /// ); /// /// let two = iter.next().unwrap().unwrap(); /// assert_eq!( /// reader.resolve_attribute(two.key), /// (Bound(Namespace(b"other namespace")), QName(b"two").into()) /// ); /// } /// _ => unreachable!(), /// } /// ``` /// /// [`Bound`]: ResolveResult::Bound /// [`Unbound`]: ResolveResult::Unbound /// [`Unknown`]: ResolveResult::Unknown #[inline] pub fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) { self.ns_resolver.resolve(name, false) } } impl NsReader { /// Reads the next event into given buffer. /// /// This method manages namespaces but doesn't resolve them automatically. /// You should call [`resolve_element()`] if you want to get a namespace. /// /// You also can use [`read_resolved_event_into()`] instead if you want to resolve /// namespace as soon as you get an event. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// Test /// Test 2 /// /// "#); /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); /// loop { /// match reader.read_event_into(&mut buf).unwrap() { /// Event::Start(e) => { /// count += 1; /// let (ns, local) = reader.resolve_element(e.name()); /// match local.as_ref() { /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))), /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))), /// _ => unreachable!(), /// } /// } /// Event::Text(e) => { /// txt.push(e.unescape().unwrap().into_owned()) /// } /// Event::Eof => break, /// _ => (), /// } /// buf.clear(); /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` /// /// [`resolve_element()`]: Self::resolve_element /// [`read_resolved_event_into()`]: Self::read_resolved_event_into #[inline] pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec) -> Result> { self.read_event_impl(buf) } /// Reads the next event into given buffer and resolves its namespace (if applicable). /// /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events. /// For all other events the concept of namespace is not defined, so /// a [`ResolveResult::Unbound`] is returned. /// /// If you are not interested in namespaces, you can use [`read_event_into()`] /// which will not automatically resolve namespaces for you. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// Test /// Test 2 /// /// "#); /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); /// loop { /// match reader.read_resolved_event_into(&mut buf).unwrap() { /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag1").into()); /// } /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag2").into()); /// } /// (_, Event::Start(_)) => unreachable!(), /// /// (_, Event::Text(e)) => { /// txt.push(e.unescape().unwrap().into_owned()) /// } /// (_, Event::Eof) => break, /// _ => (), /// } /// buf.clear(); /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` /// /// [`Start`]: Event::Start /// [`Empty`]: Event::Empty /// [`End`]: Event::End /// [`read_event_into()`]: Self::read_event_into #[inline] pub fn read_resolved_event_into<'b>( &mut self, buf: &'b mut Vec, ) -> Result<(ResolveResult, Event<'b>)> { let event = self.read_event_impl(buf); self.resolve_event(event) } /// Reads until end element is found using provided buffer as intermediate /// storage for events content. This function is supposed to be called after /// you already read a [`Start`] event. /// /// Returns a span that cover content between `>` of an opening tag and `<` of /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// /// Manages nested cases where parent and child elements have the _literally_ /// same name. /// /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`] /// will be returned. In particularly, that error will be returned if you call /// this method without consuming the corresponding [`Start`] event first. /// /// If your reader created from a string slice or byte array slice, it is /// better to use [`read_to_end()`] method, because it will not copy bytes /// into intermediate buffer. /// /// The provided `buf` buffer will be filled only by one event content at time. /// Before reading of each event the buffer will be cleared. If you know an /// appropriate size of each event, you can preallocate the buffer to reduce /// number of reallocations. /// /// The `end` parameter should contain name of the end element _in the reader /// encoding_. It is good practice to always get that parameter using /// [`BytesStart::to_end()`] method. /// /// # Namespaces /// /// While the `NsReader` does namespace resolution, namespaces does not /// change the algorithm for comparing names. Although the names `a:name` /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace, /// are semantically equivalent, `` cannot close ``, because /// according to [the specification] /// /// > The end of every element that begins with a **start-tag** MUST be marked /// > by an **end-tag** containing a name that echoes the element's type as /// > given in the **start-tag** /// /// # Examples /// /// This example shows, how you can skip XML content after you read the /// start event. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::name::{Namespace, ResolveResult}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// /// /// /// /// /// /// /// /// /// /// /// "#); /// reader.config_mut().trim_text(true); /// let mut buf = Vec::new(); /// /// let ns = Namespace(b"namespace 1"); /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!( /// reader.read_resolved_event_into(&mut buf).unwrap(), /// (ResolveResult::Bound(ns), Event::Start(start)) /// ); /// /// // ...then, we could skip all events to the corresponding end event. /// // This call will correctly handle nested elements. /// // Note, however, that this method does not handle namespaces. /// reader.read_to_end_into(end.name(), &mut buf).unwrap(); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!( /// reader.read_resolved_event_into(&mut buf).unwrap(), /// (ResolveResult::Unbound, Event::Eof) /// ); /// ``` /// /// [`Start`]: Event::Start /// [`End`]: Event::End /// [`IllFormed`]: crate::errors::Error::IllFormed /// [`read_to_end()`]: Self::read_to_end /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end /// [`expand_empty_elements`]: Config::expand_empty_elements /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag #[inline] pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should // match literally the start name. See `Config::check_end_names` documentation self.reader.read_to_end_into(end, buf) } } impl NsReader> { /// Creates an XML reader from a file path. pub fn from_file>(path: P) -> Result { Ok(Self::new(Reader::from_file(path)?)) } } impl<'i> NsReader<&'i [u8]> { /// Creates an XML reader from a string slice. #[inline] #[allow(clippy::should_implement_trait)] pub fn from_str(s: &'i str) -> Self { Self::new(Reader::from_str(s)) } /// Reads the next event, borrow its content from the input buffer. /// /// This method manages namespaces but doesn't resolve them automatically. /// You should call [`resolve_element()`] if you want to get a namespace. /// /// You also can use [`read_resolved_event()`] instead if you want to resolve namespace /// as soon as you get an event. /// /// There is no asynchronous `read_event_async()` version of this function, /// because it is not necessary -- the contents are already in memory and no IO /// is needed, therefore there is no potential for blocking. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// Test /// Test 2 /// /// "#); /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut txt = Vec::new(); /// loop { /// match reader.read_event().unwrap() { /// Event::Start(e) => { /// count += 1; /// let (ns, local) = reader.resolve_element(e.name()); /// match local.as_ref() { /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))), /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))), /// _ => unreachable!(), /// } /// } /// Event::Text(e) => { /// txt.push(e.unescape().unwrap().into_owned()) /// } /// Event::Eof => break, /// _ => (), /// } /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` /// /// [`resolve_element()`]: Self::resolve_element /// [`read_resolved_event()`]: Self::read_resolved_event #[inline] pub fn read_event(&mut self) -> Result> { self.read_event_impl(()) } /// Reads the next event, borrow its content from the input buffer, and resolves /// its namespace (if applicable). /// /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events. /// For all other events the concept of namespace is not defined, so /// a [`ResolveResult::Unbound`] is returned. /// /// If you are not interested in namespaces, you can use [`read_event()`] /// which will not automatically resolve namespaces for you. /// /// There is no asynchronous `read_resolved_event_async()` version of this function, /// because it is not necessary -- the contents are already in memory and no IO /// is needed, therefore there is no potential for blocking. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// Test /// Test 2 /// /// "#); /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut txt = Vec::new(); /// loop { /// match reader.read_resolved_event().unwrap() { /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag1").into()); /// } /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag2").into()); /// } /// (_, Event::Start(_)) => unreachable!(), /// /// (_, Event::Text(e)) => { /// txt.push(e.unescape().unwrap().into_owned()) /// } /// (_, Event::Eof) => break, /// _ => (), /// } /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` /// /// [`Start`]: Event::Start /// [`Empty`]: Event::Empty /// [`End`]: Event::End /// [`read_event()`]: Self::read_event #[inline] pub fn read_resolved_event(&mut self) -> Result<(ResolveResult, Event<'i>)> { let event = self.read_event_impl(()); self.resolve_event(event) } /// Reads until end element is found. This function is supposed to be called /// after you already read a [`Start`] event. /// /// Returns a span that cover content between `>` of an opening tag and `<` of /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// /// Manages nested cases where parent and child elements have the _literally_ /// same name. /// /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`] /// will be returned. In particularly, that error will be returned if you call /// this method without consuming the corresponding [`Start`] event first. /// /// The `end` parameter should contain name of the end element _in the reader /// encoding_. It is good practice to always get that parameter using /// [`BytesStart::to_end()`] method. /// /// There is no asynchronous `read_to_end_async()` version of this function, /// because it is not necessary -- the contents are already in memory and no IO /// is needed, therefore there is no potential for blocking. /// /// # Namespaces /// /// While the `NsReader` does namespace resolution, namespaces does not /// change the algorithm for comparing names. Although the names `a:name` /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace, /// are semantically equivalent, `` cannot close ``, because /// according to [the specification] /// /// > The end of every element that begins with a **start-tag** MUST be marked /// > by an **end-tag** containing a name that echoes the element's type as /// > given in the **start-tag** /// /// # Examples /// /// This example shows, how you can skip XML content after you read the /// start event. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::name::{Namespace, ResolveResult}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// /// /// /// /// /// /// /// /// /// /// /// "#); /// reader.config_mut().trim_text(true); /// /// let ns = Namespace(b"namespace 1"); /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!( /// reader.read_resolved_event().unwrap(), /// (ResolveResult::Bound(ns), Event::Start(start)) /// ); /// /// // ...then, we could skip all events to the corresponding end event. /// // This call will correctly handle nested elements. /// // Note, however, that this method does not handle namespaces. /// reader.read_to_end(end.name()).unwrap(); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!( /// reader.read_resolved_event().unwrap(), /// (ResolveResult::Unbound, Event::Eof) /// ); /// ``` /// /// [`Start`]: Event::Start /// [`End`]: Event::End /// [`IllFormed`]: crate::errors::Error::IllFormed /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end /// [`expand_empty_elements`]: Config::expand_empty_elements /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag #[inline] pub fn read_to_end(&mut self, end: QName) -> Result { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should // match literally the start name. See `Config::check_end_names` documentation self.reader.read_to_end(end) } /// Reads content between start and end tags, including any markup. This /// function is supposed to be called after you already read a [`Start`] event. /// /// Manages nested cases where parent and child elements have the _literally_ /// same name. /// /// This method does not unescape read data, instead it returns content /// "as is" of the XML document. This is because it has no idea what text /// it reads, and if, for example, it contains CDATA section, attempt to /// unescape it content will spoil data. /// /// Any text will be decoded using the XML current [`decoder()`]. /// /// Actually, this method perform the following code: /// /// ```ignore /// let span = reader.read_to_end(end)?; /// let text = reader.decoder().decode(&reader.inner_slice[span]); /// ``` /// /// # Examples /// /// This example shows, how you can read a HTML content from your XML document. /// /// ``` /// # use pretty_assertions::assert_eq; /// # use std::borrow::Cow; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// This is a HTML text ///