zerovec-0.11.1/.cargo_vcs_info.json0000644000000001530000000000100125700ustar { "git": { "sha1": "f4290a877dfcb0f87cad6de4abdd65f0cbb33c9c" }, "path_in_vcs": "utils/zerovec" }zerovec-0.11.1/Cargo.lock0000644000000555060000000000100105570ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "aho-corasick" version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] [[package]] name = "anes" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstyle" version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] name = "autocfg" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "bincode" version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" dependencies = [ "serde", ] [[package]] name = "bumpalo" version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" [[package]] name = "byteorder" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "cast" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "ciborium" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" dependencies = [ "ciborium-io", "ciborium-ll", "serde", ] [[package]] name = "ciborium-io" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" [[package]] name = "ciborium-ll" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" dependencies = [ "ciborium-io", "half", ] [[package]] name = "clap" version = "4.5.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "027bb0d98429ae334a8698531da7077bdf906419543a35a55c2cb1b66437d767" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" version = "4.5.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5589e0cba072e0f3d23791efac0fd8627b49c829c196a492e88168e6a669d863" dependencies = [ "anstyle", "clap_lex", ] [[package]] name = "clap_lex" version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "cobs" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" [[package]] name = "criterion" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" dependencies = [ "anes", "cast", "ciborium", "clap", "criterion-plot", "is-terminal", "itertools", "num-traits", "once_cell", "oorandom", "plotters", "rayon", "regex", "serde", "serde_derive", "serde_json", "tinytemplate", "walkdir", ] [[package]] name = "criterion-plot" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", "itertools", ] [[package]] name = "crossbeam-deque" version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" dependencies = [ "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-utils" version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crunchy" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" [[package]] name = "databake" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff6ee9e2d2afb173bcdeee45934c89ec341ab26f91c9933774fc15c2b58f83ef" dependencies = [ "databake-derive", "proc-macro2", "quote", ] [[package]] name = "databake-derive" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6834770958c7b84223607e49758ec0dde273c4df915e734aad50f62968a4c134" dependencies = [ "proc-macro2", "quote", "syn", "synstructure", ] [[package]] name = "either" version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7914353092ddf589ad78f25c5c1c21b7f80b0ff8621e7c814c3485b5306da9d" [[package]] name = "embedded-io" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" [[package]] name = "embedded-io" version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" [[package]] name = "getrandom" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "js-sys", "libc", "wasi", "wasm-bindgen", ] [[package]] name = "half" version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" dependencies = [ "cfg-if", "crunchy", ] [[package]] name = "hermit-abi" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" [[package]] name = "iai" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71a816c97c42258aa5834d07590b718b4c9a598944cd39a52dc25b351185d678" [[package]] name = "is-terminal" version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e19b23d53f35ce9f56aebc7d1bb4e6ac1e9c0db7ac85c8d1760c04379edced37" dependencies = [ "hermit-abi", "libc", "windows-sys", ] [[package]] name = "itertools" version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ "either", ] [[package]] name = "itoa" version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" [[package]] name = "js-sys" version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" dependencies = [ "once_cell", "wasm-bindgen", ] [[package]] name = "libc" version = "0.2.170" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828" [[package]] name = "libm" version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" [[package]] name = "log" version = "0.4.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e" [[package]] name = "memchr" version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "num-traits" version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", "libm", ] [[package]] name = "once_cell" version = "1.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" [[package]] name = "oorandom" version = "11.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" [[package]] name = "paste" version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "plotters" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" dependencies = [ "num-traits", "plotters-backend", "plotters-svg", "wasm-bindgen", "web-sys", ] [[package]] name = "plotters-backend" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" [[package]] name = "plotters-svg" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" dependencies = [ "plotters-backend", ] [[package]] name = "postcard" version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "170a2601f67cc9dba8edd8c4870b15f71a6a2dc196daec8c83f72b59dff628a8" dependencies = [ "cobs", "embedded-io 0.4.0", "embedded-io 0.6.1", "serde", ] [[package]] name = "ppv-lite86" version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" dependencies = [ "zerocopy", ] [[package]] name = "proc-macro2" version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" dependencies = [ "proc-macro2", ] [[package]] name = "rand" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", "rand_chacha", "rand_core", ] [[package]] name = "rand_chacha" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", "rand_core", ] [[package]] name = "rand_core" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ "getrandom", ] [[package]] name = "rand_distr" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" dependencies = [ "num-traits", "rand", ] [[package]] name = "rand_pcg" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59cad018caf63deb318e5a4586d99a24424a364f40f1e5778c29aca23f4fc73e" dependencies = [ "rand_core", ] [[package]] name = "rayon" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ "either", "rayon-core", ] [[package]] name = "rayon-core" version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ "crossbeam-deque", "crossbeam-utils", ] [[package]] name = "regex" version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", "regex-automata", "regex-syntax", ] [[package]] name = "regex-automata" version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] [[package]] name = "regex-syntax" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rmp" version = "0.8.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "228ed7c16fa39782c3b3468e974aec2795e9089153cd08ee2e9aefb3613334c4" dependencies = [ "byteorder", "num-traits", "paste", ] [[package]] name = "rmp-serde" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52e599a477cf9840e92f2cde9a7189e67b42c57532749bf90aea6ec10facd4db" dependencies = [ "byteorder", "rmp", "serde", ] [[package]] name = "rustversion" version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" [[package]] name = "ryu" version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" [[package]] name = "same-file" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" dependencies = [ "winapi-util", ] [[package]] name = "serde" version = "1.0.218" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8dfc9d19bdbf6d17e22319da49161d5d0108e4188e8b680aef6299eed22df60" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.218" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.139" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44f86c3acccc9c65b153fe1b85a3be07fe5515274ec9f0653b4a0875731c72a6" dependencies = [ "itoa", "memchr", "ryu", "serde", ] [[package]] name = "stable_deref_trait" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "syn" version = "2.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "synstructure" version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "tinytemplate" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" dependencies = [ "serde", "serde_json", ] [[package]] name = "twox-hash" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7b17f197b3050ba473acf9181f7b1d3b66d1cf7356c6cc57886662276e65908" [[package]] name = "unicode-ident" version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe" [[package]] name = "walkdir" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", ] [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ "cfg-if", "once_cell", "rustversion", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" dependencies = [ "bumpalo", "log", "proc-macro2", "quote", "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ "quote", "wasm-bindgen-macro-support", ] [[package]] name = "wasm-bindgen-macro-support" version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" dependencies = [ "unicode-ident", ] [[package]] name = "web-sys" version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" dependencies = [ "js-sys", "wasm-bindgen", ] [[package]] name = "winapi-util" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ "windows-sys", ] [[package]] name = "windows-sys" version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ "windows-targets", ] [[package]] name = "windows-targets" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_gnullvm", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "yoke" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" dependencies = [ "stable_deref_trait", "yoke-derive", "zerofrom", ] [[package]] name = "yoke-derive" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" dependencies = [ "proc-macro2", "quote", "syn", "synstructure", ] [[package]] name = "zerocopy" version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ "byteorder", "zerocopy-derive", ] [[package]] name = "zerocopy-derive" version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "zerofrom" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", "syn", "synstructure", ] [[package]] name = "zerovec" version = "0.11.1" dependencies = [ "bincode", "criterion", "databake", "getrandom", "iai", "postcard", "rand", "rand_distr", "rand_pcg", "rmp-serde", "serde", "serde_json", "twox-hash", "yoke", "zerofrom", "zerovec-derive", ] [[package]] name = "zerovec-derive" version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" dependencies = [ "proc-macro2", "quote", "syn", ] zerovec-0.11.1/Cargo.toml0000644000000066760000000000100106060ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.81" name = "zerovec" version = "0.11.1" authors = ["The ICU4X Project Developers"] build = false include = [ "data/**/*", "src/**/*", "examples/**/*", "benches/**/*", "tests/**/*", "Cargo.toml", "LICENSE", "README.md", ] autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "Zero-copy vector backed by a byte array" readme = "README.md" keywords = [ "zerocopy", "serialization", "zero-copy", "serde", ] categories = [ "rust-patterns", "memory-management", "caching", "no-std", "data-structures", ] license = "Unicode-3.0" repository = "https://github.com/unicode-org/icu4x" [package.metadata.cargo-all-features] max_combination_size = 3 [package.metadata.docs.rs] all-features = true [package.metadata.workspaces] independent = true [features] alloc = [] databake = ["dep:databake"] derive = ["dep:zerovec-derive"] hashmap = [ "dep:twox-hash", "alloc", ] serde = [ "dep:serde", "alloc", ] std = [] yoke = ["dep:yoke"] [lib] name = "zerovec" path = "src/lib.rs" bench = false [[example]] name = "zv_serde" path = "examples/zv_serde.rs" required-features = ["serde"] [[bench]] name = "vzv" path = "benches/vzv.rs" harness = false [[bench]] name = "zeromap" path = "benches/zeromap.rs" harness = false required-features = [ "serde", "hashmap", "derive", ] [[bench]] name = "zerovec" path = "benches/zerovec.rs" harness = false [[bench]] name = "zerovec_iai" path = "benches/zerovec_iai.rs" harness = false [[bench]] name = "zerovec_serde" path = "benches/zerovec_serde.rs" harness = false required-features = ["serde"] [dependencies.databake] version = "0.2.0" features = ["derive"] optional = true default-features = false [dependencies.serde] version = "1.0.110" features = [ "alloc", "derive", ] optional = true default-features = false [dependencies.twox-hash] version = "2.0.0" features = ["xxhash64"] optional = true default-features = false [dependencies.yoke] version = "0.8.0" optional = true default-features = false [dependencies.zerofrom] version = "0.1.3" default-features = false [dependencies.zerovec-derive] version = "0.11.1" optional = true default-features = false [dev-dependencies.bincode] version = "1.3.1" [dev-dependencies.getrandom] version = "0.2" features = ["js"] [dev-dependencies.iai] version = "0.1.1" [dev-dependencies.postcard] version = "1.0.3" features = ["use-std"] default-features = false [dev-dependencies.rand] version = "0.8" [dev-dependencies.rand_distr] version = "0.4" [dev-dependencies.rand_pcg] version = "0.3" [dev-dependencies.rmp-serde] version = "1.2.0" [dev-dependencies.serde] version = "1.0.110" features = ["derive"] default-features = false [dev-dependencies.serde_json] version = "1.0.45" [dev-dependencies.yoke] version = "0.8.0" features = ["derive"] default-features = false [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies.criterion] version = "0.5.0" zerovec-0.11.1/Cargo.toml.orig000064400000000000000000000050361046102023000142540ustar 00000000000000# This file is part of ICU4X. For terms of use, please see the file # called LICENSE at the top level of the ICU4X source tree # (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). [package] name = "zerovec" description = "Zero-copy vector backed by a byte array" version = "0.11.1" categories = ["rust-patterns", "memory-management", "caching", "no-std", "data-structures"] keywords = ["zerocopy", "serialization", "zero-copy", "serde"] authors.workspace = true edition.workspace = true include.workspace = true license.workspace = true repository.workspace = true rust-version.workspace = true [package.metadata.workspaces] independent = true [package.metadata.docs.rs] all-features = true [dependencies] zerofrom = { workspace = true } zerovec-derive = { workspace = true, optional = true} databake = { workspace = true, features = ["derive"], optional = true } serde = { workspace = true, features = ["alloc", "derive"], optional = true } yoke = { workspace = true, optional = true } twox-hash = { workspace = true, optional = true } [dev-dependencies] bincode = { workspace = true } getrandom = { workspace = true, features = ["js"] } iai = { workspace = true } icu_benchmark_macros = { path = "../../tools/benchmark/macros" } postcard = { workspace = true, features = ["use-std"] } rand = { workspace = true } rand_distr = { workspace = true } rand_pcg = { workspace = true } rmp-serde = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } potential_utf = { path = "../../utils/potential_utf", features = ["zerovec"] } yoke = { workspace = true, features = ["derive"] } zerofrom = { path = "../../utils/zerofrom", features = ["derive"] } [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies] criterion = { workspace = true } [features] derive = ["dep:zerovec-derive"] hashmap = ["dep:twox-hash", "alloc"] yoke = ["dep:yoke"] serde = ["dep:serde", "alloc"] databake = ["dep:databake"] alloc = [] # No longer does anything std = [] [package.metadata.cargo-all-features] # We have tons of features here, limit the amount of tests we run max_combination_size = 3 [lib] bench = false # This option is required for Benchmark CI [[bench]] name = "zerovec" harness = false [[bench]] name = "zerovec_serde" harness = false required-features = ["serde"] [[bench]] name = "vzv" harness = false [[bench]] name = "zerovec_iai" harness = false [[bench]] name = "zeromap" harness = false required-features = ["serde", "hashmap", "derive"] [[example]] name = "zv_serde" required-features = ["serde"] zerovec-0.11.1/LICENSE000064400000000000000000000042231046102023000123670ustar 00000000000000UNICODE LICENSE V3 COPYRIGHT AND PERMISSION NOTICE Copyright © 2020-2024 Unicode, Inc. NOTICE TO USER: Carefully read the following legal agreement. BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE. Permission is hereby granted, free of charge, to any person obtaining a copy of data files and any associated documentation (the "Data Files") or software and any associated documentation (the "Software") to deal in the Data Files or Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, and/or sell copies of the Data Files or Software, and to permit persons to whom the Data Files or Software are furnished to do so, provided that either (a) this copyright and permission notice appear with all copies of the Data Files or Software, or (b) this copyright and permission notice appear in associated Documentation. THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA FILES OR SOFTWARE. Except as contained in this notice, the name of a copyright holder shall not be used in advertising or otherwise to promote the sale, use or other dealings in these Data Files or Software without prior written authorization of the copyright holder. SPDX-License-Identifier: Unicode-3.0 — Portions of ICU4X may have been adapted from ICU4C and/or ICU4J. ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others. zerovec-0.11.1/README.md000064400000000000000000000203671046102023000126500ustar 00000000000000# zerovec [![crates.io](https://img.shields.io/crates/v/zerovec)](https://crates.io/crates/zerovec) Zero-copy vector abstractions for arbitrary types, backed by byte slices. `zerovec` enables a far wider range of types — beyond just `&[u8]` and `&str` — to participate in zero-copy deserialization from byte slices. It is `serde` compatible and comes equipped with proc macros Clients upgrading to `zerovec` benefit from zero heap allocations when deserializing read-only data. This crate has four main types: - [`ZeroVec<'a, T>`] (and [`ZeroSlice`](ZeroSlice)) for fixed-width types like `u32` - [`VarZeroVec<'a, T>`] (and [`VarZeroSlice`](ZeroSlice)) for variable-width types like `str` - [`ZeroMap<'a, K, V>`] to map from `K` to `V` - [`ZeroMap2d<'a, K0, K1, V>`] to map from the pair `(K0, K1)` to `V` The first two are intended as close-to-drop-in replacements for `Vec` in Serde structs. The third and fourth are intended as a replacement for `HashMap` or [`LiteMap`](docs.rs/litemap). When used with Serde derives, **be sure to apply `#[serde(borrow)]` to these types**, same as one would for [`Cow<'a, T>`]. [`ZeroVec<'a, T>`], [`VarZeroVec<'a, T>`], [`ZeroMap<'a, K, V>`], and [`ZeroMap2d<'a, K0, K1, V>`] all behave like [`Cow<'a, T>`] in that they abstract over either borrowed or owned data. When performing deserialization from human-readable formats (like `json` and `xml`), typically these types will allocate and fully own their data, whereas if deserializing from binary formats like `bincode` and `postcard`, these types will borrow data directly from the buffer being deserialized from, avoiding allocations and only performing validity checks. As such, this crate can be pretty fast (see [below](#Performance) for more information) on deserialization. See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for details on how this crate works under the hood. ## Cargo features This crate has several optional Cargo features: - `serde`: Allows serializing and deserializing `zerovec`'s abstractions via [`serde`](https://docs.rs/serde) - `yoke`: Enables implementations of `Yokeable` from the [`yoke`](https://docs.rs/yoke/) crate, which is also useful in situations involving a lot of zero-copy deserialization. - `derive`: Makes it easier to use custom types in these collections by providing the `#[make_ule]` and `#[make_varule]` proc macros, which generate appropriate [`ULE`](https://docs.rs/zerovec/latest/zerovec/ule/trait.ULE.html) and [`VarULE`](https://docs.rs/zerovec/latest/zerovec/ule/trait.VarULE.html)-conformant types for a given "normal" type. - `std`: Enabled `std::Error` implementations for error types. This crate is by default `no_std` with a dependency on `alloc`. [`ZeroVec<'a, T>`]: ZeroVec [`VarZeroVec<'a, T>`]: VarZeroVec [`ZeroMap<'a, K, V>`]: ZeroMap [`ZeroMap2d<'a, K0, K1, V>`]: ZeroMap2d [`Cow<'a, T>`]: alloc::borrow::Cow ## Examples Serialize and deserialize a struct with ZeroVec and VarZeroVec with Bincode: ```rust use zerovec::{VarZeroVec, ZeroVec}; // This example requires the "serde" feature #[derive(serde::Serialize, serde::Deserialize)] pub struct DataStruct<'data> { #[serde(borrow)] nums: ZeroVec<'data, u32>, #[serde(borrow)] chars: ZeroVec<'data, char>, #[serde(borrow)] strs: VarZeroVec<'data, str>, } let data = DataStruct { nums: ZeroVec::from_slice_or_alloc(&[211, 281, 421, 461]), chars: ZeroVec::alloc_from_slice(&['ö', '冇', 'म']), strs: VarZeroVec::from(&["hello", "world"]), }; let bincode_bytes = bincode::serialize(&data).expect("Serialization should be successful"); assert_eq!(bincode_bytes.len(), 63); let deserialized: DataStruct = bincode::deserialize(&bincode_bytes) .expect("Deserialization should be successful"); assert_eq!(deserialized.nums.first(), Some(211)); assert_eq!(deserialized.chars.get(1), Some('冇')); assert_eq!(deserialized.strs.get(1), Some("world")); // The deserialization will not have allocated anything assert!(!deserialized.nums.is_owned()); ``` Use custom types inside of ZeroVec: ```rust use zerovec::{ZeroVec, VarZeroVec, ZeroMap}; use std::borrow::Cow; use zerovec::ule::encode_varule_to_box; // custom fixed-size ULE type for ZeroVec #[zerovec::make_ule(DateULE)] #[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] struct Date { y: u64, m: u8, d: u8 } // custom variable sized VarULE type for VarZeroVec #[zerovec::make_varule(PersonULE)] #[zerovec::derive(Serialize, Deserialize)] // add Serde impls to PersonULE #[derive(Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] struct Person<'a> { birthday: Date, favorite_character: char, #[serde(borrow)] name: Cow<'a, str>, } #[derive(serde::Serialize, serde::Deserialize)] struct Data<'a> { #[serde(borrow)] important_dates: ZeroVec<'a, Date>, // note: VarZeroVec always must reference the ULE type directly #[serde(borrow)] important_people: VarZeroVec<'a, PersonULE>, #[serde(borrow)] birthdays_to_people: ZeroMap<'a, Date, PersonULE> } let person1 = Person { birthday: Date { y: 1990, m: 9, d: 7}, favorite_character: 'π', name: Cow::from("Kate") }; let person2 = Person { birthday: Date { y: 1960, m: 5, d: 25}, favorite_character: '冇', name: Cow::from("Jesse") }; let important_dates = ZeroVec::alloc_from_slice(&[Date { y: 1943, m: 3, d: 20}, Date { y: 1976, m: 8, d: 2}, Date { y: 1998, m: 2, d: 15}]); let important_people = VarZeroVec::from(&[&person1, &person2]); let mut birthdays_to_people: ZeroMap = ZeroMap::new(); // `.insert_var_v()` is slightly more convenient over `.insert()` for custom ULE types birthdays_to_people.insert_var_v(&person1.birthday, &person1); birthdays_to_people.insert_var_v(&person2.birthday, &person2); let data = Data { important_dates, important_people, birthdays_to_people }; let bincode_bytes = bincode::serialize(&data) .expect("Serialization should be successful"); assert_eq!(bincode_bytes.len(), 160); let deserialized: Data = bincode::deserialize(&bincode_bytes) .expect("Deserialization should be successful"); assert_eq!(deserialized.important_dates.get(0).unwrap().y, 1943); assert_eq!(&deserialized.important_people.get(1).unwrap().name, "Jesse"); assert_eq!(&deserialized.important_people.get(0).unwrap().name, "Kate"); assert_eq!(&deserialized.birthdays_to_people.get(&person1.birthday).unwrap().name, "Kate"); } // feature = serde and derive ``` ## Performance `zerovec` is designed for fast deserialization from byte buffers with zero memory allocations while minimizing performance regressions for common vector operations. Benchmark results on x86_64: | Operation | `Vec` | `zerovec` | |---|---|---| | Deserialize vec of 100 `u32` | 233.18 ns | 14.120 ns | | Compute sum of vec of 100 `u32` (read every element) | 8.7472 ns | 10.775 ns | | Binary search vec of 1000 `u32` 50 times | 442.80 ns | 472.51 ns | | Deserialize vec of 100 strings | 7.3740 μs\* | 1.4495 μs | | Count chars in vec of 100 strings (read every element) | 747.50 ns | 955.28 ns | | Binary search vec of 500 strings 10 times | 466.09 ns | 790.33 ns | \* *This result is reported for `Vec`. However, Serde also supports deserializing to the partially-zero-copy `Vec<&str>`; this gives 1.8420 μs, much faster than `Vec` but a bit slower than `zerovec`.* | Operation | `HashMap` | `LiteMap` | `ZeroMap` | |---|---|---|---| | Deserialize a small map | 2.72 μs | 1.28 μs | 480 ns | | Deserialize a large map | 50.5 ms | 18.3 ms | 3.74 ms | | Look up from a small deserialized map | 49 ns | 42 ns | 54 ns | | Look up from a large deserialized map | 51 ns | 155 ns | 213 ns | Small = 16 elements, large = 131,072 elements. Maps contain ``. The benches used to generate the above table can be found in the `benches` directory in the project repository. `zeromap` benches are named by convention, e.g. `zeromap/deserialize/small`, `zeromap/lookup/large`. The type is appended for baseline comparisons, e.g. `zeromap/lookup/small/hashmap`. ## More Information For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x). zerovec-0.11.1/benches/vzv.rs000064400000000000000000000156241046102023000141730ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rand::SeedableRng; use rand_distr::{Alphanumeric, Distribution, Uniform}; use rand_pcg::Lcg64Xsh32; use std::ops::RangeInclusive; use zerovec::VarZeroVec; /// Generates an array of random alphanumeric strings. /// /// - length = range of lengths for the strings (chosen uniformly at random) /// - count = number of strings to generate /// - seed = seed for the PRNG /// /// Returns a tuple including the vector and a u64 that can be used to seed the next PRNG. fn random_alphanums(lengths: RangeInclusive, count: usize, seed: u64) -> (Vec, u64) { // Lcg64Xsh32 is a small, fast PRNG for reproducible benchmarks. let mut rng1 = Lcg64Xsh32::seed_from_u64(seed); let mut rng2 = Lcg64Xsh32::seed_from_u64(rand::Rng::gen(&mut rng1)); let alpha_dist = Alphanumeric; let len_dist = Uniform::from(lengths); let string_vec = len_dist .sample_iter(&mut rng1) .take(count) .map(|len| { (&alpha_dist) .sample_iter(&mut rng2) .take(len) .map(char::from) .collect::() }) .collect(); (string_vec, rand::Rng::gen(&mut rng1)) } fn overview_bench(c: &mut Criterion) { // Same as vzv/char_count/vzv but with different inputs let seed = 42; let (string_vec, _) = random_alphanums(2..=10, 100, seed); let bytes: Vec = VarZeroVec::::from(&string_vec).into_bytes(); let vzv = VarZeroVec::::parse_bytes(black_box(bytes.as_slice())).unwrap(); c.bench_function("vzv/overview", |b| { b.iter(|| { black_box(&vzv) .iter() .fold(0, |sum, string| sum + string.chars().count()) }); }); { char_count_benches(c); binary_search_benches(c); vzv_precompute_bench(c); } #[cfg(feature = "serde")] { serde_benches(c); } } fn char_count_benches(c: &mut Criterion) { let seed = 2021; let (string_vec, _) = random_alphanums(2..=20, 100, seed); let bytes: Vec = VarZeroVec::::from(&string_vec).into_bytes(); let vzv = VarZeroVec::::parse_bytes(black_box(bytes.as_slice())).unwrap(); // *** Count chars in vec of 100 strings *** c.bench_function("vzv/char_count/slice", |b| { b.iter(|| { black_box(&string_vec) .iter() .fold(0, |sum, string| sum + string.chars().count()) }); }); // *** Count chars in vec of 100 strings *** c.bench_function("vzv/char_count/vzv", |b| { b.iter(|| { black_box(&vzv) .iter() .fold(0, |sum, string| sum + string.chars().count()) }); }); } fn binary_search_benches(c: &mut Criterion) { let seed = 2021; let (string_vec, seed) = random_alphanums(2..=20, 500, seed); let (needles, _) = random_alphanums(2..=20, 10, seed); let bytes: Vec = VarZeroVec::::from(&string_vec).into_bytes(); let vzv = VarZeroVec::::parse_bytes(black_box(bytes.as_slice())).unwrap(); let single_needle = "lmnop".to_owned(); // *** Binary search vec of 500 strings 10 times *** c.bench_function("vzv/binary_search/slice", |b| { b.iter(|| { black_box(&needles) .iter() .map(|needle| black_box(&string_vec).binary_search(needle)) .filter(|r| r.is_ok()) .count() }); }); // *** Binary search vec of 500 strings 10 times *** c.bench_function("vzv/binary_search/vzv", |b| { b.iter(|| { black_box(&needles) .iter() .map(|needle| black_box(&vzv).binary_search(needle)) .filter(|r| r.is_ok()) .count() }); }); c.bench_function("vzv/binary_search/single/slice", |b| { b.iter(|| black_box(&string_vec).binary_search(black_box(&single_needle))); }); c.bench_function("vzv/binary_search/single/vzv", |b| { b.iter(|| black_box(&vzv).binary_search(black_box(&single_needle))); }); } #[cfg(feature = "serde")] fn serde_benches(c: &mut Criterion) { let seed = 2021; let (string_vec, _) = random_alphanums(2..=20, 100, seed); let bincode_vec = bincode::serialize(&string_vec).unwrap(); let vzv: VarZeroVec = VarZeroVec::from(&*string_vec); let bincode_vzv = bincode::serialize(&vzv).unwrap(); // *** Deserialize vec of 100 strings *** c.bench_function("vzv/deserialize/string/vec_owned", |b| { b.iter(|| bincode::deserialize::>(black_box(&bincode_vec))); }); // *** Deserialize vec of 100 strings *** c.bench_function("vzv/deserialize/string/vec_borrowed", |b| { b.iter(|| bincode::deserialize::>(black_box(&bincode_vec))); }); // *** Deserialize vec of 100 strings *** c.bench_function("vzv/deserialize/string/vzv", |b| { b.iter(|| bincode::deserialize::>(black_box(&bincode_vzv))); }); } // Testing differences between operating on slices with precomputed/non-precomputed indexing info fn vzv_precompute_bench(c: &mut Criterion) { let seed = 2021; let (string_vec, seed) = random_alphanums(2..=20, 500, seed); let (needles, _) = random_alphanums(2..=20, 10, seed); let bytes: Vec = VarZeroVec::::from(&string_vec).into_bytes(); let vzv = VarZeroVec::::parse_bytes(black_box(bytes.as_slice())).unwrap(); let borrowed = vzv.as_components(); let slice = vzv.as_slice(); let single_needle = "lmnop"; c.bench_function("vzv_precompute/get/precomputed", |b| { b.iter(|| black_box(&borrowed).get(100)); }); c.bench_function("vzv_precompute/get/slice", |b| { b.iter(|| black_box(&slice).get(100)); }); c.bench_function("vzv_precompute/search/precomputed", |b| { b.iter(|| black_box(&borrowed).binary_search(single_needle)); }); c.bench_function("vzv_precompute/search/slice", |b| { b.iter(|| black_box(&slice).binary_search(single_needle)); }); c.bench_function("vzv_precompute/search_multi/precomputed", |b| { b.iter(|| { black_box(&needles) .iter() .map(|needle| black_box(&borrowed).binary_search(needle)) .filter(|r| r.is_ok()) .count() }); }); c.bench_function("vzv_precompute/search_multi/slice", |b| { b.iter(|| { black_box(&needles) .iter() .map(|needle| black_box(&slice).binary_search(needle)) .filter(|r| r.is_ok()) .count() }); }); } criterion_group!(benches, overview_bench,); criterion_main!(benches); zerovec-0.11.1/benches/zeromap.rs000064400000000000000000000344771046102023000150320ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use std::collections::HashMap; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use zerovec::maps::ZeroMapKV; use zerovec::vecs::{Index32, VarZeroSlice, VarZeroVec}; use zerovec::{ZeroHashMap, ZeroMap}; const DATA: [(&str, &str); 16] = [ ("ar", "Arabic"), ("bn", "Bangla"), ("ccp", "Chakma"), ("chr", "Cherokee"), ("el", "Greek"), ("en", "English"), ("eo", "Esperanto"), ("es", "Spanish"), ("fr", "French"), ("iu", "Inuktitut"), ("ja", "Japanese"), ("ru", "Russian"), ("sr", "Serbian"), ("th", "Thai"), ("tr", "Turkish"), ("zh", "Chinese"), ]; const POSTCARD: [u8; 274] = [ 98, 16, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 7, 0, 0, 0, 10, 0, 0, 0, 12, 0, 0, 0, 14, 0, 0, 0, 16, 0, 0, 0, 18, 0, 0, 0, 20, 0, 0, 0, 22, 0, 0, 0, 24, 0, 0, 0, 26, 0, 0, 0, 28, 0, 0, 0, 30, 0, 0, 0, 32, 0, 0, 0, 97, 114, 98, 110, 99, 99, 112, 99, 104, 114, 101, 108, 101, 110, 101, 111, 101, 115, 102, 114, 105, 117, 106, 97, 114, 117, 115, 114, 116, 104, 116, 114, 122, 104, 173, 1, 16, 0, 0, 0, 6, 0, 0, 0, 12, 0, 0, 0, 18, 0, 0, 0, 26, 0, 0, 0, 31, 0, 0, 0, 38, 0, 0, 0, 47, 0, 0, 0, 54, 0, 0, 0, 60, 0, 0, 0, 69, 0, 0, 0, 77, 0, 0, 0, 84, 0, 0, 0, 91, 0, 0, 0, 95, 0, 0, 0, 102, 0, 0, 0, 65, 114, 97, 98, 105, 99, 66, 97, 110, 103, 108, 97, 67, 104, 97, 107, 109, 97, 67, 104, 101, 114, 111, 107, 101, 101, 71, 114, 101, 101, 107, 69, 110, 103, 108, 105, 115, 104, 69, 115, 112, 101, 114, 97, 110, 116, 111, 83, 112, 97, 110, 105, 115, 104, 70, 114, 101, 110, 99, 104, 73, 110, 117, 107, 116, 105, 116, 117, 116, 74, 97, 112, 97, 110, 101, 115, 101, 82, 117, 115, 115, 105, 97, 110, 83, 101, 114, 98, 105, 97, 110, 84, 104, 97, 105, 84, 117, 114, 107, 105, 115, 104, 67, 104, 105, 110, 101, 115, 101, ]; const POSTCARD_HASHMAP: [u8; 176] = [ 16, 2, 114, 117, 7, 82, 117, 115, 115, 105, 97, 110, 3, 99, 99, 112, 6, 67, 104, 97, 107, 109, 97, 3, 99, 104, 114, 8, 67, 104, 101, 114, 111, 107, 101, 101, 2, 116, 114, 7, 84, 117, 114, 107, 105, 115, 104, 2, 116, 104, 4, 84, 104, 97, 105, 2, 106, 97, 8, 74, 97, 112, 97, 110, 101, 115, 101, 2, 101, 115, 7, 83, 112, 97, 110, 105, 115, 104, 2, 101, 111, 9, 69, 115, 112, 101, 114, 97, 110, 116, 111, 2, 122, 104, 7, 67, 104, 105, 110, 101, 115, 101, 2, 115, 114, 7, 83, 101, 114, 98, 105, 97, 110, 2, 101, 110, 7, 69, 110, 103, 108, 105, 115, 104, 2, 105, 117, 9, 73, 110, 117, 107, 116, 105, 116, 117, 116, 2, 102, 114, 6, 70, 114, 101, 110, 99, 104, 2, 98, 110, 6, 66, 97, 110, 103, 108, 97, 2, 101, 108, 5, 71, 114, 101, 101, 107, 2, 97, 114, 6, 65, 114, 97, 98, 105, 99, ]; const POSTCARD_ZEROHASHMAP: [u8; 404] = [ 128, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 98, 16, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 8, 0, 0, 0, 10, 0, 0, 0, 13, 0, 0, 0, 15, 0, 0, 0, 17, 0, 0, 0, 19, 0, 0, 0, 21, 0, 0, 0, 24, 0, 0, 0, 26, 0, 0, 0, 28, 0, 0, 0, 30, 0, 0, 0, 32, 0, 0, 0, 115, 114, 101, 111, 116, 114, 97, 114, 105, 117, 99, 99, 112, 102, 114, 101, 115, 106, 97, 122, 104, 99, 104, 114, 98, 110, 101, 110, 101, 108, 114, 117, 116, 104, 173, 1, 16, 0, 0, 0, 7, 0, 0, 0, 16, 0, 0, 0, 23, 0, 0, 0, 29, 0, 0, 0, 38, 0, 0, 0, 44, 0, 0, 0, 50, 0, 0, 0, 57, 0, 0, 0, 65, 0, 0, 0, 72, 0, 0, 0, 80, 0, 0, 0, 86, 0, 0, 0, 93, 0, 0, 0, 98, 0, 0, 0, 105, 0, 0, 0, 83, 101, 114, 98, 105, 97, 110, 69, 115, 112, 101, 114, 97, 110, 116, 111, 84, 117, 114, 107, 105, 115, 104, 65, 114, 97, 98, 105, 99, 73, 110, 117, 107, 116, 105, 116, 117, 116, 67, 104, 97, 107, 109, 97, 70, 114, 101, 110, 99, 104, 83, 112, 97, 110, 105, 115, 104, 74, 97, 112, 97, 110, 101, 115, 101, 67, 104, 105, 110, 101, 115, 101, 67, 104, 101, 114, 111, 107, 101, 101, 66, 97, 110, 103, 108, 97, 69, 110, 103, 108, 105, 115, 104, 71, 114, 101, 101, 107, 82, 117, 115, 115, 105, 97, 110, 84, 104, 97, 105, ]; /// Run this function to print new data to the console. /// Requires the optional `serde` Cargo feature. #[allow(dead_code)] fn generate_zeromap() { let map = build_zeromap(false); let buf = postcard::to_stdvec(&map).unwrap(); println!("{buf:?}"); } /// Run this function to print new data to the console. /// Requires the optional `serde` Cargo feature. #[allow(dead_code)] fn generate_hashmap() { let map = build_hashmap(false); let buf = postcard::to_stdvec(&map).unwrap(); println!("{buf:?}"); } /// Run this function to print new data to the console. /// Requires the optional `serde` Cargo feature. #[allow(dead_code)] fn generate_zerohashmap() { let map = build_zerohashmap(false); let buf = postcard::to_stdvec(&map).unwrap(); println!("{buf:?}"); } fn overview_bench(c: &mut Criterion) { bench_zeromap(c); bench_hashmap(c); bench_zerohashmap(c); } fn bench_zeromap(c: &mut Criterion) { // Uncomment the following line to re-generate the const data. // generate_hashmap(); bench_deserialize(c); bench_deserialize_large(c); bench_lookup(c); bench_lookup_large(c); } fn build_zeromap(large: bool) -> ZeroMap<'static, Index32Str, Index32Str> { // TODO(#2826): This should use ZeroMap::from_iter, however that currently takes // *minutes*, whereas this code runs in milliseconds let mut keys = Vec::new(); let mut values = Vec::new(); let mut data = DATA.to_vec(); data.sort(); for &(key, value) in data.iter() { if large { for n in 0..8192 { keys.push(format!("{key}{n:04}")); values.push(indexify(value)); } } else { keys.push(key.to_owned()); values.push(indexify(value)); } } let keys = keys.iter().map(|s| indexify(s)).collect::>(); // keys are sorted by construction unsafe { ZeroMap::from_parts_unchecked(VarZeroVec::from(&keys), VarZeroVec::from(&values)) } } fn bench_deserialize(c: &mut Criterion) { c.bench_function("zeromap/deserialize/small", |b| { b.iter(|| { let map: ZeroMap = postcard::from_bytes(black_box(&POSTCARD)).unwrap(); assert_eq!(map.get(indexify("iu")).map(|x| &x.0), Some("Inuktitut")); }) }); } fn bench_deserialize_large(c: &mut Criterion) { let buf = large_zeromap_postcard_bytes(); c.bench_function("zeromap/deserialize/large", |b| { b.iter(|| { let map: ZeroMap = postcard::from_bytes(black_box(&buf)).unwrap(); assert_eq!(map.get(indexify("iu3333")).map(|x| &x.0), Some("Inuktitut")); }) }); } fn bench_lookup(c: &mut Criterion) { let map: ZeroMap = postcard::from_bytes(black_box(&POSTCARD)).unwrap(); c.bench_function("zeromap/lookup/small", |b| { b.iter(|| { assert_eq!( map.get(black_box(indexify("iu"))).map(|x| &x.0), Some("Inuktitut") ); assert_eq!(map.get(black_box(indexify("zz"))).map(|x| &x.0), None); }); }); } fn bench_lookup_large(c: &mut Criterion) { let buf = large_zeromap_postcard_bytes(); let map: ZeroMap = postcard::from_bytes(&buf).unwrap(); c.bench_function("zeromap/lookup/large", |b| { b.iter(|| { assert_eq!( map.get(black_box(indexify("iu3333"))).map(|x| &x.0), Some("Inuktitut") ); assert_eq!(map.get(black_box(indexify("zz"))).map(|x| &x.0), None); }); }); } fn large_zeromap_postcard_bytes() -> Vec { postcard::to_stdvec(&build_zeromap(true)).unwrap() } fn bench_hashmap(c: &mut Criterion) { // Uncomment the following line to re-generate the const data. // generate_hashmap(); bench_deserialize_hashmap(c); bench_deserialize_large_hashmap(c); bench_lookup_hashmap(c); bench_lookup_large_hashmap(c); } fn build_hashmap(large: bool) -> HashMap { let mut map: HashMap = HashMap::new(); for &(key, value) in DATA.iter() { if large { for n in 0..8192 { map.insert(format!("{key}{n}"), value.to_owned()); } } else { map.insert(key.to_owned(), value.to_owned()); } } map } fn bench_deserialize_hashmap(c: &mut Criterion) { c.bench_function("zeromap/deserialize/small/hashmap", |b| { b.iter(|| { let map: HashMap = postcard::from_bytes(black_box(&POSTCARD_HASHMAP)).unwrap(); assert_eq!(map.get("iu"), Some(&"Inuktitut".to_owned())); }) }); } fn bench_deserialize_large_hashmap(c: &mut Criterion) { let buf = large_hashmap_postcard_bytes(); c.bench_function("zeromap/deserialize/large/hashmap", |b| { b.iter(|| { let map: HashMap = postcard::from_bytes(black_box(&buf)).unwrap(); assert_eq!(map.get("iu3333"), Some(&"Inuktitut".to_owned())); }) }); } fn bench_lookup_hashmap(c: &mut Criterion) { let map: HashMap = postcard::from_bytes(black_box(&POSTCARD_HASHMAP)).unwrap(); c.bench_function("zeromap/lookup/small/hashmap", |b| { b.iter(|| { assert_eq!(map.get(black_box("iu")), Some(&"Inuktitut".to_owned())); assert_eq!(map.get(black_box("zz")), None); }); }); } fn bench_lookup_large_hashmap(c: &mut Criterion) { let buf = large_hashmap_postcard_bytes(); let map: HashMap = postcard::from_bytes(&buf).unwrap(); c.bench_function("zeromap/lookup/large/hashmap", |b| { b.iter(|| { assert_eq!(map.get(black_box("iu3333")), Some(&"Inuktitut".to_owned())); assert_eq!(map.get(black_box("zz")), None); }); }); } fn large_hashmap_postcard_bytes() -> Vec { postcard::to_stdvec(&build_hashmap(true)).unwrap() } fn bench_zerohashmap(c: &mut Criterion) { // Uncomment the following line to re-generate the const data. // generate_zerohashmap(); bench_deserialize_zerohashmap(c); bench_deserialize_large_zerohashmap(c); bench_zerohashmap_lookup(c); bench_zerohashmap_lookup_large(c); } fn build_zerohashmap(large: bool) -> ZeroHashMap<'static, Index32Str, Index32Str> { let mut kv = Vec::new(); for (key, value) in DATA.iter() { if large { for n in 0..512 { kv.push((format!("{key}{n}"), indexify(value))); } } else { kv.push((key.to_string(), indexify(value))); } } ZeroHashMap::from_iter(kv.iter().map(|kv| (indexify(&kv.0), kv.1))) } fn bench_deserialize_zerohashmap(c: &mut Criterion) { c.bench_function("zerohashmap/deserialize/small", |b| { b.iter(|| { let map: ZeroHashMap = postcard::from_bytes(black_box(&POSTCARD_ZEROHASHMAP)).unwrap(); assert_eq!(map.get(indexify("iu")).map(|x| &x.0), Some("Inuktitut")); }) }); } fn bench_deserialize_large_zerohashmap(c: &mut Criterion) { let buf = large_zerohashmap_postcard_bytes(); c.bench_function("zerohashmap/deserialize/large", |b| { b.iter(|| { let map: ZeroHashMap = postcard::from_bytes(black_box(&buf)).unwrap(); assert_eq!(map.get(indexify("iu333")).map(|x| &x.0), Some("Inuktitut")); }) }); } fn bench_zerohashmap_lookup(c: &mut Criterion) { let zero_hashmap: ZeroHashMap = postcard::from_bytes(black_box(&POSTCARD_ZEROHASHMAP)).unwrap(); c.bench_function("zerohashmap/lookup/small", |b| { b.iter(|| { assert_eq!( zero_hashmap.get(black_box(indexify("iu"))).map(|x| &x.0), Some("Inuktitut") ); assert_eq!( zero_hashmap.get(black_box(indexify("zz"))).map(|x| &x.0), None ); }); }); } fn bench_zerohashmap_lookup_large(c: &mut Criterion) { let buf = large_zerohashmap_postcard_bytes(); let zero_hashmap: ZeroHashMap = postcard::from_bytes(&buf).unwrap(); c.bench_function("zerohashmap/lookup/large", |b| { b.iter(|| { assert_eq!( zero_hashmap.get(black_box(indexify("iu333"))).map(|x| &x.0), Some("Inuktitut") ); assert_eq!( zero_hashmap.get(black_box(indexify("zz"))).map(|x| &x.0), None ); }); }); } fn large_zerohashmap_postcard_bytes() -> Vec { postcard::to_stdvec(&build_zerohashmap(true)).unwrap() } criterion_group!(benches, overview_bench); criterion_main!(benches); /// This type lets us use a u32-index-format VarZeroVec with the ZeroMap. /// /// Eventually we will have a FormatSelector type that lets us do `ZeroMap, V>` /// (https://github.com/unicode-org/icu4x/issues/2312) /// /// , isn't actually important; it's just more convenient to use make_varule to get the /// full suite of traits instead of `#[derive(VarULE)]`. (With `#[derive(VarULE)]` we would have to manually /// define a Serialize implementation, and that would be gnarly) /// https://github.com/unicode-org/icu4x/issues/2310 tracks being able to do this with derive(ULE) #[zerovec::make_varule(Index32Str)] #[zerovec::skip_derive(ZeroMapKV)] #[derive(Eq, PartialEq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] #[zerovec::derive(Serialize, Deserialize, Hash)] pub(crate) struct Index32StrBorrowed<'a>(#[serde(borrow)] pub &'a str); impl<'a> ZeroMapKV<'a> for Index32Str { type Container = VarZeroVec<'a, Index32Str, Index32>; type Slice = VarZeroSlice; type GetType = Index32Str; type OwnedType = Box; } #[inline] fn indexify(s: &str) -> &Index32Str { unsafe { &*(s as *const str as *const Index32Str) } } zerovec-0.11.1/benches/zerovec.rs000064400000000000000000000122631046102023000150170ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rand::SeedableRng; use rand_distr::{Distribution, LogNormal}; use rand_pcg::Lcg64Xsh32; use std::fmt; #[path = "../src/samples.rs"] mod samples; use samples::*; use zerovec::ule::*; use zerovec::ZeroVec; #[repr(align(8))] #[derive(Default)] struct AlignedBuffer(Vec); /// Generate a large list of u32s for stress testing. #[allow(dead_code)] fn get_needles_and_haystack() -> (Vec, Vec) { // Lcg64Xsh32 is a small, fast PRNG for reproducible benchmarks. // LogNormal(10, 1) generates numbers with mean 36315 and mode 8103, a distribution that, in // spirit, correlates with Unicode properties (many low values and a long tail of high values) let mut rng = Lcg64Xsh32::seed_from_u64(2021); let dist = LogNormal::new(10.0, 1.0).unwrap(); let haystack = { let mut unsorted: Vec = (&dist) .sample_iter(&mut rng) .take(1000) .map(|f| f as u32) .collect(); unsorted.sort_unstable(); unsorted }; let needles: Vec = (&dist) .sample_iter(&mut rng) .take(100) .map(|f| f as u32) .collect(); (needles, haystack) } #[allow(dead_code, clippy::ptr_arg)] fn vec_to_unaligned_uvec<'a, T>(vec: &Vec, buffer: &'a mut AlignedBuffer) -> ZeroVec<'a, T> where T: EqULE + Copy + PartialEq + fmt::Debug, { // Pad with zero to ensure it is not aligned buffer.0.push(0); buffer .0 .extend(ZeroVec::from_slice_or_alloc(vec.as_slice()).as_bytes()); ZeroVec::::parse_bytes(&buffer.0[1..]).unwrap() } fn overview_bench(c: &mut Criterion) { c.bench_function("zerovec/overview", |b| { b.iter(|| { ZeroVec::::parse_bytes(black_box(TEST_BUFFER_LE)) .unwrap() .iter() .sum::() }); }); { sum_benches(c); binary_search_benches(c); } } fn sum_benches(c: &mut Criterion) { let normal_slice = &TEST_SLICE[0..19]; let aligned_ule_slice = ::ULE::parse_bytes_to_slice(&TEST_BUFFER_LE[0..76]).unwrap(); let unalign_ule_slice = ::ULE::parse_bytes_to_slice(&TEST_BUFFER_LE[1..77]).unwrap(); assert_eq!(normal_slice.len(), aligned_ule_slice.len()); assert_eq!(normal_slice.len(), unalign_ule_slice.len()); c.bench_function("zerovec/sum/sample/slice", |b| { b.iter(|| { black_box(normal_slice) .iter() .copied() .fold(0u32, |sum, val| sum.wrapping_add(val)) }) }); c.bench_function("zerovec/sum/sample/zerovec_aligned", |b| { b.iter(|| { ZeroVec::::new_borrowed(black_box(aligned_ule_slice)) .iter() .fold(0u32, |sum, val| sum.wrapping_add(val)) }); }); c.bench_function("zerovec/sum/sample/zerovec_unaligned", |b| { b.iter(|| { ZeroVec::::new_borrowed(black_box(unalign_ule_slice)) .iter() .fold(0u32, |sum, val| sum.wrapping_add(val)) }); }); } fn binary_search_benches(c: &mut Criterion) { c.bench_function("zerovec/binary_search/sample/slice", |b| { b.iter(|| black_box(&TEST_SLICE).binary_search(&0x0c0d0c)); }); c.bench_function("zerovec/binary_search/sample/zerovec", |b| { let zerovec = ZeroVec::::parse_bytes(black_box(TEST_BUFFER_LE)).unwrap(); b.iter(|| zerovec.binary_search(&0x0c0d0c)); }); let (needles_100, haystack) = get_needles_and_haystack(); // Only search for 50 needles to put all figures in nanoseconds let needles_50 = &needles_100[0..50]; // *** Binary search vec of 1000 `u32` 50 times *** c.bench_function("zerovec/binary_search/log_normal/slice", |b| { b.iter(|| { black_box(&needles_50) .iter() .map(|needle| black_box(&haystack).binary_search(needle)) .filter(|r| r.is_ok()) .count() }); }); let mut buffer = AlignedBuffer::default(); let zerovec = vec_to_unaligned_uvec(black_box(&haystack), &mut buffer); assert_eq!(zerovec, haystack.as_slice()); // *** Binary search vec of 1000 `u32` 50 times *** c.bench_function("zerovec/binary_search/log_normal/zerovec", |b| { b.iter(|| { black_box(&needles_50) .iter() .map(|needle| black_box(&zerovec).binary_search(needle)) .filter(|r| r.is_ok()) .count() }); }); let single_needle = 36315; c.bench_function("zerovec/binary_search/log_normal/single/slice", |b| { b.iter(|| black_box(&haystack).binary_search(&single_needle)); }); c.bench_function("zerovec/binary_search/log_normal/single/zerovec", |b| { b.iter(|| black_box(&zerovec).binary_search(&single_needle)); }); } criterion_group!(benches, overview_bench,); criterion_main!(benches); zerovec-0.11.1/benches/zerovec_iai.rs000064400000000000000000000033601046102023000156370ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use iai::black_box; #[path = "../src/samples.rs"] mod samples; use samples::*; use zerovec::VarZeroSlice; use zerovec::ZeroVec; fn sum_slice() -> u32 { black_box(TEST_SLICE).iter().sum::() } fn sum_zerovec() -> u32 { ZeroVec::::parse_bytes(black_box(TEST_BUFFER_LE)) .unwrap() .iter() .sum::() } fn binarysearch_slice() -> Result { black_box(TEST_SLICE).binary_search(&0x0c0d0c) } fn binarysearch_zerovec() -> Result { ZeroVec::::parse_bytes(black_box(TEST_BUFFER_LE)) .unwrap() .binary_search(&0x0c0d0c) } fn varzeroslice_parse_get() -> Option<&'static str> { let slice: &'static VarZeroSlice = VarZeroSlice::parse_bytes(black_box(TEST_VARZEROSLICE_BYTES)).unwrap(); slice.get(black_box(1)) } fn varzeroslice_get() -> Option<&'static str> { // Safety: The bytes are valid. let slice: &'static VarZeroSlice = unsafe { VarZeroSlice::from_bytes_unchecked(black_box(TEST_VARZEROSLICE_BYTES)) }; slice.get(black_box(1)) } fn varzeroslice_get_unchecked() -> &'static str { // Safety: The bytes are valid. let slice: &'static VarZeroSlice = unsafe { VarZeroSlice::from_bytes_unchecked(black_box(TEST_VARZEROSLICE_BYTES)) }; // Safety: The VarZeroVec has length 4. unsafe { slice.get_unchecked(black_box(1)) } } iai::main!( sum_slice, sum_zerovec, binarysearch_slice, binarysearch_zerovec, varzeroslice_parse_get, varzeroslice_get, varzeroslice_get_unchecked, ); zerovec-0.11.1/benches/zerovec_serde.rs000064400000000000000000000114531046102023000162010ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rand::SeedableRng; use rand_distr::{Distribution, LogNormal}; use rand_pcg::Lcg64Xsh32; #[path = "../src/samples.rs"] mod samples; use samples::*; use zerovec::ZeroVec; /// Generate a large list of u32s for stress testing. #[allow(dead_code)] fn random_numbers(count: usize) -> Vec { // Lcg64Xsh32 is a small, fast PRNG for reproducible benchmarks. // LogNormal(10, 1) generates numbers with mean 36315 and mode 8103, a distribution that, in // spirit, correlates with Unicode properties (many low values and a long tail of high values) let mut rng = Lcg64Xsh32::seed_from_u64(2021); let dist = LogNormal::new(10.0, 1.0).unwrap(); (&dist) .sample_iter(&mut rng) .take(count) .map(|f| f as u32) .collect() } fn overview_bench(c: &mut Criterion) { c.bench_function("zerovec_serde/overview", |b| { // Same as "zerovec_serde/deserialize_sum/u32/zerovec" let buffer = bincode::serialize(&ZeroVec::::parse_bytes(black_box(TEST_BUFFER_LE)).unwrap()) .unwrap(); b.iter(|| { bincode::deserialize::>(&buffer) .unwrap() .iter() .sum::() }); }); { u32_benches(c); char_benches(c); stress_benches(c); } } fn u32_benches(c: &mut Criterion) { c.bench_function("zerovec_serde/serialize/u32/slice", |b| { b.iter(|| bincode::serialize(&Vec::from(black_box(TEST_SLICE)))); }); c.bench_function("zerovec_serde/deserialize_sum/u32/slice", |b| { let buffer = bincode::serialize(&Vec::from(black_box(TEST_SLICE))).unwrap(); b.iter(|| { bincode::deserialize::>(&buffer) .unwrap() .iter() .sum::() }); }); c.bench_function("zerovec_serde/serialize/u32/zerovec", |b| { b.iter(|| bincode::serialize(&ZeroVec::from_slice_or_alloc(black_box(TEST_SLICE)))); }); c.bench_function("zerovec_serde/deserialize_sum/u32/zerovec", |b| { let buffer = bincode::serialize(&ZeroVec::::parse_bytes(black_box(TEST_BUFFER_LE)).unwrap()) .unwrap(); b.iter(|| { bincode::deserialize::>(&buffer) .unwrap() .iter() .sum::() }); }); } fn char_benches(c: &mut Criterion) { const ORIGINAL_CHARS: &[char] = &[ 'ⶢ', '⺇', 'Ⱜ', '◁', '◩', '⌂', '⼅', '⏻', '⢜', '◊', 'ⲫ', '⏷', '◢', '⟉', '℞', ]; let char_zero_vec = &ZeroVec::alloc_from_slice(ORIGINAL_CHARS); c.bench_function("zerovec_serde/serialize/char/slice", |b| { b.iter(|| bincode::serialize(black_box(&Vec::from(ORIGINAL_CHARS)))); }); c.bench_function("zerovec_serde/deserialize/char/slice", |b| { let buffer = bincode::serialize(black_box(&Vec::from(ORIGINAL_CHARS))).unwrap(); b.iter(|| bincode::deserialize::>(&buffer)); }); c.bench_function("zerovec_serde/serialize/char/zerovec", |b| { b.iter(|| bincode::serialize(black_box(char_zero_vec))); }); c.bench_function("zerovec_serde/deserialize/char/zerovec", |b| { let buffer = bincode::serialize(black_box(char_zero_vec)).unwrap(); b.iter(|| bincode::deserialize::>(&buffer)); }); } fn stress_benches(c: &mut Criterion) { let number_vec = random_numbers(100); let bincode_vec = bincode::serialize(&number_vec).unwrap(); let zerovec_aligned = ZeroVec::from_slice_or_alloc(number_vec.as_slice()); let bincode_zerovec = bincode::serialize(&zerovec_aligned).unwrap(); // *** Deserialize vec of 100 `u32` *** c.bench_function("zerovec_serde/deserialize/stress/vec", |b| { b.iter(|| bincode::deserialize::>(&bincode_vec)); }); // *** Deserialize vec of 100 `u32` *** c.bench_function("zerovec_serde/deserialize/stress/zerovec", |b| { b.iter(|| bincode::deserialize::>(&bincode_zerovec)); }); // *** Compute sum of vec of 100 `u32` *** c.bench_function("zerovec_serde/sum/stress/vec", |b| { b.iter(|| black_box(&number_vec).iter().sum::()); }); // *** Compute sum of vec of 100 `u32` *** let zerovec = ZeroVec::::parse_bytes(zerovec_aligned.as_bytes()).unwrap(); c.bench_function("zerovec_serde/sum/stress/zerovec", |b| { b.iter(|| black_box(&zerovec).iter().sum::()); }); } criterion_group!(benches, overview_bench,); criterion_main!(benches); zerovec-0.11.1/examples/zv_serde.rs000064400000000000000000000027741046102023000154000ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). // This example demonstrates zero-copy, zero-allocation deserialization of a u32 vector // stored in a Bincode buffer. #![no_main] // https://github.com/unicode-org/icu4x/issues/395 icu_benchmark_macros::instrument!(); use zerovec::ZeroVec; #[derive(serde::Serialize, serde::Deserialize)] struct DataStruct<'s> { #[serde(borrow)] pub nums: ZeroVec<'s, u16>, } const U16_SLICE: [u16; 16] = [ 196, 989, 414, 731, 660, 217, 716, 353, 218, 730, 245, 846, 122, 294, 922, 488, ]; const POSTCARD_BYTES: [u8; 33] = [ 0x20, 0xc4, 0x0, 0xdd, 0x3, 0x9e, 0x1, 0xdb, 0x2, 0x94, 0x2, 0xd9, 0x0, 0xcc, 0x2, 0x61, 0x1, 0xda, 0x0, 0xda, 0x2, 0xf5, 0x0, 0x4e, 0x3, 0x7a, 0x0, 0x26, 0x1, 0x9a, 0x3, 0xe8, 0x1, ]; #[allow(dead_code)] fn serialize() { let data = DataStruct { nums: ZeroVec::from_slice_or_alloc(&U16_SLICE), }; let postcard_bytes = postcard::to_stdvec(&data).expect("Serialization should be successful"); println!("Postcard bytes: {postcard_bytes:#x?}"); println!("ZeroVec bytes: {:#x?}", data.nums.as_bytes()); } fn main() { // Un-comment the following line to generate postcard data: // serialize(); let data: DataStruct = postcard::from_bytes(&POSTCARD_BYTES).expect("Valid bytes"); let result = data.nums.iter().sum::(); assert_eq!(8141, result); } zerovec-0.11.1/src/cow.rs000064400000000000000000000351211046102023000133100ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::ule::{EncodeAsVarULE, UleError, VarULE}; #[cfg(feature = "alloc")] use alloc::boxed::Box; use core::fmt; use core::marker::PhantomData; #[cfg(feature = "alloc")] use core::mem::ManuallyDrop; use core::ops::Deref; use core::ptr::NonNull; use zerofrom::ZeroFrom; /// Copy-on-write type that efficiently represents [`VarULE`] types as their bitstream representation. /// /// The primary use case for [`VarULE`] types is the ability to store complex variable-length datastructures /// inside variable-length collections like [`crate::VarZeroVec`]. /// /// Underlying this ability is the fact that [`VarULE`] types can be efficiently represented as a flat /// bytestream. /// /// In zero-copy cases, sometimes one wishes to unconditionally use this bytestream representation, for example /// to save stack size. A struct with five `Cow<'a, str>`s is not as stack-efficient as a single `Cow` containing /// the bytestream representation of, say, `Tuple5VarULE`. /// /// This type helps in this case: It is logically a `Cow<'a, V>`, with some optimizations, that is guaranteed /// to serialize as a byte stream in machine-readable scenarios. /// /// During human-readable serialization, it will fall back to the serde impls on `V`, which ought to have /// a human-readable variant. pub struct VarZeroCow<'a, V: ?Sized> { /// Safety invariant: Contained slice must be a valid V /// It may or may not have a lifetime valid for 'a, it must be valid for as long as this type is around. raw: RawVarZeroCow, marker1: PhantomData<&'a V>, #[cfg(feature = "alloc")] marker2: PhantomData>, } /// VarZeroCow without the `V` to simulate a dropck eyepatch /// (i.e., prove to rustc that the dtor is not able to observe V or 'a) /// /// This is effectively `Cow<'a, [u8]>`, with the lifetime managed externally struct RawVarZeroCow { /// Pointer to data /// /// # Safety Invariants /// /// 1. This slice must always be valid as a byte slice /// 2. If `owned` is true, this slice can be freed. /// 3. VarZeroCow, the only user of this type, will impose an additional invariant that the buffer is a valid V buf: NonNull<[u8]>, /// The buffer is `Box<[u8]>` if true #[cfg(feature = "alloc")] owned: bool, // Safety: We do not need any PhantomDatas here, since the Drop impl does not observe borrowed data // if there is any. } #[cfg(feature = "alloc")] impl Drop for RawVarZeroCow { fn drop(&mut self) { // Note: this drop impl NEVER observes borrowed data (which may have already been cleaned up by the time the impl is called) if self.owned { unsafe { // Safety: (Invariant 2 on buf) // since owned is true, this is a valid Box<[u8]> and can be cleaned up let _ = Box::<[u8]>::from_raw(self.buf.as_ptr()); } } } } // This is mostly just a `Cow<[u8]>`, safe to implement Send and Sync on unsafe impl Send for RawVarZeroCow {} unsafe impl Sync for RawVarZeroCow {} impl Clone for RawVarZeroCow { fn clone(&self) -> Self { #[cfg(feature = "alloc")] if self.is_owned() { // This clones the box let b: Box<[u8]> = self.as_bytes().into(); let b = ManuallyDrop::new(b); let buf: NonNull<[u8]> = (&**b).into(); return Self { // Invariants upheld: // 1 & 3: The bytes came from `self` so they're a valid value and byte slice // 2: This is owned (we cloned it), so we set owned to true. buf, owned: true, }; } // Unfortunately we can't just use `new_borrowed(self.deref())` since the lifetime is shorter Self { // Invariants upheld: // 1 & 3: The bytes came from `self` so they're a valid value and byte slice // 2: This is borrowed (we're sharing a borrow), so we set owned to false. buf: self.buf, #[cfg(feature = "alloc")] owned: false, } } } impl<'a, V: ?Sized> Clone for VarZeroCow<'a, V> { fn clone(&self) -> Self { let raw = self.raw.clone(); // Invariant upheld: raw came from a valid VarZeroCow, so it // is a valid V unsafe { Self::from_raw(raw) } } } impl<'a, V: VarULE + ?Sized> VarZeroCow<'a, V> { /// Construct from a slice. Errors if the slice doesn't represent a valid `V` pub fn parse_bytes(bytes: &'a [u8]) -> Result { let val = V::parse_bytes(bytes)?; Ok(Self::new_borrowed(val)) } /// Construct from an owned slice. Errors if the slice doesn't represent a valid `V` #[cfg(feature = "alloc")] pub fn parse_owned_bytes(bytes: Box<[u8]>) -> Result { V::validate_bytes(&bytes)?; let bytes = ManuallyDrop::new(bytes); let buf: NonNull<[u8]> = (&**bytes).into(); let raw = RawVarZeroCow { // Invariants upheld: // 1 & 3: The bytes came from `val` so they're a valid value and byte slice // 2: This is owned, so we set owned to true. buf, owned: true, }; Ok(Self { raw, marker1: PhantomData, #[cfg(feature = "alloc")] marker2: PhantomData, }) } /// Construct from a slice that is known to represent a valid `V` /// /// # Safety /// /// `bytes` must be a valid `V`, i.e. it must successfully pass through /// `V::parse_bytes()` or `V::validate_bytes()`. pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self { unsafe { // Safety: bytes is an &T which is always non-null let buf: NonNull<[u8]> = NonNull::new_unchecked(bytes as *const [u8] as *mut [u8]); let raw = RawVarZeroCow { // Invariants upheld: // 1 & 3: Passed upstream to caller // 2: This is borrowed, so we set owned to false. buf, #[cfg(feature = "alloc")] owned: false, }; // Invariant passed upstream to caller Self::from_raw(raw) } } /// Construct this from an [`EncodeAsVarULE`] version of the contained type /// /// Will always construct an owned version #[cfg(feature = "alloc")] pub fn from_encodeable>(encodeable: &E) -> Self { let b = crate::ule::encode_varule_to_box(encodeable); Self::new_owned(b) } /// Construct a new borrowed version of this pub fn new_borrowed(val: &'a V) -> Self { unsafe { // Safety: val is a valid V, by type Self::from_bytes_unchecked(val.as_bytes()) } } /// Construct a new borrowed version of this #[cfg(feature = "alloc")] pub fn new_owned(val: Box) -> Self { let val = ManuallyDrop::new(val); let buf: NonNull<[u8]> = val.as_bytes().into(); let raw = RawVarZeroCow { // Invariants upheld: // 1 & 3: The bytes came from `val` so they're a valid value and byte slice // 2: This is owned, so we set owned to true. buf, #[cfg(feature = "alloc")] owned: true, }; // The bytes came from `val`, so it's a valid value unsafe { Self::from_raw(raw) } } } impl<'a, V: ?Sized> VarZeroCow<'a, V> { /// Whether or not this is owned pub fn is_owned(&self) -> bool { self.raw.is_owned() } /// Get the byte representation of this type /// /// Is also always a valid `V` and can be passed to /// `V::from_bytes_unchecked()` pub fn as_bytes(&self) -> &[u8] { // The valid V invariant comes from Invariant 2 self.raw.as_bytes() } /// Invariant: `raw` must wrap a valid V, either owned or borrowed for 'a const unsafe fn from_raw(raw: RawVarZeroCow) -> Self { Self { // Invariant passed up to caller raw, marker1: PhantomData, #[cfg(feature = "alloc")] marker2: PhantomData, } } } impl RawVarZeroCow { /// Whether or not this is owned #[inline] pub fn is_owned(&self) -> bool { #[cfg(feature = "alloc")] return self.owned; #[cfg(not(feature = "alloc"))] return false; } /// Get the byte representation of this type #[inline] pub fn as_bytes(&self) -> &[u8] { // Safety: Invariant 1 on self.buf unsafe { self.buf.as_ref() } } } impl<'a, V: VarULE + ?Sized> Deref for VarZeroCow<'a, V> { type Target = V; fn deref(&self) -> &V { // Safety: From invariant 2 on self.buf unsafe { V::from_bytes_unchecked(self.as_bytes()) } } } impl<'a, V: VarULE + ?Sized> From<&'a V> for VarZeroCow<'a, V> { fn from(other: &'a V) -> Self { Self::new_borrowed(other) } } #[cfg(feature = "alloc")] impl<'a, V: VarULE + ?Sized> From> for VarZeroCow<'a, V> { fn from(other: Box) -> Self { Self::new_owned(other) } } impl<'a, V: VarULE + ?Sized + fmt::Debug> fmt::Debug for VarZeroCow<'a, V> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { self.deref().fmt(f) } } // We need manual impls since `#[derive()]` is disallowed on packed types impl<'a, V: VarULE + ?Sized + PartialEq> PartialEq for VarZeroCow<'a, V> { fn eq(&self, other: &Self) -> bool { self.deref().eq(other.deref()) } } impl<'a, V: VarULE + ?Sized + Eq> Eq for VarZeroCow<'a, V> {} impl<'a, V: VarULE + ?Sized + PartialOrd> PartialOrd for VarZeroCow<'a, V> { fn partial_cmp(&self, other: &Self) -> Option { self.deref().partial_cmp(other.deref()) } } impl<'a, V: VarULE + ?Sized + Ord> Ord for VarZeroCow<'a, V> { fn cmp(&self, other: &Self) -> core::cmp::Ordering { self.deref().cmp(other.deref()) } } // # Safety // // encode_var_ule_len: Produces the length of the contained bytes, which are known to be a valid V by invariant // // encode_var_ule_write: Writes the contained bytes, which are known to be a valid V by invariant unsafe impl<'a, V: VarULE + ?Sized> EncodeAsVarULE for VarZeroCow<'a, V> { fn encode_var_ule_as_slices(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { // unnecessary if the other two are implemented unreachable!() } #[inline] fn encode_var_ule_len(&self) -> usize { self.as_bytes().len() } #[inline] fn encode_var_ule_write(&self, dst: &mut [u8]) { dst.copy_from_slice(self.as_bytes()) } } #[cfg(feature = "serde")] impl<'a, V: VarULE + ?Sized + serde::Serialize> serde::Serialize for VarZeroCow<'a, V> { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { if serializer.is_human_readable() { ::serialize(self.deref(), serializer) } else { serializer.serialize_bytes(self.as_bytes()) } } } #[cfg(feature = "serde")] impl<'a, 'de: 'a, V: VarULE + ?Sized> serde::Deserialize<'de> for VarZeroCow<'a, V> where Box: serde::Deserialize<'de>, { fn deserialize(deserializer: Des) -> Result where Des: serde::Deserializer<'de>, { if deserializer.is_human_readable() { let b = Box::::deserialize(deserializer)?; Ok(Self::new_owned(b)) } else { let bytes = <&[u8]>::deserialize(deserializer)?; Self::parse_bytes(bytes).map_err(serde::de::Error::custom) } } } #[cfg(feature = "databake")] impl<'a, V: VarULE + ?Sized> databake::Bake for VarZeroCow<'a, V> { fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream { env.insert("zerovec"); let bytes = self.as_bytes().bake(env); databake::quote! { // Safety: Known to come from a valid V since self.as_bytes() is always a valid V unsafe { zerovec::VarZeroCow::from_bytes_unchecked(#bytes) } } } } #[cfg(feature = "databake")] impl<'a, V: VarULE + ?Sized> databake::BakeSize for VarZeroCow<'a, V> { fn borrows_size(&self) -> usize { self.as_bytes().len() } } impl<'a, V: VarULE + ?Sized> ZeroFrom<'a, V> for VarZeroCow<'a, V> { #[inline] fn zero_from(other: &'a V) -> Self { Self::new_borrowed(other) } } impl<'a, 'b, V: VarULE + ?Sized> ZeroFrom<'a, VarZeroCow<'b, V>> for VarZeroCow<'a, V> { #[inline] fn zero_from(other: &'a VarZeroCow<'b, V>) -> Self { Self::new_borrowed(other) } } #[cfg(test)] mod tests { use super::VarZeroCow; use crate::ule::tuplevar::Tuple3VarULE; use crate::vecs::VarZeroSlice; #[test] fn test_cow_roundtrip() { type Messy = Tuple3VarULE>; let vec = vec!["one", "two", "three"]; let messy: VarZeroCow = VarZeroCow::from_encodeable(&("hello", &b"g\xFF\xFFdbye"[..], vec)); assert_eq!(messy.a(), "hello"); assert_eq!(messy.b(), b"g\xFF\xFFdbye"); assert_eq!(&messy.c()[1], "two"); #[cfg(feature = "serde")] { let bincode = bincode::serialize(&messy).unwrap(); let deserialized: VarZeroCow = bincode::deserialize(&bincode).unwrap(); assert_eq!( messy, deserialized, "Single element roundtrips with bincode" ); assert!(!deserialized.is_owned()); let json = serde_json::to_string(&messy).unwrap(); let deserialized: VarZeroCow = serde_json::from_str(&json).unwrap(); assert_eq!(messy, deserialized, "Single element roundtrips with serde"); } } struct TwoCows<'a> { cow1: VarZeroCow<'a, str>, cow2: VarZeroCow<'a, str>, } #[test] fn test_eyepatch_works() { // This code should compile let mut two = TwoCows { cow1: VarZeroCow::new_borrowed("hello"), cow2: VarZeroCow::new_owned("world".into()), }; let three = VarZeroCow::new_borrowed(&*two.cow2); two.cow1 = three; // Without the eyepatch, dropck will be worried that the dtor of two.cow1 can observe the // data it borrowed from two.cow2, which may have already been deleted // This test will fail if you add an empty `impl<'a, V: ?Sized> Drop for VarZeroCow<'a, V>` } } zerovec-0.11.1/src/hashmap/algorithms.rs000064400000000000000000000142331046102023000163130ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use alloc::vec; use alloc::vec::Vec; use core::hash::{Hash, Hasher}; use twox_hash::XxHash64; // Const seed to be used with [`XxHash64::with_seed`]. const SEED: u64 = 0xaabbccdd; /// Split the 64bit `hash` into (g, f0, f1). /// /// g denotes the highest 16bits of the hash modulo `m`, and is referred to as first level hash. /// (f0, f1) denotes the middle, and lower 24bits of the hash respectively. /// (f0, f1) are used to distribute the keys with same g, into distinct slots. /// /// # Arguments /// /// * `hash` - The hash to split. /// * `m` - The modulo used to split the hash. pub const fn split_hash64(hash: u64, m: usize) -> (usize, u32, u32) { ( ((hash >> 48) as usize % m), ((hash >> 24) as u32 & 0xffffff), ((hash & 0xffffff) as u32), ) } /// Compute hash using [`XxHash64`]. pub fn compute_hash(key: &K) -> u64 { let mut hasher = XxHash64::with_seed(SEED); key.hash(&mut hasher); hasher.finish() } /// Calculate the index using (f0, f1), (d0, d1) in modulo m. /// Returns [`None`] if d is (0, 0) or modulo is 0 /// else returns the index computed using (f0 + f1 * d0 + d1) mod m. pub fn compute_index(f: (u32, u32), d: (u32, u32), m: u32) -> Option { if d == (0, 0) || m == 0 { None } else { Some((f.1.wrapping_mul(d.0).wrapping_add(f.0).wrapping_add(d.1) % m) as usize) } } /// Compute displacements for the given `key_hashes`, which split the keys into distinct slots by a /// two-level hashing schema. /// /// Returns a tuple of where the first item is the displacement array and the second item is the /// reverse mapping used to permute keys, values into their slots. /// /// 1. Split the hashes into (g, f0, f1). /// 2. Bucket and sort the split hash on g in descending order. /// 3. In decreasing order of bucket size, try until a (d0, d1) is found that splits the keys /// in the bucket into distinct slots. /// 4. Mark the slots for current bucket as occupied and store the reverse mapping. /// 5. Repeat untill all the keys have been assigned distinct slots. /// /// # Arguments /// /// * `key_hashes` - [`ExactSizeIterator`] over the hashed key values #[allow(clippy::indexing_slicing, clippy::unwrap_used)] pub fn compute_displacements( key_hashes: impl ExactSizeIterator, ) -> (Vec<(u32, u32)>, Vec) { let len = key_hashes.len(); // A vector to track the size of buckets for sorting. let mut bucket_sizes = vec![0; len]; // A flattened representation of items in the buckets after applying first level hash function let mut bucket_flatten = Vec::with_capacity(len); // Compute initial displacement and bucket sizes key_hashes.into_iter().enumerate().for_each(|(i, kh)| { let h = split_hash64(kh, len); bucket_sizes[h.0] += 1; bucket_flatten.push((h, i)) }); // Sort by decreasing order of bucket_sizes. bucket_flatten.sort_by(|&(ha, _), &(hb, _)| { // ha.0, hb.0 are always within bounds of `bucket_sizes` (bucket_sizes[hb.0], hb).cmp(&(bucket_sizes[ha.0], ha)) }); // Generation count while iterating buckets. // Each trial of ((d0, d1), bucket chain) is a new generation. // We use this to track which all slots are assigned for the current bucket chain. let mut generation = 0; // Whether a slot has been occupied by previous buckets with a different first level hash (different // bucket chain). let mut occupied = vec![false; len]; // Track generation count for the slots. // A slot is empty if either it is unoccupied by the previous bucket chains and the // assignment is not equal to generation. let mut assignments = vec![0; len]; // Vec to store the displacements (saves us a recomputation of hash while assigning slots). let mut current_displacements = Vec::with_capacity(16); // (d0, d1) which splits the bucket into different slots let mut displacements = vec![(0, 0); len]; // Vec to store mapping to the original order of keys. // This is a permutation which will be applied to keys, values at the end. let mut reverse_mapping = vec![0; len]; let mut start = 0; while start < len { // Bucket span with the same first level hash // start is always within bounds of `bucket_flatten` let g = bucket_flatten[start].0 .0; // g is always within bounds of `bucket_sizes` let end = start + bucket_sizes[g]; // start, end - 1 are always within bounds of `bucket_sizes` let buckets = &bucket_flatten[start..end]; 'd0: for d0 in 0..len as u32 { 'd1: for d1 in 0..len as u32 { if (d0, d1) == (0, 0) { continue; } current_displacements.clear(); generation += 1; for ((_, f0, f1), _) in buckets { let displacement_idx = compute_index((*f0, *f1), (d0, d1), len as u32).unwrap(); // displacement_idx is always within bounds if occupied[displacement_idx] || assignments[displacement_idx] == generation { continue 'd1; } assignments[displacement_idx] = generation; current_displacements.push(displacement_idx); } // Successfully found a (d0, d1), store it as index g. // g < displacements.len() due to modulo operation displacements[g] = (d0, d1); for (i, displacement_idx) in current_displacements.iter().enumerate() { // `current_displacements` has same size as `buckets` let (_, idx) = &buckets[i]; // displacement_idx is always within bounds occupied[*displacement_idx] = true; reverse_mapping[*displacement_idx] = *idx; } break 'd0; } } start = end; } (displacements, reverse_mapping) } zerovec-0.11.1/src/hashmap/mod.rs000064400000000000000000000161061046102023000147220ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::map::{MutableZeroVecLike, ZeroMapKV, ZeroVecLike}; use crate::ZeroVec; use alloc::vec::Vec; use core::borrow::Borrow; use core::hash::Hash; pub mod algorithms; use algorithms::*; #[cfg(feature = "serde")] mod serde; /// A perfect zerohashmap optimized for lookups over immutable keys. /// /// # Examples /// ``` /// use zerovec::ZeroHashMap; /// /// let hashmap = /// ZeroHashMap::::from_iter([(0, "a"), (1, "b"), (2, "c")]); /// assert_eq!(hashmap.get(&0), Some("a")); /// assert_eq!(hashmap.get(&2), Some("c")); /// assert_eq!(hashmap.get(&4), None); /// ``` #[derive(Debug)] pub struct ZeroHashMap<'a, K, V> where K: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, { /// Array of (d0, d1) which splits the keys with same first level hash into distinct /// slots. /// The ith index of the array splits the keys with first level hash i. /// If no key with first level hash is found in the original keys, (0, 0) is used as an empty /// placeholder. displacements: ZeroVec<'a, (u32, u32)>, keys: K::Container, values: V::Container, } impl<'a, K, V> ZeroHashMap<'a, K, V> where K: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, { /// The number of elements in the [`ZeroHashMap`]. pub fn len(&self) -> usize { self.values.zvl_len() } /// Whether the [`ZeroHashMap`] is empty. pub fn is_empty(&self) -> bool { self.len() == 0 } } impl<'a, K, V> ZeroHashMap<'a, K, V> where K: ZeroMapKV<'a> + ?Sized + Hash + Eq, V: ZeroMapKV<'a> + ?Sized, { /// Given a `key` return the index for the key or [`None`] if the key is absent. fn index(&self, key: &A) -> Option where A: Borrow + ?Sized, { let hash = compute_hash(key.borrow()); let (g, f0, f1) = split_hash64(hash, self.len()); #[allow(clippy::unwrap_used)] // g is in-range let (d0, d1) = self.displacements.get(g).unwrap(); let index = compute_index((f0, f1), (d0, d1), self.displacements.len() as u32)?; #[allow(clippy::unwrap_used)] // index is in 0..self.keys.len() let found = self.keys.zvl_get(index).unwrap(); if K::Container::zvl_get_as_t(found, |found| found == key.borrow()) { Some(index) } else { None } } /// Get the value corresponding to `key`. /// If absent [`None`] is returned. /// /// # Example /// ``` /// use zerovec::ZeroHashMap; /// /// let hashmap = ZeroHashMap::::from_iter([("a", "A"), ("z", "Z")]); /// /// assert_eq!(hashmap.get("a"), Some("A")); /// assert_eq!(hashmap.get("z"), Some("Z")); /// assert_eq!(hashmap.get("0"), None); /// ``` pub fn get<'b, A>(&'b self, key: &A) -> Option<&'b V::GetType> where A: Borrow + ?Sized + 'b, { self.index(key).and_then(|i| self.values.zvl_get(i)) } /// Returns whether `key` is contained in this hashmap /// /// # Example /// ```rust /// use zerovec::ZeroHashMap; /// /// let hashmap = ZeroHashMap::::from_iter([("a", "A"), ("z", "Z")]); /// /// assert!(hashmap.contains_key("a")); /// assert!(!hashmap.contains_key("p")); /// ``` pub fn contains_key(&self, key: &K) -> bool { self.index(key).is_some() } } impl<'a, K, V> ZeroHashMap<'a, K, V> where K: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, { // Produce an iterator over (key, value) pairs. pub fn iter<'b>( &'b self, ) -> impl ExactSizeIterator< Item = ( &'b >::GetType, &'b >::GetType, ), > { (0..self.len()).map(|index| { ( #[allow(clippy::unwrap_used)] // index is in range self.keys.zvl_get(index).unwrap(), #[allow(clippy::unwrap_used)] // index is in range self.values.zvl_get(index).unwrap(), ) }) } // Produce an iterator over keys. pub fn iter_keys<'b>( &'b self, ) -> impl ExactSizeIterator>::GetType> { #[allow(clippy::unwrap_used)] // index is in range (0..self.len()).map(|index| self.keys.zvl_get(index).unwrap()) } // Produce an iterator over values. pub fn iter_values<'b>( &'b self, ) -> impl ExactSizeIterator>::GetType> { #[allow(clippy::unwrap_used)] // index is in range (0..self.len()).map(|index| self.values.zvl_get(index).unwrap()) } } impl<'a, K, V, A, B> FromIterator<(A, B)> for ZeroHashMap<'a, K, V> where K: ZeroMapKV<'a> + ?Sized + Hash + Eq, V: ZeroMapKV<'a> + ?Sized, B: Borrow, A: Borrow, { /// Build a [`ZeroHashMap`] from an iterator returning (K, V) tuples. /// /// # Example /// ``` /// use zerovec::ZeroHashMap; /// /// let hashmap = ZeroHashMap::::from_iter([ /// (1, "a"), /// (2, "b"), /// (3, "c"), /// (4, "d"), /// ]); /// assert_eq!(hashmap.get(&1), Some("a")); /// assert_eq!(hashmap.get(&2), Some("b")); /// assert_eq!(hashmap.get(&3), Some("c")); /// assert_eq!(hashmap.get(&4), Some("d")); /// ``` fn from_iter>(iter: T) -> Self { let iter = iter.into_iter(); let size_hint = match iter.size_hint() { (_, Some(upper)) => upper, (lower, None) => lower, }; let mut key_hashes = Vec::with_capacity(size_hint); let mut keys = K::Container::zvl_with_capacity(size_hint); let mut values = V::Container::zvl_with_capacity(size_hint); for (k, v) in iter { keys.zvl_push(k.borrow()); key_hashes.push(compute_hash(k.borrow())); values.zvl_push(v.borrow()); } let (displacements, mut reverse_mapping) = compute_displacements(key_hashes.into_iter()); keys.zvl_permute(&mut reverse_mapping.clone()); values.zvl_permute(&mut reverse_mapping); Self { displacements: ZeroVec::alloc_from_slice(&displacements), values, keys, } } } #[cfg(test)] mod tests { use super::*; use crate::ule::AsULE; use rand::{distributions::Standard, Rng, SeedableRng}; use rand_pcg::Lcg64Xsh32; #[test] fn test_zhms_u64k_u64v() { const N: usize = 65530; let seed = u64::from_le_bytes(*b"testseed"); let rng = Lcg64Xsh32::seed_from_u64(seed); let kv: Vec<(u64, u64)> = rng.sample_iter(&Standard).take(N).collect(); let hashmap: ZeroHashMap = ZeroHashMap::from_iter(kv.iter().map(|e| (&e.0, &e.1))); for (k, v) in kv { assert_eq!( hashmap.get(&k).copied().map(::from_unaligned), Some(v), ); } } } zerovec-0.11.1/src/hashmap/serde.rs000064400000000000000000000112721046102023000152440ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::ZeroHashMap; use crate::{ map::{ZeroMapKV, ZeroVecLike}, ZeroVec, }; use serde::{de, Deserialize, Serialize}; impl<'a, K, V> Serialize for ZeroHashMap<'a, K, V> where K: ZeroMapKV<'a> + Serialize + ?Sized, V: ZeroMapKV<'a> + Serialize + ?Sized, K::Container: Serialize, V::Container: Serialize, { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { (&self.displacements, &self.keys, &self.values).serialize(serializer) } } impl<'de, 'a, K, V> Deserialize<'de> for ZeroHashMap<'a, K, V> where K: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, K::Container: Deserialize<'de>, V::Container: Deserialize<'de>, 'de: 'a, { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { let (displacements, keys, values): (ZeroVec<(u32, u32)>, K::Container, V::Container) = Deserialize::deserialize(deserializer)?; if keys.zvl_len() != values.zvl_len() { return Err(de::Error::custom( "Mismatched key and value sizes in ZeroHashMap", )); } if displacements.zvl_len() != keys.zvl_len() { return Err(de::Error::custom( "Mismatched displacements and key, value sizes in ZeroHashMap", )); } Ok(Self { displacements, keys, values, }) } } #[cfg(test)] mod test { use crate::{VarZeroVec, ZeroHashMap, ZeroVec}; use serde::{Deserialize, Serialize}; const JSON_STR: &str = "[[[0,0],[0,1],[0,1]],[1,2,0],[\"b\",\"c\",\"a\"]]"; const BINCODE_BYTES: &[u8] = &[ 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 2, 0, 98, 99, 97, ]; #[derive(Serialize, Deserialize)] struct DeriveTestZeroHashMap<'data> { #[serde(borrow)] _data: ZeroHashMap<'data, str, [u8]>, } fn make_zerohashmap() -> ZeroHashMap<'static, u32, str> { ZeroHashMap::from_iter([(0, "a"), (1, "b"), (2, "c")]) } fn build_invalid_hashmap_str( displacements: Vec<(u32, u32)>, keys: Vec, values: Vec<&str>, ) -> String { let invalid_hm: ZeroHashMap = ZeroHashMap { displacements: ZeroVec::alloc_from_slice(&displacements), keys: ZeroVec::alloc_from_slice(&keys), values: VarZeroVec::::from(&values), }; serde_json::to_string(&invalid_hm).expect("serialize") } #[test] fn test_invalid_deser_zhm() { // Invalid hashmap |keys| != |values| let mut invalid_hm_str = build_invalid_hashmap_str(vec![(0, 1), (0, 0)], vec![1, 2], vec!["a", "b", "c"]); assert_eq!( serde_json::from_str::>(&invalid_hm_str) .unwrap_err() .to_string(), "Mismatched key and value sizes in ZeroHashMap" ); // Invalid hashmap |displacements| != |keys| == |values| // |displacements| = 2, |keys| = 3, |values| = 3 invalid_hm_str = build_invalid_hashmap_str(vec![(0, 1), (0, 0)], vec![2, 1, 0], vec!["a", "b", "c"]); assert_eq!( serde_json::from_str::>(&invalid_hm_str) .unwrap_err() .to_string(), "Mismatched displacements and key, value sizes in ZeroHashMap" ); } #[test] fn test_serde_valid_deser_zhm() { let hm = make_zerohashmap(); let json_str = serde_json::to_string(&hm).expect("serialize"); assert_eq!(json_str, JSON_STR); let deserialized_hm: ZeroHashMap = serde_json::from_str(JSON_STR).expect("deserialize"); assert_eq!( hm.iter().collect::>(), deserialized_hm.iter().collect::>() ); } #[test] fn test_bincode_zhm() { let hm = make_zerohashmap(); let bincode_bytes = bincode::serialize(&hm).expect("serialize"); assert_eq!(bincode_bytes, BINCODE_BYTES); let deserialized_hm: ZeroHashMap = bincode::deserialize(BINCODE_BYTES).expect("deserialize"); assert_eq!( hm.iter().collect::>(), deserialized_hm.iter().collect::>() ); } } zerovec-0.11.1/src/lib.rs000064400000000000000000000535031046102023000132720ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). //! Zero-copy vector abstractions for arbitrary types, backed by byte slices. //! //! `zerovec` enables a far wider range of types — beyond just `&[u8]` and `&str` — to participate in //! zero-copy deserialization from byte slices. It is `serde` compatible and comes equipped with //! proc macros //! //! Clients upgrading to `zerovec` benefit from zero heap allocations when deserializing //! read-only data. //! //! This crate has four main types: //! //! - [`ZeroVec<'a, T>`] (and [`ZeroSlice`](ZeroSlice)) for fixed-width types like `u32` //! - [`VarZeroVec<'a, T>`] (and [`VarZeroSlice`](ZeroSlice)) for variable-width types like `str` //! - [`ZeroMap<'a, K, V>`] to map from `K` to `V` //! - [`ZeroMap2d<'a, K0, K1, V>`] to map from the pair `(K0, K1)` to `V` //! //! The first two are intended as close-to-drop-in replacements for `Vec` in Serde structs. The third and fourth are //! intended as a replacement for `HashMap` or [`LiteMap`](docs.rs/litemap). When used with Serde derives, **be sure to apply //! `#[serde(borrow)]` to these types**, same as one would for [`Cow<'a, T>`]. //! //! [`ZeroVec<'a, T>`], [`VarZeroVec<'a, T>`], [`ZeroMap<'a, K, V>`], and [`ZeroMap2d<'a, K0, K1, V>`] all behave like //! [`Cow<'a, T>`] in that they abstract over either borrowed or owned data. When performing deserialization //! from human-readable formats (like `json` and `xml`), typically these types will allocate and fully own their data, whereas if deserializing //! from binary formats like `bincode` and `postcard`, these types will borrow data directly from the buffer being deserialized from, //! avoiding allocations and only performing validity checks. As such, this crate can be pretty fast (see [below](#Performance) for more information) //! on deserialization. //! //! See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for details on how this crate //! works under the hood. //! //! # Cargo features //! //! This crate has several optional Cargo features: //! - `serde`: Allows serializing and deserializing `zerovec`'s abstractions via [`serde`](https://docs.rs/serde) //! - `yoke`: Enables implementations of `Yokeable` from the [`yoke`](https://docs.rs/yoke/) crate, which is also useful //! in situations involving a lot of zero-copy deserialization. //! - `derive`: Makes it easier to use custom types in these collections by providing the [`#[make_ule]`](crate::make_ule) and //! [`#[make_varule]`](crate::make_varule) proc macros, which generate appropriate [`ULE`](crate::ule::ULE) and //! [`VarULE`](crate::ule::VarULE)-conformant types for a given "normal" type. //! - `std`: Enabled `std::Error` implementations for error types. This crate is by default `no_std` with a dependency on `alloc`. //! //! [`ZeroVec<'a, T>`]: ZeroVec //! [`VarZeroVec<'a, T>`]: VarZeroVec //! [`ZeroMap<'a, K, V>`]: ZeroMap //! [`ZeroMap2d<'a, K0, K1, V>`]: ZeroMap2d //! [`Cow<'a, T>`]: alloc::borrow::Cow //! //! # Examples //! //! Serialize and deserialize a struct with ZeroVec and VarZeroVec with Bincode: //! //! ``` //! # #[cfg(feature = "serde")] { //! use zerovec::{VarZeroVec, ZeroVec}; //! //! // This example requires the "serde" feature //! #[derive(serde::Serialize, serde::Deserialize)] //! pub struct DataStruct<'data> { //! #[serde(borrow)] //! nums: ZeroVec<'data, u32>, //! #[serde(borrow)] //! chars: ZeroVec<'data, char>, //! #[serde(borrow)] //! strs: VarZeroVec<'data, str>, //! } //! //! let data = DataStruct { //! nums: ZeroVec::from_slice_or_alloc(&[211, 281, 421, 461]), //! chars: ZeroVec::alloc_from_slice(&['ö', '冇', 'म']), //! strs: VarZeroVec::from(&["hello", "world"]), //! }; //! let bincode_bytes = //! bincode::serialize(&data).expect("Serialization should be successful"); //! assert_eq!(bincode_bytes.len(), 63); //! //! let deserialized: DataStruct = bincode::deserialize(&bincode_bytes) //! .expect("Deserialization should be successful"); //! assert_eq!(deserialized.nums.first(), Some(211)); //! assert_eq!(deserialized.chars.get(1), Some('冇')); //! assert_eq!(deserialized.strs.get(1), Some("world")); //! // The deserialization will not have allocated anything //! assert!(!deserialized.nums.is_owned()); //! # } // feature = "serde" //! ``` //! //! Use custom types inside of ZeroVec: //! //! ```rust //! # #[cfg(all(feature = "serde", feature = "derive"))] { //! use zerovec::{ZeroVec, VarZeroVec, ZeroMap}; //! use std::borrow::Cow; //! use zerovec::ule::encode_varule_to_box; //! //! // custom fixed-size ULE type for ZeroVec //! #[zerovec::make_ule(DateULE)] //! #[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] //! struct Date { //! y: u64, //! m: u8, //! d: u8 //! } //! //! // custom variable sized VarULE type for VarZeroVec //! #[zerovec::make_varule(PersonULE)] //! #[zerovec::derive(Serialize, Deserialize)] // add Serde impls to PersonULE //! #[derive(Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] //! struct Person<'a> { //! birthday: Date, //! favorite_character: char, //! #[serde(borrow)] //! name: Cow<'a, str>, //! } //! //! #[derive(serde::Serialize, serde::Deserialize)] //! struct Data<'a> { //! #[serde(borrow)] //! important_dates: ZeroVec<'a, Date>, //! // note: VarZeroVec always must reference the ULE type directly //! #[serde(borrow)] //! important_people: VarZeroVec<'a, PersonULE>, //! #[serde(borrow)] //! birthdays_to_people: ZeroMap<'a, Date, PersonULE> //! } //! //! //! let person1 = Person { //! birthday: Date { y: 1990, m: 9, d: 7}, //! favorite_character: 'π', //! name: Cow::from("Kate") //! }; //! let person2 = Person { //! birthday: Date { y: 1960, m: 5, d: 25}, //! favorite_character: '冇', //! name: Cow::from("Jesse") //! }; //! //! let important_dates = ZeroVec::alloc_from_slice(&[Date { y: 1943, m: 3, d: 20}, Date { y: 1976, m: 8, d: 2}, Date { y: 1998, m: 2, d: 15}]); //! let important_people = VarZeroVec::from(&[&person1, &person2]); //! let mut birthdays_to_people: ZeroMap = ZeroMap::new(); //! // `.insert_var_v()` is slightly more convenient over `.insert()` for custom ULE types //! birthdays_to_people.insert_var_v(&person1.birthday, &person1); //! birthdays_to_people.insert_var_v(&person2.birthday, &person2); //! //! let data = Data { important_dates, important_people, birthdays_to_people }; //! //! let bincode_bytes = bincode::serialize(&data) //! .expect("Serialization should be successful"); //! assert_eq!(bincode_bytes.len(), 160); //! //! let deserialized: Data = bincode::deserialize(&bincode_bytes) //! .expect("Deserialization should be successful"); //! //! assert_eq!(deserialized.important_dates.get(0).unwrap().y, 1943); //! assert_eq!(&deserialized.important_people.get(1).unwrap().name, "Jesse"); //! assert_eq!(&deserialized.important_people.get(0).unwrap().name, "Kate"); //! assert_eq!(&deserialized.birthdays_to_people.get(&person1.birthday).unwrap().name, "Kate"); //! //! } // feature = serde and derive //! ``` //! //! # Performance //! //! `zerovec` is designed for fast deserialization from byte buffers with zero memory allocations //! while minimizing performance regressions for common vector operations. //! //! Benchmark results on x86_64: //! //! | Operation | `Vec` | `zerovec` | //! |---|---|---| //! | Deserialize vec of 100 `u32` | 233.18 ns | 14.120 ns | //! | Compute sum of vec of 100 `u32` (read every element) | 8.7472 ns | 10.775 ns | //! | Binary search vec of 1000 `u32` 50 times | 442.80 ns | 472.51 ns | //! | Deserialize vec of 100 strings | 7.3740 μs\* | 1.4495 μs | //! | Count chars in vec of 100 strings (read every element) | 747.50 ns | 955.28 ns | //! | Binary search vec of 500 strings 10 times | 466.09 ns | 790.33 ns | //! //! \* *This result is reported for `Vec`. However, Serde also supports deserializing to the partially-zero-copy `Vec<&str>`; this gives 1.8420 μs, much faster than `Vec` but a bit slower than `zerovec`.* //! //! | Operation | `HashMap` | `LiteMap` | `ZeroMap` | //! |---|---|---|---| //! | Deserialize a small map | 2.72 μs | 1.28 μs | 480 ns | //! | Deserialize a large map | 50.5 ms | 18.3 ms | 3.74 ms | //! | Look up from a small deserialized map | 49 ns | 42 ns | 54 ns | //! | Look up from a large deserialized map | 51 ns | 155 ns | 213 ns | //! //! Small = 16 elements, large = 131,072 elements. Maps contain ``. //! //! The benches used to generate the above table can be found in the `benches` directory in the project repository. //! `zeromap` benches are named by convention, e.g. `zeromap/deserialize/small`, `zeromap/lookup/large`. The type //! is appended for baseline comparisons, e.g. `zeromap/lookup/small/hashmap`. // https://github.com/unicode-org/icu4x/blob/main/documents/process/boilerplate.md#library-annotations #![cfg_attr(not(any(test, doc)), no_std)] #![cfg_attr( not(test), deny( clippy::indexing_slicing, clippy::unwrap_used, clippy::expect_used, clippy::panic, clippy::exhaustive_structs, clippy::exhaustive_enums, clippy::trivially_copy_pass_by_ref, missing_debug_implementations, ) )] // this crate does a lot of nuanced lifetime manipulation, being explicit // is better here. #![allow(clippy::needless_lifetimes)] #[cfg(feature = "alloc")] extern crate alloc; mod cow; #[cfg(feature = "hashmap")] pub mod hashmap; #[cfg(feature = "alloc")] mod map; #[cfg(feature = "alloc")] mod map2d; #[cfg(test)] pub mod samples; mod varzerovec; mod zerovec; // This must be after `mod zerovec` for some impls on `ZeroSlice` // to show up in the right spot in the docs pub mod ule; #[cfg(feature = "yoke")] mod yoke_impls; mod zerofrom_impls; pub use crate::cow::VarZeroCow; #[cfg(feature = "hashmap")] pub use crate::hashmap::ZeroHashMap; #[cfg(feature = "alloc")] pub use crate::map::map::ZeroMap; #[cfg(feature = "alloc")] pub use crate::map2d::map::ZeroMap2d; pub use crate::varzerovec::{slice::VarZeroSlice, vec::VarZeroVec}; pub use crate::zerovec::{ZeroSlice, ZeroVec}; #[doc(hidden)] // macro use pub mod __zerovec_internal_reexport { pub use zerofrom::ZeroFrom; #[cfg(feature = "alloc")] pub use alloc::borrow; #[cfg(feature = "alloc")] pub use alloc::boxed; #[cfg(feature = "serde")] pub use serde; } #[cfg(feature = "alloc")] pub mod maps { //! This module contains additional utility types and traits for working with //! [`ZeroMap`] and [`ZeroMap2d`]. See their docs for more details on the general purpose //! of these types. //! //! [`ZeroMapBorrowed`] and [`ZeroMap2dBorrowed`] are versions of [`ZeroMap`] and [`ZeroMap2d`] //! that can be used when you wish to guarantee that the map data is always borrowed, leading to //! relaxed lifetime constraints. //! //! The [`ZeroMapKV`] trait is required to be implemented on any type that needs to be used //! within a map type. [`ZeroVecLike`] and [`MutableZeroVecLike`] are traits used in the //! internal workings of the map types, and should typically not be used or implemented by //! users of this crate. #[doc(no_inline)] pub use crate::map::ZeroMap; pub use crate::map::ZeroMapBorrowed; #[doc(no_inline)] pub use crate::map2d::ZeroMap2d; pub use crate::map2d::ZeroMap2dBorrowed; pub use crate::map::{MutableZeroVecLike, ZeroMapKV, ZeroVecLike}; pub use crate::map2d::ZeroMap2dCursor; } pub mod vecs { //! This module contains additional utility types for working with //! [`ZeroVec`] and [`VarZeroVec`]. See their docs for more details on the general purpose //! of these types. //! //! [`ZeroSlice`] and [`VarZeroSlice`] provide slice-like versions of the vector types //! for use behind references and in custom ULE types. //! //! [`VarZeroVecOwned`] is a special owned/mutable version of [`VarZeroVec`], allowing //! direct manipulation of the backing buffer. #[doc(no_inline)] pub use crate::zerovec::{ZeroSlice, ZeroVec}; pub use crate::zerovec::ZeroSliceIter; #[doc(no_inline)] pub use crate::varzerovec::{VarZeroSlice, VarZeroVec}; #[cfg(feature = "alloc")] pub use crate::varzerovec::VarZeroVecOwned; pub use crate::varzerovec::{Index16, Index32, Index8, VarZeroSliceIter, VarZeroVecFormat}; pub type VarZeroVec16<'a, T> = VarZeroVec<'a, T, Index16>; pub type VarZeroVec32<'a, T> = VarZeroVec<'a, T, Index32>; pub type VarZeroSlice16 = VarZeroSlice; pub type VarZeroSlice32 = VarZeroSlice; } // Proc macro reexports // // These exist so that our docs can use intra-doc links. // Due to quirks of how rustdoc does documentation on reexports, these must be in this module and not reexported from // a submodule /// Generate a corresponding [`ULE`] type and the relevant [`AsULE`] implementations for this type /// /// This can be attached to structs containing only [`AsULE`] types, or C-like enums that have `#[repr(u8)]` /// and all explicit discriminants. /// /// The type must be [`Copy`], [`PartialEq`], and [`Eq`]. /// /// `#[make_ule]` will automatically derive the following traits on the [`ULE`] type: /// /// - [`Ord`] and [`PartialOrd`] /// - [`ZeroMapKV`] /// /// To disable one of the automatic derives, use `#[zerovec::skip_derive(...)]` like so: `#[zerovec::skip_derive(ZeroMapKV)]`. /// `Ord` and `PartialOrd` are implemented as a unit and can only be disabled as a group with `#[zerovec::skip_derive(Ord)]`. /// /// The following traits are available to derive, but not automatic: /// /// - [`Debug`] /// /// To enable one of these additional derives, use `#[zerovec::derive(...)]` like so: `#[zerovec::derive(Debug)]`. /// /// In most cases these derives will defer to the impl of the same trait on the current type, so such impls must exist. /// /// For enums, this attribute will generate a crate-public `fn new_from_u8(value: u8) -> Option` /// method on the main type that allows one to construct the value from a u8. If this method is desired /// to be more public, it should be wrapped. /// /// [`ULE`]: ule::ULE /// [`AsULE`]: ule::AsULE /// [`ZeroMapKV`]: maps::ZeroMapKV /// /// # Example /// /// ```rust /// use zerovec::ZeroVec; /// /// #[zerovec::make_ule(DateULE)] /// #[derive( /// Copy, /// Clone, /// PartialEq, /// Eq, /// Ord, /// PartialOrd, /// serde::Serialize, /// serde::Deserialize, /// )] /// struct Date { /// y: u64, /// m: u8, /// d: u8, /// } /// /// #[derive(serde::Serialize, serde::Deserialize)] /// struct Dates<'a> { /// #[serde(borrow)] /// dates: ZeroVec<'a, Date>, /// } /// /// let dates = Dates { /// dates: ZeroVec::alloc_from_slice(&[ /// Date { /// y: 1985, /// m: 9, /// d: 3, /// }, /// Date { /// y: 1970, /// m: 2, /// d: 20, /// }, /// Date { /// y: 1990, /// m: 6, /// d: 13, /// }, /// ]), /// }; /// /// let bincode_bytes = /// bincode::serialize(&dates).expect("Serialization should be successful"); /// /// // Will deserialize without allocations /// let deserialized: Dates = bincode::deserialize(&bincode_bytes) /// .expect("Deserialization should be successful"); /// /// assert_eq!(deserialized.dates.get(1).unwrap().y, 1970); /// assert_eq!(deserialized.dates.get(2).unwrap().d, 13); /// ``` #[cfg(feature = "derive")] pub use zerovec_derive::make_ule; /// Generate a corresponding [`VarULE`] type and the relevant [`EncodeAsVarULE`]/[`zerofrom::ZeroFrom`] /// implementations for this type /// /// This can be attached to structs containing only [`AsULE`] types with the last fields being /// [`Cow<'a, str>`](alloc::borrow::Cow), [`ZeroSlice`], or [`VarZeroSlice`]. If there is more than one such field, it will be represented /// using [`MultiFieldsULE`](crate::ule::MultiFieldsULE) and getters will be generated. Other VarULE fields will be detected if they are /// tagged with `#[zerovec::varule(NameOfVarULETy)]`. /// /// The type must be [`PartialEq`] and [`Eq`]. /// /// [`EncodeAsVarULE`] and [`zerofrom::ZeroFrom`] are useful for avoiding the need to deal with /// the [`VarULE`] type directly. In particular, it is recommended to use [`zerofrom::ZeroFrom`] /// to convert the [`VarULE`] type back to this type in a cheap, zero-copy way (see the example below /// for more details). /// /// `#[make_varule]` will automatically derive the following traits on the [`VarULE`] type: /// /// - [`Ord`] and [`PartialOrd`] /// - [`ZeroMapKV`] /// - [`alloc::borrow::ToOwned`] /// /// To disable one of the automatic derives, use `#[zerovec::skip_derive(...)]` like so: `#[zerovec::skip_derive(ZeroMapKV)]`. /// `Ord` and `PartialOrd` are implemented as a unit and can only be disabled as a group with `#[zerovec::skip_derive(Ord)]`. /// /// The following traits are available to derive, but not automatic: /// /// - [`Debug`] /// - [`Serialize`](serde::Serialize) /// - [`Deserialize`](serde::Deserialize) /// /// To enable one of these additional derives, use `#[zerovec::derive(...)]` like so: `#[zerovec::derive(Debug)]`. /// /// In most cases these derives will defer to the impl of the same trait on the current type, so such impls must exist. /// /// This implementation will also by default autogenerate [`Ord`] and [`PartialOrd`] on the [`VarULE`] type based on /// the implementation on `Self`. You can opt out of this with `#[zerovec::skip_derive(Ord)]` /// /// Note that this implementation will autogenerate [`EncodeAsVarULE`] impls for _both_ `Self` and `&Self` /// for convenience. This allows for a little more flexibility encoding slices. /// /// In case there are multiple [`VarULE`] (i.e., variable-sized) fields, this macro will produce private fields that /// appropriately pack the data together, with the packing format by default being [`crate::vecs::Index16`], but can be /// overridden with `#[zerovec::format(zerovec::vecs::Index8)]`. /// /// [`EncodeAsVarULE`]: ule::EncodeAsVarULE /// [`VarULE`]: ule::VarULE /// [`ULE`]: ule::ULE /// [`AsULE`]: ule::AsULE /// [`ZeroMapKV`]: maps::ZeroMapKV /// /// # Example /// /// ```rust /// use std::borrow::Cow; /// use zerofrom::ZeroFrom; /// use zerovec::ule::encode_varule_to_box; /// use zerovec::{VarZeroVec, ZeroMap, ZeroVec}; /// /// // custom fixed-size ULE type for ZeroVec /// #[zerovec::make_ule(DateULE)] /// #[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] /// struct Date { /// y: u64, /// m: u8, /// d: u8, /// } /// /// // custom variable sized VarULE type for VarZeroVec /// #[zerovec::make_varule(PersonULE)] /// #[zerovec::derive(Serialize, Deserialize)] /// #[derive(Clone, PartialEq, Eq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)] /// struct Person<'a> { /// birthday: Date, /// favorite_character: char, /// #[serde(borrow)] /// name: Cow<'a, str>, /// } /// /// #[derive(serde::Serialize, serde::Deserialize)] /// struct Data<'a> { /// // note: VarZeroVec always must reference the ULE type directly /// #[serde(borrow)] /// important_people: VarZeroVec<'a, PersonULE>, /// } /// /// let person1 = Person { /// birthday: Date { /// y: 1990, /// m: 9, /// d: 7, /// }, /// favorite_character: 'π', /// name: Cow::from("Kate"), /// }; /// let person2 = Person { /// birthday: Date { /// y: 1960, /// m: 5, /// d: 25, /// }, /// favorite_character: '冇', /// name: Cow::from("Jesse"), /// }; /// /// let important_people = VarZeroVec::from(&[person1, person2]); /// let data = Data { important_people }; /// /// let bincode_bytes = bincode::serialize(&data).expect("Serialization should be successful"); /// /// // Will deserialize without allocations /// let deserialized: Data = /// bincode::deserialize(&bincode_bytes).expect("Deserialization should be successful"); /// /// assert_eq!(&deserialized.important_people.get(1).unwrap().name, "Jesse"); /// assert_eq!(&deserialized.important_people.get(0).unwrap().name, "Kate"); /// /// // Since VarZeroVec produces PersonULE types, it's convenient to use ZeroFrom /// // to recoup Person values in a zero-copy way /// let person_converted: Person = /// ZeroFrom::zero_from(deserialized.important_people.get(1).unwrap()); /// assert_eq!(person_converted.name, "Jesse"); /// assert_eq!(person_converted.birthday.y, 1960); /// ``` #[cfg(feature = "derive")] pub use zerovec_derive::make_varule; #[cfg(test)] // Expected sizes are based on a 64-bit architecture #[cfg(target_pointer_width = "64")] mod tests { use super::*; use core::mem::size_of; /// Checks that the size of the type is one of the given sizes. /// The size might differ across Rust versions or channels. macro_rules! check_size_of { ($sizes:pat, $type:path) => { assert!( matches!(size_of::<$type>(), $sizes), concat!(stringify!($type), " is of size {}"), size_of::<$type>() ); }; } #[test] fn check_sizes() { check_size_of!(24, ZeroVec); check_size_of!(24, ZeroVec); check_size_of!(32 | 24, VarZeroVec<[u8]>); check_size_of!(32 | 24, VarZeroVec); check_size_of!(48, ZeroMap); check_size_of!(56 | 48, ZeroMap); check_size_of!(56 | 48, ZeroMap); check_size_of!(64 | 48, ZeroMap); check_size_of!(120 | 96, ZeroMap2d); check_size_of!(24, Option>); check_size_of!(32 | 24, Option>); check_size_of!(64 | 56 | 48, Option>); check_size_of!(120 | 104 | 96, Option>); } } zerovec-0.11.1/src/map/borrowed.rs000064400000000000000000000235721046102023000151270ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::ule::AsULE; use crate::ZeroSlice; use core::cmp::Ordering; use core::fmt; use super::kv::ZeroMapKV; use super::vecs::ZeroVecLike; /// A borrowed-only version of [`ZeroMap`](super::ZeroMap) /// /// This is useful for fully-zero-copy deserialization from non-human-readable /// serialization formats. It also has the advantage that it can return references that live for /// the lifetime of the backing buffer as opposed to that of the [`ZeroMapBorrowed`] instance. /// /// # Examples /// /// ``` /// use zerovec::maps::ZeroMapBorrowed; /// /// // Example byte buffer representing the map { 1: "one" } /// let BINCODE_BYTES: &[u8; 25] = &[ /// 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 111, /// 110, 101, /// ]; /// /// // Deserializing to ZeroMap requires no heap allocations. /// let zero_map: ZeroMapBorrowed = /// bincode::deserialize(BINCODE_BYTES) /// .expect("Should deserialize successfully"); /// assert_eq!(zero_map.get(&1), Some("one")); /// ``` /// /// This can be obtained from a [`ZeroMap`](super::ZeroMap) via [`ZeroMap::as_borrowed`](super::ZeroMap::as_borrowed) pub struct ZeroMapBorrowed<'a, K, V> where K: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K: ?Sized, V: ?Sized, { pub(crate) keys: &'a >::Slice, pub(crate) values: &'a >::Slice, } impl<'a, K, V> Copy for ZeroMapBorrowed<'a, K, V> where K: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K: ?Sized, V: ?Sized, { } impl<'a, K, V> Clone for ZeroMapBorrowed<'a, K, V> where K: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K: ?Sized, V: ?Sized, { fn clone(&self) -> Self { *self } } impl<'a, K, V> Default for ZeroMapBorrowed<'a, K, V> where K: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K::Slice: 'static, V::Slice: 'static, K: ?Sized, V: ?Sized, { fn default() -> Self { Self::new() } } impl<'a, K, V> ZeroMapBorrowed<'a, K, V> where K: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K::Slice: 'static, V::Slice: 'static, K: ?Sized, V: ?Sized, { /// Creates a new, empty `ZeroMapBorrowed`. /// /// Note: Since [`ZeroMapBorrowed`] is not mutable, the return value will be a stub unless /// converted into a [`ZeroMap`](super::ZeroMap). /// /// # Examples /// /// ``` /// use zerovec::maps::ZeroMapBorrowed; /// /// let zm: ZeroMapBorrowed = ZeroMapBorrowed::new(); /// assert!(zm.is_empty()); /// ``` pub fn new() -> Self { Self { keys: K::Container::zvl_new_borrowed(), values: V::Container::zvl_new_borrowed(), } } } impl<'a, K, V> ZeroMapBorrowed<'a, K, V> where K: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K: ?Sized, V: ?Sized, { #[doc(hidden)] // databake internal pub const unsafe fn from_parts_unchecked( keys: &'a >::Slice, values: &'a >::Slice, ) -> Self { Self { keys, values } } /// The number of elements in the [`ZeroMapBorrowed`] pub fn len(self) -> usize { self.values.zvl_len() } /// Whether the [`ZeroMapBorrowed`] is empty pub fn is_empty(self) -> bool { self.values.zvl_len() == 0 } } impl<'a, K, V> ZeroMapBorrowed<'a, K, V> where K: ZeroMapKV<'a> + Ord, V: ZeroMapKV<'a>, K: ?Sized, V: ?Sized, { /// Get the value associated with `key`, if it exists. /// /// This is able to return values that live longer than the map itself /// since they borrow directly from the backing buffer. This is the /// primary advantage of using [`ZeroMapBorrowed`](super::ZeroMapBorrowed) over [`ZeroMap`](super::ZeroMap). /// /// ```rust /// use zerovec::ZeroMap; /// /// let mut map = ZeroMap::new(); /// map.insert(&1, "one"); /// map.insert(&2, "two"); /// let borrowed = map.as_borrowed(); /// assert_eq!(borrowed.get(&1), Some("one")); /// assert_eq!(borrowed.get(&3), None); /// ``` pub fn get(self, key: &K) -> Option<&'a V::GetType> { let index = self.keys.zvl_binary_search(key).ok()?; self.values.zvl_get(index) } /// Binary search the map with `predicate` to find a key, returning the value. /// /// This is able to return values that live longer than the map itself /// since they borrow directly from the backing buffer. This is the /// primary advantage of using [`ZeroMapBorrowed`](super::ZeroMapBorrowed) over [`ZeroMap`](super::ZeroMap). /// /// ```rust /// use zerovec::ZeroMap; /// /// let mut map = ZeroMap::new(); /// map.insert(&1, "one"); /// map.insert(&2, "two"); /// let borrowed = map.as_borrowed(); /// assert_eq!(borrowed.get_by(|probe| probe.cmp(&1)), Some("one")); /// assert_eq!(borrowed.get_by(|probe| probe.cmp(&3)), None); /// ``` pub fn get_by(self, predicate: impl FnMut(&K) -> Ordering) -> Option<&'a V::GetType> { let index = self.keys.zvl_binary_search_by(predicate).ok()?; self.values.zvl_get(index) } /// Returns whether `key` is contained in this map /// /// ```rust /// use zerovec::ZeroMap; /// /// let mut map = ZeroMap::new(); /// map.insert(&1, "one"); /// map.insert(&2, "two"); /// let borrowed = map.as_borrowed(); /// assert!(borrowed.contains_key(&1)); /// assert!(!borrowed.contains_key(&3)); /// ``` pub fn contains_key(self, key: &K) -> bool { self.keys.zvl_binary_search(key).is_ok() } } impl<'a, K, V> ZeroMapBorrowed<'a, K, V> where K: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, { /// Produce an ordered iterator over key-value pairs pub fn iter( self, ) -> impl Iterator< Item = ( &'a >::GetType, &'a >::GetType, ), > { self.iter_keys().zip(self.iter_values()) } /// Produce an ordered iterator over keys pub fn iter_keys(self) -> impl Iterator>::GetType> { #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() (0..self.keys.zvl_len()).map(move |idx| self.keys.zvl_get(idx).unwrap()) } /// Produce an iterator over values, ordered by keys pub fn iter_values(self) -> impl Iterator>::GetType> { #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() == values.zvl_len() (0..self.values.zvl_len()).map(move |idx| self.values.zvl_get(idx).unwrap()) } } impl<'a, K, V> ZeroMapBorrowed<'a, K, V> where K: ZeroMapKV<'a> + Ord + ?Sized, V: ZeroMapKV<'a, Slice = ZeroSlice> + AsULE + Copy + 'static, { /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE` pub fn get_copied(self, key: &K) -> Option { let index = self.keys.zvl_binary_search(key).ok()?; self.values.get(index) } /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE` pub fn get_copied_by(self, predicate: impl FnMut(&K) -> Ordering) -> Option { let index = self.keys.zvl_binary_search_by(predicate).ok()?; self.values.get(index) } /// Similar to [`Self::iter()`] except it returns a direct copy of the values instead of references /// to `V::ULE`, in cases when `V` is fixed-size pub fn iter_copied_values( self, ) -> impl Iterator>::GetType, V)> { (0..self.keys.zvl_len()).map(move |idx| { ( #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() self.keys.zvl_get(idx).unwrap(), #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() = values.zvl_len() self.values.get(idx).unwrap(), ) }) } } impl<'a, K, V> ZeroMapBorrowed<'a, K, V> where K: ZeroMapKV<'a, Slice = ZeroSlice> + AsULE + Copy + Ord + 'static, V: ZeroMapKV<'a, Slice = ZeroSlice> + AsULE + Copy + 'static, { /// Similar to [`Self::iter()`] except it returns a direct copy of the keys values instead of references /// to `K::ULE` and `V::ULE`, in cases when `K` and `V` are fixed-size #[allow(clippy::needless_lifetimes)] // Lifetime is necessary in impl Trait pub fn iter_copied(self) -> impl Iterator + 'a { let len = self.keys.zvl_len(); (0..len).map(move |idx| { ( #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() ZeroSlice::get(self.keys, idx).unwrap(), #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() = values.zvl_len() ZeroSlice::get(self.values, idx).unwrap(), ) }) } } // We can't use the default PartialEq because ZeroMap is invariant // so otherwise rustc will not automatically allow you to compare ZeroMaps // with different lifetimes impl<'a, 'b, K, V> PartialEq> for ZeroMapBorrowed<'a, K, V> where K: for<'c> ZeroMapKV<'c> + ?Sized, V: for<'c> ZeroMapKV<'c> + ?Sized, >::Slice: PartialEq<>::Slice>, >::Slice: PartialEq<>::Slice>, { fn eq(&self, other: &ZeroMapBorrowed<'b, K, V>) -> bool { self.keys.eq(other.keys) && self.values.eq(other.values) } } impl<'a, K, V> fmt::Debug for ZeroMapBorrowed<'a, K, V> where K: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, K::Slice: fmt::Debug, V::Slice: fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { f.debug_struct("ZeroMapBorrowed") .field("keys", &self.keys) .field("values", &self.values) .finish() } } zerovec-0.11.1/src/map/databake.rs000064400000000000000000000057771046102023000150470ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::{maps::ZeroMapBorrowed, maps::ZeroMapKV, ZeroMap}; use databake::*; impl<'a, K, V> Bake for ZeroMap<'a, K, V> where K: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, K::Container: Bake, V::Container: Bake, { fn bake(&self, env: &CrateEnv) -> TokenStream { env.insert("zerovec"); let keys = self.keys.bake(env); let values = self.values.bake(env); quote! { unsafe { #[allow(unused_unsafe)] zerovec::ZeroMap::from_parts_unchecked(#keys, #values) } } } } impl<'a, K, V> BakeSize for ZeroMap<'a, K, V> where K: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, K::Container: BakeSize, V::Container: BakeSize, { fn borrows_size(&self) -> usize { self.keys.borrows_size() + self.values.borrows_size() } } impl<'a, K, V> Bake for ZeroMapBorrowed<'a, K, V> where K: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, &'a K::Slice: Bake, &'a V::Slice: Bake, { fn bake(&self, env: &CrateEnv) -> TokenStream { env.insert("zerovec"); let keys = self.keys.bake(env); let values = self.values.bake(env); quote! { unsafe { #[allow(unused_unsafe)] zerovec::maps::ZeroMapBorrowed::from_parts_unchecked(#keys, #values) } } } } impl<'a, K, V> BakeSize for ZeroMapBorrowed<'a, K, V> where K: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, &'a K::Slice: BakeSize, &'a V::Slice: BakeSize, { fn borrows_size(&self) -> usize { self.keys.borrows_size() + self.values.borrows_size() } } #[test] fn test_baked_map() { test_bake!( ZeroMap, const, unsafe { #[allow(unused_unsafe)] crate::ZeroMap::from_parts_unchecked( unsafe { crate::vecs::VarZeroVec16::from_bytes_unchecked( b"\x02\0\0\0\0\0\0\0\x02\0\0\0adbc" ) }, unsafe { crate::vecs::VarZeroVec16::from_bytes_unchecked( b"\x02\0\0\0\0\0\0\0\x04\0\0\0ERA1ERA0" ) }, ) }, zerovec ); } #[test] fn test_baked_borrowed_map() { test_bake!( ZeroMapBorrowed, const, unsafe { #[allow(unused_unsafe)] crate::maps::ZeroMapBorrowed::from_parts_unchecked( unsafe { crate::vecs::VarZeroSlice16::from_bytes_unchecked( b"\x02\0\0\0\0\0\0\0\x02\0\0\0adbc" ) }, unsafe { crate::vecs::VarZeroSlice16::from_bytes_unchecked( b"\x02\0\0\0\0\0\0\0\x04\0\0\0ERA1ERA0" ) }, ) }, zerovec ); } zerovec-0.11.1/src/map/kv.rs000064400000000000000000000075131046102023000137210ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::vecs::{MutableZeroVecLike, ZeroVecLike}; use crate::ule::*; use crate::vecs::{VarZeroSlice, VarZeroVec}; use crate::zerovec::{ZeroSlice, ZeroVec}; use alloc::boxed::Box; /// Trait marking types which are allowed to be keys or values in [`ZeroMap`](super::ZeroMap). /// /// Users should not be calling methods of this trait directly, however if you are /// implementing your own [`AsULE`] or [`VarULE`] type you may wish to implement /// this trait. // this lifetime should be a GAT on Container once that is possible #[allow(clippy::upper_case_acronyms)] // KV is not an acronym pub trait ZeroMapKV<'a> { /// The container that can be used with this type: [`ZeroVec`] or [`VarZeroVec`]. type Container: MutableZeroVecLike< 'a, Self, SliceVariant = Self::Slice, GetType = Self::GetType, OwnedType = Self::OwnedType, > + Sized; type Slice: ZeroVecLike + ?Sized; /// The type produced by `Container::get()` /// /// This type will be predetermined by the choice of `Self::Container`: /// For sized types this must be `T::ULE`, and for unsized types this must be `T` type GetType: ?Sized + 'static; /// The type produced by `Container::replace()` and `Container::remove()`, /// also used during deserialization. If `Self` is human readable serialized, /// deserializing to `Self::OwnedType` should produce the same value once /// passed through `Self::owned_as_self()` /// /// This type will be predetermined by the choice of `Self::Container`: /// For sized types this must be `T` and for unsized types this must be `Box` type OwnedType: 'static; } macro_rules! impl_sized_kv { ($ty:path) => { impl<'a> ZeroMapKV<'a> for $ty { type Container = ZeroVec<'a, $ty>; type Slice = ZeroSlice<$ty>; type GetType = <$ty as AsULE>::ULE; type OwnedType = $ty; } }; } impl_sized_kv!(u8); impl_sized_kv!(u16); impl_sized_kv!(u32); impl_sized_kv!(u64); impl_sized_kv!(u128); impl_sized_kv!(i8); impl_sized_kv!(i16); impl_sized_kv!(i32); impl_sized_kv!(i64); impl_sized_kv!(i128); impl_sized_kv!(char); impl_sized_kv!(f32); impl_sized_kv!(f64); impl_sized_kv!(core::num::NonZeroU8); impl_sized_kv!(core::num::NonZeroI8); impl<'a, T> ZeroMapKV<'a> for Option where Option: AsULE + 'static, { type Container = ZeroVec<'a, Option>; type Slice = ZeroSlice>; type GetType = as AsULE>::ULE; type OwnedType = Option; } impl<'a, T> ZeroMapKV<'a> for OptionVarULE where T: VarULE + ?Sized, { type Container = VarZeroVec<'a, OptionVarULE>; type Slice = VarZeroSlice>; type GetType = OptionVarULE; type OwnedType = Box>; } impl<'a> ZeroMapKV<'a> for str { type Container = VarZeroVec<'a, str>; type Slice = VarZeroSlice; type GetType = str; type OwnedType = Box; } impl<'a, T> ZeroMapKV<'a> for [T] where T: ULE + AsULE, { type Container = VarZeroVec<'a, [T]>; type Slice = VarZeroSlice<[T]>; type GetType = [T]; type OwnedType = Box<[T]>; } impl<'a, T, const N: usize> ZeroMapKV<'a> for [T; N] where T: AsULE + 'static, { type Container = ZeroVec<'a, [T; N]>; type Slice = ZeroSlice<[T; N]>; type GetType = [T::ULE; N]; type OwnedType = [T; N]; } impl<'a, T> ZeroMapKV<'a> for ZeroSlice where T: AsULE + 'static, { type Container = VarZeroVec<'a, ZeroSlice>; type Slice = VarZeroSlice>; type GetType = ZeroSlice; type OwnedType = Box>; } zerovec-0.11.1/src/map/map.rs000064400000000000000000000504271046102023000140600ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::*; use crate::ule::{AsULE, EncodeAsVarULE, UleError, VarULE}; use crate::{VarZeroVec, ZeroSlice, ZeroVec}; use alloc::borrow::Borrow; use alloc::boxed::Box; use core::cmp::Ordering; use core::fmt; use core::iter::FromIterator; /// A zero-copy map datastructure, built on sorted binary-searchable [`ZeroVec`] /// and [`VarZeroVec`]. /// /// This type, like [`ZeroVec`] and [`VarZeroVec`], is able to zero-copy /// deserialize from appropriately formatted byte buffers. It is internally copy-on-write, so it can be mutated /// afterwards as necessary. /// /// Internally, a `ZeroMap` is a zero-copy vector for keys paired with a zero-copy vector for /// values, sorted by the keys. Therefore, all types used in `ZeroMap` need to work with either /// [`ZeroVec`] or [`VarZeroVec`]. /// /// This does mean that for fixed-size data, one must use the regular type (`u32`, `u8`, `char`, etc), /// whereas for variable-size data, `ZeroMap` will use the dynamically sized version (`str` not `String`, /// `ZeroSlice` not `ZeroVec`, `FooULE` not `Foo` for custom types) /// /// # Examples /// /// ``` /// use zerovec::ZeroMap; /// /// #[derive(serde::Serialize, serde::Deserialize)] /// struct Data<'a> { /// #[serde(borrow)] /// map: ZeroMap<'a, u32, str>, /// } /// /// let mut map = ZeroMap::new(); /// map.insert(&1, "one"); /// map.insert(&2, "two"); /// map.insert(&4, "four"); /// /// let data = Data { map }; /// /// let bincode_bytes = /// bincode::serialize(&data).expect("Serialization should be successful"); /// /// // Will deserialize without any allocations /// let deserialized: Data = bincode::deserialize(&bincode_bytes) /// .expect("Deserialization should be successful"); /// /// assert_eq!(data.map.get(&1), Some("one")); /// assert_eq!(data.map.get(&2), Some("two")); /// ``` /// /// [`VarZeroVec`]: crate::VarZeroVec // ZeroMap has only one invariant: keys.len() == values.len() // It is also expected that the keys are sorted, but this is not an invariant. See #1433 pub struct ZeroMap<'a, K, V> where K: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, { pub(crate) keys: K::Container, pub(crate) values: V::Container, } impl<'a, K, V> Default for ZeroMap<'a, K, V> where K: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, { fn default() -> Self { Self::new() } } impl<'a, K, V> ZeroMap<'a, K, V> where K: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, { /// Creates a new, empty `ZeroMap`. /// /// # Examples /// /// ``` /// use zerovec::ZeroMap; /// /// let zm: ZeroMap = ZeroMap::new(); /// assert!(zm.is_empty()); /// ``` pub fn new() -> Self { Self { keys: K::Container::zvl_with_capacity(0), values: V::Container::zvl_with_capacity(0), } } #[doc(hidden)] // databake internal pub const unsafe fn from_parts_unchecked(keys: K::Container, values: V::Container) -> Self { Self { keys, values } } /// Construct a new [`ZeroMap`] with a given capacity pub fn with_capacity(capacity: usize) -> Self { Self { keys: K::Container::zvl_with_capacity(capacity), values: V::Container::zvl_with_capacity(capacity), } } /// Obtain a borrowed version of this map pub fn as_borrowed(&'a self) -> ZeroMapBorrowed<'a, K, V> { ZeroMapBorrowed { keys: self.keys.zvl_as_borrowed(), values: self.values.zvl_as_borrowed(), } } /// The number of elements in the [`ZeroMap`] pub fn len(&self) -> usize { self.values.zvl_len() } /// Whether the [`ZeroMap`] is empty pub fn is_empty(&self) -> bool { self.values.zvl_len() == 0 } /// Remove all elements from the [`ZeroMap`] pub fn clear(&mut self) { self.keys.zvl_clear(); self.values.zvl_clear(); } /// Reserve capacity for `additional` more elements to be inserted into /// the [`ZeroMap`] to avoid frequent reallocations. /// /// See [`Vec::reserve()`](alloc::vec::Vec::reserve) for more information. pub fn reserve(&mut self, additional: usize) { self.keys.zvl_reserve(additional); self.values.zvl_reserve(additional); } } impl<'a, K, V> ZeroMap<'a, K, V> where K: ZeroMapKV<'a> + ?Sized + Ord, V: ZeroMapKV<'a> + ?Sized, { /// Get the value associated with `key`, if it exists. /// /// For fixed-size ([`AsULE`]) `V` types, this _will_ return /// their corresponding [`AsULE::ULE`] type. If you wish to work with the `V` /// type directly, [`Self::get_copied()`] exists for convenience. /// /// ```rust /// use zerovec::ZeroMap; /// /// let mut map = ZeroMap::new(); /// map.insert(&1, "one"); /// map.insert(&2, "two"); /// assert_eq!(map.get(&1), Some("one")); /// assert_eq!(map.get(&3), None); /// ``` pub fn get(&self, key: &K) -> Option<&V::GetType> { let index = self.keys.zvl_binary_search(key).ok()?; self.values.zvl_get(index) } /// Binary search the map with `predicate` to find a key, returning the value. /// /// ```rust /// use zerovec::ZeroMap; /// /// let mut map = ZeroMap::new(); /// map.insert(&1, "one"); /// map.insert(&2, "two"); /// assert_eq!(map.get_by(|probe| probe.cmp(&1)), Some("one")); /// assert_eq!(map.get_by(|probe| probe.cmp(&3)), None); /// ``` pub fn get_by(&self, predicate: impl FnMut(&K) -> Ordering) -> Option<&V::GetType> { let index = self.keys.zvl_binary_search_by(predicate).ok()?; self.values.zvl_get(index) } /// Returns whether `key` is contained in this map /// /// ```rust /// use zerovec::ZeroMap; /// /// let mut map = ZeroMap::new(); /// map.insert(&1, "one"); /// map.insert(&2, "two"); /// assert!(map.contains_key(&1)); /// assert!(!map.contains_key(&3)); /// ``` pub fn contains_key(&self, key: &K) -> bool { self.keys.zvl_binary_search(key).is_ok() } /// Insert `value` with `key`, returning the existing value if it exists. /// /// ```rust /// use zerovec::ZeroMap; /// /// let mut map = ZeroMap::new(); /// map.insert(&1, "one"); /// map.insert(&2, "two"); /// assert_eq!(map.get(&1), Some("one")); /// assert_eq!(map.get(&3), None); /// ``` pub fn insert(&mut self, key: &K, value: &V) -> Option { match self.keys.zvl_binary_search(key) { Ok(index) => Some(self.values.zvl_replace(index, value)), Err(index) => { self.keys.zvl_insert(index, key); self.values.zvl_insert(index, value); None } } } /// Remove the value at `key`, returning it if it exists. /// /// ```rust /// use zerovec::ZeroMap; /// /// let mut map = ZeroMap::new(); /// map.insert(&1, "one"); /// map.insert(&2, "two"); /// assert_eq!(map.remove(&1), Some("one".to_owned().into_boxed_str())); /// assert_eq!(map.get(&1), None); /// ``` pub fn remove(&mut self, key: &K) -> Option { let idx = self.keys.zvl_binary_search(key).ok()?; self.keys.zvl_remove(idx); Some(self.values.zvl_remove(idx)) } /// Appends `value` with `key` to the end of the underlying vector, returning /// `key` and `value` _if it failed_. Useful for extending with an existing /// sorted list. /// ```rust /// use zerovec::ZeroMap; /// /// let mut map = ZeroMap::new(); /// assert!(map.try_append(&1, "uno").is_none()); /// assert!(map.try_append(&3, "tres").is_none()); /// /// let unsuccessful = map.try_append(&3, "tres-updated"); /// assert!(unsuccessful.is_some(), "append duplicate of last key"); /// /// let unsuccessful = map.try_append(&2, "dos"); /// assert!(unsuccessful.is_some(), "append out of order"); /// /// assert_eq!(map.get(&1), Some("uno")); /// /// // contains the original value for the key: 3 /// assert_eq!(map.get(&3), Some("tres")); /// /// // not appended since it wasn't in order /// assert_eq!(map.get(&2), None); /// ``` #[must_use] pub fn try_append<'b>(&mut self, key: &'b K, value: &'b V) -> Option<(&'b K, &'b V)> { if self.keys.zvl_len() != 0 { if let Some(last) = self.keys.zvl_get(self.keys.zvl_len() - 1) { if K::Container::t_cmp_get(key, last) != Ordering::Greater { return Some((key, value)); } } } self.keys.zvl_push(key); self.values.zvl_push(value); None } } impl<'a, K, V> ZeroMap<'a, K, V> where K: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, { /// Produce an ordered iterator over key-value pairs pub fn iter<'b>( &'b self, ) -> impl ExactSizeIterator< Item = ( &'b >::GetType, &'b >::GetType, ), > { (0..self.keys.zvl_len()).map(move |idx| { ( #[allow(clippy::unwrap_used)] // idx is in-range self.keys.zvl_get(idx).unwrap(), #[allow(clippy::unwrap_used)] // idx is in-range self.values.zvl_get(idx).unwrap(), ) }) } /// Produce an ordered iterator over keys pub fn iter_keys<'b>( &'b self, ) -> impl ExactSizeIterator>::GetType> { #[allow(clippy::unwrap_used)] // idx is in-range (0..self.keys.zvl_len()).map(move |idx| self.keys.zvl_get(idx).unwrap()) } /// Produce an iterator over values, ordered by keys pub fn iter_values<'b>( &'b self, ) -> impl ExactSizeIterator>::GetType> { #[allow(clippy::unwrap_used)] // idx is in-range (0..self.values.zvl_len()).map(move |idx| self.values.zvl_get(idx).unwrap()) } } impl<'a, K, V> ZeroMap<'a, K, V> where K: AsULE + ZeroMapKV<'a, Container = ZeroVec<'a, K>>, V: ZeroMapKV<'a> + ?Sized, { /// Cast a `ZeroMap` to `ZeroMap` where `K` and `P` are [`AsULE`] types /// with the same representation. /// /// # Unchecked Invariants /// /// If `K` and `P` have different ordering semantics, unexpected behavior may occur. pub fn cast_zv_k_unchecked

(self) -> ZeroMap<'a, P, V> where P: AsULE + ZeroMapKV<'a, Container = ZeroVec<'a, P>>, { ZeroMap { keys: self.keys.cast(), values: self.values, } } /// Convert a `ZeroMap` to `ZeroMap` where `K` and `P` are [`AsULE`] types /// with the same size. /// /// # Unchecked Invariants /// /// If `K` and `P` have different ordering semantics, unexpected behavior may occur. /// /// # Panics /// /// Panics if `K::ULE` and `P::ULE` are not the same size. pub fn try_convert_zv_k_unchecked

(self) -> Result, UleError> where P: AsULE + ZeroMapKV<'a, Container = ZeroVec<'a, P>>, { Ok(ZeroMap { keys: self.keys.try_into_converted()?, values: self.values, }) } } impl<'a, K, V> ZeroMap<'a, K, V> where K: ZeroMapKV<'a> + ?Sized, V: AsULE + ZeroMapKV<'a, Container = ZeroVec<'a, V>>, { /// Cast a `ZeroMap` to `ZeroMap` where `V` and `P` are [`AsULE`] types /// with the same representation. /// /// # Unchecked Invariants /// /// If `V` and `P` have different ordering semantics, unexpected behavior may occur. pub fn cast_zv_v_unchecked

(self) -> ZeroMap<'a, K, P> where P: AsULE + ZeroMapKV<'a, Container = ZeroVec<'a, P>>, { ZeroMap { keys: self.keys, values: self.values.cast(), } } /// Convert a `ZeroMap` to `ZeroMap` where `V` and `P` are [`AsULE`] types /// with the same size. /// /// # Unchecked Invariants /// /// If `V` and `P` have different ordering semantics, unexpected behavior may occur. /// /// # Panics /// /// Panics if `V::ULE` and `P::ULE` are not the same size. pub fn try_convert_zv_v_unchecked

(self) -> Result, UleError> where P: AsULE + ZeroMapKV<'a, Container = ZeroVec<'a, P>>, { Ok(ZeroMap { keys: self.keys, values: self.values.try_into_converted()?, }) } } impl<'a, K, V> ZeroMap<'a, K, V> where K: ZeroMapKV<'a> + ?Sized + Ord, V: ZeroMapKV<'a, Container = VarZeroVec<'a, V>> + ?Sized, V: VarULE, { /// Same as `insert()`, but allows using [EncodeAsVarULE](crate::ule::EncodeAsVarULE) /// types with the value to avoid an extra allocation when dealing with custom ULE types. /// /// ```rust /// use std::borrow::Cow; /// use zerovec::ZeroMap; /// /// #[zerovec::make_varule(PersonULE)] /// #[derive(Clone, Eq, PartialEq, Ord, PartialOrd)] /// struct Person<'a> { /// age: u8, /// name: Cow<'a, str>, /// } /// /// let mut map: ZeroMap = ZeroMap::new(); /// map.insert_var_v( /// &1, /// &Person { /// age: 20, /// name: "Joseph".into(), /// }, /// ); /// map.insert_var_v( /// &1, /// &Person { /// age: 35, /// name: "Carla".into(), /// }, /// ); /// assert_eq!(&map.get(&1).unwrap().name, "Carla"); /// assert!(map.get(&3).is_none()); /// ``` pub fn insert_var_v>(&mut self, key: &K, value: &VE) -> Option> { match self.keys.zvl_binary_search(key) { Ok(index) => { #[allow(clippy::unwrap_used)] // binary search let ret = self.values.get(index).unwrap().to_boxed(); self.values.make_mut().replace(index, value); Some(ret) } Err(index) => { self.keys.zvl_insert(index, key); self.values.make_mut().insert(index, value); None } } } // insert_var_k, insert_var_kv are not possible since one cannot perform the binary search with EncodeAsVarULE // though we might be able to do it in the future if we add a trait for cross-Ord requirements } impl<'a, K, V> ZeroMap<'a, K, V> where K: ZeroMapKV<'a> + ?Sized + Ord, V: Copy + ZeroMapKV<'a>, { /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE`. /// /// # Examples /// /// ```rust /// use zerovec::ZeroMap; /// /// let mut map = ZeroMap::new(); /// map.insert(&1, &'a'); /// map.insert(&2, &'b'); /// assert_eq!(map.get_copied(&1), Some('a')); /// assert_eq!(map.get_copied(&3), None); #[inline] pub fn get_copied(&self, key: &K) -> Option { let index = self.keys.zvl_binary_search(key).ok()?; self.get_copied_at(index) } /// Binary search the map with `predicate` to find a key, returning the value. /// /// For cases when `V` is fixed-size, use this method to obtain a direct copy of `V` /// instead of `V::ULE`. /// /// # Examples /// /// ```rust /// use zerovec::ZeroMap; /// /// let mut map = ZeroMap::new(); /// map.insert(&1, &'a'); /// map.insert(&2, &'b'); /// assert_eq!(map.get_copied_by(|probe| probe.cmp(&1)), Some('a')); /// assert_eq!(map.get_copied_by(|probe| probe.cmp(&3)), None); /// ``` #[inline] pub fn get_copied_by(&self, predicate: impl FnMut(&K) -> Ordering) -> Option { let index = self.keys.zvl_binary_search_by(predicate).ok()?; self.get_copied_at(index) } fn get_copied_at(&self, index: usize) -> Option { let ule = self.values.zvl_get(index)?; let mut result = Option::::None; V::Container::zvl_get_as_t(ule, |v| result.replace(*v)); #[allow(clippy::unwrap_used)] // `zvl_get_as_t` guarantees that the callback is invoked Some(result.unwrap()) } } impl<'a, K, V> ZeroMap<'a, K, V> where K: ZeroMapKV<'a> + ?Sized, V: AsULE + ZeroMapKV<'a, Container = ZeroVec<'a, V>>, { /// Similar to [`Self::iter()`] except it returns a direct copy of the values instead of references /// to `V::ULE`, in cases when `V` is fixed-size pub fn iter_copied_values<'b>( &'b self, ) -> impl Iterator>::GetType, V)> { (0..self.keys.zvl_len()).map(move |idx| { ( #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() self.keys.zvl_get(idx).unwrap(), #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() = values.zvl_len() ZeroSlice::get(&*self.values, idx).unwrap(), ) }) } } impl<'a, K, V> ZeroMap<'a, K, V> where K: AsULE + ZeroMapKV<'a, Container = ZeroVec<'a, K>>, V: AsULE + ZeroMapKV<'a, Container = ZeroVec<'a, V>>, { /// Similar to [`Self::iter()`] except it returns a direct copy of the keys values instead of references /// to `K::ULE` and `V::ULE`, in cases when `K` and `V` are fixed-size #[allow(clippy::needless_lifetimes)] // Lifetime is necessary in impl Trait pub fn iter_copied<'b>(&'b self) -> impl Iterator + 'b { let keys = &self.keys; let values = &self.values; (0..keys.len()).map(move |idx| { ( #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() ZeroSlice::get(&**keys, idx).unwrap(), #[allow(clippy::unwrap_used)] // idx in 0..keys.zvl_len() = values.zvl_len() ZeroSlice::get(&**values, idx).unwrap(), ) }) } } impl<'a, K, V> From> for ZeroMap<'a, K, V> where K: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K: ?Sized, V: ?Sized, { fn from(other: ZeroMapBorrowed<'a, K, V>) -> Self { Self { keys: K::Container::zvl_from_borrowed(other.keys), values: V::Container::zvl_from_borrowed(other.values), } } } // We can't use the default PartialEq because ZeroMap is invariant // so otherwise rustc will not automatically allow you to compare ZeroMaps // with different lifetimes impl<'a, 'b, K, V> PartialEq> for ZeroMap<'a, K, V> where K: for<'c> ZeroMapKV<'c> + ?Sized, V: for<'c> ZeroMapKV<'c> + ?Sized, >::Container: PartialEq<>::Container>, >::Container: PartialEq<>::Container>, { fn eq(&self, other: &ZeroMap<'b, K, V>) -> bool { self.keys.eq(&other.keys) && self.values.eq(&other.values) } } impl<'a, K, V> fmt::Debug for ZeroMap<'a, K, V> where K: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, >::Container: fmt::Debug, >::Container: fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { f.debug_struct("ZeroMap") .field("keys", &self.keys) .field("values", &self.values) .finish() } } impl<'a, K, V> Clone for ZeroMap<'a, K, V> where K: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, >::Container: Clone, >::Container: Clone, { fn clone(&self) -> Self { Self { keys: self.keys.clone(), values: self.values.clone(), } } } impl<'a, A, B, K, V> FromIterator<(A, B)> for ZeroMap<'a, K, V> where A: Borrow, B: Borrow, K: ZeroMapKV<'a> + ?Sized + Ord, V: ZeroMapKV<'a> + ?Sized, { fn from_iter(iter: T) -> Self where T: IntoIterator, { let iter = iter.into_iter(); let mut map = match iter.size_hint() { (_, Some(upper)) => Self::with_capacity(upper), (lower, None) => Self::with_capacity(lower), }; for (key, value) in iter { if let Some((key, value)) = map.try_append(key.borrow(), value.borrow()) { map.insert(key, value); } } map } } zerovec-0.11.1/src/map/mod.rs000064400000000000000000000011631046102023000140530ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). //! See [`ZeroMap`](crate::ZeroMap) for details. mod borrowed; mod kv; #[allow(clippy::module_inception)] // module is purely internal pub(crate) mod map; mod vecs; #[cfg(feature = "databake")] mod databake; #[cfg(feature = "serde")] mod serde; #[cfg(feature = "serde")] mod serde_helpers; pub use crate::ZeroMap; pub use borrowed::ZeroMapBorrowed; pub use kv::ZeroMapKV; pub use vecs::{MutableZeroVecLike, ZeroVecLike}; zerovec-0.11.1/src/map/serde.rs000064400000000000000000000247061046102023000144060ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::{MutableZeroVecLike, ZeroMap, ZeroMapBorrowed, ZeroMapKV, ZeroVecLike}; use core::fmt; use core::marker::PhantomData; use serde::de::{self, Deserialize, Deserializer, MapAccess, SeqAccess, Visitor}; #[cfg(feature = "serde")] use serde::ser::{Serialize, SerializeMap, SerializeSeq, Serializer}; /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate #[cfg(feature = "serde")] impl<'a, K, V> Serialize for ZeroMap<'a, K, V> where K: ZeroMapKV<'a> + Serialize + ?Sized + Ord, V: ZeroMapKV<'a> + Serialize + ?Sized, K::Container: Serialize, V::Container: Serialize, { fn serialize(&self, serializer: S) -> Result where S: Serializer, { if serializer.is_human_readable() { // Many human-readable formats don't support values other // than numbers and strings as map keys. For them, we can serialize // as a vec of tuples instead if let Some(k) = self.iter_keys().next() { if !K::Container::zvl_get_as_t(k, super::serde_helpers::is_num_or_string) { let mut seq = serializer.serialize_seq(Some(self.len()))?; for (k, v) in self.iter() { K::Container::zvl_get_as_t(k, |k| { V::Container::zvl_get_as_t(v, |v| seq.serialize_element(&(k, v))) })?; } return seq.end(); } } let mut map = serializer.serialize_map(Some(self.len()))?; for (k, v) in self.iter() { K::Container::zvl_get_as_t(k, |k| map.serialize_key(k))?; V::Container::zvl_get_as_t(v, |v| map.serialize_value(v))?; } map.end() } else { (&self.keys, &self.values).serialize(serializer) } } } /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate #[cfg(feature = "serde")] impl<'a, K, V> Serialize for ZeroMapBorrowed<'a, K, V> where K: ZeroMapKV<'a> + Serialize + ?Sized + Ord, V: ZeroMapKV<'a> + Serialize + ?Sized, K::Container: Serialize, V::Container: Serialize, { fn serialize(&self, serializer: S) -> Result where S: Serializer, { ZeroMap::::from(*self).serialize(serializer) } } /// Modified example from https://serde.rs/deserialize-map.html struct ZeroMapMapVisitor<'a, K, V> where K: ZeroMapKV<'a> + ?Sized + Ord, V: ZeroMapKV<'a> + ?Sized, { #[allow(clippy::type_complexity)] // it's a marker type, complexity doesn't matter marker: PhantomData (&'a K::OwnedType, &'a V::OwnedType)>, } impl<'a, K, V> ZeroMapMapVisitor<'a, K, V> where K: ZeroMapKV<'a> + ?Sized + Ord, V: ZeroMapKV<'a> + ?Sized, { fn new() -> Self { ZeroMapMapVisitor { marker: PhantomData, } } } impl<'a, 'de, K, V> Visitor<'de> for ZeroMapMapVisitor<'a, K, V> where K: ZeroMapKV<'a> + Ord + ?Sized, V: ZeroMapKV<'a> + ?Sized, K::OwnedType: Deserialize<'de>, V::OwnedType: Deserialize<'de>, { type Value = ZeroMap<'a, K, V>; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("a map produced by ZeroMap") } fn visit_seq(self, mut access: S) -> Result where S: SeqAccess<'de>, { let mut map = ZeroMap::with_capacity(access.size_hint().unwrap_or(0)); // While there are entries remaining in the input, add them // into our map. while let Some((key, value)) = access.next_element::<(K::OwnedType, V::OwnedType)>()? { // Try to append it at the end, hoping for a sorted map. // If not sorted, return an error // a serialized map that came from another ZeroMap if map .try_append( K::Container::owned_as_t(&key), V::Container::owned_as_t(&value), ) .is_some() { return Err(de::Error::custom( "ZeroMap's keys must be sorted while deserializing", )); } } Ok(map) } fn visit_map(self, mut access: M) -> Result where M: MapAccess<'de>, { let mut map = ZeroMap::with_capacity(access.size_hint().unwrap_or(0)); // While there are entries remaining in the input, add them // into our map. while let Some((key, value)) = access.next_entry::()? { // Try to append it at the end, hoping for a sorted map. // If not sorted, return an error // a serialized map that came from another ZeroMap if map .try_append( K::Container::owned_as_t(&key), V::Container::owned_as_t(&value), ) .is_some() { return Err(de::Error::custom( "ZeroMap's keys must be sorted while deserializing", )); } } Ok(map) } } /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate impl<'de, 'a, K, V> Deserialize<'de> for ZeroMap<'a, K, V> where K: ZeroMapKV<'a> + Ord + ?Sized, V: ZeroMapKV<'a> + ?Sized, K::Container: Deserialize<'de>, V::Container: Deserialize<'de>, K::OwnedType: Deserialize<'de>, V::OwnedType: Deserialize<'de>, 'de: 'a, { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { if deserializer.is_human_readable() { deserializer.deserialize_any(ZeroMapMapVisitor::<'a, K, V>::new()) } else { let (keys, values): (K::Container, V::Container) = Deserialize::deserialize(deserializer)?; if keys.zvl_len() != values.zvl_len() { return Err(de::Error::custom( "Mismatched key and value sizes in ZeroMap", )); } // #1433: If keys are out of order, treat it as GIGO. debug_assert!(keys.zvl_is_ascending()); Ok(Self { keys, values }) } } } // /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate impl<'de, 'a, K, V> Deserialize<'de> for ZeroMapBorrowed<'a, K, V> where K: ZeroMapKV<'a> + Ord + ?Sized, V: ZeroMapKV<'a> + ?Sized, K::Container: Deserialize<'de>, V::Container: Deserialize<'de>, K::OwnedType: Deserialize<'de>, V::OwnedType: Deserialize<'de>, 'de: 'a, { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { if deserializer.is_human_readable() { Err(de::Error::custom( "ZeroMapBorrowed cannot be deserialized from human-readable formats", )) } else { let deserialized: ZeroMap<'a, K, V> = ZeroMap::deserialize(deserializer)?; let keys = if let Some(keys) = deserialized.keys.zvl_as_borrowed_inner() { keys } else { return Err(de::Error::custom( "ZeroMapBorrowed can only deserialize in zero-copy ways", )); }; let values = if let Some(values) = deserialized.values.zvl_as_borrowed_inner() { values } else { return Err(de::Error::custom( "ZeroMapBorrowed can only deserialize in zero-copy ways", )); }; Ok(Self { keys, values }) } } } #[cfg(test)] #[allow(non_camel_case_types)] mod test { use crate::{map::ZeroMapBorrowed, ZeroMap}; #[derive(serde::Serialize, serde::Deserialize)] struct DeriveTest_ZeroMap<'data> { #[serde(borrow)] _data: ZeroMap<'data, str, [u8]>, } #[derive(serde::Serialize, serde::Deserialize)] struct DeriveTest_ZeroMapBorrowed<'data> { #[serde(borrow)] _data: ZeroMapBorrowed<'data, str, [u8]>, } const JSON_STR: &str = "{\"1\":\"uno\",\"2\":\"dos\",\"3\":\"tres\"}"; const BINCODE_BYTES: &[u8] = &[ 12, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 6, 0, 117, 110, 111, 100, 111, 115, 116, 114, 101, 115, ]; fn make_map() -> ZeroMap<'static, u32, str> { let mut map = ZeroMap::new(); map.insert(&1, "uno"); map.insert(&2, "dos"); map.insert(&3, "tres"); map } #[test] fn test_serde_json() { let map = make_map(); let json_str = serde_json::to_string(&map).expect("serialize"); assert_eq!(JSON_STR, json_str); let new_map: ZeroMap = serde_json::from_str(&json_str).expect("deserialize"); assert_eq!( new_map.iter().collect::>(), map.iter().collect::>() ); } #[test] fn test_serde_json_complex_key() { let mut map = ZeroMap::new(); map.insert(&(1, 1), "uno"); map.insert(&(2, 2), "dos"); map.insert(&(3, 3), "tres"); let json_str = serde_json::to_string(&map).expect("serialize"); assert_eq!( json_str, "[[[1,1],\"uno\"],[[2,2],\"dos\"],[[3,3],\"tres\"]]" ); let new_map: ZeroMap<(u32, u32), str> = serde_json::from_str(&json_str).expect("deserialize"); assert_eq!( new_map.iter().collect::>(), map.iter().collect::>() ); } #[test] fn test_bincode() { let map = make_map(); let bincode_bytes = bincode::serialize(&map).expect("serialize"); assert_eq!(BINCODE_BYTES, bincode_bytes); let new_map: ZeroMap = bincode::deserialize(&bincode_bytes).expect("deserialize"); assert_eq!( new_map.iter().collect::>(), map.iter().collect::>() ); let new_map: ZeroMapBorrowed = bincode::deserialize(&bincode_bytes).expect("deserialize"); assert_eq!( new_map.iter().collect::>(), map.iter().collect::>() ); } } zerovec-0.11.1/src/map/serde_helpers.rs000064400000000000000000000136541046102023000161300ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). // @@@@@@@@@@@@@@@@ // THIS FILE IS SHARED BETWEEN LITEMAP AND ZEROVEC. PLEASE KEEP IT IN SYNC FOR ALL EDITS // @@@@@@@@@@@@@@@@ use serde::ser::{Impossible, Serialize, Serializer}; pub fn is_num_or_string(k: &T) -> bool { // Serializer that errors in the same cases as serde_json::ser::MapKeySerializer struct MapKeySerializerDryRun; impl Serializer for MapKeySerializerDryRun { type Ok = (); // Singleton error type that implements serde::ser::Error type Error = core::fmt::Error; type SerializeSeq = Impossible<(), Self::Error>; type SerializeTuple = Impossible<(), Self::Error>; type SerializeTupleStruct = Impossible<(), Self::Error>; type SerializeTupleVariant = Impossible<(), Self::Error>; type SerializeMap = Impossible<(), Self::Error>; type SerializeStruct = Impossible<(), Self::Error>; type SerializeStructVariant = Impossible<(), Self::Error>; fn serialize_str(self, _value: &str) -> Result { Ok(()) } fn serialize_unit_variant( self, _name: &'static str, _variant_index: u32, _variant: &'static str, ) -> Result { Ok(()) } fn serialize_newtype_struct( self, _name: &'static str, value: &T, ) -> Result { // Recurse value.serialize(self) } fn serialize_bool(self, _value: bool) -> Result { Err(core::fmt::Error) } fn serialize_i8(self, _value: i8) -> Result { Ok(()) } fn serialize_i16(self, _value: i16) -> Result { Ok(()) } fn serialize_i32(self, _value: i32) -> Result { Ok(()) } fn serialize_i64(self, _value: i64) -> Result { Ok(()) } serde::serde_if_integer128! { fn serialize_i128(self, _value: i128) -> Result { Ok(()) } } fn serialize_u8(self, _value: u8) -> Result { Ok(()) } fn serialize_u16(self, _value: u16) -> Result { Ok(()) } fn serialize_u32(self, _value: u32) -> Result { Ok(()) } fn serialize_u64(self, _value: u64) -> Result { Ok(()) } serde::serde_if_integer128! { fn serialize_u128(self, _value: u128) -> Result { Ok(()) } } fn serialize_f32(self, _value: f32) -> Result { Err(core::fmt::Error) } fn serialize_f64(self, _value: f64) -> Result { Err(core::fmt::Error) } fn serialize_char(self, _value: char) -> Result { Ok(()) } fn serialize_bytes(self, _value: &[u8]) -> Result { Err(core::fmt::Error) } fn serialize_unit(self) -> Result { Err(core::fmt::Error) } fn serialize_unit_struct(self, _name: &'static str) -> Result { Err(core::fmt::Error) } fn serialize_newtype_variant( self, _name: &'static str, _variant_index: u32, _variant: &'static str, _value: &T, ) -> Result { Err(core::fmt::Error) } fn serialize_none(self) -> Result { Err(core::fmt::Error) } fn serialize_some( self, _value: &T, ) -> Result { Err(core::fmt::Error) } fn serialize_seq(self, _len: Option) -> Result { Err(core::fmt::Error) } fn serialize_tuple(self, _len: usize) -> Result { Err(core::fmt::Error) } fn serialize_tuple_struct( self, _name: &'static str, _len: usize, ) -> Result { Err(core::fmt::Error) } fn serialize_tuple_variant( self, _name: &'static str, _variant_index: u32, _variant: &'static str, _len: usize, ) -> Result { Err(core::fmt::Error) } fn serialize_map(self, _len: Option) -> Result { Err(core::fmt::Error) } fn serialize_struct( self, _name: &'static str, _len: usize, ) -> Result { Err(core::fmt::Error) } fn serialize_struct_variant( self, _name: &'static str, _variant_index: u32, _variant: &'static str, _len: usize, ) -> Result { Err(core::fmt::Error) } fn collect_str( self, _value: &T, ) -> Result { Ok(()) } } k.serialize(MapKeySerializerDryRun).is_ok() } zerovec-0.11.1/src/map/vecs.rs000064400000000000000000000450431046102023000142410ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::ule::*; use crate::varzerovec::owned::VarZeroVecOwned; use crate::varzerovec::vec::VarZeroVecInner; use crate::vecs::VarZeroVecFormat; use crate::{VarZeroSlice, VarZeroVec}; use crate::{ZeroSlice, ZeroVec}; use alloc::boxed::Box; use alloc::vec::Vec; use core::cmp::Ordering; use core::mem; use core::ops::Range; /// Trait abstracting over [`ZeroVec`] and [`VarZeroVec`], for use in [`ZeroMap`](super::ZeroMap). **You /// should not be implementing or calling this trait directly.** /// /// The T type is the type received by [`Self::zvl_binary_search()`], as well as the one used /// for human-readable serialization. /// /// Methods are prefixed with `zvl_*` to avoid clashes with methods on the types themselves pub trait ZeroVecLike { /// The type returned by `Self::get()` type GetType: ?Sized + 'static; /// A fully borrowed version of this type SliceVariant: ZeroVecLike + ?Sized; /// Create a new, empty borrowed variant fn zvl_new_borrowed() -> &'static Self::SliceVariant; /// Search for a key in a sorted vector, returns `Ok(index)` if found, /// returns `Err(insert_index)` if not found, where `insert_index` is the /// index where it should be inserted to maintain sort order. fn zvl_binary_search(&self, k: &T) -> Result where T: Ord; /// Search for a key within a certain range in a sorted vector. /// Returns `None` if the range is out of bounds, and /// `Ok` or `Err` in the same way as `zvl_binary_search`. /// Indices are returned relative to the start of the range. fn zvl_binary_search_in_range( &self, k: &T, range: Range, ) -> Option> where T: Ord; /// Search for a key in a sorted vector by a predicate, returns `Ok(index)` if found, /// returns `Err(insert_index)` if not found, where `insert_index` is the /// index where it should be inserted to maintain sort order. fn zvl_binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result; /// Search for a key within a certain range in a sorted vector by a predicate. /// Returns `None` if the range is out of bounds, and /// `Ok` or `Err` in the same way as `zvl_binary_search`. /// Indices are returned relative to the start of the range. fn zvl_binary_search_in_range_by( &self, predicate: impl FnMut(&T) -> Ordering, range: Range, ) -> Option>; /// Get element at `index` fn zvl_get(&self, index: usize) -> Option<&Self::GetType>; /// The length of this vector fn zvl_len(&self) -> usize; /// Check if this vector is in ascending order according to `T`s `Ord` impl fn zvl_is_ascending(&self) -> bool where T: Ord, { if let Some(first) = self.zvl_get(0) { let mut prev = first; for i in 1..self.zvl_len() { #[allow(clippy::unwrap_used)] // looping over the valid indices let curr = self.zvl_get(i).unwrap(); if Self::get_cmp_get(prev, curr) != Ordering::Less { return false; } prev = curr; } } true } /// Check if this vector is empty fn zvl_is_empty(&self) -> bool { self.zvl_len() == 0 } /// Construct a borrowed variant by borrowing from `&self`. /// /// This function behaves like `&'b self -> Self::SliceVariant<'b>`, /// where `'b` is the lifetime of the reference to this object. /// /// Note: We rely on the compiler recognizing `'a` and `'b` as covariant and /// casting `&'b Self<'a>` to `&'b Self<'b>` when this gets called, which works /// out for `ZeroVec` and `VarZeroVec` containers just fine. fn zvl_as_borrowed(&self) -> &Self::SliceVariant; /// Compare this type with a `Self::GetType`. This must produce the same result as /// if `g` were converted to `Self` #[inline] fn t_cmp_get(t: &T, g: &Self::GetType) -> Ordering where T: Ord, { Self::zvl_get_as_t(g, |g| t.cmp(g)) } /// Compare two values of `Self::GetType`. This must produce the same result as /// if both `a` and `b` were converted to `Self` #[inline] fn get_cmp_get(a: &Self::GetType, b: &Self::GetType) -> Ordering where T: Ord, { Self::zvl_get_as_t(a, |a| Self::zvl_get_as_t(b, |b| a.cmp(b))) } /// Obtain a reference to T, passed to a closure /// /// This uses a callback because it's not possible to return owned-or-borrowed /// types without GATs /// /// Impls should guarantee that the callback function is be called exactly once. fn zvl_get_as_t(g: &Self::GetType, f: impl FnOnce(&T) -> R) -> R; } /// Trait abstracting over [`ZeroVec`] and [`VarZeroVec`], for use in [`ZeroMap`](super::ZeroMap). **You /// should not be implementing or calling this trait directly.** /// /// This trait augments [`ZeroVecLike`] with methods allowing for mutation of the underlying /// vector for owned vector types. /// /// Methods are prefixed with `zvl_*` to avoid clashes with methods on the types themselves pub trait MutableZeroVecLike<'a, T: ?Sized>: ZeroVecLike { /// The type returned by `Self::remove()` and `Self::replace()` type OwnedType; /// Insert an element at `index` fn zvl_insert(&mut self, index: usize, value: &T); /// Remove the element at `index` (panicking if nonexistant) fn zvl_remove(&mut self, index: usize) -> Self::OwnedType; /// Replace the element at `index` with another one, returning the old element fn zvl_replace(&mut self, index: usize, value: &T) -> Self::OwnedType; /// Push an element to the end of this vector fn zvl_push(&mut self, value: &T); /// Create a new, empty vector, with given capacity fn zvl_with_capacity(cap: usize) -> Self; /// Remove all elements from the vector fn zvl_clear(&mut self); /// Reserve space for `addl` additional elements fn zvl_reserve(&mut self, addl: usize); /// Applies the permutation such that `before.zvl_get(permutation[i]) == after.zvl_get(i)`. /// /// # Panics /// If `permutation` is not a valid permutation of length `zvl_len()`. fn zvl_permute(&mut self, permutation: &mut [usize]); /// Convert an owned value to a borrowed T fn owned_as_t(o: &Self::OwnedType) -> &T; /// Construct from the borrowed version of the type /// /// These are useful to ensure serialization parity between borrowed and owned versions fn zvl_from_borrowed(b: &'a Self::SliceVariant) -> Self; /// Extract the inner borrowed variant if possible. Returns `None` if the data is owned. /// /// This function behaves like `&'_ self -> Self::SliceVariant<'a>`, /// where `'a` is the lifetime of this object's borrowed data. /// /// This function is similar to matching the `Borrowed` variant of `ZeroVec` /// or `VarZeroVec`, returning the inner borrowed type. fn zvl_as_borrowed_inner(&self) -> Option<&'a Self::SliceVariant>; } impl<'a, T> ZeroVecLike for ZeroVec<'a, T> where T: 'a + AsULE + Copy, { type GetType = T::ULE; type SliceVariant = ZeroSlice; fn zvl_new_borrowed() -> &'static Self::SliceVariant { ZeroSlice::::new_empty() } fn zvl_binary_search(&self, k: &T) -> Result where T: Ord, { ZeroSlice::binary_search(self, k) } fn zvl_binary_search_in_range(&self, k: &T, range: Range) -> Option> where T: Ord, { let zs: &ZeroSlice = self; zs.zvl_binary_search_in_range(k, range) } fn zvl_binary_search_by( &self, mut predicate: impl FnMut(&T) -> Ordering, ) -> Result { ZeroSlice::binary_search_by(self, |probe| predicate(&probe)) } fn zvl_binary_search_in_range_by( &self, predicate: impl FnMut(&T) -> Ordering, range: Range, ) -> Option> { let zs: &ZeroSlice = self; zs.zvl_binary_search_in_range_by(predicate, range) } fn zvl_get(&self, index: usize) -> Option<&T::ULE> { self.get_ule_ref(index) } fn zvl_len(&self) -> usize { ZeroSlice::len(self) } fn zvl_as_borrowed(&self) -> &ZeroSlice { self } #[inline] fn zvl_get_as_t(g: &Self::GetType, f: impl FnOnce(&T) -> R) -> R { f(&T::from_unaligned(*g)) } } impl ZeroVecLike for ZeroSlice where T: AsULE + Copy, { type GetType = T::ULE; type SliceVariant = ZeroSlice; fn zvl_new_borrowed() -> &'static Self::SliceVariant { ZeroSlice::::new_empty() } fn zvl_binary_search(&self, k: &T) -> Result where T: Ord, { ZeroSlice::binary_search(self, k) } fn zvl_binary_search_in_range(&self, k: &T, range: Range) -> Option> where T: Ord, { let subslice = self.get_subslice(range)?; Some(ZeroSlice::binary_search(subslice, k)) } fn zvl_binary_search_by( &self, mut predicate: impl FnMut(&T) -> Ordering, ) -> Result { ZeroSlice::binary_search_by(self, |probe| predicate(&probe)) } fn zvl_binary_search_in_range_by( &self, mut predicate: impl FnMut(&T) -> Ordering, range: Range, ) -> Option> { let subslice = self.get_subslice(range)?; Some(ZeroSlice::binary_search_by(subslice, |probe| { predicate(&probe) })) } fn zvl_get(&self, index: usize) -> Option<&T::ULE> { self.get_ule_ref(index) } fn zvl_len(&self) -> usize { ZeroSlice::len(self) } fn zvl_as_borrowed(&self) -> &ZeroSlice { self } #[inline] fn zvl_get_as_t(g: &Self::GetType, f: impl FnOnce(&T) -> R) -> R { f(&T::from_unaligned(*g)) } } impl<'a, T> MutableZeroVecLike<'a, T> for ZeroVec<'a, T> where T: AsULE + Copy + 'static, { type OwnedType = T; fn zvl_insert(&mut self, index: usize, value: &T) { self.with_mut(|v| v.insert(index, value.to_unaligned())) } fn zvl_remove(&mut self, index: usize) -> T { T::from_unaligned(self.with_mut(|v| v.remove(index))) } fn zvl_replace(&mut self, index: usize, value: &T) -> T { #[allow(clippy::indexing_slicing)] let unaligned = self.with_mut(|vec| { debug_assert!(index < vec.len()); mem::replace(&mut vec[index], value.to_unaligned()) }); T::from_unaligned(unaligned) } fn zvl_push(&mut self, value: &T) { self.with_mut(|v| v.push(value.to_unaligned())) } fn zvl_with_capacity(cap: usize) -> Self { if cap == 0 { ZeroVec::new() } else { ZeroVec::new_owned(Vec::with_capacity(cap)) } } fn zvl_clear(&mut self) { self.with_mut(|v| v.clear()) } fn zvl_reserve(&mut self, addl: usize) { self.with_mut(|v| v.reserve(addl)) } fn owned_as_t(o: &Self::OwnedType) -> &T { o } fn zvl_from_borrowed(b: &'a ZeroSlice) -> Self { b.as_zerovec() } fn zvl_as_borrowed_inner(&self) -> Option<&'a ZeroSlice> { self.as_maybe_borrowed() } #[allow(clippy::indexing_slicing)] // documented panic fn zvl_permute(&mut self, permutation: &mut [usize]) { assert_eq!(permutation.len(), self.zvl_len()); let vec = self.to_mut_slice(); for cycle_start in 0..permutation.len() { let mut curr = cycle_start; let mut next = permutation[curr]; while next != cycle_start { vec.swap(curr, next); // Make curr a self-cycle so we don't use it as a cycle_start later permutation[curr] = curr; curr = next; next = permutation[next]; } permutation[curr] = curr; } } } impl<'a, T, F> ZeroVecLike for VarZeroVec<'a, T, F> where T: VarULE, T: ?Sized, F: VarZeroVecFormat, { type GetType = T; type SliceVariant = VarZeroSlice; fn zvl_new_borrowed() -> &'static Self::SliceVariant { VarZeroSlice::::new_empty() } fn zvl_binary_search(&self, k: &T) -> Result where T: Ord, { self.binary_search(k) } fn zvl_binary_search_in_range(&self, k: &T, range: Range) -> Option> where T: Ord, { self.binary_search_in_range(k, range) } fn zvl_binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result { self.binary_search_by(predicate) } fn zvl_binary_search_in_range_by( &self, predicate: impl FnMut(&T) -> Ordering, range: Range, ) -> Option> { self.binary_search_in_range_by(predicate, range) } fn zvl_get(&self, index: usize) -> Option<&T> { self.get(index) } fn zvl_len(&self) -> usize { self.len() } fn zvl_as_borrowed(&self) -> &VarZeroSlice { self.as_slice() } #[inline] fn zvl_get_as_t(g: &Self::GetType, f: impl FnOnce(&T) -> R) -> R { f(g) } } impl ZeroVecLike for VarZeroSlice where T: VarULE, T: ?Sized, F: VarZeroVecFormat, { type GetType = T; type SliceVariant = VarZeroSlice; fn zvl_new_borrowed() -> &'static Self::SliceVariant { VarZeroSlice::::new_empty() } fn zvl_binary_search(&self, k: &T) -> Result where T: Ord, { self.binary_search(k) } fn zvl_binary_search_in_range(&self, k: &T, range: Range) -> Option> where T: Ord, { self.binary_search_in_range(k, range) } fn zvl_binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result { self.binary_search_by(predicate) } fn zvl_binary_search_in_range_by( &self, predicate: impl FnMut(&T) -> Ordering, range: Range, ) -> Option> { self.binary_search_in_range_by(predicate, range) } fn zvl_get(&self, index: usize) -> Option<&T> { self.get(index) } fn zvl_len(&self) -> usize { self.len() } fn zvl_as_borrowed(&self) -> &VarZeroSlice { self } #[inline] fn zvl_get_as_t(g: &Self::GetType, f: impl FnOnce(&T) -> R) -> R { f(g) } } impl<'a, T, F> MutableZeroVecLike<'a, T> for VarZeroVec<'a, T, F> where T: VarULE, T: ?Sized, F: VarZeroVecFormat, { type OwnedType = Box; fn zvl_insert(&mut self, index: usize, value: &T) { self.make_mut().insert(index, value) } fn zvl_remove(&mut self, index: usize) -> Box { let vec = self.make_mut(); debug_assert!(index < vec.len()); #[allow(clippy::unwrap_used)] let old = vec.get(index).unwrap().to_boxed(); vec.remove(index); old } fn zvl_replace(&mut self, index: usize, value: &T) -> Box { let vec = self.make_mut(); debug_assert!(index < vec.len()); #[allow(clippy::unwrap_used)] let old = vec.get(index).unwrap().to_boxed(); vec.replace(index, value); old } fn zvl_push(&mut self, value: &T) { let len = self.len(); self.make_mut().insert(len, value) } fn zvl_with_capacity(cap: usize) -> Self { if cap == 0 { VarZeroVec::new() } else { Self::from(VarZeroVecOwned::with_capacity(cap)) } } fn zvl_clear(&mut self) { self.make_mut().clear() } fn zvl_reserve(&mut self, addl: usize) { self.make_mut().reserve(addl) } fn owned_as_t(o: &Self::OwnedType) -> &T { o } fn zvl_from_borrowed(b: &'a VarZeroSlice) -> Self { b.as_varzerovec() } fn zvl_as_borrowed_inner(&self) -> Option<&'a VarZeroSlice> { if let Self(VarZeroVecInner::Borrowed(b)) = *self { Some(b) } else { None } } #[allow(clippy::unwrap_used)] // documented panic fn zvl_permute(&mut self, permutation: &mut [usize]) { assert_eq!(permutation.len(), self.zvl_len()); let mut result = VarZeroVecOwned::new(); for &i in permutation.iter() { result.push(self.get(i).unwrap()); } *self = Self(VarZeroVecInner::Owned(result)); } } #[cfg(test)] mod test { use super::*; #[test] fn test_zerovec_binary_search_in_range() { let zv: ZeroVec = ZeroVec::from_slice_or_alloc(&[11, 22, 33, 44, 55, 66, 77]); // Full range search assert_eq!(zv.zvl_binary_search_in_range(&11, 0..7), Some(Ok(0))); assert_eq!(zv.zvl_binary_search_in_range(&12, 0..7), Some(Err(1))); assert_eq!(zv.zvl_binary_search_in_range(&44, 0..7), Some(Ok(3))); assert_eq!(zv.zvl_binary_search_in_range(&45, 0..7), Some(Err(4))); assert_eq!(zv.zvl_binary_search_in_range(&77, 0..7), Some(Ok(6))); assert_eq!(zv.zvl_binary_search_in_range(&78, 0..7), Some(Err(7))); // Out-of-range search assert_eq!(zv.zvl_binary_search_in_range(&44, 0..2), Some(Err(2))); assert_eq!(zv.zvl_binary_search_in_range(&44, 5..7), Some(Err(0))); // Offset search assert_eq!(zv.zvl_binary_search_in_range(&44, 2..5), Some(Ok(1))); assert_eq!(zv.zvl_binary_search_in_range(&45, 2..5), Some(Err(2))); // Out-of-bounds assert_eq!(zv.zvl_binary_search_in_range(&44, 0..100), None); assert_eq!(zv.zvl_binary_search_in_range(&44, 100..200), None); } #[test] fn test_permute() { let mut zv: ZeroVec = ZeroVec::from_slice_or_alloc(&[11, 22, 33, 44, 55, 66, 77]); let mut permutation = vec![3, 2, 1, 0, 6, 5, 4]; zv.zvl_permute(&mut permutation); assert_eq!(&zv, &[44, 33, 22, 11, 77, 66, 55]); let mut vzv: VarZeroVec = VarZeroVec::from( VarZeroVecOwned::try_from_elements(&["11", "22", "33", "44", "55", "66", "77"]) .unwrap(), ); let mut permutation = vec![3, 2, 1, 0, 6, 5, 4]; vzv.zvl_permute(&mut permutation); assert_eq!(&vzv, &["44", "33", "22", "11", "77", "66", "55"]); } } zerovec-0.11.1/src/map2d/borrowed.rs000064400000000000000000000232641046102023000153530ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::ZeroSlice; use core::cmp::Ordering; use core::fmt; use crate::map::ZeroMapKV; use crate::map::ZeroVecLike; use crate::map2d::ZeroMap2dCursor; /// A borrowed-only version of [`ZeroMap2d`](super::ZeroMap2d) /// /// This is useful for fully-zero-copy deserialization from non-human-readable /// serialization formats. It also has the advantage that it can return references that live for /// the lifetime of the backing buffer as opposed to that of the [`ZeroMap2dBorrowed`] instance. /// /// # Examples /// /// ``` /// use zerovec::maps::ZeroMap2dBorrowed; /// /// // Example byte buffer representing the map { 1: {2: "three" } } /// let BINCODE_BYTES: &[u8; 47] = &[ /// 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, /// 0, 0, 0, 0, 0, 0, 2, 0, 7, 0, 0, 0, 0, 0, 0, 0, 1, 0, 116, 104, 114, /// 101, 101, /// ]; /// /// // Deserializing to ZeroMap2d requires no heap allocations. /// let zero_map: ZeroMap2dBorrowed = /// bincode::deserialize(BINCODE_BYTES) /// .expect("Should deserialize successfully"); /// assert_eq!(zero_map.get_2d(&1, &2), Some("three")); /// ``` /// /// This can be obtained from a [`ZeroMap2d`](super::ZeroMap2d) via [`ZeroMap2d::as_borrowed`](super::ZeroMap2d::as_borrowed) pub struct ZeroMap2dBorrowed<'a, K0, K1, V> where K0: ZeroMapKV<'a>, K1: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K0: ?Sized, K1: ?Sized, V: ?Sized, { pub(crate) keys0: &'a K0::Slice, pub(crate) joiner: &'a ZeroSlice, pub(crate) keys1: &'a K1::Slice, pub(crate) values: &'a V::Slice, } impl<'a, K0, K1, V> Copy for ZeroMap2dBorrowed<'a, K0, K1, V> where K0: ZeroMapKV<'a>, K1: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K0: ?Sized, K1: ?Sized, V: ?Sized, { } impl<'a, K0, K1, V> Clone for ZeroMap2dBorrowed<'a, K0, K1, V> where K0: ZeroMapKV<'a>, K1: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K0: ?Sized, K1: ?Sized, V: ?Sized, { fn clone(&self) -> Self { *self } } impl<'a, K0, K1, V> Default for ZeroMap2dBorrowed<'a, K0, K1, V> where K0: ZeroMapKV<'a>, K1: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K0::Slice: 'static, K1::Slice: 'static, V::Slice: 'static, K0: ?Sized, K1: ?Sized, V: ?Sized, { fn default() -> Self { Self::new() } } impl<'a, K0, K1, V> ZeroMap2dBorrowed<'a, K0, K1, V> where K0: ZeroMapKV<'a>, K1: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K0::Slice: 'static, K1::Slice: 'static, V::Slice: 'static, K0: ?Sized, K1: ?Sized, V: ?Sized, { /// Creates a new, empty `ZeroMap2dBorrowed`. /// /// Note: Since [`ZeroMap2dBorrowed`] is not mutable, the return value will be a stub unless /// converted into a [`ZeroMap2d`](super::ZeroMap2d). /// /// # Examples /// /// ``` /// use zerovec::maps::ZeroMap2dBorrowed; /// /// let zm: ZeroMap2dBorrowed = ZeroMap2dBorrowed::new(); /// assert!(zm.is_empty()); /// ``` pub fn new() -> Self { Self { keys0: K0::Container::zvl_new_borrowed(), joiner: Default::default(), keys1: K1::Container::zvl_new_borrowed(), values: V::Container::zvl_new_borrowed(), } } } impl<'a, K0, K1, V> ZeroMap2dBorrowed<'a, K0, K1, V> where K0: ZeroMapKV<'a>, K1: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K0: ?Sized, K1: ?Sized, V: ?Sized, { #[doc(hidden)] // databake internal pub const unsafe fn from_parts_unchecked( keys0: &'a K0::Slice, joiner: &'a ZeroSlice, keys1: &'a K1::Slice, values: &'a V::Slice, ) -> Self { Self { keys0, joiner, keys1, values, } } /// The number of elements in the [`ZeroMap2dBorrowed`] pub fn len(&self) -> usize { self.values.zvl_len() } /// Whether the [`ZeroMap2dBorrowed`] is empty pub fn is_empty(&self) -> bool { self.values.zvl_len() == 0 } } impl<'a, K0, K1, V> ZeroMap2dBorrowed<'a, K0, K1, V> where K0: ZeroMapKV<'a> + Ord, K1: ZeroMapKV<'a> + Ord, V: ZeroMapKV<'a>, K0: ?Sized, K1: ?Sized, V: ?Sized, { /// Get the value associated with `key0` and `key1`, if it exists. /// /// This is able to return values that live longer than the map itself /// since they borrow directly from the backing buffer. This is the /// primary advantage of using [`ZeroMap2dBorrowed`](super::ZeroMap2dBorrowed) over [`ZeroMap2d`](super::ZeroMap2d). /// /// ```rust /// use zerovec::ZeroMap2d; /// /// let mut map = ZeroMap2d::new(); /// map.insert(&1, "one", "foo"); /// map.insert(&2, "one", "bar"); /// map.insert(&2, "two", "baz"); /// /// let borrowed = map.as_borrowed(); /// assert_eq!(borrowed.get_2d(&1, "one"), Some("foo")); /// assert_eq!(borrowed.get_2d(&1, "two"), None); /// assert_eq!(borrowed.get_2d(&2, "one"), Some("bar")); /// assert_eq!(borrowed.get_2d(&2, "two"), Some("baz")); /// assert_eq!(borrowed.get_2d(&3, "three"), None); /// ``` pub fn get_2d(&self, key0: &K0, key1: &K1) -> Option<&'a V::GetType> { self.get0(key0)?.get1(key1) } } impl<'a, K0, K1, V> ZeroMap2dBorrowed<'a, K0, K1, V> where K0: ZeroMapKV<'a> + Ord, K1: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K0: ?Sized, K1: ?Sized, V: ?Sized, { /// Gets a cursor for `key0`. If `None`, then `key0` is not in the map. If `Some`, /// then `key0` is in the map, and `key1` can be queried. /// /// ```rust /// use zerovec::ZeroMap2d; /// /// let mut map = ZeroMap2d::new(); /// map.insert(&1, "one", "foo"); /// map.insert(&2, "two", "bar"); /// let borrowed = map.as_borrowed(); /// assert!(matches!(borrowed.get0(&1), Some(_))); /// assert!(matches!(borrowed.get0(&3), None)); /// ``` #[inline] pub fn get0<'l>(&'l self, key0: &K0) -> Option> { let key0_index = self.keys0.zvl_binary_search(key0).ok()?; Some(ZeroMap2dCursor::from_borrowed(self, key0_index)) } /// Binary search the map for `key0`, returning a cursor. /// /// ```rust /// use zerovec::ZeroMap2d; /// /// let mut map = ZeroMap2d::new(); /// map.insert(&1, "one", "foo"); /// map.insert(&2, "two", "bar"); /// let borrowed = map.as_borrowed(); /// assert!(matches!(borrowed.get0_by(|probe| probe.cmp(&1)), Some(_))); /// assert!(matches!(borrowed.get0_by(|probe| probe.cmp(&3)), None)); /// ``` pub fn get0_by<'l>( &'l self, predicate: impl FnMut(&K0) -> Ordering, ) -> Option> { let key0_index = self.keys0.zvl_binary_search_by(predicate).ok()?; Some(ZeroMap2dCursor::from_borrowed(self, key0_index)) } /// Returns whether `key0` is contained in this map /// /// ```rust /// use zerovec::ZeroMap2d; /// /// let mut map = ZeroMap2d::new(); /// map.insert(&1, "one", "foo"); /// map.insert(&2, "two", "bar"); /// let borrowed = map.as_borrowed(); /// assert!(borrowed.contains_key0(&1)); /// assert!(!borrowed.contains_key0(&3)); /// ``` pub fn contains_key0(&self, key0: &K0) -> bool { self.keys0.zvl_binary_search(key0).is_ok() } } impl<'a, K0, K1, V> ZeroMap2dBorrowed<'a, K0, K1, V> where K0: ZeroMapKV<'a>, K1: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K0: ?Sized, K1: ?Sized, V: ?Sized, { /// Produce an ordered iterator over keys0 pub fn iter0<'l>(&'l self) -> impl Iterator> + 'l { (0..self.keys0.zvl_len()).map(move |idx| ZeroMap2dCursor::from_borrowed(self, idx)) } } impl<'a, K0, K1, V> ZeroMap2dBorrowed<'a, K0, K1, V> where K0: ZeroMapKV<'a> + Ord, K1: ZeroMapKV<'a> + Ord, V: ZeroMapKV<'a>, V: Copy, K0: ?Sized, K1: ?Sized, { /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE` pub fn get_copied_2d(&self, key0: &K0, key1: &K1) -> Option { self.get0(key0)?.get1_copied(key1) } } // We can't use the default PartialEq because ZeroMap2d is invariant // so otherwise rustc will not automatically allow you to compare ZeroMaps // with different lifetimes impl<'a, 'b, K0, K1, V> PartialEq> for ZeroMap2dBorrowed<'a, K0, K1, V> where K0: for<'c> ZeroMapKV<'c> + ?Sized, K1: for<'c> ZeroMapKV<'c> + ?Sized, V: for<'c> ZeroMapKV<'c> + ?Sized, >::Slice: PartialEq<>::Slice>, >::Slice: PartialEq<>::Slice>, >::Slice: PartialEq<>::Slice>, { fn eq(&self, other: &ZeroMap2dBorrowed<'b, K0, K1, V>) -> bool { self.keys0.eq(other.keys0) && self.joiner.eq(other.joiner) && self.keys1.eq(other.keys1) && self.values.eq(other.values) } } impl<'a, K0, K1, V> fmt::Debug for ZeroMap2dBorrowed<'a, K0, K1, V> where K0: ZeroMapKV<'a> + ?Sized, K1: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, K0::Slice: fmt::Debug, K1::Slice: fmt::Debug, V::Slice: fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { f.debug_struct("ZeroMap2dBorrowed") .field("keys0", &self.keys0) .field("joiner", &self.joiner) .field("keys1", &self.keys1) .field("values", &self.values) .finish() } } zerovec-0.11.1/src/map2d/cursor.rs000064400000000000000000000312301046102023000150350ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::{ZeroMap2d, ZeroSlice}; use core::cmp::Ordering; use core::fmt; use core::ops::Range; use crate::map::ZeroMapKV; use crate::map::ZeroVecLike; use super::ZeroMap2dBorrowed; /// An intermediate state of queries over [`ZeroMap2d`] and [`ZeroMap2dBorrowed`]. pub struct ZeroMap2dCursor<'l, 'a, K0, K1, V> where K0: ZeroMapKV<'a>, K1: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K0: ?Sized, K1: ?Sized, V: ?Sized, { // Invariant: these fields have the same invariants as they do in ZeroMap2d keys0: &'l K0::Slice, joiner: &'l ZeroSlice, keys1: &'l K1::Slice, values: &'l V::Slice, // Invariant: key0_index is in range key0_index: usize, } impl<'a, K0, K1, V> ZeroMap2dCursor<'a, 'a, K0, K1, V> where K0: ZeroMapKV<'a>, K1: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K0: ?Sized, K1: ?Sized, V: ?Sized, { /// `key0_index` must be in range pub(crate) fn from_borrowed( borrowed: &ZeroMap2dBorrowed<'a, K0, K1, V>, key0_index: usize, ) -> Self { debug_assert!(key0_index < borrowed.joiner.len()); ZeroMap2dCursor { keys0: borrowed.keys0, joiner: borrowed.joiner, keys1: borrowed.keys1, values: borrowed.values, key0_index, } } } impl<'l, 'a, K0, K1, V> ZeroMap2dCursor<'l, 'a, K0, K1, V> where K0: ZeroMapKV<'a>, K1: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K0: ?Sized, K1: ?Sized, V: ?Sized, { /// `key0_index` must be in range pub(crate) fn from_cow(cow: &'l ZeroMap2d<'a, K0, K1, V>, key0_index: usize) -> Self { debug_assert!(key0_index < cow.joiner.len()); Self { keys0: cow.keys0.zvl_as_borrowed(), joiner: &cow.joiner, keys1: cow.keys1.zvl_as_borrowed(), values: cow.values.zvl_as_borrowed(), key0_index, } } /// Returns the key0 corresponding to the cursor position. /// /// ```rust /// use zerovec::ZeroMap2d; /// /// let mut map = ZeroMap2d::new(); /// map.insert("one", &1u32, "foo"); /// assert_eq!(map.get0("one").unwrap().key0(), "one"); /// ``` pub fn key0(&self) -> &'l K0::GetType { #[allow(clippy::unwrap_used)] // safe by invariant on `self.key0_index` self.keys0.zvl_get(self.key0_index).unwrap() } /// Borrow an ordered iterator over keys1 and values for a particular key0. /// /// To get the values as copy types, see [`Self::iter1_copied`]. /// /// For an example, see [`ZeroMap2d::iter0()`]. pub fn iter1( &self, ) -> impl DoubleEndedIterator< Item = ( &'l >::GetType, &'l >::GetType, ), > + ExactSizeIterator + '_ { let range = self.get_range(); #[allow(clippy::unwrap_used)] // `self.get_range()` returns a valid range range.map(move |idx| { ( self.keys1.zvl_get(idx).unwrap(), self.values.zvl_get(idx).unwrap(), ) }) } /// Transform this cursor into an ordered iterator over keys1 for a particular key0. pub fn into_iter1( self, ) -> impl DoubleEndedIterator< Item = ( &'l >::GetType, &'l >::GetType, ), > + ExactSizeIterator { let range = self.get_range(); #[allow(clippy::unwrap_used)] // `self.get_range()` returns a valid range range.map(move |idx| { ( self.keys1.zvl_get(idx).unwrap(), self.values.zvl_get(idx).unwrap(), ) }) } /// Given key0_index, returns the corresponding range of keys1, which will be valid pub(super) fn get_range(&self) -> Range { debug_assert!(self.key0_index < self.joiner.len()); let start = if self.key0_index == 0 { 0 } else { #[allow(clippy::unwrap_used)] // protected by the debug_assert above self.joiner.get(self.key0_index - 1).unwrap() }; #[allow(clippy::unwrap_used)] // protected by the debug_assert above let limit = self.joiner.get(self.key0_index).unwrap(); // These two assertions are true based on the invariants of ZeroMap2d debug_assert!(start < limit); debug_assert!((limit as usize) <= self.values.zvl_len()); (start as usize)..(limit as usize) } } impl<'l, 'a, K0, K1, V> ZeroMap2dCursor<'l, 'a, K0, K1, V> where K0: ZeroMapKV<'a>, K1: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K0: ?Sized, K1: ?Sized, V: Copy, { /// Borrow an ordered iterator over keys1 and values for a particular key0. /// /// The values are returned as copy types. /// /// # Examples /// /// ``` /// use zerovec::ZeroMap2d; /// /// let zm2d: ZeroMap2d = /// [("a", 0u8, 1u16), ("b", 1u8, 1000u16), ("b", 2u8, 2000u16)] /// .into_iter() /// .collect(); /// /// let mut total_value = 0; /// /// for cursor in zm2d.iter0() { /// for (_, value) in cursor.iter1_copied() { /// total_value += value; /// } /// } /// /// assert_eq!(total_value, 3001); /// ``` pub fn iter1_copied( &self, ) -> impl DoubleEndedIterator>::GetType, V)> + ExactSizeIterator + '_ { let range = self.get_range(); #[allow(clippy::unwrap_used)] // `self.get_range()` returns a valid range range.map(move |idx| { ( self.keys1.zvl_get(idx).unwrap(), self.get1_copied_at(idx).unwrap(), ) }) } /// Transform this cursor into an ordered iterator over keys1 for a particular key0. /// /// The values are returned as copy types. /// /// # Examples /// /// ``` /// use zerovec::ZeroMap2d; /// /// let zm2d: ZeroMap2d = /// [("a", 0u8, 1u16), ("b", 1u8, 1000u16), ("b", 2u8, 2000u16)] /// .into_iter() /// .collect(); /// /// let mut total_value = 0; /// /// for cursor in zm2d.iter0() { /// for (_, value) in cursor.into_iter1_copied() { /// total_value += value; /// } /// } /// /// assert_eq!(total_value, 3001); /// ``` pub fn into_iter1_copied( self, ) -> impl DoubleEndedIterator>::GetType, V)> + ExactSizeIterator { let range = self.get_range(); #[allow(clippy::unwrap_used)] // `self.get_range()` returns a valid range range.map(move |idx| { ( self.keys1.zvl_get(idx).unwrap(), self.get1_copied_at(idx).unwrap(), ) }) } fn get1_copied_at(&self, index: usize) -> Option { let ule = self.values.zvl_get(index)?; let mut result = Option::::None; V::Container::zvl_get_as_t(ule, |v| result.replace(*v)); #[allow(clippy::unwrap_used)] // `zvl_get_as_t` guarantees that the callback is invoked Some(result.unwrap()) } } impl<'l, 'a, K0, K1, V> ZeroMap2dCursor<'l, 'a, K0, K1, V> where K0: ZeroMapKV<'a>, K1: ZeroMapKV<'a> + Ord, V: ZeroMapKV<'a>, K0: ?Sized, K1: ?Sized, V: ?Sized, { /// Gets the value for a key1 from this cursor, or `None` if key1 is not in the map. /// /// ```rust /// use zerovec::ZeroMap2d; /// /// let mut map = ZeroMap2d::new(); /// map.insert("one", &1u32, "foo"); /// assert_eq!(map.get0("one").unwrap().get1(&1), Some("foo")); /// assert_eq!(map.get0("one").unwrap().get1(&2), None); /// ``` pub fn get1(&self, key1: &K1) -> Option<&'l V::GetType> { let key1_index = self.get_key1_index(key1)?; #[allow(clippy::unwrap_used)] // key1_index is valid Some(self.values.zvl_get(key1_index).unwrap()) } /// Gets the value for a predicate from this cursor, or `None` if key1 is not in the map. /// /// ```rust /// use zerovec::ZeroMap2d; /// /// let mut map = ZeroMap2d::new(); /// map.insert("one", &1u32, "foo"); /// assert_eq!(map.get0("one").unwrap().get1_by(|v| v.cmp(&1)), Some("foo")); /// assert_eq!(map.get0("one").unwrap().get1_by(|v| v.cmp(&2)), None); /// ``` pub fn get1_by(&self, predicate: impl FnMut(&K1) -> Ordering) -> Option<&'l V::GetType> { let key1_index = self.get_key1_index_by(predicate)?; #[allow(clippy::unwrap_used)] // key1_index is valid Some(self.values.zvl_get(key1_index).unwrap()) } /// Given key0_index and predicate, returns the index into the values array fn get_key1_index_by(&self, predicate: impl FnMut(&K1) -> Ordering) -> Option { let range = self.get_range(); debug_assert!(range.start < range.end); // '<' because every key0 should have a key1 debug_assert!(range.end <= self.keys1.zvl_len()); let start = range.start; #[allow(clippy::expect_used)] // protected by the debug_assert above let binary_search_result = self .keys1 .zvl_binary_search_in_range_by(predicate, range) .expect("in-bounds range"); binary_search_result.ok().map(move |s| s + start) } /// Given key0_index and key1, returns the index into the values array fn get_key1_index(&self, key1: &K1) -> Option { let range = self.get_range(); debug_assert!(range.start < range.end); // '<' because every key0 should have a key1 debug_assert!(range.end <= self.keys1.zvl_len()); let start = range.start; #[allow(clippy::expect_used)] // protected by the debug_assert above let binary_search_result = self .keys1 .zvl_binary_search_in_range(key1, range) .expect("in-bounds range"); binary_search_result.ok().map(move |s| s + start) } } impl<'l, 'a, K0, K1, V> ZeroMap2dCursor<'l, 'a, K0, K1, V> where K0: ZeroMapKV<'a>, K1: ZeroMapKV<'a> + Ord, V: ZeroMapKV<'a>, V: Copy, K0: ?Sized, K1: ?Sized, { /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE` /// /// ```rust /// use zerovec::ZeroMap2d; /// /// let mut map: ZeroMap2d = ZeroMap2d::new(); /// map.insert(&1, &2, &3); /// map.insert(&1, &4, &5); /// map.insert(&6, &7, &8); /// /// assert_eq!(map.get0(&6).unwrap().get1_copied(&7), Some(8)); /// ``` #[inline] pub fn get1_copied(&self, key1: &K1) -> Option { let key1_index = self.get_key1_index(key1)?; self.get1_copied_at(key1_index) } /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE` #[inline] pub fn get1_copied_by(&self, predicate: impl FnMut(&K1) -> Ordering) -> Option { let key1_index = self.get_key1_index_by(predicate)?; self.get1_copied_at(key1_index) } } // We can't use the default PartialEq because ZeroMap2d is invariant // so otherwise rustc will not automatically allow you to compare ZeroMaps // with different lifetimes impl<'m, 'n, 'a, 'b, K0, K1, V> PartialEq> for ZeroMap2dCursor<'m, 'a, K0, K1, V> where K0: for<'c> ZeroMapKV<'c> + ?Sized, K1: for<'c> ZeroMapKV<'c> + ?Sized, V: for<'c> ZeroMapKV<'c> + ?Sized, >::Slice: PartialEq<>::Slice>, >::Slice: PartialEq<>::Slice>, >::Slice: PartialEq<>::Slice>, { fn eq(&self, other: &ZeroMap2dCursor<'n, 'b, K0, K1, V>) -> bool { self.keys0.eq(other.keys0) && self.joiner.eq(other.joiner) && self.keys1.eq(other.keys1) && self.values.eq(other.values) && self.key0_index.eq(&other.key0_index) } } impl<'l, 'a, K0, K1, V> fmt::Debug for ZeroMap2dCursor<'l, 'a, K0, K1, V> where K0: ZeroMapKV<'a> + ?Sized, K1: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, K0::Slice: fmt::Debug, K1::Slice: fmt::Debug, V::Slice: fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { f.debug_struct("ZeroMap2d") .field("keys0", &self.keys0) .field("joiner", &self.joiner) .field("keys1", &self.keys1) .field("values", &self.values) .field("key0_index", &self.key0_index) .finish() } } zerovec-0.11.1/src/map2d/databake.rs000064400000000000000000000133011046102023000152530ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::{maps::ZeroMap2dBorrowed, maps::ZeroMapKV, ZeroMap2d}; use databake::*; impl<'a, K0, K1, V> Bake for ZeroMap2d<'a, K0, K1, V> where K0: ZeroMapKV<'a> + ?Sized, K1: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, K0::Container: Bake, K1::Container: Bake, V::Container: Bake, { fn bake(&self, env: &CrateEnv) -> TokenStream { env.insert("zerovec"); let keys0 = self.keys0.bake(env); let joiner = self.joiner.bake(env); let keys1 = self.keys1.bake(env); let values = self.values.bake(env); quote! { unsafe { #[allow(unused_unsafe)] zerovec::ZeroMap2d::from_parts_unchecked(#keys0, #joiner, #keys1, #values) } } } } impl<'a, K0, K1, V> BakeSize for ZeroMap2d<'a, K0, K1, V> where K0: ZeroMapKV<'a> + ?Sized, K1: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, K0::Container: BakeSize, K1::Container: BakeSize, V::Container: BakeSize, { fn borrows_size(&self) -> usize { self.keys0.borrows_size() + self.joiner.borrows_size() + self.keys1.borrows_size() + self.values.borrows_size() } } impl<'a, K0, K1, V> Bake for ZeroMap2dBorrowed<'a, K0, K1, V> where K0: ZeroMapKV<'a> + ?Sized, K1: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, &'a K0::Slice: Bake, &'a K1::Slice: Bake, &'a V::Slice: Bake, { fn bake(&self, env: &CrateEnv) -> TokenStream { env.insert("zerovec"); let keys0 = self.keys0.bake(env); let joiner = self.joiner.bake(env); let keys1 = self.keys1.bake(env); let values = self.values.bake(env); quote! { unsafe { #[allow(unused_unsafe)] zerovec::maps::ZeroMap2dBorrowed::from_parts_unchecked(#keys0, #joiner, #keys1, #values) } } } } impl<'a, K0, K1, V> BakeSize for ZeroMap2dBorrowed<'a, K0, K1, V> where K0: ZeroMapKV<'a> + ?Sized, K1: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, &'a K0::Slice: BakeSize, &'a K1::Slice: BakeSize, &'a V::Slice: BakeSize, { fn borrows_size(&self) -> usize { self.keys0.borrows_size() + self.joiner.borrows_size() + self.keys1.borrows_size() + self.values.borrows_size() } } #[test] fn test_baked_map() { test_bake!( ZeroMap2d, const, unsafe { #[allow(unused_unsafe)] crate::ZeroMap2d::from_parts_unchecked( unsafe { crate::vecs::VarZeroVec16::from_bytes_unchecked( b"\x0E\0\0\0\0\0\x05\0\x07\0\t\0\x0B\0\x10\0\x12\0\x14\0\x1C\0\x1E\0#\0%\0'\0,\0arcazcuenffgrckkkukylifmanmnpapalsdtgugunruzyuezh" ) }, unsafe { crate::ZeroVec::from_bytes_unchecked( b"\x02\0\0\0\x03\0\0\0\x04\0\0\0\x05\0\0\0\x06\0\0\0\x07\0\0\0\x08\0\0\0\n\0\0\0\x0C\0\0\0\r\0\0\0\x0E\0\0\0\x0F\0\0\0\x10\0\0\0\x11\0\0\0\x14\0\0\0\x15\0\0\0\x16\0\0\0\x17\0\0\0\x18\0\0\0\x19\0\0\0\x1C\0\0\0" ) }, unsafe { crate::vecs::VarZeroVec16::from_bytes_unchecked( b"\x1C\0\0\0\0\0\x04\0\x08\0\x0C\0\x10\0\x14\0\x18\0\x1C\0 \0$\0(\0,\x000\x004\08\0<\0@\0D\0H\0L\0P\0T\0X\0\\\0`\0d\0h\0l\0NbatPalmArabGlagShawAdlmLinbArabArabYeziArabLatnLimbNkooMongArabPhlpDevaKhojSindArabCyrlDevaArabHansBopoHanbHant" ) }, unsafe { crate::vecs::VarZeroVec16::from_bytes_unchecked( b"\x1C\0\0\0\0\0\x02\0\x04\0\x06\0\x08\0\n\0\x0C\0\x0E\0\x10\0\x12\0\x14\0\x16\0\x18\0\x1A\0\x1C\0\x1E\0 \0\"\0$\0&\0(\0*\0,\0.\x000\x002\x004\x006\0JOSYIRBGGBGNGRCNIQGECNTRINGNCNPKCNINININPKKZNPAFCNTWTWTW" ) }, ) }, zerovec ); } #[test] fn test_baked_borrowed_map() { test_bake!( ZeroMap2dBorrowed, const, unsafe { #[allow(unused_unsafe)] crate::maps::ZeroMap2dBorrowed::from_parts_unchecked( unsafe { crate::vecs::VarZeroSlice16::from_bytes_unchecked( b"\x0E\0\0\0\0\0\x05\0\x07\0\t\0\x0B\0\x10\0\x12\0\x14\0\x1C\0\x1E\0#\0%\0'\0,\0arcazcuenffgrckkkukylifmanmnpapalsdtgugunruzyuezh" ) }, unsafe { crate::ZeroSlice::from_bytes_unchecked( b"\x02\0\0\0\x03\0\0\0\x04\0\0\0\x05\0\0\0\x06\0\0\0\x07\0\0\0\x08\0\0\0\n\0\0\0\x0C\0\0\0\r\0\0\0\x0E\0\0\0\x0F\0\0\0\x10\0\0\0\x11\0\0\0\x14\0\0\0\x15\0\0\0\x16\0\0\0\x17\0\0\0\x18\0\0\0\x19\0\0\0\x1C\0\0\0" ) }, unsafe { crate::vecs::VarZeroSlice16::from_bytes_unchecked( b"\x1C\0\0\0\0\0\x04\0\x08\0\x0C\0\x10\0\x14\0\x18\0\x1C\0 \0$\0(\0,\x000\x004\08\0<\0@\0D\0H\0L\0P\0T\0X\0\\\0`\0d\0h\0l\0NbatPalmArabGlagShawAdlmLinbArabArabYeziArabLatnLimbNkooMongArabPhlpDevaKhojSindArabCyrlDevaArabHansBopoHanbHant" ) }, unsafe { crate::vecs::VarZeroSlice16::from_bytes_unchecked( b"\x1C\0\0\0\0\0\x02\0\x04\0\x06\0\x08\0\n\0\x0C\0\x0E\0\x10\0\x12\0\x14\0\x16\0\x18\0\x1A\0\x1C\0\x1E\0 \0\"\0$\0&\0(\0*\0,\0.\x000\x002\x004\x006\0JOSYIRBGGBGNGRCNIQGECNTRINGNCNPKCNINININPKKZNPAFCNTWTWTW" ) }, ) }, zerovec ); } zerovec-0.11.1/src/map2d/map.rs000064400000000000000000000754431046102023000143130ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::ule::AsULE; use crate::ZeroVec; use alloc::borrow::Borrow; use core::cmp::Ordering; use core::convert::TryFrom; use core::fmt; use core::iter::FromIterator; use core::ops::Range; use super::*; use crate::map::ZeroMapKV; use crate::map::{MutableZeroVecLike, ZeroVecLike}; /// A zero-copy, two-dimensional map datastructure . /// /// This is an extension of [`ZeroMap`] that supports two layers of keys. For example, /// to map a pair of an integer and a string to a buffer, you can write: /// /// ```no_run /// # use zerovec::ZeroMap2d; /// let _: ZeroMap2d = unimplemented!(); /// ``` /// /// Internally, `ZeroMap2d` stores four zero-copy vectors, one for each type argument plus /// one more to match between the two vectors of keys. /// /// # Examples /// /// ``` /// use zerovec::ZeroMap2d; /// /// // Example byte buffer representing the map { 1: {2: "three" } } /// let BINCODE_BYTES: &[u8; 47] = &[ /// 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, /// 0, 0, 0, 0, 0, 0, 2, 0, 7, 0, 0, 0, 0, 0, 0, 0, 1, 0, 116, 104, 114, /// 101, 101, /// ]; /// /// // Deserializing to ZeroMap requires no heap allocations. /// let zero_map: ZeroMap2d = /// bincode::deserialize(BINCODE_BYTES) /// .expect("Should deserialize successfully"); /// assert_eq!(zero_map.get_2d(&1, &2), Some("three")); /// ``` /// /// [`VarZeroVec`]: crate::VarZeroVec /// [`ZeroMap`]: crate::ZeroMap // ZeroMap2d contains 4 fields: // // - keys0 = sorted list of all K0 in the map // - joiner = helper vec that maps from a K0 to a range of keys1 // - keys1 = list of all K1 in the map, sorted in ranges for each K0 // - values = list of all values in the map, sorted by (K0, K1) // // For a particular K0 at index i, the range of keys1 corresponding to K0 is // (joiner[i-1]..joiner[i]), where the first range starts at 0. // // Required Invariants: // // 1. len(keys0) == len(joiner) // 2. len(keys1) == len(values) // 3. joiner is sorted // 4. the last element of joiner is the length of keys1 // // Optional Invariants: // // 5. keys0 is sorted (for binary_search) // 6. ranges within keys1 are sorted (for binary_search) // 7. every K0 is associated with at least one K1 (no empty ranges) // // During deserialization, these three invariants are not checked, because they put the // ZeroMap2d in a deterministic state, even though it may have unexpected behavior. pub struct ZeroMap2d<'a, K0, K1, V> where K0: ZeroMapKV<'a>, K1: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K0: ?Sized, K1: ?Sized, V: ?Sized, { pub(crate) keys0: K0::Container, pub(crate) joiner: ZeroVec<'a, u32>, pub(crate) keys1: K1::Container, pub(crate) values: V::Container, } impl<'a, K0, K1, V> Default for ZeroMap2d<'a, K0, K1, V> where K0: ZeroMapKV<'a>, K1: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K0: ?Sized, K1: ?Sized, V: ?Sized, { fn default() -> Self { Self::new() } } impl<'a, K0, K1, V> ZeroMap2d<'a, K0, K1, V> where K0: ZeroMapKV<'a>, K1: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K0: ?Sized, K1: ?Sized, V: ?Sized, { /// Creates a new, empty `ZeroMap2d`. /// /// # Examples /// /// ``` /// use zerovec::ZeroMap2d; /// /// let zm: ZeroMap2d = ZeroMap2d::new(); /// assert!(zm.is_empty()); /// ``` pub fn new() -> Self { Self { keys0: K0::Container::zvl_with_capacity(0), joiner: ZeroVec::new(), keys1: K1::Container::zvl_with_capacity(0), values: V::Container::zvl_with_capacity(0), } } #[doc(hidden)] // databake internal pub const unsafe fn from_parts_unchecked( keys0: K0::Container, joiner: ZeroVec<'a, u32>, keys1: K1::Container, values: V::Container, ) -> Self { Self { keys0, joiner, keys1, values, } } /// Construct a new [`ZeroMap2d`] with a given capacity pub fn with_capacity(capacity: usize) -> Self { Self { keys0: K0::Container::zvl_with_capacity(capacity), joiner: ZeroVec::with_capacity(capacity), keys1: K1::Container::zvl_with_capacity(capacity), values: V::Container::zvl_with_capacity(capacity), } } /// Obtain a borrowed version of this map pub fn as_borrowed(&'a self) -> ZeroMap2dBorrowed<'a, K0, K1, V> { ZeroMap2dBorrowed { keys0: self.keys0.zvl_as_borrowed(), joiner: &self.joiner, keys1: self.keys1.zvl_as_borrowed(), values: self.values.zvl_as_borrowed(), } } /// The number of values in the [`ZeroMap2d`] pub fn len(&self) -> usize { self.values.zvl_len() } /// Whether the [`ZeroMap2d`] is empty pub fn is_empty(&self) -> bool { self.values.zvl_len() == 0 } /// Remove all elements from the [`ZeroMap2d`] pub fn clear(&mut self) { self.keys0.zvl_clear(); self.joiner.clear(); self.keys1.zvl_clear(); self.values.zvl_clear(); } /// Reserve capacity for `additional` more elements to be inserted into /// the [`ZeroMap2d`] to avoid frequent reallocations. /// /// See [`Vec::reserve()`](alloc::vec::Vec::reserve) for more information. pub fn reserve(&mut self, additional: usize) { self.keys0.zvl_reserve(additional); self.joiner.zvl_reserve(additional); self.keys1.zvl_reserve(additional); self.values.zvl_reserve(additional); } /// Produce an ordered iterator over keys0, which can then be used to get an iterator /// over keys1 for a particular key0. /// /// # Example /// /// Loop over all elements of a ZeroMap2d: /// /// ``` /// use zerovec::ZeroMap2d; /// /// let mut map: ZeroMap2d = ZeroMap2d::new(); /// map.insert(&1, &1, "foo"); /// map.insert(&2, &3, "bar"); /// map.insert(&2, &4, "baz"); /// /// let mut total_value = 0; /// /// for cursor in map.iter0() { /// for (key1, value) in cursor.iter1() { /// // This code runs for every (key0, key1) pair /// total_value += cursor.key0().as_unsigned_int() as usize; /// total_value += key1.as_unsigned_int() as usize; /// total_value += value.len(); /// } /// } /// /// assert_eq!(total_value, 22); /// ``` pub fn iter0<'l>(&'l self) -> impl Iterator> + 'l { (0..self.keys0.zvl_len()).map(move |idx| ZeroMap2dCursor::from_cow(self, idx)) } // INTERNAL ROUTINES FOLLOW // /// Given an index into the joiner array, returns the corresponding range of keys1 fn get_range_for_key0_index(&self, key0_index: usize) -> Range { ZeroMap2dCursor::from_cow(self, key0_index).get_range() } /// Removes key0_index from the keys0 array and the joiner array fn remove_key0_index(&mut self, key0_index: usize) { self.keys0.zvl_remove(key0_index); self.joiner.with_mut(|v| v.remove(key0_index)); } /// Shifts all joiner ranges from key0_index onward one index up fn joiner_expand(&mut self, key0_index: usize) { #[allow(clippy::expect_used)] // slice overflow self.joiner .to_mut_slice() .iter_mut() .skip(key0_index) .for_each(|ref mut v| { // TODO(#1410): Make this fallible **v = v .as_unsigned_int() .checked_add(1) .expect("Attempted to add more than 2^32 elements to a ZeroMap2d") .to_unaligned() }) } /// Shifts all joiner ranges from key0_index onward one index down fn joiner_shrink(&mut self, key0_index: usize) { self.joiner .to_mut_slice() .iter_mut() .skip(key0_index) .for_each(|ref mut v| **v = (v.as_unsigned_int() - 1).to_unaligned()) } } impl<'a, K0, K1, V> ZeroMap2d<'a, K0, K1, V> where K0: ZeroMapKV<'a> + Ord, K1: ZeroMapKV<'a> + Ord, V: ZeroMapKV<'a>, K0: ?Sized, K1: ?Sized, V: ?Sized, { /// Get the value associated with `key0` and `key1`, if it exists. /// /// For more fine-grained error handling, use [`ZeroMap2d::get0`]. /// /// ```rust /// use zerovec::ZeroMap2d; /// /// let mut map = ZeroMap2d::new(); /// map.insert(&1, "one", "foo"); /// map.insert(&2, "one", "bar"); /// map.insert(&2, "two", "baz"); /// assert_eq!(map.get_2d(&1, "one"), Some("foo")); /// assert_eq!(map.get_2d(&1, "two"), None); /// assert_eq!(map.get_2d(&2, "one"), Some("bar")); /// assert_eq!(map.get_2d(&2, "two"), Some("baz")); /// assert_eq!(map.get_2d(&3, "three"), None); /// ``` pub fn get_2d(&self, key0: &K0, key1: &K1) -> Option<&V::GetType> { self.get0(key0)?.get1(key1) } /// Insert `value` with `key`, returning the existing value if it exists. /// /// ```rust /// use zerovec::ZeroMap2d; /// /// let mut map = ZeroMap2d::new(); /// assert_eq!(map.insert(&0, "zero", "foo"), None,); /// assert_eq!(map.insert(&1, "one", "bar"), None,); /// assert_eq!(map.insert(&1, "one", "baz").as_deref(), Some("bar"),); /// assert_eq!(map.get_2d(&1, "one").as_deref(), Some("baz")); /// assert_eq!(map.len(), 2); /// ``` pub fn insert(&mut self, key0: &K0, key1: &K1, value: &V) -> Option { let (key0_index, range) = self.get_or_insert_range_for_key0(key0); debug_assert!(range.start <= range.end); // '<=' because we may have inserted a new key0 debug_assert!(range.end <= self.keys1.zvl_len()); let range_start = range.start; #[allow(clippy::unwrap_used)] // by debug_assert! invariants let index = range_start + match self.keys1.zvl_binary_search_in_range(key1, range).unwrap() { Ok(index) => return Some(self.values.zvl_replace(range_start + index, value)), Err(index) => index, }; self.keys1.zvl_insert(index, key1); self.values.zvl_insert(index, value); self.joiner_expand(key0_index); #[cfg(debug_assertions)] self.check_invariants(); None } /// Remove the value at `key`, returning it if it exists. /// /// ```rust /// use zerovec::ZeroMap2d; /// /// let mut map = ZeroMap2d::new(); /// map.insert(&1, "one", "foo"); /// map.insert(&2, "two", "bar"); /// assert_eq!( /// map.remove(&1, "one"), /// Some("foo".to_owned().into_boxed_str()) /// ); /// assert_eq!(map.get_2d(&1, "one"), None); /// assert_eq!(map.remove(&1, "one"), None); /// ``` pub fn remove(&mut self, key0: &K0, key1: &K1) -> Option { let key0_index = self.keys0.zvl_binary_search(key0).ok()?; let range = self.get_range_for_key0_index(key0_index); debug_assert!(range.start < range.end); // '<' because every key0 should have a key1 debug_assert!(range.end <= self.keys1.zvl_len()); let is_singleton_range = range.start + 1 == range.end; #[allow(clippy::unwrap_used)] // by debug_assert invariants let index = range.start + self .keys1 .zvl_binary_search_in_range(key1, range) .unwrap() .ok()?; self.keys1.zvl_remove(index); let removed = self.values.zvl_remove(index); self.joiner_shrink(key0_index); if is_singleton_range { self.remove_key0_index(key0_index); } #[cfg(debug_assertions)] self.check_invariants(); Some(removed) } /// Appends `value` with `key` to the end of the underlying vector, returning /// `key` and `value` _if it failed_. Useful for extending with an existing /// sorted list. /// /// ```rust /// use zerovec::ZeroMap2d; /// /// let mut map = ZeroMap2d::new(); /// assert!(map.try_append(&1, "one", "uno").is_none()); /// assert!(map.try_append(&3, "three", "tres").is_none()); /// /// let unsuccessful = map.try_append(&3, "three", "tres-updated"); /// assert!(unsuccessful.is_some(), "append duplicate of last key"); /// /// let unsuccessful = map.try_append(&2, "two", "dos"); /// assert!(unsuccessful.is_some(), "append out of order"); /// /// assert_eq!(map.get_2d(&1, "one"), Some("uno")); /// /// // contains the original value for the key: 3 /// assert_eq!(map.get_2d(&3, "three"), Some("tres")); /// /// // not appended since it wasn't in order /// assert_eq!(map.get_2d(&2, "two"), None); /// ``` #[must_use] pub fn try_append<'b>( &mut self, key0: &'b K0, key1: &'b K1, value: &'b V, ) -> Option<(&'b K0, &'b K1, &'b V)> { if self.is_empty() { self.keys0.zvl_push(key0); self.joiner.with_mut(|v| v.push(1u32.to_unaligned())); self.keys1.zvl_push(key1); self.values.zvl_push(value); return None; } // The unwraps are protected by the fact that we are not empty #[allow(clippy::unwrap_used)] let last_key0 = self.keys0.zvl_get(self.keys0.zvl_len() - 1).unwrap(); let key0_cmp = K0::Container::t_cmp_get(key0, last_key0); #[allow(clippy::unwrap_used)] let last_key1 = self.keys1.zvl_get(self.keys1.zvl_len() - 1).unwrap(); let key1_cmp = K1::Container::t_cmp_get(key1, last_key1); // Check for error case (out of order) match key0_cmp { Ordering::Less => { // Error case return Some((key0, key1, value)); } Ordering::Equal => { match key1_cmp { Ordering::Less | Ordering::Equal => { // Error case return Some((key0, key1, value)); } _ => {} } } _ => {} } #[allow(clippy::expect_used)] // slice overflow let joiner_value = u32::try_from(self.keys1.zvl_len() + 1) .expect("Attempted to add more than 2^32 elements to a ZeroMap2d"); // All OK to append #[allow(clippy::unwrap_used)] if key0_cmp == Ordering::Greater { self.keys0.zvl_push(key0); self.joiner .with_mut(|v| v.push(joiner_value.to_unaligned())); } else { // This unwrap is protected because we are not empty *self.joiner.to_mut_slice().last_mut().unwrap() = joiner_value.to_unaligned(); } self.keys1.zvl_push(key1); self.values.zvl_push(value); #[cfg(debug_assertions)] self.check_invariants(); None } // INTERNAL ROUTINES FOLLOW // #[cfg(debug_assertions)] #[allow(clippy::unwrap_used)] // this is an assertion function pub(crate) fn check_invariants(&self) { debug_assert_eq!(self.keys0.zvl_len(), self.joiner.len()); debug_assert_eq!(self.keys1.zvl_len(), self.values.zvl_len()); debug_assert!(self.keys0.zvl_is_ascending()); debug_assert!(self.joiner.zvl_is_ascending()); if let Some(last_joiner) = self.joiner.last() { debug_assert_eq!(last_joiner as usize, self.keys1.zvl_len()); } for i in 0..self.joiner.len() { let j0 = if i == 0 { 0 } else { self.joiner.get(i - 1).unwrap() as usize }; let j1 = self.joiner.get(i).unwrap() as usize; debug_assert_ne!(j0, j1); for j in (j0 + 1)..j1 { let m0 = self.keys1.zvl_get(j - 1).unwrap(); let m1 = self.keys1.zvl_get(j).unwrap(); debug_assert_eq!(Ordering::Less, K1::Container::get_cmp_get(m0, m1)); } } } } impl<'a, K0, K1, V> ZeroMap2d<'a, K0, K1, V> where K0: ZeroMapKV<'a> + Ord, K1: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K0: ?Sized, K1: ?Sized, V: ?Sized, { /// Gets a cursor for `key0`. If `None`, then `key0` is not in the map. If `Some`, /// then `key0` is in the map, and `key1` can be queried. /// /// ```rust /// use zerovec::ZeroMap2d; /// /// let mut map = ZeroMap2d::new(); /// map.insert(&1u32, "one", "foo"); /// map.insert(&2, "one", "bar"); /// map.insert(&2, "two", "baz"); /// assert_eq!(map.get0(&1).unwrap().get1("one").unwrap(), "foo"); /// assert_eq!(map.get0(&1).unwrap().get1("two"), None); /// assert_eq!(map.get0(&2).unwrap().get1("one").unwrap(), "bar"); /// assert_eq!(map.get0(&2).unwrap().get1("two").unwrap(), "baz"); /// assert_eq!(map.get0(&3), None); /// ``` #[inline] pub fn get0<'l>(&'l self, key0: &K0) -> Option> { let key0_index = self.keys0.zvl_binary_search(key0).ok()?; Some(ZeroMap2dCursor::from_cow(self, key0_index)) } /// Binary search the map for `key0`, returning a cursor. /// /// ```rust /// use zerovec::ZeroMap2d; /// /// let mut map = ZeroMap2d::new(); /// map.insert(&1, "one", "foo"); /// map.insert(&2, "two", "bar"); /// assert!(matches!(map.get0_by(|probe| probe.cmp(&1)), Some(_))); /// assert!(matches!(map.get0_by(|probe| probe.cmp(&3)), None)); /// ``` pub fn get0_by<'l>( &'l self, predicate: impl FnMut(&K0) -> Ordering, ) -> Option> { let key0_index = self.keys0.zvl_binary_search_by(predicate).ok()?; Some(ZeroMap2dCursor::from_cow(self, key0_index)) } /// Returns whether `key0` is contained in this map /// /// ```rust /// use zerovec::ZeroMap2d; /// /// let mut map = ZeroMap2d::new(); /// map.insert(&1, "one", "foo"); /// map.insert(&2, "two", "bar"); /// assert!(map.contains_key0(&1)); /// assert!(!map.contains_key0(&3)); /// ``` pub fn contains_key0(&self, key0: &K0) -> bool { self.keys0.zvl_binary_search(key0).is_ok() } // INTERNAL ROUTINES FOLLOW // /// Same as `get_range_for_key0`, but creates key0 if it doesn't already exist fn get_or_insert_range_for_key0(&mut self, key0: &K0) -> (usize, Range) { match self.keys0.zvl_binary_search(key0) { Ok(key0_index) => (key0_index, self.get_range_for_key0_index(key0_index)), Err(key0_index) => { // Add an entry to self.keys0 and self.joiner let joiner_value = if key0_index == 0 { 0 } else { debug_assert!(key0_index <= self.joiner.len()); // The unwrap is protected by the debug_assert above and key0_index != 0 #[allow(clippy::unwrap_used)] self.joiner.get(key0_index - 1).unwrap() }; self.keys0.zvl_insert(key0_index, key0); self.joiner .with_mut(|v| v.insert(key0_index, joiner_value.to_unaligned())); (key0_index, (joiner_value as usize)..(joiner_value as usize)) } } } } impl<'a, K0, K1, V> ZeroMap2d<'a, K0, K1, V> where K0: ZeroMapKV<'a> + Ord, K1: ZeroMapKV<'a> + Ord, V: ZeroMapKV<'a>, V: Copy, K0: ?Sized, K1: ?Sized, { /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE` /// /// # Examples /// /// ``` /// # use zerovec::ZeroMap2d; /// let mut map: ZeroMap2d = ZeroMap2d::new(); /// map.insert(&1, &2, &3); /// map.insert(&1, &4, &5); /// map.insert(&6, &7, &8); /// /// assert_eq!(map.get_copied_2d(&6, &7), Some(8)); /// ``` #[inline] pub fn get_copied_2d(&self, key0: &K0, key1: &K1) -> Option { self.get0(key0)?.get1_copied(key1) } } impl<'a, K0, K1, V> From> for ZeroMap2d<'a, K0, K1, V> where K0: ZeroMapKV<'a>, K1: ZeroMapKV<'a>, V: ZeroMapKV<'a>, K0: ?Sized, K1: ?Sized, V: ?Sized, { fn from(other: ZeroMap2dBorrowed<'a, K0, K1, V>) -> Self { Self { keys0: K0::Container::zvl_from_borrowed(other.keys0), joiner: other.joiner.as_zerovec(), keys1: K1::Container::zvl_from_borrowed(other.keys1), values: V::Container::zvl_from_borrowed(other.values), } } } // We can't use the default PartialEq because ZeroMap2d is invariant // so otherwise rustc will not automatically allow you to compare ZeroMaps // with different lifetimes impl<'a, 'b, K0, K1, V> PartialEq> for ZeroMap2d<'a, K0, K1, V> where K0: for<'c> ZeroMapKV<'c> + ?Sized, K1: for<'c> ZeroMapKV<'c> + ?Sized, V: for<'c> ZeroMapKV<'c> + ?Sized, >::Container: PartialEq<>::Container>, >::Container: PartialEq<>::Container>, >::Container: PartialEq<>::Container>, { fn eq(&self, other: &ZeroMap2d<'b, K0, K1, V>) -> bool { self.keys0.eq(&other.keys0) && self.joiner.eq(&other.joiner) && self.keys1.eq(&other.keys1) && self.values.eq(&other.values) } } impl<'a, K0, K1, V> fmt::Debug for ZeroMap2d<'a, K0, K1, V> where K0: ZeroMapKV<'a> + ?Sized, K1: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, >::Container: fmt::Debug, >::Container: fmt::Debug, >::Container: fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { f.debug_struct("ZeroMap2d") .field("keys0", &self.keys0) .field("joiner", &self.joiner) .field("keys1", &self.keys1) .field("values", &self.values) .finish() } } impl<'a, K0, K1, V> Clone for ZeroMap2d<'a, K0, K1, V> where K0: ZeroMapKV<'a> + ?Sized, K1: ZeroMapKV<'a> + ?Sized, V: ZeroMapKV<'a> + ?Sized, >::Container: Clone, >::Container: Clone, >::Container: Clone, { fn clone(&self) -> Self { Self { keys0: self.keys0.clone(), joiner: self.joiner.clone(), keys1: self.keys1.clone(), values: self.values.clone(), } } } impl<'a, A, B, C, K0, K1, V> FromIterator<(A, B, C)> for ZeroMap2d<'a, K0, K1, V> where A: Borrow, B: Borrow, C: Borrow, K0: ZeroMapKV<'a> + ?Sized + Ord, K1: ZeroMapKV<'a> + ?Sized + Ord, V: ZeroMapKV<'a> + ?Sized, { fn from_iter(iter: T) -> Self where T: IntoIterator, { let iter = iter.into_iter(); let mut map = match iter.size_hint() { (_, Some(upper)) => Self::with_capacity(upper), (lower, None) => Self::with_capacity(lower), }; for (key0, key1, value) in iter { if let Some((key0, key1, value)) = map.try_append(key0.borrow(), key1.borrow(), value.borrow()) { map.insert(key0, key1, value); } } #[cfg(debug_assertions)] map.check_invariants(); map } } #[cfg(test)] mod test { use super::*; use alloc::collections::BTreeMap; #[test] fn stress_test() { let mut zm2d = ZeroMap2d::::new(); assert_eq!( format!("{zm2d:?}"), "ZeroMap2d { keys0: ZeroVec([]), joiner: ZeroVec([]), keys1: [], values: [] }" ); assert_eq!(zm2d.get0(&0), None); let result = zm2d.try_append(&3, "ccc", "CCC"); assert!(result.is_none()); assert_eq!(format!("{zm2d:?}"), "ZeroMap2d { keys0: ZeroVec([3]), joiner: ZeroVec([1]), keys1: [\"ccc\"], values: [\"CCC\"] }"); assert_eq!(zm2d.get0(&0), None); assert_eq!(zm2d.get0(&3).unwrap().get1(""), None); assert_eq!(zm2d.get_2d(&3, "ccc"), Some("CCC")); assert_eq!(zm2d.get0(&99), None); let result = zm2d.try_append(&3, "eee", "EEE"); assert!(result.is_none()); assert_eq!(format!("{zm2d:?}"), "ZeroMap2d { keys0: ZeroVec([3]), joiner: ZeroVec([2]), keys1: [\"ccc\", \"eee\"], values: [\"CCC\", \"EEE\"] }"); assert_eq!(zm2d.get0(&0), None); assert_eq!(zm2d.get0(&3).unwrap().get1(""), None); assert_eq!(zm2d.get_2d(&3, "ccc"), Some("CCC")); assert_eq!(zm2d.get_2d(&3, "eee"), Some("EEE")); assert_eq!(zm2d.get0(&3).unwrap().get1("five"), None); assert_eq!(zm2d.get0(&99), None); // Out of order let result = zm2d.try_append(&3, "ddd", "DD0"); assert!(result.is_some()); // Append a few more elements let result = zm2d.try_append(&5, "ddd", "DD1"); assert!(result.is_none()); let result = zm2d.try_append(&7, "ddd", "DD2"); assert!(result.is_none()); let result = zm2d.try_append(&7, "eee", "EEE"); assert!(result.is_none()); let result = zm2d.try_append(&7, "www", "WWW"); assert!(result.is_none()); let result = zm2d.try_append(&9, "yyy", "YYY"); assert!(result.is_none()); assert_eq!(format!("{zm2d:?}"), "ZeroMap2d { keys0: ZeroVec([3, 5, 7, 9]), joiner: ZeroVec([2, 3, 6, 7]), keys1: [\"ccc\", \"eee\", \"ddd\", \"ddd\", \"eee\", \"www\", \"yyy\"], values: [\"CCC\", \"EEE\", \"DD1\", \"DD2\", \"EEE\", \"WWW\", \"YYY\"] }"); assert_eq!(zm2d.get0(&0), None); assert_eq!(zm2d.get0(&3).unwrap().get1(""), None); assert_eq!(zm2d.get_2d(&3, "ccc"), Some("CCC")); assert_eq!(zm2d.get_2d(&3, "eee"), Some("EEE")); assert_eq!(zm2d.get0(&3).unwrap().get1("zzz"), None); assert_eq!(zm2d.get0(&4), None); assert_eq!(zm2d.get0(&5).unwrap().get1("aaa"), None); assert_eq!(zm2d.get_2d(&5, "ddd"), Some("DD1")); assert_eq!(zm2d.get0(&5).unwrap().get1("zzz"), None); assert_eq!(zm2d.get0(&6), None); assert_eq!(zm2d.get0(&7).unwrap().get1("aaa"), None); assert_eq!(zm2d.get_2d(&7, "ddd"), Some("DD2")); assert_eq!(zm2d.get_2d(&7, "eee"), Some("EEE")); assert_eq!(zm2d.get_2d(&7, "www"), Some("WWW")); assert_eq!(zm2d.get0(&7).unwrap().get1("yyy"), None); assert_eq!(zm2d.get0(&7).unwrap().get1("zzz"), None); assert_eq!(zm2d.get0(&8), None); assert_eq!(zm2d.get0(&9).unwrap().get1("aaa"), None); assert_eq!(zm2d.get0(&9).unwrap().get1("www"), None); assert_eq!(zm2d.get_2d(&9, "yyy"), Some("YYY")); assert_eq!(zm2d.get0(&9).unwrap().get1("zzz"), None); assert_eq!(zm2d.get0(&10), None); assert_eq!(zm2d.get0(&99), None); // Insert some elements zm2d.insert(&3, "mmm", "MM0"); zm2d.insert(&6, "ddd", "DD3"); zm2d.insert(&6, "mmm", "MM1"); zm2d.insert(&6, "nnn", "NNN"); assert_eq!(format!("{zm2d:?}"), "ZeroMap2d { keys0: ZeroVec([3, 5, 6, 7, 9]), joiner: ZeroVec([3, 4, 7, 10, 11]), keys1: [\"ccc\", \"eee\", \"mmm\", \"ddd\", \"ddd\", \"mmm\", \"nnn\", \"ddd\", \"eee\", \"www\", \"yyy\"], values: [\"CCC\", \"EEE\", \"MM0\", \"DD1\", \"DD3\", \"MM1\", \"NNN\", \"DD2\", \"EEE\", \"WWW\", \"YYY\"] }"); assert_eq!(zm2d.get0(&0), None); assert_eq!(zm2d.get0(&3).unwrap().get1(""), None); assert_eq!(zm2d.get_2d(&3, "ccc"), Some("CCC")); assert_eq!(zm2d.get_2d(&3, "eee"), Some("EEE")); assert_eq!(zm2d.get_2d(&3, "mmm"), Some("MM0")); assert_eq!(zm2d.get0(&3).unwrap().get1("zzz"), None); assert_eq!(zm2d.get0(&4), None); assert_eq!(zm2d.get0(&5).unwrap().get1("aaa"), None); assert_eq!(zm2d.get_2d(&5, "ddd"), Some("DD1")); assert_eq!(zm2d.get0(&5).unwrap().get1("zzz"), None); assert_eq!(zm2d.get0(&6).unwrap().get1("aaa"), None); assert_eq!(zm2d.get_2d(&6, "ddd"), Some("DD3")); assert_eq!(zm2d.get_2d(&6, "mmm"), Some("MM1")); assert_eq!(zm2d.get_2d(&6, "nnn"), Some("NNN")); assert_eq!(zm2d.get0(&6).unwrap().get1("zzz"), None); assert_eq!(zm2d.get0(&7).unwrap().get1("aaa"), None); assert_eq!(zm2d.get_2d(&7, "ddd"), Some("DD2")); assert_eq!(zm2d.get_2d(&7, "eee"), Some("EEE")); assert_eq!(zm2d.get_2d(&7, "www"), Some("WWW")); assert_eq!(zm2d.get0(&7).unwrap().get1("yyy"), None); assert_eq!(zm2d.get0(&7).unwrap().get1("zzz"), None); assert_eq!(zm2d.get0(&8), None); assert_eq!(zm2d.get0(&9).unwrap().get1("aaa"), None); assert_eq!(zm2d.get0(&9).unwrap().get1("www"), None); assert_eq!(zm2d.get_2d(&9, "yyy"), Some("YYY")); assert_eq!(zm2d.get0(&9).unwrap().get1("zzz"), None); assert_eq!(zm2d.get0(&10), None); assert_eq!(zm2d.get0(&99), None); // Remove some elements let result = zm2d.remove(&3, "ccc"); // first element assert_eq!(result.as_deref(), Some("CCC")); let result = zm2d.remove(&3, "mmm"); // middle element assert_eq!(result.as_deref(), Some("MM0")); let result = zm2d.remove(&5, "ddd"); // singleton K0 assert_eq!(result.as_deref(), Some("DD1")); let result = zm2d.remove(&9, "yyy"); // last element assert_eq!(result.as_deref(), Some("YYY")); assert_eq!(format!("{zm2d:?}"), "ZeroMap2d { keys0: ZeroVec([3, 6, 7]), joiner: ZeroVec([1, 4, 7]), keys1: [\"eee\", \"ddd\", \"mmm\", \"nnn\", \"ddd\", \"eee\", \"www\"], values: [\"EEE\", \"DD3\", \"MM1\", \"NNN\", \"DD2\", \"EEE\", \"WWW\"] }"); } #[test] fn zeromap2d_metazone() { let source_data = [ (*b"aedxb", 0, Some(*b"gulf")), (*b"afkbl", 0, Some(*b"afgh")), (*b"ushnl", 0, None), (*b"ushnl", 7272660, Some(*b"haal")), (*b"ushnl", 0, None), (*b"ushnl", 7272660, Some(*b"haal")), ]; let btreemap: BTreeMap<([u8; 5], i32), Option<[u8; 4]>> = source_data .iter() .copied() .map(|(a, b, c)| ((a, b), c)) .collect(); let zeromap2d: ZeroMap2d<[u8; 5], i32, Option<[u8; 4]>> = source_data.iter().copied().collect(); let mut btreemap_iter = btreemap.iter(); for cursor in zeromap2d.iter0() { for (key1, value) in cursor.iter1() { // This code runs for every (key0, key1) pair in order let expected = btreemap_iter.next().unwrap(); assert_eq!( (expected.0 .0, expected.0 .1, expected.1), (*cursor.key0(), key1.as_unsigned_int() as i32, &value.get()) ); } } assert!(btreemap_iter.next().is_none()); } } zerovec-0.11.1/src/map2d/mod.rs000064400000000000000000000007411046102023000143020ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). //! See [`ZeroMap2d`](crate::ZeroMap2d) for details. mod borrowed; mod cursor; pub(crate) mod map; #[cfg(feature = "databake")] mod databake; #[cfg(feature = "serde")] mod serde; pub use crate::ZeroMap2d; pub use borrowed::ZeroMap2dBorrowed; pub use cursor::ZeroMap2dCursor; zerovec-0.11.1/src/map2d/serde.rs000064400000000000000000000347621046102023000146370ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::{ZeroMap2d, ZeroMap2dBorrowed, ZeroMap2dCursor}; use crate::map::{MutableZeroVecLike, ZeroMapKV, ZeroVecLike}; use crate::ZeroVec; use alloc::vec::Vec; use core::fmt; use core::marker::PhantomData; use serde::de::{self, Deserialize, Deserializer, MapAccess, Visitor}; #[cfg(feature = "serde")] use serde::ser::{Serialize, SerializeMap, Serializer}; /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate #[cfg(feature = "serde")] impl<'a, K0, K1, V> Serialize for ZeroMap2d<'a, K0, K1, V> where K0: ZeroMapKV<'a> + Serialize + ?Sized + Ord, K1: ZeroMapKV<'a> + Serialize + ?Sized + Ord, V: ZeroMapKV<'a> + Serialize + ?Sized, K0::Container: Serialize, K1::Container: Serialize, V::Container: Serialize, { fn serialize(&self, serializer: S) -> Result where S: Serializer, { if serializer.is_human_readable() { let mut serde_map = serializer.serialize_map(None)?; for cursor in self.iter0() { K0::Container::zvl_get_as_t(cursor.key0(), |k| serde_map.serialize_key(k))?; let inner_map = ZeroMap2dInnerMapSerialize { cursor }; serde_map.serialize_value(&inner_map)?; } serde_map.end() } else { (&self.keys0, &self.joiner, &self.keys1, &self.values).serialize(serializer) } } } /// Helper struct for human-serializing the inner map of a ZeroMap2d #[cfg(feature = "serde")] struct ZeroMap2dInnerMapSerialize<'a, 'l, K0, K1, V> where K0: ZeroMapKV<'a> + ?Sized + Ord, K1: ZeroMapKV<'a> + ?Sized + Ord, V: ZeroMapKV<'a> + ?Sized, { pub cursor: ZeroMap2dCursor<'l, 'a, K0, K1, V>, } #[cfg(feature = "serde")] impl<'a, 'l, K0, K1, V> Serialize for ZeroMap2dInnerMapSerialize<'a, 'l, K0, K1, V> where K0: ZeroMapKV<'a> + Serialize + ?Sized + Ord, K1: ZeroMapKV<'a> + Serialize + ?Sized + Ord, V: ZeroMapKV<'a> + Serialize + ?Sized, K0::Container: Serialize, K1::Container: Serialize, V::Container: Serialize, { fn serialize(&self, serializer: S) -> Result where S: Serializer, { let mut serde_map = serializer.serialize_map(None)?; for (key1, v) in self.cursor.iter1() { K1::Container::zvl_get_as_t(key1, |k| serde_map.serialize_key(k))?; V::Container::zvl_get_as_t(v, |v| serde_map.serialize_value(v))?; } serde_map.end() } } /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate #[cfg(feature = "serde")] impl<'a, K0, K1, V> Serialize for ZeroMap2dBorrowed<'a, K0, K1, V> where K0: ZeroMapKV<'a> + Serialize + ?Sized + Ord, K1: ZeroMapKV<'a> + Serialize + ?Sized + Ord, V: ZeroMapKV<'a> + Serialize + ?Sized, K0::Container: Serialize, K1::Container: Serialize, V::Container: Serialize, { fn serialize(&self, serializer: S) -> Result where S: Serializer, { ZeroMap2d::::from(*self).serialize(serializer) } } /// Modified example from https://serde.rs/deserialize-map.html struct ZeroMap2dMapVisitor<'a, K0, K1, V> where K0: ZeroMapKV<'a> + ?Sized + Ord, K1: ZeroMapKV<'a> + ?Sized + Ord, V: ZeroMapKV<'a> + ?Sized, { #[allow(clippy::type_complexity)] // it's a marker type, complexity doesn't matter marker: PhantomData (&'a K0::OwnedType, &'a K1::OwnedType, &'a V::OwnedType)>, } impl<'a, K0, K1, V> ZeroMap2dMapVisitor<'a, K0, K1, V> where K0: ZeroMapKV<'a> + ?Sized + Ord, K1: ZeroMapKV<'a> + ?Sized + Ord, V: ZeroMapKV<'a> + ?Sized, { fn new() -> Self { ZeroMap2dMapVisitor { marker: PhantomData, } } } impl<'a, 'de, K0, K1, V> Visitor<'de> for ZeroMap2dMapVisitor<'a, K0, K1, V> where K0: ZeroMapKV<'a> + Ord + ?Sized + Ord, K1: ZeroMapKV<'a> + Ord + ?Sized + Ord, V: ZeroMapKV<'a> + ?Sized, K1::Container: Deserialize<'de>, V::Container: Deserialize<'de>, K0::OwnedType: Deserialize<'de>, K1::OwnedType: Deserialize<'de>, V::OwnedType: Deserialize<'de>, { type Value = ZeroMap2d<'a, K0, K1, V>; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("a map produced by ZeroMap2d") } fn visit_map(self, mut access: M) -> Result where M: MapAccess<'de>, { let mut map = ZeroMap2d::with_capacity(access.size_hint().unwrap_or(0)); // On the first level, pull out the K0s and a TupleVecMap of the // K1s and Vs, and then collect them into a ZeroMap2d while let Some((key0, inner_map)) = access.next_entry::>()? { for (key1, value) in inner_map.entries.iter() { if map .try_append( K0::Container::owned_as_t(&key0), K1::Container::owned_as_t(key1), V::Container::owned_as_t(value), ) .is_some() { return Err(de::Error::custom( "ZeroMap2d's keys must be sorted while deserializing", )); } } } Ok(map) } } /// Helper struct for human-deserializing the inner map of a ZeroMap2d struct TupleVecMap { pub entries: Vec<(K1, V)>, } struct TupleVecMapVisitor { #[allow(clippy::type_complexity)] // it's a marker type, complexity doesn't matter marker: PhantomData (K1, V)>, } impl TupleVecMapVisitor { fn new() -> Self { TupleVecMapVisitor { marker: PhantomData, } } } impl<'de, K1, V> Visitor<'de> for TupleVecMapVisitor where K1: Deserialize<'de>, V: Deserialize<'de>, { type Value = TupleVecMap; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("an inner map produced by ZeroMap2d") } fn visit_map(self, mut access: M) -> Result where M: MapAccess<'de>, { let mut result = Vec::with_capacity(access.size_hint().unwrap_or(0)); while let Some((key1, value)) = access.next_entry::()? { result.push((key1, value)); } Ok(TupleVecMap { entries: result }) } } impl<'de, K1, V> Deserialize<'de> for TupleVecMap where K1: Deserialize<'de>, V: Deserialize<'de>, { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { deserializer.deserialize_map(TupleVecMapVisitor::::new()) } } /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate impl<'de, 'a, K0, K1, V> Deserialize<'de> for ZeroMap2d<'a, K0, K1, V> where K0: ZeroMapKV<'a> + Ord + ?Sized, K1: ZeroMapKV<'a> + Ord + ?Sized, V: ZeroMapKV<'a> + ?Sized, K0::Container: Deserialize<'de>, K1::Container: Deserialize<'de>, V::Container: Deserialize<'de>, K0::OwnedType: Deserialize<'de>, K1::OwnedType: Deserialize<'de>, V::OwnedType: Deserialize<'de>, 'de: 'a, { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { if deserializer.is_human_readable() { deserializer.deserialize_map(ZeroMap2dMapVisitor::<'a, K0, K1, V>::new()) } else { let (keys0, joiner, keys1, values): ( K0::Container, ZeroVec, K1::Container, V::Container, ) = Deserialize::deserialize(deserializer)?; // Invariant 1: len(keys0) == len(joiner) if keys0.zvl_len() != joiner.len() { return Err(de::Error::custom( "Mismatched keys0 and joiner sizes in ZeroMap2d", )); } // Invariant 2: len(keys1) == len(values) if keys1.zvl_len() != values.zvl_len() { return Err(de::Error::custom( "Mismatched keys1 and value sizes in ZeroMap2d", )); } // Invariant 3: joiner is sorted if !joiner.zvl_is_ascending() { return Err(de::Error::custom( "ZeroMap2d deserializing joiner array out of order", )); } // Invariant 4: the last element of joiner is the length of keys1 if let Some(last_joiner0) = joiner.last() { if keys1.zvl_len() != last_joiner0 as usize { return Err(de::Error::custom( "ZeroMap2d deserializing joiner array malformed", )); } } let result = Self { keys0, joiner, keys1, values, }; // In debug mode, check the optional invariants, too #[cfg(debug_assertions)] result.check_invariants(); Ok(result) } } } /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate impl<'de, 'a, K0, K1, V> Deserialize<'de> for ZeroMap2dBorrowed<'a, K0, K1, V> where K0: ZeroMapKV<'a> + Ord + ?Sized, K1: ZeroMapKV<'a> + Ord + ?Sized, V: ZeroMapKV<'a> + ?Sized, K0::Container: Deserialize<'de>, K1::Container: Deserialize<'de>, V::Container: Deserialize<'de>, K0::OwnedType: Deserialize<'de>, K1::OwnedType: Deserialize<'de>, V::OwnedType: Deserialize<'de>, 'de: 'a, { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { if deserializer.is_human_readable() { Err(de::Error::custom( "ZeroMap2dBorrowed cannot be deserialized from human-readable formats", )) } else { let deserialized: ZeroMap2d<'a, K0, K1, V> = ZeroMap2d::deserialize(deserializer)?; let keys0 = if let Some(keys0) = deserialized.keys0.zvl_as_borrowed_inner() { keys0 } else { return Err(de::Error::custom( "ZeroMap2dBorrowed can only deserialize in zero-copy ways", )); }; let joiner = if let Some(joiner) = deserialized.joiner.zvl_as_borrowed_inner() { joiner } else { return Err(de::Error::custom( "ZeroMap2dBorrowed can only deserialize in zero-copy ways", )); }; let keys1 = if let Some(keys1) = deserialized.keys1.zvl_as_borrowed_inner() { keys1 } else { return Err(de::Error::custom( "ZeroMap2dBorrowed can only deserialize in zero-copy ways", )); }; let values = if let Some(values) = deserialized.values.zvl_as_borrowed_inner() { values } else { return Err(de::Error::custom( "ZeroMap2dBorrowed can only deserialize in zero-copy ways", )); }; Ok(Self { keys0, joiner, keys1, values, }) } } } #[cfg(test)] #[allow(non_camel_case_types)] mod test { use crate::map2d::{ZeroMap2d, ZeroMap2dBorrowed}; #[derive(serde::Serialize, serde::Deserialize)] struct DeriveTest_ZeroMap2d<'data> { #[serde(borrow)] _data: ZeroMap2d<'data, u16, str, [u8]>, } #[derive(serde::Serialize, serde::Deserialize)] struct DeriveTest_ZeroMap2dBorrowed<'data> { #[serde(borrow)] _data: ZeroMap2dBorrowed<'data, u16, str, [u8]>, } const JSON_STR: &str = "{\"1\":{\"1\":\"uno\"},\"2\":{\"2\":\"dos\",\"3\":\"tres\"}}"; const BINCODE_BYTES: &[u8] = &[ 8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 3, 0, 16, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 6, 0, 117, 110, 111, 100, 111, 115, 116, 114, 101, 115, ]; fn make_map() -> ZeroMap2d<'static, u32, u16, str> { let mut map = ZeroMap2d::new(); map.insert(&1, &1, "uno"); map.insert(&2, &2, "dos"); map.insert(&2, &3, "tres"); map } #[test] fn test_serde_json() { let map = make_map(); let json_str = serde_json::to_string(&map).expect("serialize"); assert_eq!(JSON_STR, json_str); let new_map: ZeroMap2d = serde_json::from_str(&json_str).expect("deserialize"); assert_eq!(format!("{new_map:?}"), format!("{map:?}")); } #[test] fn test_bincode() { let map = make_map(); let bincode_bytes = bincode::serialize(&map).expect("serialize"); assert_eq!(BINCODE_BYTES, bincode_bytes); let new_map: ZeroMap2d = bincode::deserialize(&bincode_bytes).expect("deserialize"); assert_eq!( format!("{new_map:?}"), format!("{map:?}").replace("Owned", "Borrowed"), ); let new_map: ZeroMap2dBorrowed = bincode::deserialize(&bincode_bytes).expect("deserialize"); assert_eq!( format!("{new_map:?}"), format!("{map:?}") .replace("Owned", "Borrowed") .replace("ZeroMap2d", "ZeroMap2dBorrowed") ); } #[test] fn test_serde_rmp() { let map = make_map(); let rmp_buf = rmp_serde::to_vec(&map).expect("serialize"); let new_map: ZeroMap2d = rmp_serde::from_slice(&rmp_buf).unwrap(); assert_eq!(map, new_map); } #[test] fn test_sample_bincode() { // This is the map from the main docs page for ZeroMap2d let mut map: ZeroMap2d = ZeroMap2d::new(); map.insert(&1, &2, "three"); let bincode_bytes: Vec = bincode::serialize(&map).expect("serialize"); assert_eq!( bincode_bytes.as_slice(), &[ 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 7, 0, 0, 0, 0, 0, 0, 0, 1, 0, 116, 104, 114, 101, 101 ] ); } } zerovec-0.11.1/src/samples.rs000064400000000000000000000057721046102023000141750ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). //! Example data useful for testing ZeroVec. // This module is included directly in tests and can trigger the dead_code // warning since not all samples are used in each test #![allow(dead_code)] #[repr(align(8))] struct Aligned(pub T); // This is aligned so that we can test unaligned behavior at odd offsets const ALIGNED_TEST_BUFFER_LE: Aligned<[u8; 80]> = Aligned([ 0x00, 0x01, 0x02, 0x00, 0x04, 0x05, 0x06, 0x00, 0x08, 0x09, 0x0a, 0x00, 0x0c, 0x0d, 0x0e, 0x00, 0x10, 0x11, 0x12, 0x00, 0x14, 0x15, 0x16, 0x00, 0x18, 0x19, 0x1a, 0x00, 0x1c, 0x1d, 0x1e, 0x00, 0x20, 0x21, 0x22, 0x00, 0x24, 0x25, 0x26, 0x00, 0x28, 0x29, 0x2a, 0x00, 0x2c, 0x2d, 0x2e, 0x00, 0x30, 0x31, 0x32, 0x00, 0x34, 0x35, 0x36, 0x00, 0x38, 0x39, 0x3a, 0x00, 0x3c, 0x3d, 0x3e, 0x00, 0x40, 0x41, 0x42, 0x00, 0x44, 0x45, 0x46, 0x00, 0x48, 0x49, 0x4a, 0x00, 0x4c, 0x4d, 0x4e, 0x00, ]); /// An example byte array intended to be used in `ZeroVec`. pub const TEST_BUFFER_LE: &[u8] = &ALIGNED_TEST_BUFFER_LE.0; /// u32 numbers corresponding to the above byte array. pub const TEST_SLICE: &[u32] = &[ 0x020100, 0x060504, 0x0a0908, 0x0e0d0c, 0x121110, 0x161514, 0x1a1918, 0x1e1d1c, 0x222120, 0x262524, 0x2a2928, 0x2e2d2c, 0x323130, 0x363534, 0x3a3938, 0x3e3d3c, 0x424140, 0x464544, 0x4a4948, 0x4e4d4c, ]; /// The sum of the numbers in TEST_SLICE. pub const TEST_SUM: u32 = 52629240; /// Representation of TEST_SLICE in JSON. pub const JSON_STR: &str = "[131328,394500,657672,920844,1184016,1447188,1710360,1973532,2236704,2499876,2763048,3026220,3289392,3552564,3815736,4078908,4342080,4605252,4868424,5131596]"; /// Representation of TEST_SLICE in Bincode. pub const BINCODE_BUF: &[u8] = &[ 80, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 4, 5, 6, 0, 8, 9, 10, 0, 12, 13, 14, 0, 16, 17, 18, 0, 20, 21, 22, 0, 24, 25, 26, 0, 28, 29, 30, 0, 32, 33, 34, 0, 36, 37, 38, 0, 40, 41, 42, 0, 44, 45, 46, 0, 48, 49, 50, 0, 52, 53, 54, 0, 56, 57, 58, 0, 60, 61, 62, 0, 64, 65, 66, 0, 68, 69, 70, 0, 72, 73, 74, 0, 76, 77, 78, 0, ]; /// Representation of a VarZeroVec with contents ["w", "ω", "文", "𑄃"] pub const TEST_VARZEROSLICE_BYTES: &[u8] = &[ 4, 0, 0, 0, 0, 0, 1, 0, 3, 0, 6, 0, 119, 207, 137, 230, 150, 135, 240, 145, 132, 131, ]; #[test] fn validate() { use crate::{VarZeroVec, ZeroVec}; assert_eq!( ZeroVec::::parse_bytes(TEST_BUFFER_LE).unwrap(), ZeroVec::alloc_from_slice(TEST_SLICE) ); assert_eq!(TEST_SLICE.iter().sum::(), TEST_SUM); assert_eq!( serde_json::from_str::>(JSON_STR).unwrap(), ZeroVec::alloc_from_slice(TEST_SLICE) ); assert_eq!( bincode::deserialize::>(BINCODE_BUF).unwrap(), ZeroVec::alloc_from_slice(TEST_SLICE) ); VarZeroVec::::parse_bytes(TEST_VARZEROSLICE_BYTES).unwrap(); } zerovec-0.11.1/src/ule/chars.rs000064400000000000000000000144151046102023000144100ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). #![allow(clippy::upper_case_acronyms)] //! ULE implementation for the `char` type. use super::*; use crate::impl_ule_from_array; use core::cmp::Ordering; use core::convert::TryFrom; /// A u8 array of little-endian data corresponding to a Unicode scalar value. /// /// The bytes of a `CharULE` are guaranteed to represent a little-endian-encoded u32 that is a /// valid `char` and can be converted without validation. /// /// # Examples /// /// Convert a `char` to a `CharULE` and back again: /// /// ``` /// use zerovec::ule::{AsULE, CharULE, ULE}; /// /// let c1 = '𑄃'; /// let ule = c1.to_unaligned(); /// assert_eq!(CharULE::slice_as_bytes(&[ule]), &[0x03, 0x11, 0x01]); /// let c2 = char::from_unaligned(ule); /// assert_eq!(c1, c2); /// ``` /// /// Attempt to parse invalid bytes to a `CharULE`: /// /// ``` /// use zerovec::ule::{CharULE, ULE}; /// /// let bytes: &[u8] = &[0xFF, 0xFF, 0xFF, 0xFF]; /// CharULE::parse_bytes_to_slice(bytes).expect_err("Invalid bytes"); /// ``` #[repr(transparent)] #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] pub struct CharULE([u8; 3]); impl CharULE { /// Converts a [`char`] to a [`CharULE`]. This is equivalent to calling /// [`AsULE::to_unaligned()`] /// /// See the type-level documentation for [`CharULE`] for more information. #[inline] pub const fn from_aligned(c: char) -> Self { let [u0, u1, u2, _u3] = (c as u32).to_le_bytes(); Self([u0, u1, u2]) } /// Converts this [`CharULE`] to a [`char`]. This is equivalent to calling /// [`AsULE::from_unaligned`] /// /// See the type-level documentation for [`CharULE`] for more information. #[inline] pub fn to_char(self) -> char { let [b0, b1, b2] = self.0; // Safe because the bytes of CharULE are defined to represent a valid Unicode scalar value. unsafe { char::from_u32_unchecked(u32::from_le_bytes([b0, b1, b2, 0])) } } impl_ule_from_array!(char, CharULE, Self([0; 3])); } // Safety (based on the safety checklist on the ULE trait): // 1. CharULE does not include any uninitialized or padding bytes. // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) // 2. CharULE is aligned to 1 byte. // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) // 3. The impl of validate_bytes() returns an error if any byte is not valid. // 4. The impl of validate_bytes() returns an error if there are extra bytes. // 5. The other ULE methods use the default impl. // 6. CharULE byte equality is semantic equality unsafe impl ULE for CharULE { #[inline] fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> { if bytes.len() % 3 != 0 { return Err(UleError::length::(bytes.len())); } // Validate the bytes for chunk in bytes.chunks_exact(3) { // TODO: Use slice::as_chunks() when stabilized #[allow(clippy::indexing_slicing)] // Won't panic because the chunks are always 3 bytes long let u = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], 0]); char::try_from(u).map_err(|_| UleError::parse::())?; } Ok(()) } } impl AsULE for char { type ULE = CharULE; #[inline] fn to_unaligned(self) -> Self::ULE { CharULE::from_aligned(self) } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { unaligned.to_char() } } impl PartialOrd for CharULE { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for CharULE { fn cmp(&self, other: &Self) -> Ordering { char::from_unaligned(*self).cmp(&char::from_unaligned(*other)) } } #[cfg(test)] mod test { use super::*; #[test] fn test_from_array() { const CHARS: [char; 2] = ['a', '🙃']; const CHARS_ULE: [CharULE; 2] = CharULE::from_array(CHARS); assert_eq!( CharULE::slice_as_bytes(&CHARS_ULE), &[0x61, 0x00, 0x00, 0x43, 0xF6, 0x01] ); } #[test] fn test_from_array_zst() { const CHARS: [char; 0] = []; const CHARS_ULE: [CharULE; 0] = CharULE::from_array(CHARS); let bytes = CharULE::slice_as_bytes(&CHARS_ULE); let empty: &[u8] = &[]; assert_eq!(bytes, empty); } #[test] fn test_parse() { // 1-byte, 2-byte, 3-byte, and two 4-byte character in UTF-8 (not as relevant in UTF-32) let chars = ['w', 'ω', '文', '𑄃', '🙃']; let char_ules: Vec = chars.iter().copied().map(char::to_unaligned).collect(); let char_bytes: &[u8] = CharULE::slice_as_bytes(&char_ules); // Check parsing let parsed_ules: &[CharULE] = CharULE::parse_bytes_to_slice(char_bytes).unwrap(); assert_eq!(char_ules, parsed_ules); let parsed_chars: Vec = parsed_ules .iter() .copied() .map(char::from_unaligned) .collect(); assert_eq!(&chars, parsed_chars.as_slice()); // Compare to golden expected data assert_eq!( &[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1], char_bytes ); } #[test] fn test_failures() { // 119 and 120 are valid, but not 0xD800 (high surrogate) let u32s = [119, 0xD800, 120]; let u32_ules: Vec> = u32s .iter() .copied() .map(::to_unaligned) .collect(); let u32_bytes: &[u8] = RawBytesULE::<4>::slice_as_bytes(&u32_ules); let parsed_ules_result = CharULE::parse_bytes_to_slice(u32_bytes); assert!(parsed_ules_result.is_err()); // 0x20FFFF is out of range for a char let u32s = [0x20FFFF]; let u32_ules: Vec> = u32s .iter() .copied() .map(::to_unaligned) .collect(); let u32_bytes: &[u8] = RawBytesULE::<4>::slice_as_bytes(&u32_ules); let parsed_ules_result = CharULE::parse_bytes_to_slice(u32_bytes); assert!(parsed_ules_result.is_err()); } } zerovec-0.11.1/src/ule/custom.rs000064400000000000000000000132371046102023000146230ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). //! Documentation on implementing custom VarULE types. //! //! This module contains documentation for defining custom VarULE types, //! especially those using complex custom dynamically sized types. //! //! In *most cases* you should be able to create custom VarULE types using //! [`#[make_varule]`](crate::make_ule). //! //! # Example //! //! For example, if your regular stack type is: //! //! ```rust //! use zerofrom::ZeroFrom; //! use zerovec::ule::*; //! use zerovec::ZeroVec; //! //! #[derive(serde::Serialize, serde::Deserialize)] //! struct Foo<'a> { //! field1: char, //! field2: u32, //! #[serde(borrow)] //! field3: ZeroVec<'a, u32>, //! } //! ``` //! //! then the ULE type will be implemented as follows. Ideally, you should have //! `EncodeAsVarULE` and `ZeroFrom` implementations on `Foo` pertaining to `FooULE`, //! as well as a `Serialize` impl on `FooULE` and a `Deserialize` impl on `Box` //! to enable human-readable serialization and deserialization. //! //! ```rust //! use zerovec::{ZeroVec, VarZeroVec, ZeroSlice}; //! use zerovec::ule::*; //! use zerofrom::ZeroFrom; //! use core::mem; //! //! # #[derive(serde::Serialize, serde::Deserialize)] //! # struct Foo<'a> { //! # field1: char, //! # field2: u32, //! # #[serde(borrow)] //! # field3: ZeroVec<'a, u32> //! # } //! //! // Must be repr(C, packed) for safety of VarULE! //! // Must also only contain ULE types //! #[repr(C, packed)] //! struct FooULE { //! field1: ::ULE, //! field2: ::ULE, //! field3: ZeroSlice, //! } //! //! // Safety (based on the safety checklist on the VarULE trait): //! // 1. FooULE does not include any uninitialized or padding bytes. (achieved by `#[repr(C, packed)]` on //! // a struct with only ULE fields) //! // 2. FooULE is aligned to 1 byte. (achieved by `#[repr(C, packed)]` on //! // a struct with only ULE fields) //! // 3. The impl of `validate_bytes()` returns an error if any byte is not valid. //! // 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety //! // 5. The impl of `from_bytes_unchecked()` returns a reference to the same data. //! // 6. The other VarULE methods use the default impl. //! // 7. FooULE byte equality is semantic equality //! unsafe impl VarULE for FooULE { //! fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> { //! // validate each field //! ::ULE::validate_bytes(&bytes[0..3]).map_err(|_| UleError::parse::())?; //! ::ULE::validate_bytes(&bytes[3..7]).map_err(|_| UleError::parse::())?; //! let _ = ZeroVec::::parse_bytes(&bytes[7..]).map_err(|_| UleError::parse::())?; //! Ok(()) //! } //! unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { //! let ptr = bytes.as_ptr(); //! let len = bytes.len(); //! // subtract the length of the char and u32 to get the length of the array //! let len_new = (len - 7) / 4; //! // it's hard constructing custom DSTs, we fake a pointer/length construction //! // eventually we can use the Pointer::Metadata APIs when they stabilize //! let fake_slice = core::ptr::slice_from_raw_parts(ptr as *const ::ULE, len_new); //! &*(fake_slice as *const Self) //! } //! } //! //! unsafe impl EncodeAsVarULE for Foo<'_> { //! fn encode_var_ule_as_slices(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { //! // take each field, convert to ULE byte slices, and pass them through //! cb(&[::ULE::slice_as_bytes(&[self.field1.to_unaligned()]), //! ::ULE::slice_as_bytes(&[self.field2.to_unaligned()]), //! // the ZeroVec is already in the correct slice format //! self.field3.as_bytes()]) //! } //! } //! //! impl<'a> ZeroFrom<'a, FooULE> for Foo<'a> { //! fn zero_from(other: &'a FooULE) -> Self { //! Self { //! field1: AsULE::from_unaligned(other.field1), //! field2: AsULE::from_unaligned(other.field2), //! field3: ZeroFrom::zero_from(&other.field3), //! } //! } //! } //! //! //! impl serde::Serialize for FooULE //! { //! fn serialize(&self, serializer: S) -> Result //! where //! S: serde::Serializer, //! { //! Foo::zero_from(self).serialize(serializer) //! } //! } //! //! impl<'de> serde::Deserialize<'de> for Box //! { //! fn deserialize(deserializer: D) -> Result //! where //! D: serde::Deserializer<'de>, //! { //! let mut foo = Foo::deserialize(deserializer)?; //! Ok(encode_varule_to_box(&foo)) //! } //! } //! //! fn main() { //! let mut foos = [Foo {field1: 'u', field2: 983, field3: ZeroVec::alloc_from_slice(&[1212,2309,500,7000])}, //! Foo {field1: 'l', field2: 1010, field3: ZeroVec::alloc_from_slice(&[1932, 0, 8888, 91237])}]; //! //! let vzv = VarZeroVec::<_>::from(&foos); //! //! assert_eq!(char::from_unaligned(vzv.get(0).unwrap().field1), 'u'); //! assert_eq!(u32::from_unaligned(vzv.get(0).unwrap().field2), 983); //! assert_eq!(&vzv.get(0).unwrap().field3, &[1212,2309,500,7000][..]); //! //! assert_eq!(char::from_unaligned(vzv.get(1).unwrap().field1), 'l'); //! assert_eq!(u32::from_unaligned(vzv.get(1).unwrap().field2), 1010); //! assert_eq!(&vzv.get(1).unwrap().field3, &[1932, 0, 8888, 91237][..]); //! } //! ``` zerovec-0.11.1/src/ule/encode.rs000064400000000000000000000370731046102023000145520ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::ule::*; use crate::varzerovec::VarZeroVecFormat; use crate::{VarZeroSlice, VarZeroVec, ZeroSlice, ZeroVec}; #[cfg(feature = "alloc")] use alloc::borrow::{Cow, ToOwned}; #[cfg(feature = "alloc")] use alloc::boxed::Box; #[cfg(feature = "alloc")] use alloc::string::String; #[cfg(feature = "alloc")] use alloc::{vec, vec::Vec}; #[cfg(feature = "alloc")] use core::mem; /// Allows types to be encoded as VarULEs. This is highly useful for implementing VarULE on /// custom DSTs where the type cannot be obtained as a reference to some other type. /// /// [`Self::encode_var_ule_as_slices()`] should be implemented by providing an encoded slice for each field /// of the VarULE type to the callback, in order. For an implementation to be safe, the slices /// to the callback must, when concatenated, be a valid instance of the VarULE type. /// /// See the [custom VarULEdocumentation](crate::ule::custom) for examples. /// /// [`Self::encode_var_ule_as_slices()`] is only used to provide default implementations for [`Self::encode_var_ule_write()`] /// and [`Self::encode_var_ule_len()`]. If you override the default implementations it is totally valid to /// replace [`Self::encode_var_ule_as_slices()`]'s body with `unreachable!()`. This can be done for cases where /// it is not possible to implement [`Self::encode_var_ule_as_slices()`] but the other methods still work. /// /// A typical implementation will take each field in the order found in the [`VarULE`] type, /// convert it to ULE, call [`ULE::slice_as_bytes()`] on them, and pass the slices to `cb` in order. /// A trailing [`ZeroVec`](crate::ZeroVec) or [`VarZeroVec`](crate::VarZeroVec) can have their underlying /// byte representation passed through. /// /// In case the compiler is not optimizing [`Self::encode_var_ule_len()`], it can be overridden. A typical /// implementation will add up the sizes of each field on the [`VarULE`] type and then add in the byte length of the /// dynamically-sized part. /// /// # Safety /// /// The safety invariants of [`Self::encode_var_ule_as_slices()`] are: /// - It must call `cb` (only once) /// - The slices passed to `cb`, if concatenated, should be a valid instance of the `T` [`VarULE`] type /// (i.e. if fed to [`VarULE::validate_bytes()`] they must produce a successful result) /// - It must return the return value of `cb` to the caller /// /// One or more of [`Self::encode_var_ule_len()`] and [`Self::encode_var_ule_write()`] may be provided. /// If both are, then `zerovec` code is guaranteed to not call [`Self::encode_var_ule_as_slices()`], and it may be replaced /// with `unreachable!()`. /// /// The safety invariants of [`Self::encode_var_ule_len()`] are: /// - It must return the length of the corresponding VarULE type /// /// The safety invariants of [`Self::encode_var_ule_write()`] are: /// - The slice written to `dst` must be a valid instance of the `T` [`VarULE`] type pub unsafe trait EncodeAsVarULE { /// Calls `cb` with a piecewise list of byte slices that when concatenated /// produce the memory pattern of the corresponding instance of `T`. /// /// Do not call this function directly; instead use the other two. Some implementors /// may define this function to panic. fn encode_var_ule_as_slices(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R; /// Return the length, in bytes, of the corresponding [`VarULE`] type fn encode_var_ule_len(&self) -> usize { self.encode_var_ule_as_slices(|slices| slices.iter().map(|s| s.len()).sum()) } /// Write the corresponding [`VarULE`] type to the `dst` buffer. `dst` should /// be the size of [`Self::encode_var_ule_len()`] fn encode_var_ule_write(&self, mut dst: &mut [u8]) { debug_assert_eq!(self.encode_var_ule_len(), dst.len()); self.encode_var_ule_as_slices(move |slices| { #[allow(clippy::indexing_slicing)] // by debug_assert for slice in slices { dst[..slice.len()].copy_from_slice(slice); dst = &mut dst[slice.len()..]; } }); } } /// Given an [`EncodeAsVarULE`] type `S`, encode it into a `Box` /// /// This is primarily useful for generating `Deserialize` impls for VarULE types #[cfg(feature = "alloc")] pub fn encode_varule_to_box + ?Sized, T: VarULE + ?Sized>(x: &S) -> Box { // zero-fill the vector to avoid uninitialized data UB let mut vec: Vec = vec![0; x.encode_var_ule_len()]; x.encode_var_ule_write(&mut vec); let boxed = mem::ManuallyDrop::new(vec.into_boxed_slice()); unsafe { // Safety: `ptr` is a box, and `T` is a VarULE which guarantees it has the same memory layout as `[u8]` // and can be recouped via from_bytes_unchecked() let ptr: *mut T = T::from_bytes_unchecked(&boxed) as *const T as *mut T; // Safety: we can construct an owned version since we have mem::forgotten the older owner Box::from_raw(ptr) } } unsafe impl EncodeAsVarULE for T { fn encode_var_ule_as_slices(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { cb(&[T::as_bytes(self)]) } } unsafe impl EncodeAsVarULE for &'_ T { fn encode_var_ule_as_slices(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { cb(&[T::as_bytes(self)]) } } unsafe impl EncodeAsVarULE for &'_ &'_ T { fn encode_var_ule_as_slices(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { cb(&[T::as_bytes(self)]) } } #[cfg(feature = "alloc")] unsafe impl EncodeAsVarULE for Cow<'_, T> where T: ToOwned, { fn encode_var_ule_as_slices(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { cb(&[T::as_bytes(self.as_ref())]) } } #[cfg(feature = "alloc")] unsafe impl EncodeAsVarULE for Box { fn encode_var_ule_as_slices(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { cb(&[T::as_bytes(self)]) } } #[cfg(feature = "alloc")] unsafe impl EncodeAsVarULE for &'_ Box { fn encode_var_ule_as_slices(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { cb(&[T::as_bytes(self)]) } } #[cfg(feature = "alloc")] unsafe impl EncodeAsVarULE for String { fn encode_var_ule_as_slices(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { cb(&[self.as_bytes()]) } } #[cfg(feature = "alloc")] unsafe impl EncodeAsVarULE for &'_ String { fn encode_var_ule_as_slices(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { cb(&[self.as_bytes()]) } } // Note: This impl could technically use `T: AsULE`, but we want users to prefer `ZeroSlice` // for cases where T is not a ULE. Therefore, we can use the more efficient `memcpy` impl here. #[cfg(feature = "alloc")] unsafe impl EncodeAsVarULE<[T]> for Vec where T: ULE, { fn encode_var_ule_as_slices(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { cb(&[<[T] as VarULE>::as_bytes(self)]) } } unsafe impl EncodeAsVarULE> for &'_ [T] where T: AsULE + 'static, { fn encode_var_ule_as_slices(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { // unnecessary if the other two are implemented unreachable!() } #[inline] fn encode_var_ule_len(&self) -> usize { self.len() * core::mem::size_of::() } fn encode_var_ule_write(&self, dst: &mut [u8]) { #[allow(non_snake_case)] let S = core::mem::size_of::(); debug_assert_eq!(self.len() * S, dst.len()); for (item, ref mut chunk) in self.iter().zip(dst.chunks_mut(S)) { let ule = item.to_unaligned(); chunk.copy_from_slice(ULE::slice_as_bytes(core::slice::from_ref(&ule))); } } } #[cfg(feature = "alloc")] unsafe impl EncodeAsVarULE> for Vec where T: AsULE + 'static, { fn encode_var_ule_as_slices(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { // unnecessary if the other two are implemented unreachable!() } #[inline] fn encode_var_ule_len(&self) -> usize { self.as_slice().encode_var_ule_len() } #[inline] fn encode_var_ule_write(&self, dst: &mut [u8]) { self.as_slice().encode_var_ule_write(dst) } } unsafe impl EncodeAsVarULE> for ZeroVec<'_, T> where T: AsULE + 'static, { fn encode_var_ule_as_slices(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { // unnecessary if the other two are implemented unreachable!() } #[inline] fn encode_var_ule_len(&self) -> usize { self.as_bytes().len() } fn encode_var_ule_write(&self, dst: &mut [u8]) { debug_assert_eq!(self.as_bytes().len(), dst.len()); dst.copy_from_slice(self.as_bytes()); } } unsafe impl EncodeAsVarULE> for &'_ [E] where T: VarULE + ?Sized, E: EncodeAsVarULE, F: VarZeroVecFormat, { fn encode_var_ule_as_slices(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { // unnecessary if the other two are implemented unimplemented!() } #[allow(clippy::unwrap_used)] // TODO(#1410): Rethink length errors in VZV. fn encode_var_ule_len(&self) -> usize { crate::varzerovec::components::compute_serializable_len::(self).unwrap() as usize } fn encode_var_ule_write(&self, dst: &mut [u8]) { crate::varzerovec::components::write_serializable_bytes::(self, dst) } } #[cfg(feature = "alloc")] unsafe impl EncodeAsVarULE> for Vec where T: VarULE + ?Sized, E: EncodeAsVarULE, F: VarZeroVecFormat, { fn encode_var_ule_as_slices(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { // unnecessary if the other two are implemented unreachable!() } #[inline] fn encode_var_ule_len(&self) -> usize { <_ as EncodeAsVarULE>>::encode_var_ule_len(&self.as_slice()) } #[inline] fn encode_var_ule_write(&self, dst: &mut [u8]) { <_ as EncodeAsVarULE>>::encode_var_ule_write(&self.as_slice(), dst) } } unsafe impl EncodeAsVarULE> for VarZeroVec<'_, T, F> where T: VarULE + ?Sized, F: VarZeroVecFormat, { fn encode_var_ule_as_slices(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { // unnecessary if the other two are implemented unreachable!() } #[inline] fn encode_var_ule_len(&self) -> usize { self.as_bytes().len() } #[inline] fn encode_var_ule_write(&self, dst: &mut [u8]) { debug_assert_eq!(self.as_bytes().len(), dst.len()); dst.copy_from_slice(self.as_bytes()); } } #[cfg(test)] mod test { use super::*; const STRING_ARRAY: [&str; 2] = ["hello", "world"]; const STRING_SLICE: &[&str] = &STRING_ARRAY; const U8_ARRAY: [u8; 8] = [0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07]; const U8_2D_ARRAY: [&[u8]; 2] = [&U8_ARRAY, &U8_ARRAY]; const U8_2D_SLICE: &[&[u8]] = &[&U8_ARRAY, &U8_ARRAY]; const U8_3D_ARRAY: [&[&[u8]]; 2] = [U8_2D_SLICE, U8_2D_SLICE]; const U8_3D_SLICE: &[&[&[u8]]] = &[U8_2D_SLICE, U8_2D_SLICE]; const U32_ARRAY: [u32; 4] = [0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F]; const U32_2D_ARRAY: [&[u32]; 2] = [&U32_ARRAY, &U32_ARRAY]; const U32_2D_SLICE: &[&[u32]] = &[&U32_ARRAY, &U32_ARRAY]; const U32_3D_ARRAY: [&[&[u32]]; 2] = [U32_2D_SLICE, U32_2D_SLICE]; const U32_3D_SLICE: &[&[&[u32]]] = &[U32_2D_SLICE, U32_2D_SLICE]; #[test] fn test_vzv_from() { type VZV<'a, T> = VarZeroVec<'a, T>; type ZS = ZeroSlice; type VZS = VarZeroSlice; let u8_zerovec: ZeroVec = ZeroVec::from_slice_or_alloc(&U8_ARRAY); let u8_2d_zerovec: [ZeroVec; 2] = [u8_zerovec.clone(), u8_zerovec.clone()]; let u8_2d_vec: Vec> = vec![U8_ARRAY.into(), U8_ARRAY.into()]; let u8_3d_vec: Vec>> = vec![u8_2d_vec.clone(), u8_2d_vec.clone()]; let u32_zerovec: ZeroVec = ZeroVec::from_slice_or_alloc(&U32_ARRAY); let u32_2d_zerovec: [ZeroVec; 2] = [u32_zerovec.clone(), u32_zerovec.clone()]; let u32_2d_vec: Vec> = vec![U32_ARRAY.into(), U32_ARRAY.into()]; let u32_3d_vec: Vec>> = vec![u32_2d_vec.clone(), u32_2d_vec.clone()]; let a: VZV = VarZeroVec::from(&STRING_ARRAY); let b: VZV = VarZeroVec::from(STRING_SLICE); let c: VZV = VarZeroVec::from(&Vec::from(STRING_SLICE)); assert_eq!(a, STRING_SLICE); assert_eq!(a, b); assert_eq!(a, c); let a: VZV<[u8]> = VarZeroVec::from(&U8_2D_ARRAY); let b: VZV<[u8]> = VarZeroVec::from(U8_2D_SLICE); let c: VZV<[u8]> = VarZeroVec::from(&u8_2d_vec); assert_eq!(a, U8_2D_SLICE); assert_eq!(a, b); assert_eq!(a, c); let u8_3d_vzv_brackets = &[a.clone(), a.clone()]; let a: VZV> = VarZeroVec::from(&U8_2D_ARRAY); let b: VZV> = VarZeroVec::from(U8_2D_SLICE); let c: VZV> = VarZeroVec::from(&u8_2d_vec); let d: VZV> = VarZeroVec::from(&u8_2d_zerovec); assert_eq!(a, U8_2D_SLICE); assert_eq!(a, b); assert_eq!(a, c); assert_eq!(a, d); let u8_3d_vzv_zeroslice = &[a.clone(), a.clone()]; let a: VZV> = VarZeroVec::from(&U8_3D_ARRAY); let b: VZV> = VarZeroVec::from(U8_3D_SLICE); let c: VZV> = VarZeroVec::from(&u8_3d_vec); let d: VZV> = VarZeroVec::from(u8_3d_vzv_brackets); assert_eq!( a.iter() .map(|x| x.iter().map(|y| y.to_vec()).collect::>>()) .collect::>>>(), u8_3d_vec ); assert_eq!(a, b); assert_eq!(a, c); assert_eq!(a, d); let a: VZV>> = VarZeroVec::from(&U8_3D_ARRAY); let b: VZV>> = VarZeroVec::from(U8_3D_SLICE); let c: VZV>> = VarZeroVec::from(&u8_3d_vec); let d: VZV>> = VarZeroVec::from(u8_3d_vzv_zeroslice); assert_eq!( a.iter() .map(|x| x .iter() .map(|y| y.iter().collect::>()) .collect::>>()) .collect::>>>(), u8_3d_vec ); assert_eq!(a, b); assert_eq!(a, c); assert_eq!(a, d); let a: VZV> = VarZeroVec::from(&U32_2D_ARRAY); let b: VZV> = VarZeroVec::from(U32_2D_SLICE); let c: VZV> = VarZeroVec::from(&u32_2d_vec); let d: VZV> = VarZeroVec::from(&u32_2d_zerovec); assert_eq!(a, u32_2d_zerovec); assert_eq!(a, b); assert_eq!(a, c); assert_eq!(a, d); let u32_3d_vzv = &[a.clone(), a.clone()]; let a: VZV>> = VarZeroVec::from(&U32_3D_ARRAY); let b: VZV>> = VarZeroVec::from(U32_3D_SLICE); let c: VZV>> = VarZeroVec::from(&u32_3d_vec); let d: VZV>> = VarZeroVec::from(u32_3d_vzv); assert_eq!( a.iter() .map(|x| x .iter() .map(|y| y.iter().collect::>()) .collect::>>()) .collect::>>>(), u32_3d_vec ); assert_eq!(a, b); assert_eq!(a, c); assert_eq!(a, d); } } zerovec-0.11.1/src/ule/macros.rs000064400000000000000000000024251046102023000145720ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). /// Given `Self` (`$aligned`), `Self::ULE` (`$unaligned`), and a conversion function (`$single` or /// `Self::from_aligned`), implement `from_array` for arrays of `$aligned` to `$unaligned`. /// /// The `$default` argument is due to current compiler limitations. /// Pass any (cheap to construct) value. #[macro_export] macro_rules! impl_ule_from_array { ($aligned:ty, $unaligned:ty, $default:expr, $single:path) => { #[doc = concat!("Convert an array of `", stringify!($aligned), "` to an array of `", stringify!($unaligned), "`.")] pub const fn from_array(arr: [$aligned; N]) -> [Self; N] { let mut result = [$default; N]; let mut i = 0; // Won't panic because i < N and arr has length N #[allow(clippy::indexing_slicing)] while i < N { result[i] = $single(arr[i]); i += 1; } result } }; ($aligned:ty, $unaligned:ty, $default:expr) => { impl_ule_from_array!($aligned, $unaligned, $default, Self::from_aligned); }; } zerovec-0.11.1/src/ule/mod.rs000064400000000000000000000447011046102023000140700ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). #![allow(clippy::upper_case_acronyms)] //! Traits over unaligned little-endian data (ULE, pronounced "yule"). //! //! The main traits for this module are [`ULE`], [`AsULE`] and, [`VarULE`]. //! //! See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for details on how these traits //! works under the hood. mod chars; #[cfg(doc)] pub mod custom; mod encode; mod macros; mod multi; mod niche; mod option; mod plain; mod slices; #[cfg(test)] pub mod test_utils; pub mod tuple; pub mod tuplevar; pub mod vartuple; pub use chars::CharULE; #[cfg(feature = "alloc")] pub use encode::encode_varule_to_box; pub use encode::EncodeAsVarULE; pub use multi::MultiFieldsULE; pub use niche::{NicheBytes, NichedOption, NichedOptionULE}; pub use option::{OptionULE, OptionVarULE}; pub use plain::RawBytesULE; use core::{any, fmt, mem, slice}; /// Fixed-width, byte-aligned data that can be cast to and from a little-endian byte slice. /// /// If you need to implement this trait, consider using [`#[make_ule]`](crate::make_ule) or /// [`#[derive(ULE)]`](macro@ULE) instead. /// /// Types that are not fixed-width can implement [`VarULE`] instead. /// /// "ULE" stands for "Unaligned little-endian" /// /// # Safety /// /// Safety checklist for `ULE`: /// /// 1. The type *must not* include any uninitialized or padding bytes. /// 2. The type must have an alignment of 1 byte, or it is a ZST that is safe to construct. /// 3. The impl of [`ULE::validate_bytes()`] *must* return an error if the given byte slice /// would not represent a valid slice of this type. /// 4. The impl of [`ULE::validate_bytes()`] *must* return an error if the given byte slice /// cannot be used in its entirety (if its length is not a multiple of `size_of::()`). /// 5. All other methods *must* be left with their default impl, or else implemented according to /// their respective safety guidelines. /// 6. Acknowledge the following note about the equality invariant. /// /// If the ULE type is a struct only containing other ULE types (or other types which satisfy invariants 1 and 2, /// like `[u8; N]`), invariants 1 and 2 can be achieved via `#[repr(C, packed)]` or `#[repr(transparent)]`. /// /// # Equality invariant /// /// A non-safety invariant is that if `Self` implements `PartialEq`, the it *must* be logically /// equivalent to byte equality on [`Self::slice_as_bytes()`]. /// /// It may be necessary to introduce a "canonical form" of the ULE if logical equality does not /// equal byte equality. In such a case, [`Self::validate_bytes()`] should return an error /// for any values that are not in canonical form. For example, the decimal strings "1.23e4" and /// "12.3e3" are logically equal, but not byte-for-byte equal, so we could define a canonical form /// where only a single digit is allowed before `.`. /// /// Failure to follow this invariant will cause surprising behavior in `PartialEq`, which may /// result in unpredictable operations on `ZeroVec`, `VarZeroVec`, and `ZeroMap`. pub unsafe trait ULE where Self: Sized, Self: Copy + 'static, { /// Validates a byte slice, `&[u8]`. /// /// If `Self` is not well-defined for all possible bit values, the bytes should be validated. /// If the bytes can be transmuted, *in their entirety*, to a valid slice of `Self`, then `Ok` /// should be returned; otherwise, `Err` should be returned. fn validate_bytes(bytes: &[u8]) -> Result<(), UleError>; /// Parses a byte slice, `&[u8]`, and return it as `&[Self]` with the same lifetime. /// /// If `Self` is not well-defined for all possible bit values, the bytes should be validated, /// and an error should be returned in the same cases as [`Self::validate_bytes()`]. /// /// The default implementation executes [`Self::validate_bytes()`] followed by /// [`Self::slice_from_bytes_unchecked`]. /// /// Note: The following equality should hold: `bytes.len() % size_of::() == 0`. This /// means that the returned slice can span the entire byte slice. fn parse_bytes_to_slice(bytes: &[u8]) -> Result<&[Self], UleError> { Self::validate_bytes(bytes)?; debug_assert_eq!(bytes.len() % mem::size_of::(), 0); Ok(unsafe { Self::slice_from_bytes_unchecked(bytes) }) } /// Takes a byte slice, `&[u8]`, and return it as `&[Self]` with the same lifetime, assuming /// that this byte slice has previously been run through [`Self::parse_bytes_to_slice()`] with /// success. /// /// The default implementation performs a pointer cast to the same region of memory. /// /// # Safety /// /// ## Callers /// /// Callers of this method must take care to ensure that `bytes` was previously passed through /// [`Self::validate_bytes()`] with success (and was not changed since then). /// /// ## Implementors /// /// Implementations of this method may call unsafe functions to cast the pointer to the correct /// type, assuming the "Callers" invariant above. /// /// Keep in mind that `&[Self]` and `&[u8]` may have different lengths. /// /// Safety checklist: /// /// 1. This method *must* return the same result as [`Self::parse_bytes_to_slice()`]. /// 2. This method *must* return a slice to the same region of memory as the argument. #[inline] unsafe fn slice_from_bytes_unchecked(bytes: &[u8]) -> &[Self] { let data = bytes.as_ptr(); let len = bytes.len() / mem::size_of::(); debug_assert_eq!(bytes.len() % mem::size_of::(), 0); core::slice::from_raw_parts(data as *const Self, len) } /// Given `&[Self]`, returns a `&[u8]` with the same lifetime. /// /// The default implementation performs a pointer cast to the same region of memory. /// /// # Safety /// /// Implementations of this method should call potentially unsafe functions to cast the /// pointer to the correct type. /// /// Keep in mind that `&[Self]` and `&[u8]` may have different lengths. #[inline] #[allow(clippy::wrong_self_convention)] // https://github.com/rust-lang/rust-clippy/issues/7219 fn slice_as_bytes(slice: &[Self]) -> &[u8] { unsafe { slice::from_raw_parts(slice as *const [Self] as *const u8, mem::size_of_val(slice)) } } } /// A trait for any type that has a 1:1 mapping with an unaligned little-endian (ULE) type. /// /// If you need to implement this trait, consider using [`#[make_ule]`](crate::make_ule) instead. pub trait AsULE: Copy { /// The ULE type corresponding to `Self`. /// /// Types having infallible conversions from all bit values (Plain Old Data) can use /// `RawBytesULE` with the desired width; for example, `u32` uses `RawBytesULE<4>`. /// /// Types that are not well-defined for all bit values should implement a custom ULE. type ULE: ULE; /// Converts from `Self` to `Self::ULE`. /// /// This function may involve byte order swapping (native-endian to little-endian). /// /// For best performance, mark your implementation of this function `#[inline]`. fn to_unaligned(self) -> Self::ULE; /// Converts from `Self::ULE` to `Self`. /// /// This function may involve byte order swapping (little-endian to native-endian). /// /// For best performance, mark your implementation of this function `#[inline]`. /// /// # Safety /// /// This function is infallible because bit validation should have occurred when `Self::ULE` /// was first constructed. An implementation may therefore involve an `unsafe{}` block, like /// `from_bytes_unchecked()`. fn from_unaligned(unaligned: Self::ULE) -> Self; } /// A type whose byte sequence equals the byte sequence of its ULE type on /// little-endian platforms. /// /// This enables certain performance optimizations, such as /// [`ZeroVec::try_from_slice`](crate::ZeroVec::try_from_slice). /// /// # Implementation safety /// /// This trait is safe to implement if the type's ULE (as defined by `impl `[`AsULE`]` for T`) /// has an equal byte sequence as the type itself on little-endian platforms; i.e., one where /// `*const T` can be cast to a valid `*const T::ULE`. pub unsafe trait EqULE: AsULE {} /// A trait for a type where aligned slices can be cast to unaligned slices. /// /// Auto-implemented on all types implementing [`EqULE`]. pub trait SliceAsULE where Self: AsULE + Sized, { /// Converts from `&[Self]` to `&[Self::ULE]` if possible. /// /// In general, this function returns `Some` on little-endian and `None` on big-endian. fn slice_to_unaligned(slice: &[Self]) -> Option<&[Self::ULE]>; } #[cfg(target_endian = "little")] impl SliceAsULE for T where T: EqULE, { #[inline] fn slice_to_unaligned(slice: &[Self]) -> Option<&[Self::ULE]> { // This is safe because on little-endian platforms, the byte sequence of &[T] // is equivalent to the byte sequence of &[T::ULE] by the contract of EqULE, // and &[T::ULE] has equal or looser alignment than &[T]. let ule_slice = unsafe { core::slice::from_raw_parts(slice.as_ptr() as *const Self::ULE, slice.len()) }; Some(ule_slice) } } #[cfg(not(target_endian = "little"))] impl SliceAsULE for T where T: EqULE, { #[inline] fn slice_to_unaligned(_: &[Self]) -> Option<&[Self::ULE]> { None } } /// Variable-width, byte-aligned data that can be cast to and from a little-endian byte slice. /// /// If you need to implement this trait, consider using [`#[make_varule]`](crate::make_varule) or /// [`#[derive(VarULE)]`](macro@VarULE) instead. /// /// This trait is mostly for unsized types like `str` and `[T]`. It can be implemented on sized types; /// however, it is much more preferable to use [`ULE`] for that purpose. The [`custom`] module contains /// additional documentation on how this type can be implemented on custom types. /// /// If deserialization with `VarZeroVec` is desired is recommended to implement `Deserialize` for /// `Box` (serde does not do this automatically for unsized `T`). /// /// For convenience it is typically desired to implement [`EncodeAsVarULE`] and [`ZeroFrom`](zerofrom::ZeroFrom) /// on some stack type to convert to and from the ULE type efficiently when necessary. /// /// # Safety /// /// Safety checklist for `VarULE`: /// /// 1. The type *must not* include any uninitialized or padding bytes. /// 2. The type must have an alignment of 1 byte. /// 3. The impl of [`VarULE::validate_bytes()`] *must* return an error if the given byte slice /// would not represent a valid slice of this type. /// 4. The impl of [`VarULE::validate_bytes()`] *must* return an error if the given byte slice /// cannot be used in its entirety. /// 5. The impl of [`VarULE::from_bytes_unchecked()`] must produce a reference to the same /// underlying data assuming that the given bytes previously passed validation. /// 6. All other methods *must* be left with their default impl, or else implemented according to /// their respective safety guidelines. /// 7. Acknowledge the following note about the equality invariant. /// /// If the ULE type is a struct only containing other ULE/VarULE types (or other types which satisfy invariants 1 and 2, /// like `[u8; N]`), invariants 1 and 2 can be achieved via `#[repr(C, packed)]` or `#[repr(transparent)]`. /// /// # Equality invariant /// /// A non-safety invariant is that if `Self` implements `PartialEq`, the it *must* be logically /// equivalent to byte equality on [`Self::as_bytes()`]. /// /// It may be necessary to introduce a "canonical form" of the ULE if logical equality does not /// equal byte equality. In such a case, [`Self::validate_bytes()`] should return an error /// for any values that are not in canonical form. For example, the decimal strings "1.23e4" and /// "12.3e3" are logically equal, but not byte-for-byte equal, so we could define a canonical form /// where only a single digit is allowed before `.`. /// /// There may also be cases where a `VarULE` has muiltiple canonical forms, such as a faster /// version and a smaller version. The cleanest way to handle this case would be separate types. /// However, if this is not feasible, then the application should ensure that the data it is /// deserializing is in the expected form. For example, if the data is being loaded from an /// external source, then requests could carry information about the expected form of the data. /// /// Failure to follow this invariant will cause surprising behavior in `PartialEq`, which may /// result in unpredictable operations on `ZeroVec`, `VarZeroVec`, and `ZeroMap`. pub unsafe trait VarULE: 'static { /// Validates a byte slice, `&[u8]`. /// /// If `Self` is not well-defined for all possible bit values, the bytes should be validated. /// If the bytes can be transmuted, *in their entirety*, to a valid `&Self`, then `Ok` should /// be returned; otherwise, `Self::Error` should be returned. fn validate_bytes(_bytes: &[u8]) -> Result<(), UleError>; /// Parses a byte slice, `&[u8]`, and return it as `&Self` with the same lifetime. /// /// If `Self` is not well-defined for all possible bit values, the bytes should be validated, /// and an error should be returned in the same cases as [`Self::validate_bytes()`]. /// /// The default implementation executes [`Self::validate_bytes()`] followed by /// [`Self::from_bytes_unchecked`]. /// /// Note: The following equality should hold: `size_of_val(result) == size_of_val(bytes)`, /// where `result` is the successful return value of the method. This means that the return /// value spans the entire byte slice. fn parse_bytes(bytes: &[u8]) -> Result<&Self, UleError> { Self::validate_bytes(bytes)?; let result = unsafe { Self::from_bytes_unchecked(bytes) }; debug_assert_eq!(mem::size_of_val(result), mem::size_of_val(bytes)); Ok(result) } /// Takes a byte slice, `&[u8]`, and return it as `&Self` with the same lifetime, assuming /// that this byte slice has previously been run through [`Self::parse_bytes()`] with /// success. /// /// # Safety /// /// ## Callers /// /// Callers of this method must take care to ensure that `bytes` was previously passed through /// [`Self::validate_bytes()`] with success (and was not changed since then). /// /// ## Implementors /// /// Implementations of this method may call unsafe functions to cast the pointer to the correct /// type, assuming the "Callers" invariant above. /// /// Safety checklist: /// /// 1. This method *must* return the same result as [`Self::parse_bytes()`]. /// 2. This method *must* return a slice to the same region of memory as the argument. unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self; /// Given `&Self`, returns a `&[u8]` with the same lifetime. /// /// The default implementation performs a pointer cast to the same region of memory. /// /// # Safety /// /// Implementations of this method should call potentially unsafe functions to cast the /// pointer to the correct type. #[inline] fn as_bytes(&self) -> &[u8] { unsafe { slice::from_raw_parts(self as *const Self as *const u8, mem::size_of_val(self)) } } /// Allocate on the heap as a `Box` #[inline] #[cfg(feature = "alloc")] fn to_boxed(&self) -> alloc::boxed::Box { use alloc::borrow::ToOwned; use alloc::boxed::Box; use core::alloc::Layout; let bytesvec = self.as_bytes().to_owned().into_boxed_slice(); let bytesvec = mem::ManuallyDrop::new(bytesvec); unsafe { // Get the pointer representation let ptr: *mut Self = Self::from_bytes_unchecked(&bytesvec) as *const Self as *mut Self; assert_eq!(Layout::for_value(&*ptr), Layout::for_value(&**bytesvec)); // Transmute the pointer to an owned pointer Box::from_raw(ptr) } } } // Proc macro reexports // // These exist so that our docs can use intra-doc links. // Due to quirks of how rustdoc does documentation on reexports, these must be in this module and not reexported from // a submodule /// Custom derive for [`ULE`]. /// /// This can be attached to [`Copy`] structs containing only [`ULE`] types. /// /// Most of the time, it is recommended one use [`#[make_ule]`](crate::make_ule) instead of defining /// a custom ULE type. #[cfg(feature = "derive")] pub use zerovec_derive::ULE; /// Custom derive for [`VarULE`] /// /// This can be attached to structs containing only [`ULE`] types with one [`VarULE`] type at the end. /// /// Most of the time, it is recommended one use [`#[make_varule]`](crate::make_varule) instead of defining /// a custom [`VarULE`] type. #[cfg(feature = "derive")] pub use zerovec_derive::VarULE; /// An error type to be used for decoding slices of ULE types #[derive(Copy, Clone, Debug, PartialEq, Eq)] #[non_exhaustive] pub enum UleError { /// Attempted to parse a buffer into a slice of the given ULE type but its /// length was not compatible. /// /// Typically created by a [`ULE`] impl via [`UleError::length()`]. /// /// [`ULE`]: crate::ule::ULE InvalidLength { ty: &'static str, len: usize }, /// The byte sequence provided for `ty` failed to parse correctly in the /// given ULE type. /// /// Typically created by a [`ULE`] impl via [`UleError::parse()`]. /// /// [`ULE`]: crate::ule::ULE ParseError { ty: &'static str }, } impl fmt::Display for UleError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { match *self { UleError::InvalidLength { ty, len } => { write!(f, "Invalid length {len} for slice of type {ty}") } UleError::ParseError { ty } => { write!(f, "Could not parse bytes to slice of type {ty}") } } } } impl UleError { /// Construct a parse error for the given type pub fn parse() -> UleError { UleError::ParseError { ty: any::type_name::(), } } /// Construct an "invalid length" error for the given type and length pub fn length(len: usize) -> UleError { UleError::InvalidLength { ty: any::type_name::(), len, } } } impl core::error::Error for UleError {} zerovec-0.11.1/src/ule/multi.rs000064400000000000000000000146131046102023000144420ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::*; use crate::varzerovec::lengthless::VarZeroLengthlessSlice; use crate::vecs::VarZeroVecFormat; use core::{fmt, mem}; /// This type is used by the custom derive to represent multiple [`VarULE`] /// fields packed into a single end-of-struct field. It is not recommended /// to use this type directly, use [`Tuple2VarULE`](crate::ule::tuplevar::Tuple2VarULE) etc instead. /// /// Logically, consider it to be `(, , , ..)` /// where `` etc are potentially different [`VarULE`] types. /// /// Internally, it is represented by a VarZeroSlice without the length part. #[derive(PartialEq, Eq)] #[repr(transparent)] pub struct MultiFieldsULE( VarZeroLengthlessSlice<[u8], Format>, ); impl MultiFieldsULE { /// Compute the amount of bytes needed to support elements with lengths `lengths` #[inline] #[allow(clippy::expect_used)] // See #1410 pub fn compute_encoded_len_for(lengths: [usize; LEN]) -> usize { let lengths = lengths.map(BlankSliceEncoder); crate::varzerovec::components::compute_serializable_len_without_length::<_, _, Format>( &lengths, ) .expect("Too many bytes to encode") as usize } /// Construct a partially initialized MultiFieldsULE backed by a mutable byte buffer pub fn new_from_lengths_partially_initialized<'a>( lengths: [usize; LEN], output: &'a mut [u8], ) -> &'a mut Self { let lengths = lengths.map(BlankSliceEncoder); crate::varzerovec::components::write_serializable_bytes_without_length::<_, _, Format>( &lengths, output, ); debug_assert!( >::parse_bytes(LEN as u32, output).is_ok(), "Encoded slice must be valid VarZeroSlice" ); unsafe { // Safe since write_serializable_bytes produces a valid VarZeroLengthlessSlice buffer with the right format let slice = >::from_bytes_unchecked_mut(output); // safe since `Self` is transparent over VarZeroLengthlessSlice<[u8], Format> mem::transmute::<&mut VarZeroLengthlessSlice<[u8], Format>, &mut Self>(slice) } } /// Given a buffer of size obtained by [`Self::compute_encoded_len_for()`], write element A to index idx /// /// # Safety /// - `idx` must be in range /// - `T` must be the appropriate type expected by the custom derive in this usage of this type #[inline] pub unsafe fn set_field_at + ?Sized>( &mut self, idx: usize, value: &A, ) { value.encode_var_ule_write(self.0.get_bytes_at_mut(LEN as u32, idx)) } /// Validate field at `index` to see if it is a valid `T` VarULE type /// /// # Safety /// /// - `index` must be in range #[inline] pub unsafe fn validate_field(&self, index: usize) -> Result<(), UleError> { T::validate_bytes(self.0.get_unchecked(LEN as u32, index)) } /// Get field at `index` as a value of type T /// /// # Safety /// /// - `index` must be in range /// - Element at `index` must have been created with the VarULE type T #[inline] pub unsafe fn get_field(&self, index: usize) -> &T { T::from_bytes_unchecked(self.0.get_unchecked(LEN as u32, index)) } /// Construct from a byte slice /// /// # Safety /// - byte slice must be a valid VarZeroLengthlessSlice<[u8], Format> with length LEN #[inline] pub unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { // &Self is transparent over &VZS<..> with the right format mem::transmute(>::from_bytes_unchecked(bytes)) } /// Get the bytes behind this value pub fn as_bytes(&self) -> &[u8] { self.0.as_bytes() } } impl fmt::Debug for MultiFieldsULE { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "MultiFieldsULE<{LEN}>({:?})", self.0.as_bytes()) } } /// This lets us conveniently use the EncodeAsVarULE functionality to create /// `VarZeroVec<[u8]>`s that have the right amount of space for elements /// without having to duplicate any unsafe code #[repr(transparent)] struct BlankSliceEncoder(usize); unsafe impl EncodeAsVarULE<[u8]> for BlankSliceEncoder { fn encode_var_ule_as_slices(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { // unnecessary if the other two are implemented unreachable!() } #[inline] fn encode_var_ule_len(&self) -> usize { self.0 } #[inline] fn encode_var_ule_write(&self, _dst: &mut [u8]) { // do nothing } } // Safety (based on the safety checklist on the VarULE trait): // 1. MultiFieldsULE does not include any uninitialized or padding bytes (achieved by being transparent over a VarULE type) // 2. MultiFieldsULE is aligned to 1 byte (achieved by being transparent over a VarULE type) // 3. The impl of `validate_bytes()` returns an error if any byte is not valid. // 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety // 5. The impl of `from_bytes_unchecked()` returns a reference to the same data. // 6. All other methods are defaulted // 7. `MultiFieldsULE` byte equality is semantic equality (achieved by being transparent over a VarULE type) unsafe impl VarULE for MultiFieldsULE { /// Note: MultiFieldsULE is usually used in cases where one should be calling .validate_field() directly for /// each field, rather than using the regular VarULE impl. /// /// This impl exists so that EncodeAsVarULE can work. #[inline] fn validate_bytes(slice: &[u8]) -> Result<(), UleError> { >::parse_bytes(LEN as u32, slice).map(|_| ()) } #[inline] unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { // &Self is transparent over &VZS<..> mem::transmute(>::from_bytes_unchecked(bytes)) } } zerovec-0.11.1/src/ule/niche.rs000064400000000000000000000162031046102023000143730ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use core::{marker::Copy, mem::size_of}; #[cfg(feature = "alloc")] use crate::map::ZeroMapKV; #[cfg(feature = "alloc")] use crate::{ZeroSlice, ZeroVec}; use super::{AsULE, ULE}; /// The [`ULE`] types implementing this trait guarantee that [`NicheBytes::NICHE_BIT_PATTERN`] /// can never occur as a valid byte representation of the type. /// /// Guarantees for a valid implementation. /// 1. N must be equal to `core::mem::sizeo_of::()` or else it will /// cause panics. /// 2. The bit pattern [`NicheBytes::NICHE_BIT_PATTERN`] must not be incorrect as it would lead to /// weird behaviour. /// 3. The abstractions built on top of this trait must panic on an invalid N. /// 4. The abstractions built on this trait that use type punning must ensure that type being /// punned is [`ULE`]. pub trait NicheBytes { const NICHE_BIT_PATTERN: [u8; N]; } /// [`ULE`] type for [`NichedOption`] where U implements [`NicheBytes`]. /// The invalid bit pattern is used as the niche. /// /// This uses 1 byte less than [`crate::ule::OptionULE`] to represent [`NichedOption`]. /// /// # Example /// /// ``` /// use core::num::NonZeroI8; /// use zerovec::ule::NichedOption; /// use zerovec::ZeroVec; /// /// let bytes = &[0x00, 0x01, 0x02, 0x00]; /// let zv_no: ZeroVec> = /// ZeroVec::parse_bytes(bytes).expect("Unable to parse as NichedOption."); /// /// assert_eq!(zv_no.get(0).map(|e| e.0), Some(None)); /// assert_eq!(zv_no.get(1).map(|e| e.0), Some(NonZeroI8::new(1))); /// assert_eq!(zv_no.get(2).map(|e| e.0), Some(NonZeroI8::new(2))); /// assert_eq!(zv_no.get(3).map(|e| e.0), Some(None)); /// ``` // Invariants: // The union stores [`NicheBytes::NICHE_BIT_PATTERN`] when None. // Any other bit pattern is a valid. #[repr(C)] pub union NichedOptionULE + ULE, const N: usize> { /// Invariant: The value is `niche` only if the bytes equal NICHE_BIT_PATTERN. niche: [u8; N], /// Invariant: The value is `valid` if the `niche` field does not match NICHE_BIT_PATTERN. valid: U, } impl + ULE + core::fmt::Debug, const N: usize> core::fmt::Debug for NichedOptionULE { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { self.get().fmt(f) } } impl + ULE, const N: usize> NichedOptionULE { /// New `NichedOptionULE` from `Option` pub fn new(opt: Option) -> Self { assert!(N == core::mem::size_of::()); match opt { Some(u) => Self { valid: u }, None => Self { niche: >::NICHE_BIT_PATTERN, }, } } /// Convert to an `Option` pub fn get(self) -> Option { // Safety: The union stores NICHE_BIT_PATTERN when None otherwise a valid U unsafe { if self.niche == >::NICHE_BIT_PATTERN { None } else { Some(self.valid) } } } /// Borrows as an `Option<&U>`. pub fn as_ref(&self) -> Option<&U> { // Safety: The union stores NICHE_BIT_PATTERN when None otherwise a valid U unsafe { if self.niche == >::NICHE_BIT_PATTERN { None } else { Some(&self.valid) } } } } impl + ULE, const N: usize> Copy for NichedOptionULE {} impl + ULE, const N: usize> Clone for NichedOptionULE { fn clone(&self) -> Self { *self } } impl + ULE + PartialEq, const N: usize> PartialEq for NichedOptionULE { fn eq(&self, other: &Self) -> bool { self.get().eq(&other.get()) } } impl + ULE + Eq, const N: usize> Eq for NichedOptionULE {} /// Safety for ULE trait /// 1. NichedOptionULE does not have any padding bytes due to `#[repr(C)]` on a struct /// containing only ULE fields. /// NichedOptionULE either contains NICHE_BIT_PATTERN or valid U byte sequences. /// In both cases the data is initialized. /// 2. NichedOptionULE is aligned to 1 byte due to `#[repr(C, packed)]` on a struct containing only /// ULE fields. /// 3. validate_bytes impl returns an error if invalid bytes are encountered. /// 4. validate_bytes impl returns an error there are extra bytes. /// 5. The other ULE methods are left to their default impl. /// 6. NichedOptionULE equality is based on ULE equality of the subfield, assuming that NicheBytes /// has been implemented correctly (this is a correctness but not a safety guarantee). unsafe impl + ULE, const N: usize> ULE for NichedOptionULE { fn validate_bytes(bytes: &[u8]) -> Result<(), crate::ule::UleError> { let size = size_of::(); // The implemention is only correct if NICHE_BIT_PATTERN has same number of bytes as the // type. debug_assert!(N == core::mem::size_of::()); // The bytes should fully transmute to a collection of Self if bytes.len() % size != 0 { return Err(crate::ule::UleError::length::(bytes.len())); } bytes.chunks(size).try_for_each(|chunk| { // Associated const cannot be referenced in a pattern // https://doc.rust-lang.org/error-index.html#E0158 if chunk == >::NICHE_BIT_PATTERN { Ok(()) } else { U::validate_bytes(chunk) } }) } } /// Optional type which uses [`NichedOptionULE`] as ULE type. /// /// The implementors guarantee that `N == core::mem::size_of::()` /// [`repr(transparent)`] guarantees that the layout is same as [`Option`] #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] #[repr(transparent)] #[allow(clippy::exhaustive_structs)] // newtype #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct NichedOption(pub Option); impl Default for NichedOption { fn default() -> Self { Self(None) } } impl AsULE for NichedOption where U::ULE: NicheBytes, { type ULE = NichedOptionULE; fn to_unaligned(self) -> Self::ULE { NichedOptionULE::new(self.0.map(U::to_unaligned)) } fn from_unaligned(unaligned: Self::ULE) -> Self { Self(unaligned.get().map(U::from_unaligned)) } } #[cfg(feature = "alloc")] impl<'a, T: AsULE + 'static, const N: usize> ZeroMapKV<'a> for NichedOption where T::ULE: NicheBytes, { type Container = ZeroVec<'a, NichedOption>; type Slice = ZeroSlice>; type GetType = as AsULE>::ULE; type OwnedType = Self; } impl IntoIterator for NichedOption { type IntoIter = as IntoIterator>::IntoIter; type Item = T; fn into_iter(self) -> Self::IntoIter { self.0.into_iter() } } zerovec-0.11.1/src/ule/option.rs000064400000000000000000000211421046102023000146130ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::*; use core::cmp::Ordering; use core::marker::PhantomData; use core::mem::{self, MaybeUninit}; /// This type is the [`ULE`] type for `Option` where `U` is a [`ULE`] type /// /// # Example /// /// ```rust /// use zerovec::ZeroVec; /// /// let z = ZeroVec::alloc_from_slice(&[ /// Some('a'), /// Some('á'), /// Some('ø'), /// None, /// Some('ł'), /// ]); /// /// assert_eq!(z.get(2), Some(Some('ø'))); /// assert_eq!(z.get(3), Some(None)); /// ``` // Invariants: // The MaybeUninit is zeroed when None (bool = false), // and is valid when Some (bool = true) #[repr(C, packed)] pub struct OptionULE(bool, MaybeUninit); impl OptionULE { /// Obtain this as an `Option` pub fn get(self) -> Option { if self.0 { unsafe { // safety: self.0 is true so the MaybeUninit is valid Some(self.1.assume_init()) } } else { None } } /// Construct an `OptionULE` from an equivalent `Option` pub fn new(opt: Option) -> Self { if let Some(inner) = opt { Self(true, MaybeUninit::new(inner)) } else { Self(false, MaybeUninit::zeroed()) } } } impl core::fmt::Debug for OptionULE { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { self.get().fmt(f) } } // Safety (based on the safety checklist on the ULE trait): // 1. OptionULE does not include any uninitialized or padding bytes. // (achieved by `#[repr(C, packed)]` on a struct containing only ULE fields, // in the context of this impl. The MaybeUninit is valid for all byte sequences, and we only generate /// zeroed or valid-T byte sequences to fill it) // 2. OptionULE is aligned to 1 byte. // (achieved by `#[repr(C, packed)]` on a struct containing only ULE fields, in the context of this impl) // 3. The impl of validate_bytes() returns an error if any byte is not valid. // 4. The impl of validate_bytes() returns an error if there are extra bytes. // 5. The other ULE methods use the default impl. // 6. OptionULE byte equality is semantic equality by relying on the ULE equality // invariant on the subfields unsafe impl ULE for OptionULE { fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> { let size = mem::size_of::(); if bytes.len() % size != 0 { return Err(UleError::length::(bytes.len())); } for chunk in bytes.chunks(size) { #[allow(clippy::indexing_slicing)] // `chunk` will have enough bytes to fit Self match chunk[0] { // https://doc.rust-lang.org/reference/types/boolean.html // Rust booleans are always size 1, align 1 values with valid bit patterns 0x0 or 0x1 0 => { if !chunk[1..].iter().all(|x| *x == 0) { return Err(UleError::parse::()); } } 1 => U::validate_bytes(&chunk[1..])?, _ => return Err(UleError::parse::()), } } Ok(()) } } impl AsULE for Option { type ULE = OptionULE; fn to_unaligned(self) -> OptionULE { OptionULE::new(self.map(T::to_unaligned)) } fn from_unaligned(other: OptionULE) -> Self { other.get().map(T::from_unaligned) } } impl Copy for OptionULE {} impl Clone for OptionULE { fn clone(&self) -> Self { *self } } impl PartialEq for OptionULE { fn eq(&self, other: &Self) -> bool { self.get().eq(&other.get()) } } impl Eq for OptionULE {} /// A type allowing one to represent `Option` for [`VarULE`] `U` types. /// /// ```rust /// use zerovec::ule::OptionVarULE; /// use zerovec::VarZeroVec; /// /// let mut zv: VarZeroVec> = VarZeroVec::new(); /// /// zv.make_mut().push(&None::<&str>); /// zv.make_mut().push(&Some("hello")); /// zv.make_mut().push(&Some("world")); /// zv.make_mut().push(&None::<&str>); /// /// assert_eq!(zv.get(0).unwrap().as_ref(), None); /// assert_eq!(zv.get(1).unwrap().as_ref(), Some("hello")); /// ``` // The slice field is empty when None (bool = false), // and is a valid T when Some (bool = true) #[repr(C, packed)] pub struct OptionVarULE(PhantomData, bool, [u8]); impl OptionVarULE { /// Obtain this as an `Option<&U>` pub fn as_ref(&self) -> Option<&U> { if self.1 { unsafe { // Safety: byte field is a valid T if boolean field is true Some(U::from_bytes_unchecked(&self.2)) } } else { None } } } impl core::fmt::Debug for OptionVarULE { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { self.as_ref().fmt(f) } } // Safety (based on the safety checklist on the VarULE trait): // 1. OptionVarULE does not include any uninitialized or padding bytes // (achieved by being repr(C, packed) on ULE types) // 2. OptionVarULE is aligned to 1 byte (achieved by being repr(C, packed) on ULE types) // 3. The impl of `validate_bytes()` returns an error if any byte is not valid. // 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety // 5. The impl of `from_bytes_unchecked()` returns a reference to the same data. // 6. All other methods are defaulted // 7. OptionVarULE byte equality is semantic equality (achieved by being an aggregate) unsafe impl VarULE for OptionVarULE { #[inline] fn validate_bytes(slice: &[u8]) -> Result<(), UleError> { if slice.is_empty() { return Err(UleError::length::(slice.len())); } #[allow(clippy::indexing_slicing)] // slice already verified to be nonempty match slice[0] { // https://doc.rust-lang.org/reference/types/boolean.html // Rust booleans are always size 1, align 1 values with valid bit patterns 0x0 or 0x1 0 => { if slice.len() != 1 { Err(UleError::length::(slice.len())) } else { Ok(()) } } 1 => U::validate_bytes(&slice[1..]), _ => Err(UleError::parse::()), } } #[inline] unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { let entire_struct_as_slice: *const [u8] = ::core::ptr::slice_from_raw_parts(bytes.as_ptr(), bytes.len() - 1); &*(entire_struct_as_slice as *const Self) } } unsafe impl EncodeAsVarULE> for Option where T: EncodeAsVarULE, U: VarULE + ?Sized, { fn encode_var_ule_as_slices(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { // unnecessary if the other two are implemented unreachable!() } #[inline] fn encode_var_ule_len(&self) -> usize { if let Some(ref inner) = *self { // slice + boolean 1 + inner.encode_var_ule_len() } else { // boolean + empty slice 1 } } #[allow(clippy::indexing_slicing)] // This method is allowed to panic when lengths are invalid fn encode_var_ule_write(&self, dst: &mut [u8]) { if let Some(ref inner) = *self { debug_assert!( !dst.is_empty(), "OptionVarULE must have at least one byte when Some" ); dst[0] = 1; inner.encode_var_ule_write(&mut dst[1..]); } else { debug_assert!( dst.len() == 1, "OptionVarULE must have exactly one byte when None" ); dst[0] = 0; } } } impl PartialEq for OptionVarULE { fn eq(&self, other: &Self) -> bool { self.as_ref().eq(&other.as_ref()) } } impl Eq for OptionVarULE {} impl PartialOrd for OptionVarULE { fn partial_cmp(&self, other: &Self) -> Option { self.as_ref().partial_cmp(&other.as_ref()) } } impl Ord for OptionVarULE { fn cmp(&self, other: &Self) -> Ordering { self.as_ref().cmp(&other.as_ref()) } } zerovec-0.11.1/src/ule/plain.rs000064400000000000000000000312731046102023000144140ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). #![allow(clippy::upper_case_acronyms)] //! ULE implementation for Plain Old Data types, including all sized integers. use super::*; use crate::impl_ule_from_array; use crate::ZeroSlice; use core::num::{NonZeroI8, NonZeroU8}; /// A u8 array of little-endian data with infallible conversions to and from &[u8]. #[repr(transparent)] #[derive(Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord, Hash)] #[allow(clippy::exhaustive_structs)] // newtype pub struct RawBytesULE(pub [u8; N]); impl RawBytesULE { #[inline] pub fn as_bytes(&self) -> &[u8] { &self.0 } #[inline] pub fn from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] { let data = bytes.as_mut_ptr(); let len = bytes.len() / N; // Safe because Self is transparent over [u8; N] unsafe { core::slice::from_raw_parts_mut(data as *mut Self, len) } } } // Safety (based on the safety checklist on the ULE trait): // 1. RawBytesULE does not include any uninitialized or padding bytes. // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) // 2. RawBytesULE is aligned to 1 byte. // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) // 3. The impl of validate_bytes() returns an error if any byte is not valid (never). // 4. The impl of validate_bytes() returns an error if there are leftover bytes. // 5. The other ULE methods use the default impl. // 6. RawBytesULE byte equality is semantic equality unsafe impl ULE for RawBytesULE { #[inline] fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> { if bytes.len() % N == 0 { // Safe because Self is transparent over [u8; N] Ok(()) } else { Err(UleError::length::(bytes.len())) } } } impl From<[u8; N]> for RawBytesULE { #[inline] fn from(le_bytes: [u8; N]) -> Self { Self(le_bytes) } } macro_rules! impl_byte_slice_size { ($unsigned:ty, $size:literal) => { impl RawBytesULE<$size> { #[doc = concat!("Gets this `RawBytesULE` as a `", stringify!($unsigned), "`. This is equivalent to calling [`AsULE::from_unaligned()`] on the appropriately sized type.")] #[inline] pub fn as_unsigned_int(&self) -> $unsigned { <$unsigned as $crate::ule::AsULE>::from_unaligned(*self) } #[doc = concat!("Converts a `", stringify!($unsigned), "` to a `RawBytesULE`. This is equivalent to calling [`AsULE::to_unaligned()`] on the appropriately sized type.")] #[inline] pub const fn from_aligned(value: $unsigned) -> Self { Self(value.to_le_bytes()) } impl_ule_from_array!( $unsigned, RawBytesULE<$size>, RawBytesULE([0; $size]) ); } }; } macro_rules! impl_const_constructors { ($base:ty, $size:literal) => { impl ZeroSlice<$base> { /// This function can be used for constructing ZeroVecs in a const context, avoiding /// parsing checks. /// /// This cannot be generic over T because of current limitations in `const`, but if /// this method is needed in a non-const context, check out [`ZeroSlice::parse_bytes()`] /// instead. /// /// See [`ZeroSlice::cast()`] for an example. pub const fn try_from_bytes(bytes: &[u8]) -> Result<&Self, UleError> { let len = bytes.len(); #[allow(clippy::modulo_one)] if len % $size == 0 { Ok(unsafe { Self::from_bytes_unchecked(bytes) }) } else { Err(UleError::InvalidLength { ty: concat!(""), len, }) } } } }; } macro_rules! impl_byte_slice_type { ($single_fn:ident, $type:ty, $size:literal) => { impl From<$type> for RawBytesULE<$size> { #[inline] fn from(value: $type) -> Self { Self(value.to_le_bytes()) } } impl AsULE for $type { type ULE = RawBytesULE<$size>; #[inline] fn to_unaligned(self) -> Self::ULE { RawBytesULE(self.to_le_bytes()) } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { <$type>::from_le_bytes(unaligned.0) } } // EqULE is true because $type and RawBytesULE<$size> // have the same byte sequence on little-endian unsafe impl EqULE for $type {} impl RawBytesULE<$size> { pub const fn $single_fn(v: $type) -> Self { RawBytesULE(v.to_le_bytes()) } } }; } macro_rules! impl_byte_slice_unsigned_type { ($type:ty, $size:literal) => { impl_byte_slice_type!(from_unsigned, $type, $size); }; } macro_rules! impl_byte_slice_signed_type { ($type:ty, $size:literal) => { impl_byte_slice_type!(from_signed, $type, $size); }; } impl_byte_slice_size!(u16, 2); impl_byte_slice_size!(u32, 4); impl_byte_slice_size!(u64, 8); impl_byte_slice_size!(u128, 16); impl_byte_slice_unsigned_type!(u16, 2); impl_byte_slice_unsigned_type!(u32, 4); impl_byte_slice_unsigned_type!(u64, 8); impl_byte_slice_unsigned_type!(u128, 16); impl_byte_slice_signed_type!(i16, 2); impl_byte_slice_signed_type!(i32, 4); impl_byte_slice_signed_type!(i64, 8); impl_byte_slice_signed_type!(i128, 16); impl_const_constructors!(u8, 1); impl_const_constructors!(u16, 2); impl_const_constructors!(u32, 4); impl_const_constructors!(u64, 8); impl_const_constructors!(u128, 16); // Note: The f32 and f64 const constructors currently have limited use because // `f32::to_le_bytes` is not yet const. impl_const_constructors!(bool, 1); // Safety (based on the safety checklist on the ULE trait): // 1. u8 does not include any uninitialized or padding bytes. // 2. u8 is aligned to 1 byte. // 3. The impl of validate_bytes() returns an error if any byte is not valid (never). // 4. The impl of validate_bytes() returns an error if there are leftover bytes (never). // 5. The other ULE methods use the default impl. // 6. u8 byte equality is semantic equality unsafe impl ULE for u8 { #[inline] fn validate_bytes(_bytes: &[u8]) -> Result<(), UleError> { Ok(()) } } impl AsULE for u8 { type ULE = Self; #[inline] fn to_unaligned(self) -> Self::ULE { self } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { unaligned } } // EqULE is true because u8 is its own ULE. unsafe impl EqULE for u8 {} // Safety (based on the safety checklist on the ULE trait): // 1. NonZeroU8 does not include any uninitialized or padding bytes. // 2. NonZeroU8 is aligned to 1 byte. // 3. The impl of validate_bytes() returns an error if any byte is not valid (0x00). // 4. The impl of validate_bytes() returns an error if there are leftover bytes (never). // 5. The other ULE methods use the default impl. // 6. NonZeroU8 byte equality is semantic equality unsafe impl ULE for NonZeroU8 { #[inline] fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> { bytes.iter().try_for_each(|b| { if *b == 0x00 { Err(UleError::parse::()) } else { Ok(()) } }) } } impl AsULE for NonZeroU8 { type ULE = Self; #[inline] fn to_unaligned(self) -> Self::ULE { self } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { unaligned } } unsafe impl EqULE for NonZeroU8 {} impl NicheBytes<1> for NonZeroU8 { const NICHE_BIT_PATTERN: [u8; 1] = [0x00]; } // Safety (based on the safety checklist on the ULE trait): // 1. i8 does not include any uninitialized or padding bytes. // 2. i8 is aligned to 1 byte. // 3. The impl of validate_bytes() returns an error if any byte is not valid (never). // 4. The impl of validate_bytes() returns an error if there are leftover bytes (never). // 5. The other ULE methods use the default impl. // 6. i8 byte equality is semantic equality unsafe impl ULE for i8 { #[inline] fn validate_bytes(_bytes: &[u8]) -> Result<(), UleError> { Ok(()) } } impl AsULE for i8 { type ULE = Self; #[inline] fn to_unaligned(self) -> Self::ULE { self } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { unaligned } } // EqULE is true because i8 is its own ULE. unsafe impl EqULE for i8 {} impl AsULE for NonZeroI8 { type ULE = NonZeroU8; #[inline] fn to_unaligned(self) -> Self::ULE { // Safety: NonZeroU8 and NonZeroI8 have same size unsafe { core::mem::transmute(self) } } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { // Safety: NonZeroU8 and NonZeroI8 have same size unsafe { core::mem::transmute(unaligned) } } } // These impls are actually safe and portable due to Rust always using IEEE 754, see the documentation // on f32::from_bits: https://doc.rust-lang.org/stable/std/primitive.f32.html#method.from_bits // // The only potential problem is that some older platforms treat signaling NaNs differently. This is // still quite portable, signalingness is not typically super important. impl AsULE for f32 { type ULE = RawBytesULE<4>; #[inline] fn to_unaligned(self) -> Self::ULE { self.to_bits().to_unaligned() } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { Self::from_bits(u32::from_unaligned(unaligned)) } } impl AsULE for f64 { type ULE = RawBytesULE<8>; #[inline] fn to_unaligned(self) -> Self::ULE { self.to_bits().to_unaligned() } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { Self::from_bits(u64::from_unaligned(unaligned)) } } // The from_bits documentation mentions that they have identical byte representations to integers // and EqULE only cares about LE systems unsafe impl EqULE for f32 {} unsafe impl EqULE for f64 {} // The bool impl is not as efficient as it could be // We can, in the future, have https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md#bitpacking // for better bitpacking // Safety (based on the safety checklist on the ULE trait): // 1. bool does not include any uninitialized or padding bytes (the remaining 7 bytes in bool are by definition zero) // 2. bool is aligned to 1 byte. // 3. The impl of validate_bytes() returns an error if any byte is not valid (bytes that are not 0 or 1). // 4. The impl of validate_bytes() returns an error if there are leftover bytes (never). // 5. The other ULE methods use the default impl. // 6. bool byte equality is semantic equality unsafe impl ULE for bool { #[inline] fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> { for byte in bytes { // https://doc.rust-lang.org/reference/types/boolean.html // Rust booleans are always size 1, align 1 values with valid bit patterns 0x0 or 0x1 if *byte > 1 { return Err(UleError::parse::()); } } Ok(()) } } impl AsULE for bool { type ULE = Self; #[inline] fn to_unaligned(self) -> Self::ULE { self } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { unaligned } } // EqULE is true because bool is its own ULE. unsafe impl EqULE for bool {} // Safety (based on the safety checklist on the ULE trait): // 1. () does not include any uninitialized or padding bytes (it has no bytes) // 2. () is a ZST that is safe to construct // 3. The impl of validate_bytes() returns an error if any byte is not valid (any byte). // 4. The impl of validate_bytes() returns an error if there are leftover bytes (always). // 5. The other ULE methods use the default impl. // 6. () byte equality is semantic equality unsafe impl ULE for () { #[inline] fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> { if bytes.is_empty() { Ok(()) } else { Err(UleError::length::(bytes.len())) } } } impl AsULE for () { type ULE = Self; #[inline] fn to_unaligned(self) -> Self::ULE { self } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { unaligned } } // EqULE is true because () is its own ULE. unsafe impl EqULE for () {} zerovec-0.11.1/src/ule/slices.rs000064400000000000000000000076351046102023000146000ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::ule::*; // Safety (based on the safety checklist on the ULE trait): // 1. [T; N] does not include any uninitialized or padding bytes since T is ULE // 2. [T; N] is aligned to 1 byte since T is ULE // 3. The impl of validate_bytes() returns an error if any byte is not valid. // 4. The impl of validate_bytes() returns an error if there are leftover bytes. // 5. The other ULE methods use the default impl. // 6. [T; N] byte equality is semantic equality since T is ULE unsafe impl ULE for [T; N] { #[inline] fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> { // a slice of multiple Selfs is equivalent to just a larger slice of Ts T::validate_bytes(bytes) } } impl AsULE for [T; N] { type ULE = [T::ULE; N]; #[inline] fn to_unaligned(self) -> Self::ULE { self.map(T::to_unaligned) } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { unaligned.map(T::from_unaligned) } } unsafe impl EqULE for [T; N] {} // Safety (based on the safety checklist on the VarULE trait): // 1. str does not include any uninitialized or padding bytes. // 2. str is aligned to 1 byte. // 3. The impl of `validate_bytes()` returns an error if any byte is not valid. // 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety // 5. The impl of `from_bytes_unchecked()` returns a reference to the same data. // 6. `parse_bytes()` is equivalent to `validate_bytes()` followed by `from_bytes_unchecked()` // 7. str byte equality is semantic equality unsafe impl VarULE for str { #[inline] fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> { core::str::from_utf8(bytes).map_err(|_| UleError::parse::())?; Ok(()) } #[inline] fn parse_bytes(bytes: &[u8]) -> Result<&Self, UleError> { core::str::from_utf8(bytes).map_err(|_| UleError::parse::()) } /// Invariant: must be safe to call when called on a slice that previously /// succeeded with `parse_bytes` #[inline] unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { core::str::from_utf8_unchecked(bytes) } } /// Note: VarULE is well-defined for all `[T]` where `T: ULE`, but [`ZeroSlice`] is more ergonomic /// when `T` is a low-level ULE type. For example: /// /// ```no_run /// # use zerovec::ZeroSlice; /// # use zerovec::VarZeroVec; /// # use zerovec::ule::AsULE; /// // OK: [u8] is a useful type /// let _: VarZeroVec<[u8]> = unimplemented!(); /// /// // Technically works, but [u32::ULE] is not very useful /// let _: VarZeroVec<[::ULE]> = unimplemented!(); /// /// // Better: ZeroSlice /// let _: VarZeroVec> = unimplemented!(); /// ``` /// /// [`ZeroSlice`]: crate::ZeroSlice // Safety (based on the safety checklist on the VarULE trait): // 1. [T] does not include any uninitialized or padding bytes (achieved by being a slice of a ULE type) // 2. [T] is aligned to 1 byte (achieved by being a slice of a ULE type) // 3. The impl of `validate_bytes()` returns an error if any byte is not valid. // 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety // 5. The impl of `from_bytes_unchecked()` returns a reference to the same data. // 6. All other methods are defaulted // 7. `[T]` byte equality is semantic equality (achieved by being a slice of a ULE type) unsafe impl VarULE for [T] where T: ULE, { #[inline] fn validate_bytes(slice: &[u8]) -> Result<(), UleError> { T::validate_bytes(slice) } #[inline] unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { T::slice_from_bytes_unchecked(bytes) } } zerovec-0.11.1/src/ule/test_utils.rs000064400000000000000000000024441046102023000155060ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). /// Take a VarULE type and serialize it both in human and machine readable contexts, /// and ensure it roundtrips correctly /// /// Note that the concrete type may need to be explicitly specified to prevent issues with /// https://github.com/rust-lang/rust/issues/130180 #[cfg(feature = "serde")] pub(crate) fn assert_serde_roundtrips(var: &T) where T: crate::ule::VarULE + ?Sized + serde::Serialize, for<'a> Box: serde::Deserialize<'a>, for<'a> &'a T: serde::Deserialize<'a>, T: core::fmt::Debug + PartialEq, { let bincode = bincode::serialize(var).unwrap(); let deserialized: &T = bincode::deserialize(&bincode).unwrap(); let deserialized_box: Box = bincode::deserialize(&bincode).unwrap(); assert_eq!(var, deserialized, "Single element roundtrips with bincode"); assert_eq!( var, &*deserialized_box, "Single element roundtrips with bincode" ); let json = serde_json::to_string(var).unwrap(); let deserialized: Box = serde_json::from_str(&json).unwrap(); assert_eq!(var, &*deserialized, "Single element roundtrips with serde"); } zerovec-0.11.1/src/ule/tuple.rs000064400000000000000000000151161046102023000144400ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). //! ULE impls for tuples. //! //! Rust does not guarantee the layout of tuples, so ZeroVec defines its own tuple ULE types. //! //! Impls are defined for tuples of up to 6 elements. For longer tuples, use a custom struct //! with [`#[make_ule]`](crate::make_ule). //! //! # Examples //! //! ``` //! use zerovec::ZeroVec; //! //! // ZeroVec of tuples! //! let zerovec: ZeroVec<(u32, char)> = [(1, 'a'), (1234901, '啊'), (100, 'अ')] //! .iter() //! .copied() //! .collect(); //! //! assert_eq!(zerovec.get(1), Some((1234901, '啊'))); //! ``` use super::*; use core::fmt; use core::mem; macro_rules! tuple_ule { ($name:ident, $len:literal, [ $($t:ident $i:tt),+ ]) => { #[doc = concat!("ULE type for tuples with ", $len, " elements.")] #[repr(C, packed)] #[allow(clippy::exhaustive_structs)] // stable pub struct $name<$($t),+>($(pub $t),+); // Safety (based on the safety checklist on the ULE trait): // 1. TupleULE does not include any uninitialized or padding bytes. // (achieved by `#[repr(C, packed)]` on a struct containing only ULE fields) // 2. TupleULE is aligned to 1 byte. // (achieved by `#[repr(C, packed)]` on a struct containing only ULE fields) // 3. The impl of validate_bytes() returns an error if any byte is not valid. // 4. The impl of validate_bytes() returns an error if there are extra bytes. // 5. The other ULE methods use the default impl. // 6. TupleULE byte equality is semantic equality by relying on the ULE equality // invariant on the subfields unsafe impl<$($t: ULE),+> ULE for $name<$($t),+> { fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> { // expands to: 0size + mem::size_of::() + mem::size_of::(); let ule_bytes = 0usize $(+ mem::size_of::<$t>())+; if bytes.len() % ule_bytes != 0 { return Err(UleError::length::(bytes.len())); } for chunk in bytes.chunks(ule_bytes) { let mut i = 0; $( let j = i; i += mem::size_of::<$t>(); #[allow(clippy::indexing_slicing)] // length checked <$t>::validate_bytes(&chunk[j..i])?; )+ } Ok(()) } } impl<$($t: AsULE),+> AsULE for ($($t),+) { type ULE = $name<$(<$t>::ULE),+>; #[inline] fn to_unaligned(self) -> Self::ULE { $name($( self.$i.to_unaligned() ),+) } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { ($( <$t>::from_unaligned(unaligned.$i) ),+) } } impl<$($t: fmt::Debug + ULE),+> fmt::Debug for $name<$($t),+> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { ($(self.$i),+).fmt(f) } } // We need manual impls since `#[derive()]` is disallowed on packed types impl<$($t: PartialEq + ULE),+> PartialEq for $name<$($t),+> { fn eq(&self, other: &Self) -> bool { ($(self.$i),+).eq(&($(other.$i),+)) } } impl<$($t: Eq + ULE),+> Eq for $name<$($t),+> {} impl<$($t: PartialOrd + ULE),+> PartialOrd for $name<$($t),+> { fn partial_cmp(&self, other: &Self) -> Option { ($(self.$i),+).partial_cmp(&($(other.$i),+)) } } impl<$($t: Ord + ULE),+> Ord for $name<$($t),+> { fn cmp(&self, other: &Self) -> core::cmp::Ordering { ($(self.$i),+).cmp(&($(other.$i),+)) } } impl<$($t: ULE),+> Clone for $name<$($t),+> { fn clone(&self) -> Self { *self } } impl<$($t: ULE),+> Copy for $name<$($t),+> {} #[cfg(feature = "alloc")] impl<'a, $($t: Ord + AsULE + 'static),+> crate::map::ZeroMapKV<'a> for ($($t),+) { type Container = crate::ZeroVec<'a, ($($t),+)>; type Slice = crate::ZeroSlice<($($t),+)>; type GetType = $name<$(<$t>::ULE),+>; type OwnedType = ($($t),+); } }; } tuple_ule!(Tuple2ULE, "2", [ A 0, B 1 ]); tuple_ule!(Tuple3ULE, "3", [ A 0, B 1, C 2 ]); tuple_ule!(Tuple4ULE, "4", [ A 0, B 1, C 2, D 3 ]); tuple_ule!(Tuple5ULE, "5", [ A 0, B 1, C 2, D 3, E 4 ]); tuple_ule!(Tuple6ULE, "6", [ A 0, B 1, C 2, D 3, E 4, F 5 ]); #[test] fn test_pairule_validate() { use crate::ZeroVec; let vec: Vec<(u32, char)> = vec![(1, 'a'), (1234901, '啊'), (100, 'अ')]; let zerovec: ZeroVec<(u32, char)> = vec.iter().copied().collect(); let bytes = zerovec.as_bytes(); let zerovec2 = ZeroVec::parse_bytes(bytes).unwrap(); assert_eq!(zerovec, zerovec2); // Test failed validation with a correctly sized but differently constrained tuple // Note: 1234901 is not a valid char let zerovec3 = ZeroVec::<(char, u32)>::parse_bytes(bytes); assert!(zerovec3.is_err()); } #[test] fn test_tripleule_validate() { use crate::ZeroVec; let vec: Vec<(u32, char, i8)> = vec![(1, 'a', -5), (1234901, '啊', 3), (100, 'अ', -127)]; let zerovec: ZeroVec<(u32, char, i8)> = vec.iter().copied().collect(); let bytes = zerovec.as_bytes(); let zerovec2 = ZeroVec::parse_bytes(bytes).unwrap(); assert_eq!(zerovec, zerovec2); // Test failed validation with a correctly sized but differently constrained tuple // Note: 1234901 is not a valid char let zerovec3 = ZeroVec::<(char, i8, u32)>::parse_bytes(bytes); assert!(zerovec3.is_err()); } #[test] fn test_quadule_validate() { use crate::ZeroVec; let vec: Vec<(u32, char, i8, u16)> = vec![(1, 'a', -5, 3), (1234901, '啊', 3, 11), (100, 'अ', -127, 0)]; let zerovec: ZeroVec<(u32, char, i8, u16)> = vec.iter().copied().collect(); let bytes = zerovec.as_bytes(); let zerovec2 = ZeroVec::parse_bytes(bytes).unwrap(); assert_eq!(zerovec, zerovec2); // Test failed validation with a correctly sized but differently constrained tuple // Note: 1234901 is not a valid char let zerovec3 = ZeroVec::<(char, i8, u16, u32)>::parse_bytes(bytes); assert!(zerovec3.is_err()); } zerovec-0.11.1/src/ule/tuplevar.rs000064400000000000000000000330301046102023000151440ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). //! [`VarULE`] impls for tuples. //! //! This module exports [`Tuple2VarULE`], [`Tuple3VarULE`], ..., the corresponding [`VarULE`] types //! of tuples containing purely [`VarULE`] types. //! //! This can be paired with [`VarTupleULE`] to make arbitrary combinations of [`ULE`] and [`VarULE`] types. //! //! [`VarTupleULE`]: crate::ule::vartuple::VarTupleULE use super::*; use crate::varzerovec::{Index16, VarZeroVecFormat}; use core::fmt; use core::marker::PhantomData; use core::mem; use zerofrom::ZeroFrom; macro_rules! tuple_varule { // Invocation: Should be called like `tuple_ule!(Tuple2VarULE, 2, [ A a AX 0, B b BX 1 ])` // // $T is a generic name, $t is a lowercase version of it, $T_alt is an "alternate" name to use when we need two types referring // to the same input field, $i is an index. // // $name is the name of the type, $len MUST be the total number of fields, and then $i must be an integer going from 0 to (n - 1) in sequence // (This macro code can rely on $i < $len) ($name:ident, $len:literal, [ $($T:ident $t:ident $T_alt: ident $i:tt),+ ]) => { #[doc = concat!("VarULE type for tuples with ", $len, " elements. See module docs for more information")] #[repr(transparent)] #[allow(clippy::exhaustive_structs)] // stable pub struct $name<$($T: ?Sized,)+ Format: VarZeroVecFormat = Index16> { $($t: PhantomData<$T>,)+ // Safety invariant: Each "field" $i of the MultiFieldsULE is a valid instance of $t // // In other words, calling `.get_field::<$T>($i)` is always safe. // // This invariant is upheld when this type is constructed during VarULE parsing/validation multi: MultiFieldsULE<$len, Format> } impl<$($T: VarULE + ?Sized,)+ Format: VarZeroVecFormat> $name<$($T,)+ Format> { $( #[doc = concat!("Get field ", $i, "of this tuple")] pub fn $t(&self) -> &$T { // Safety: See invariant of `multi`. unsafe { self.multi.get_field::<$T>($i) } } )+ } // # Safety // // ## Checklist // // Safety checklist for `VarULE`: // // 1. align(1): repr(transparent) around an align(1) VarULE type: MultiFieldsULE // 2. No padding: see previous point // 3. `validate_bytes` validates that this type is a valid MultiFieldsULE, and that each field is the correct type from the tuple. // 4. `validate_bytes` checks length by deferring to the inner ULEs // 5. `from_bytes_unchecked` returns a fat pointer to the bytes. // 6. All other methods are left at their default impl. // 7. The inner ULEs have byte equality, so this composition has byte equality. unsafe impl<$($T: VarULE + ?Sized,)+ Format: VarZeroVecFormat> VarULE for $name<$($T,)+ Format> { fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> { // Safety: We validate that this type is the same kind of MultiFieldsULE (with $len, Format) // as in the type def let multi = as VarULE>::parse_bytes(bytes)?; $( // Safety invariant: $i < $len, from the macro invocation unsafe { multi.validate_field::<$T>($i)?; } )+ Ok(()) } unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { // Safety: We validate that this type is the same kind of MultiFieldsULE (with $len, Format) // as in the type def let multi = as VarULE>::from_bytes_unchecked(bytes); // This type is repr(transparent) over MultiFieldsULE<$len>, so its slices can be transmuted // Field invariant upheld here: validate_bytes above validates every field for being the right type mem::transmute::<&MultiFieldsULE<$len, Format>, &Self>(multi) } } impl<$($T: fmt::Debug + VarULE + ?Sized,)+ Format: VarZeroVecFormat> fmt::Debug for $name<$($T,)+ Format> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { ($(self.$t(),)+).fmt(f) } } // We need manual impls since `#[derive()]` is disallowed on packed types impl<$($T: PartialEq + VarULE + ?Sized,)+ Format: VarZeroVecFormat> PartialEq for $name<$($T,)+ Format> { fn eq(&self, other: &Self) -> bool { ($(self.$t(),)+).eq(&($(other.$t(),)+)) } } impl<$($T: Eq + VarULE + ?Sized,)+ Format: VarZeroVecFormat> Eq for $name<$($T,)+ Format> {} impl<$($T: PartialOrd + VarULE + ?Sized,)+ Format: VarZeroVecFormat> PartialOrd for $name<$($T,)+ Format> { fn partial_cmp(&self, other: &Self) -> Option { ($(self.$t(),)+).partial_cmp(&($(other.$t(),)+)) } } impl<$($T: Ord + VarULE + ?Sized,)+ Format: VarZeroVecFormat> Ord for $name<$($T,)+ Format> { fn cmp(&self, other: &Self) -> core::cmp::Ordering { ($(self.$t(),)+).cmp(&($(other.$t(),)+)) } } // # Safety // // encode_var_ule_len: returns the length of the individual VarULEs together. // // encode_var_ule_write: writes bytes by deferring to the inner VarULE impls. unsafe impl<$($T,)+ $($T_alt,)+ Format> EncodeAsVarULE<$name<$($T,)+ Format>> for ( $($T_alt),+ ) where $($T: VarULE + ?Sized,)+ $($T_alt: EncodeAsVarULE<$T>,)+ Format: VarZeroVecFormat, { fn encode_var_ule_as_slices(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { // unnecessary if the other two are implemented unreachable!() } #[inline] fn encode_var_ule_len(&self) -> usize { // Safety: We validate that this type is the same kind of MultiFieldsULE (with $len, Format) // as in the type def MultiFieldsULE::<$len, Format>::compute_encoded_len_for([$(self.$i.encode_var_ule_len()),+]) } #[inline] fn encode_var_ule_write(&self, dst: &mut [u8]) { let lengths = [$(self.$i.encode_var_ule_len()),+]; // Safety: We validate that this type is the same kind of MultiFieldsULE (with $len, Format) // as in the type def let multi = MultiFieldsULE::<$len, Format>::new_from_lengths_partially_initialized(lengths, dst); $( // Safety: $i < $len, from the macro invocation, and field $i is supposed to be of type $T unsafe { multi.set_field_at::<$T, $T_alt>($i, &self.$i); } )+ } } #[cfg(feature = "alloc")] impl<$($T: VarULE + ?Sized,)+ Format: VarZeroVecFormat> alloc::borrow::ToOwned for $name<$($T,)+ Format> { type Owned = alloc::boxed::Box; fn to_owned(&self) -> Self::Owned { encode_varule_to_box(self) } } impl<'a, $($T,)+ $($T_alt,)+ Format> ZeroFrom <'a, $name<$($T,)+ Format>> for ($($T_alt),+) where $($T: VarULE + ?Sized,)+ $($T_alt: ZeroFrom<'a, $T>,)+ Format: VarZeroVecFormat { fn zero_from(other: &'a $name<$($T,)+ Format>) -> Self { ( $($T_alt::zero_from(other.$t()),)+ ) } } #[cfg(feature = "serde")] impl<$($T: serde::Serialize,)+ Format> serde::Serialize for $name<$($T,)+ Format> where $($T: VarULE + ?Sized,)+ // This impl should be present on almost all VarULE types. if it isn't, that is a bug $(for<'a> &'a $T: ZeroFrom<'a, $T>,)+ Format: VarZeroVecFormat { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer { if serializer.is_human_readable() { let this = ( $(self.$t()),+ ); <($(&$T),+) as serde::Serialize>::serialize(&this, serializer) } else { serializer.serialize_bytes(self.multi.as_bytes()) } } } #[cfg(feature = "serde")] impl<'de, $($T: VarULE + ?Sized,)+ Format> serde::Deserialize<'de> for alloc::boxed::Box<$name<$($T,)+ Format>> where // This impl should be present on almost all deserializable VarULE types $( alloc::boxed::Box<$T>: serde::Deserialize<'de>,)+ Format: VarZeroVecFormat { fn deserialize(deserializer: Des) -> Result where Des: serde::Deserializer<'de> { if deserializer.is_human_readable() { let this = <( $(alloc::boxed::Box<$T>),+) as serde::Deserialize>::deserialize(deserializer)?; let this_ref = ( $(&*this.$i),+ ); Ok(crate::ule::encode_varule_to_box(&this_ref)) } else { // This branch should usually not be hit, since Cow-like use cases will hit the Deserialize impl for &'a TupleNVarULE instead. let deserialized = <&$name<$($T,)+ Format>>::deserialize(deserializer)?; Ok(deserialized.to_boxed()) } } } #[cfg(feature = "serde")] impl<'a, 'de: 'a, $($T: VarULE + ?Sized,)+ Format: VarZeroVecFormat> serde::Deserialize<'de> for &'a $name<$($T,)+ Format> { fn deserialize(deserializer: Des) -> Result where Des: serde::Deserializer<'de> { if deserializer.is_human_readable() { Err(serde::de::Error::custom( concat!("&", stringify!($name), " can only deserialize in zero-copy ways"), )) } else { let bytes = <&[u8]>::deserialize(deserializer)?; $name::<$($T,)+ Format>::parse_bytes(bytes).map_err(serde::de::Error::custom) } } } }; } tuple_varule!(Tuple2VarULE, 2, [ A a AE 0, B b BE 1 ]); tuple_varule!(Tuple3VarULE, 3, [ A a AE 0, B b BE 1, C c CE 2 ]); tuple_varule!(Tuple4VarULE, 4, [ A a AE 0, B b BE 1, C c CE 2, D d DE 3 ]); tuple_varule!(Tuple5VarULE, 5, [ A a AE 0, B b BE 1, C c CE 2, D d DE 3, E e EE 4 ]); tuple_varule!(Tuple6VarULE, 6, [ A a AE 0, B b BE 1, C c CE 2, D d DE 3, E e EE 4, F f FE 5 ]); #[cfg(test)] mod tests { use super::*; use crate::varzerovec::{Index16, Index32, Index8, VarZeroVecFormat}; use crate::VarZeroSlice; use crate::VarZeroVec; #[test] fn test_pairvarule_validate() { let vec: Vec<(&str, &[u8])> = vec![("a", b"b"), ("foo", b"bar"), ("lorem", b"ipsum\xFF")]; let zerovec: VarZeroVec> = (&vec).into(); let bytes = zerovec.as_bytes(); let zerovec2 = VarZeroVec::parse_bytes(bytes).unwrap(); assert_eq!(zerovec, zerovec2); // Test failed validation with a correctly sized but differently constrained tuple // Note: ipsum\xFF is not a valid str let zerovec3 = VarZeroVec::>::parse_bytes(bytes); assert!(zerovec3.is_err()); #[cfg(feature = "serde")] for val in zerovec.iter() { // Can't use inference due to https://github.com/rust-lang/rust/issues/130180 crate::ule::test_utils::assert_serde_roundtrips::>(val); } } fn test_tripleule_validate_inner() { let vec: Vec<(&str, &[u8], VarZeroVec)> = vec![ ("a", b"b", (&vec!["a", "b", "c"]).into()), ("foo", b"bar", (&vec!["baz", "quux"]).into()), ( "lorem", b"ipsum\xFF", (&vec!["dolor", "sit", "amet"]).into(), ), ]; let zerovec: VarZeroVec, Format>> = (&vec).into(); let bytes = zerovec.as_bytes(); let zerovec2 = VarZeroVec::parse_bytes(bytes).unwrap(); assert_eq!(zerovec, zerovec2); // Test failed validation with a correctly sized but differently constrained tuple // Note: the str is unlikely to be a valid varzerovec let zerovec3 = VarZeroVec::, [u8], VarZeroSlice, Format>>::parse_bytes(bytes); assert!(zerovec3.is_err()); #[cfg(feature = "serde")] for val in zerovec.iter() { // Can't use inference due to https://github.com/rust-lang/rust/issues/130180 crate::ule::test_utils::assert_serde_roundtrips::< Tuple3VarULE, Format>, >(val); } } #[test] fn test_tripleule_validate() { test_tripleule_validate_inner::(); test_tripleule_validate_inner::(); test_tripleule_validate_inner::(); } } zerovec-0.11.1/src/ule/vartuple.rs000064400000000000000000000244451046102023000151560ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). //! Types to help compose fixed-size [`ULE`] and variable-size [`VarULE`] primitives. //! //! This module exports [`VarTuple`] and [`VarTupleULE`], which allow a single sized type and //! a single unsized type to be stored together as a [`VarULE`]. //! //! # Examples //! //! ``` //! use zerovec::ule::vartuple::{VarTuple, VarTupleULE}; //! use zerovec::VarZeroVec; //! //! struct Employee<'a> { //! id: u32, //! name: &'a str, //! }; //! //! let employees = [ //! Employee { //! id: 12345, //! name: "Jane Doe", //! }, //! Employee { //! id: 67890, //! name: "John Doe", //! }, //! ]; //! //! let employees_as_var_tuples = employees //! .into_iter() //! .map(|x| VarTuple { //! sized: x.id, //! variable: x.name, //! }) //! .collect::>(); //! //! let employees_vzv: VarZeroVec> = //! employees_as_var_tuples.as_slice().into(); //! //! assert_eq!(employees_vzv.len(), 2); //! //! assert_eq!(employees_vzv.get(0).unwrap().sized.as_unsigned_int(), 12345); //! assert_eq!(&employees_vzv.get(0).unwrap().variable, "Jane Doe"); //! //! assert_eq!(employees_vzv.get(1).unwrap().sized.as_unsigned_int(), 67890); //! assert_eq!(&employees_vzv.get(1).unwrap().variable, "John Doe"); //! ``` use core::mem::{size_of, transmute_copy}; use zerofrom::ZeroFrom; use super::{AsULE, EncodeAsVarULE, UleError, VarULE, ULE}; /// A sized type that can be converted to a [`VarTupleULE`]. /// /// See the module for examples. #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] #[allow(clippy::exhaustive_structs)] // well-defined type #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct VarTuple { pub sized: A, pub variable: B, } /// A dynamically-sized type combining a sized and an unsized type. /// /// See the module for examples. #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] #[allow(clippy::exhaustive_structs)] // well-defined type #[repr(C)] pub struct VarTupleULE { pub sized: A::ULE, pub variable: V, } // # Safety // // ## Representation // // The type `VarTupleULE` is align(1) because it is repr(C) and its fields // are all align(1), since they are themselves ULE and VarULE, which have // this same safety constraint. Further, there is no padding, because repr(C) // does not add padding when all fields are align(1). // // // // Pointers to `VarTupleULE` are fat pointers with metadata equal to the // metadata of the inner DST field V. // // // // ## Checklist // // Safety checklist for `VarULE`: // // 1. align(1): see "Representation" above. // 2. No padding: see "Representation" above. // 3. `validate_bytes` checks length and defers to the inner ULEs. // 4. `validate_bytes` checks length and defers to the inner ULEs. // 5. `from_bytes_unchecked` returns a fat pointer to the bytes. // 6. All other methods are left at their default impl. // 7. The two ULEs have byte equality, so this composition has byte equality. unsafe impl VarULE for VarTupleULE where A: AsULE + 'static, V: VarULE + ?Sized, { fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> { let (sized_chunk, variable_chunk) = bytes .split_at_checked(size_of::()) .ok_or(UleError::length::(bytes.len()))?; A::ULE::validate_bytes(sized_chunk)?; V::validate_bytes(variable_chunk)?; Ok(()) } unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { let (_sized_chunk, variable_chunk) = bytes.split_at_unchecked(size_of::()); // Safety: variable_chunk is a valid V because of this function's precondition: bytes is a valid Self, // and a valid Self contains a valid V after the space needed for A::ULE. let variable_ref = V::from_bytes_unchecked(variable_chunk); let variable_ptr: *const V = variable_ref; // Safety: The DST of VarTupleULE is a pointer to the `sized` element and has a metadata // equal to the metadata of the `variable` field (see "Representation" comments on the impl). // We should use the pointer metadata APIs here when they are stable: https://github.com/rust-lang/rust/issues/81513 // For now we rely on all DST metadata being a usize. // Extract metadata from V's DST // Rust doesn't know that `&V` is a fat pointer so we have to use transmute_copy assert_eq!(size_of::<*const V>(), size_of::<(*const u8, usize)>()); // Safety: We have asserted that the transmute Src and Dst are the same size. Furthermore, // DST pointers are a pointer and usize length metadata let (_v_ptr, metadata) = transmute_copy::<*const V, (*const u8, usize)>(&variable_ptr); // Construct a new DST with the same metadata as V assert_eq!(size_of::<*const Self>(), size_of::<(*const u8, usize)>()); // Safety: Same as above but in the other direction. let composed_ptr = transmute_copy::<(*const u8, usize), *const Self>(&(bytes.as_ptr(), metadata)); &*(composed_ptr) } } // # Safety // // encode_var_ule_len: returns the length of the two ULEs together. // // encode_var_ule_write: writes bytes by deferring to the inner ULE impls. unsafe impl EncodeAsVarULE> for VarTuple where A: AsULE + 'static, B: EncodeAsVarULE, V: VarULE + ?Sized, { fn encode_var_ule_as_slices(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { // unnecessary if the other two are implemented unreachable!() } #[inline] fn encode_var_ule_len(&self) -> usize { size_of::() + self.variable.encode_var_ule_len() } #[inline] fn encode_var_ule_write(&self, dst: &mut [u8]) { // TODO: use split_first_chunk_mut in 1.77 let (sized_chunk, variable_chunk) = dst.split_at_mut(size_of::()); sized_chunk.clone_from_slice([self.sized.to_unaligned()].as_bytes()); self.variable.encode_var_ule_write(variable_chunk); } } #[cfg(feature = "alloc")] impl alloc::borrow::ToOwned for VarTupleULE where A: AsULE + 'static, V: VarULE + ?Sized, { type Owned = alloc::boxed::Box; fn to_owned(&self) -> Self::Owned { crate::ule::encode_varule_to_box(self) } } impl<'a, A, B, V> ZeroFrom<'a, VarTupleULE> for VarTuple where A: AsULE + 'static, V: VarULE + ?Sized, B: ZeroFrom<'a, V>, { fn zero_from(other: &'a VarTupleULE) -> Self { VarTuple { sized: AsULE::from_unaligned(other.sized), variable: B::zero_from(&other.variable), } } } #[cfg(feature = "serde")] impl serde::Serialize for VarTupleULE where A: AsULE + 'static, V: VarULE + ?Sized, A: serde::Serialize, V: serde::Serialize, { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { if serializer.is_human_readable() { let this = VarTuple { sized: A::from_unaligned(self.sized), variable: &self.variable, }; this.serialize(serializer) } else { serializer.serialize_bytes(self.as_bytes()) } } } #[cfg(feature = "serde")] impl<'a, 'de: 'a, A, V> serde::Deserialize<'de> for &'a VarTupleULE where A: AsULE + 'static, V: VarULE + ?Sized, A: serde::Deserialize<'de>, { fn deserialize(deserializer: Des) -> Result where Des: serde::Deserializer<'de>, { if !deserializer.is_human_readable() { let bytes = <&[u8]>::deserialize(deserializer)?; VarTupleULE::::parse_bytes(bytes).map_err(serde::de::Error::custom) } else { Err(serde::de::Error::custom( "&VarTupleULE can only deserialize in zero-copy ways", )) } } } #[cfg(feature = "serde")] impl<'de, A, V> serde::Deserialize<'de> for alloc::boxed::Box> where A: AsULE + 'static, V: VarULE + ?Sized, A: serde::Deserialize<'de>, alloc::boxed::Box: serde::Deserialize<'de>, { fn deserialize(deserializer: Des) -> Result where Des: serde::Deserializer<'de>, { if deserializer.is_human_readable() { let this = VarTuple::>::deserialize(deserializer)?; Ok(crate::ule::encode_varule_to_box(&this)) } else { // This branch should usually not be hit, since Cow-like use cases will hit the Deserialize impl for &'a TupleNVarULE instead. let deserialized = <&VarTupleULE>::deserialize(deserializer)?; Ok(deserialized.to_boxed()) } } } #[test] fn test_simple() { let var_tuple = VarTuple { sized: 1500u16, variable: "hello", }; let var_tuple_ule = super::encode_varule_to_box(&var_tuple); assert_eq!(var_tuple_ule.sized.as_unsigned_int(), 1500); assert_eq!(&var_tuple_ule.variable, "hello"); // Can't use inference due to https://github.com/rust-lang/rust/issues/130180 #[cfg(feature = "serde")] crate::ule::test_utils::assert_serde_roundtrips::>(&var_tuple_ule); } #[test] fn test_nested() { use crate::{ZeroSlice, ZeroVec}; let var_tuple = VarTuple { sized: 2000u16, variable: VarTuple { sized: '🦙', variable: ZeroVec::alloc_from_slice(b"ICU"), }, }; let var_tuple_ule = super::encode_varule_to_box(&var_tuple); assert_eq!(var_tuple_ule.sized.as_unsigned_int(), 2000u16); assert_eq!(var_tuple_ule.variable.sized.to_char(), '🦙'); assert_eq!( &var_tuple_ule.variable.variable, ZeroSlice::from_ule_slice(b"ICU") ); // Can't use inference due to https://github.com/rust-lang/rust/issues/130180 #[cfg(feature = "serde")] crate::ule::test_utils::assert_serde_roundtrips::< VarTupleULE>>, >(&var_tuple_ule); } zerovec-0.11.1/src/varzerovec/components.rs000064400000000000000000000737541046102023000171110ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::VarZeroVecFormatError; use crate::ule::*; use core::cmp::Ordering; use core::convert::TryFrom; use core::marker::PhantomData; use core::mem; use core::ops::Range; /// This trait allows switching between different possible internal /// representations of VarZeroVec. /// /// Currently this crate supports three formats: [`Index8`], [`Index16`] and [`Index32`], /// with [`Index16`] being the default for all [`VarZeroVec`](super::VarZeroVec) /// types unless explicitly specified otherwise. /// /// Do not implement this trait, its internals may be changed in the future, /// and all of its associated items are hidden from the docs. pub trait VarZeroVecFormat: 'static + Sized { /// The type to use for the indexing array /// /// Safety: must be a ULE for which all byte sequences are allowed #[doc(hidden)] type Index: IntegerULE; /// The type to use for the length segment /// /// Safety: must be a ULE for which all byte sequences are allowed #[doc(hidden)] type Len: IntegerULE; } /// This trait represents various ULE types that can be used to represent an integer /// /// Do not implement this trait, its internals may be changed in the future, /// and all of its associated items are hidden from the docs. #[allow(clippy::missing_safety_doc)] // no safety section for you, don't implement this trait period #[doc(hidden)] pub unsafe trait IntegerULE: ULE { /// The error to show when unable to construct a vec #[doc(hidden)] const TOO_LARGE_ERROR: &'static str; /// Safety: must be sizeof(self) #[doc(hidden)] const SIZE: usize; /// Safety: must be maximum integral value represented here #[doc(hidden)] const MAX_VALUE: u32; /// Safety: Must roundtrip with from_usize and represent the correct /// integral value #[doc(hidden)] fn iule_to_usize(self) -> usize; #[doc(hidden)] fn iule_from_usize(x: usize) -> Option; /// Safety: Should always convert a buffer into an array of Self with the correct length #[doc(hidden)] #[cfg(feature = "alloc")] fn iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self]; } /// This is a [`VarZeroVecFormat`] that stores u8s in the index array, and a u8 for a length. /// /// Will have a smaller data size, but it's *extremely* likely for larger arrays /// to be unrepresentable (and error on construction). Should probably be used /// for known-small arrays, where all but the last field are known-small. #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] #[allow(clippy::exhaustive_structs)] // marker pub struct Index8; /// This is a [`VarZeroVecFormat`] that stores u16s in the index array, and a u16 for a length. /// /// Will have a smaller data size, but it's more likely for larger arrays /// to be unrepresentable (and error on construction) /// /// This is the default index size used by all [`VarZeroVec`](super::VarZeroVec) types. #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] #[allow(clippy::exhaustive_structs)] // marker pub struct Index16; /// This is a [`VarZeroVecFormat`] that stores u32s in the index array, and a u32 for a length. /// Will have a larger data size, but will support large arrays without /// problems. #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] #[allow(clippy::exhaustive_structs)] // marker pub struct Index32; impl VarZeroVecFormat for Index8 { type Index = u8; type Len = u8; } impl VarZeroVecFormat for Index16 { type Index = RawBytesULE<2>; type Len = RawBytesULE<2>; } impl VarZeroVecFormat for Index32 { type Index = RawBytesULE<4>; type Len = RawBytesULE<4>; } unsafe impl IntegerULE for u8 { const TOO_LARGE_ERROR: &'static str = "Attempted to build VarZeroVec out of elements that \ cumulatively are larger than a u8 in size"; const SIZE: usize = mem::size_of::(); const MAX_VALUE: u32 = u8::MAX as u32; #[inline] fn iule_to_usize(self) -> usize { self as usize } #[inline] fn iule_from_usize(u: usize) -> Option { u8::try_from(u).ok() } #[inline] #[cfg(feature = "alloc")] fn iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] { bytes } } unsafe impl IntegerULE for RawBytesULE<2> { const TOO_LARGE_ERROR: &'static str = "Attempted to build VarZeroVec out of elements that \ cumulatively are larger than a u16 in size"; const SIZE: usize = mem::size_of::(); const MAX_VALUE: u32 = u16::MAX as u32; #[inline] fn iule_to_usize(self) -> usize { self.as_unsigned_int() as usize } #[inline] fn iule_from_usize(u: usize) -> Option { u16::try_from(u).ok().map(u16::to_unaligned) } #[inline] #[cfg(feature = "alloc")] fn iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] { Self::from_bytes_unchecked_mut(bytes) } } unsafe impl IntegerULE for RawBytesULE<4> { const TOO_LARGE_ERROR: &'static str = "Attempted to build VarZeroVec out of elements that \ cumulatively are larger than a u32 in size"; const SIZE: usize = mem::size_of::(); const MAX_VALUE: u32 = u32::MAX; #[inline] fn iule_to_usize(self) -> usize { self.as_unsigned_int() as usize } #[inline] fn iule_from_usize(u: usize) -> Option { u32::try_from(u).ok().map(u32::to_unaligned) } #[inline] #[cfg(feature = "alloc")] fn iule_from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] { Self::from_bytes_unchecked_mut(bytes) } } /// A more parsed version of `VarZeroSlice`. This type is where most of the VarZeroVec /// internal representation code lies. /// /// This is *basically* an `&'a [u8]` to a zero copy buffer, but split out into /// the buffer components. Logically this is capable of behaving as /// a `&'a [T::VarULE]`, but since `T::VarULE` is unsized that type does not actually /// exist. /// /// See [`VarZeroVecComponents::parse_bytes()`] for information on the internal invariants involved #[derive(Debug)] pub struct VarZeroVecComponents<'a, T: ?Sized, F> { /// The number of elements len: u32, /// The list of indices into the `things` slice /// Since the first element is always at things[0], the first element of the indices array is for the *second* element indices: &'a [u8], /// The contiguous list of `T::VarULE`s things: &'a [u8], marker: PhantomData<(&'a T, F)>, } // #[derive()] won't work here since we do not want it to be // bound on T: Copy impl<'a, T: ?Sized, F> Copy for VarZeroVecComponents<'a, T, F> {} impl<'a, T: ?Sized, F> Clone for VarZeroVecComponents<'a, T, F> { fn clone(&self) -> Self { *self } } impl<'a, T: VarULE + ?Sized, F> Default for VarZeroVecComponents<'a, T, F> { #[inline] fn default() -> Self { Self::new() } } impl<'a, T: VarULE + ?Sized, F> VarZeroVecComponents<'a, T, F> { #[inline] pub fn new() -> Self { Self { len: 0, indices: &[], things: &[], marker: PhantomData, } } } impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F> { /// Construct a new VarZeroVecComponents, checking invariants about the overall buffer size: /// /// - There must be either zero or at least four bytes (if four, this is the "length" parsed as a usize) /// - There must be at least `4*(length - 1) + 4` bytes total, to form the array `indices` of indices /// - `0..indices[0]` must index into a valid section of /// `things` (the data after `indices`), such that it parses to a `T::VarULE` /// - `indices[i - 1]..indices[i]` must index into a valid section of /// `things` (the data after `indices`), such that it parses to a `T::VarULE` /// - `indices[len - 2]..things.len()` must index into a valid section of /// `things`, such that it parses to a `T::VarULE` #[inline] pub fn parse_bytes(slice: &'a [u8]) -> Result { // The empty VZV is special-cased to the empty slice if slice.is_empty() { return Ok(VarZeroVecComponents { len: 0, indices: &[], things: &[], marker: PhantomData, }); } let len_bytes = slice .get(0..F::Len::SIZE) .ok_or(VarZeroVecFormatError::Metadata)?; let len_ule = F::Len::parse_bytes_to_slice(len_bytes).map_err(|_| VarZeroVecFormatError::Metadata)?; let len = len_ule .first() .ok_or(VarZeroVecFormatError::Metadata)? .iule_to_usize(); let rest = slice .get(F::Len::SIZE..) .ok_or(VarZeroVecFormatError::Metadata)?; let len_u32 = u32::try_from(len).map_err(|_| VarZeroVecFormatError::Metadata); // We pass down the rest of the invariants Self::parse_bytes_with_length(len_u32?, rest) } /// Construct a new VarZeroVecComponents, checking invariants about the overall buffer size: /// /// - There must be at least `4*len` bytes total, to form the array `indices` of indices. /// - `indices[i]..indices[i+1]` must index into a valid section of /// `things` (the data after `indices`), such that it parses to a `T::VarULE` /// - `indices[len - 1]..things.len()` must index into a valid section of /// `things`, such that it parses to a `T::VarULE` #[inline] pub fn parse_bytes_with_length( len: u32, slice: &'a [u8], ) -> Result { let len_minus_one = len.checked_sub(1); // The empty VZV is special-cased to the empty slice let Some(len_minus_one) = len_minus_one else { return Ok(VarZeroVecComponents { len: 0, indices: &[], things: &[], marker: PhantomData, }); }; // The indices array is one element shorter since the first index is always 0, // so we use len_minus_one let indices_bytes = slice .get(..F::Index::SIZE * (len_minus_one as usize)) .ok_or(VarZeroVecFormatError::Metadata)?; let things = slice .get(F::Index::SIZE * (len_minus_one as usize)..) .ok_or(VarZeroVecFormatError::Metadata)?; let borrowed = VarZeroVecComponents { len, indices: indices_bytes, things, marker: PhantomData, }; borrowed.check_indices_and_things()?; Ok(borrowed) } /// Construct a [`VarZeroVecComponents`] from a byte slice that has previously /// successfully returned a [`VarZeroVecComponents`] when passed to /// [`VarZeroVecComponents::parse_bytes()`]. Will return the same /// object as one would get from calling [`VarZeroVecComponents::parse_bytes()`]. /// /// # Safety /// The bytes must have previously successfully run through /// [`VarZeroVecComponents::parse_bytes()`] pub unsafe fn from_bytes_unchecked(slice: &'a [u8]) -> Self { // The empty VZV is special-cased to the empty slice if slice.is_empty() { return VarZeroVecComponents { len: 0, indices: &[], things: &[], marker: PhantomData, }; } let (len_bytes, data_bytes) = unsafe { slice.split_at_unchecked(F::Len::SIZE) }; // Safety: F::Len allows all byte sequences let len_ule = F::Len::slice_from_bytes_unchecked(len_bytes); let len = len_ule.get_unchecked(0).iule_to_usize(); let len_u32 = len as u32; // Safety: This method requires the bytes to have passed through `parse_bytes()` // whereas we're calling something that asks for `parse_bytes_with_length()`. // The two methods perform similar validation, with parse_bytes() validating an additional // 4-byte `length` header. Self::from_bytes_unchecked_with_length(len_u32, data_bytes) } /// Construct a [`VarZeroVecComponents`] from a byte slice that has previously /// successfully returned a [`VarZeroVecComponents`] when passed to /// [`VarZeroVecComponents::parse_bytes()`]. Will return the same /// object as one would get from calling [`VarZeroVecComponents::parse_bytes()`]. /// /// # Safety /// The len,bytes must have previously successfully run through /// [`VarZeroVecComponents::parse_bytes_with_length()`] pub unsafe fn from_bytes_unchecked_with_length(len: u32, slice: &'a [u8]) -> Self { let len_minus_one = len.checked_sub(1); // The empty VZV is special-cased to the empty slice let Some(len_minus_one) = len_minus_one else { return VarZeroVecComponents { len: 0, indices: &[], things: &[], marker: PhantomData, }; }; // The indices array is one element shorter since the first index is always 0, // so we use len_minus_one let indices_bytes = slice.get_unchecked(..F::Index::SIZE * (len_minus_one as usize)); let things = slice.get_unchecked(F::Index::SIZE * (len_minus_one as usize)..); VarZeroVecComponents { len, indices: indices_bytes, things, marker: PhantomData, } } /// Get the number of elements in this vector #[inline] pub fn len(self) -> usize { self.len as usize } /// Returns `true` if the vector contains no elements. #[inline] pub fn is_empty(self) -> bool { self.len == 0 } /// Get the idx'th element out of this slice. Returns `None` if out of bounds. #[inline] pub fn get(self, idx: usize) -> Option<&'a T> { if idx >= self.len() { return None; } Some(unsafe { self.get_unchecked(idx) }) } /// Get the idx'th element out of this slice. Does not bounds check. /// /// Safety: /// - `idx` must be in bounds (`idx < self.len()`) #[inline] pub(crate) unsafe fn get_unchecked(self, idx: usize) -> &'a T { let range = self.get_things_range(idx); let things_slice = self.things.get_unchecked(range); T::from_bytes_unchecked(things_slice) } /// Get the range in `things` for the element at `idx`. Does not bounds check. /// /// Safety: /// - `idx` must be in bounds (`idx < self.len()`) #[inline] pub(crate) unsafe fn get_things_range(self, idx: usize) -> Range { let start = if let Some(idx_minus_one) = idx.checked_sub(1) { self.indices_slice() .get_unchecked(idx_minus_one) .iule_to_usize() } else { 0 }; let end = if idx + 1 == self.len() { self.things.len() } else { self.indices_slice().get_unchecked(idx).iule_to_usize() }; debug_assert!(start <= end); start..end } /// Get the size, in bytes, of the indices array pub(crate) unsafe fn get_indices_size(self) -> usize { self.indices.len() } /// Check the internal invariants of VarZeroVecComponents: /// /// - `indices[i]..indices[i+1]` must index into a valid section of /// `things`, such that it parses to a `T::VarULE` /// - `indices[len - 1]..things.len()` must index into a valid section of /// `things`, such that it parses to a `T::VarULE` /// - `indices` is monotonically increasing /// /// This method is NOT allowed to call any other methods on VarZeroVecComponents since all other methods /// assume that the slice has been passed through check_indices_and_things #[inline] #[allow(clippy::len_zero)] // more explicit to enforce safety invariants fn check_indices_and_things(self) -> Result<(), VarZeroVecFormatError> { if self.len() == 0 { if self.things.len() > 0 { return Err(VarZeroVecFormatError::Metadata); } else { return Ok(()); } } let indices_slice = self.indices_slice(); assert_eq!(self.len(), indices_slice.len() + 1); // Safety: i is in bounds (assertion above) let mut start = 0; for i in 0..self.len() { // The indices array is offset by 1: indices[0] is the end of the first // element and the start of the next, since the start of the first element // is always things[0]. So to get the end we get element `i`. let end = if let Some(end) = indices_slice.get(i) { end.iule_to_usize() } else { // This only happens at i = self.len() - 1 = indices_slice.len() + 1 - 1 // = indices_slice.len(). This is the last `end`, which is always the size of // `things` and thus never stored in the array self.things.len() }; if start > end { return Err(VarZeroVecFormatError::Metadata); } if end > self.things.len() { return Err(VarZeroVecFormatError::Metadata); } // Safety: start..end is a valid range in self.things let bytes = unsafe { self.things.get_unchecked(start..end) }; T::parse_bytes(bytes).map_err(VarZeroVecFormatError::Values)?; start = end; } Ok(()) } /// Create an iterator over the Ts contained in VarZeroVecComponents #[inline] pub fn iter(self) -> VarZeroSliceIter<'a, T, F> { VarZeroSliceIter::new(self) } #[cfg(feature = "alloc")] pub fn to_vec(self) -> alloc::vec::Vec> { self.iter().map(T::to_boxed).collect() } #[inline] fn indices_slice(&self) -> &'a [F::Index] { unsafe { F::Index::slice_from_bytes_unchecked(self.indices) } } // Dump a debuggable representation of this type #[allow(unused)] // useful for debugging #[cfg(feature = "alloc")] pub(crate) fn dump(&self) -> alloc::string::String { let indices = self .indices_slice() .iter() .copied() .map(IntegerULE::iule_to_usize) .collect::>(); alloc::format!("VarZeroVecComponents {{ indices: {indices:?} }}") } } /// An iterator over VarZeroSlice #[derive(Debug)] pub struct VarZeroSliceIter<'a, T: ?Sized, F = Index16> { components: VarZeroVecComponents<'a, T, F>, index: usize, // Safety invariant: must be a valid index into the data segment of `components`, or an index at the end // i.e. start_index <= components.things.len() // // It must be a valid index into the `things` array of components, coming from `components.indices_slice()` start_index: usize, } impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSliceIter<'a, T, F> { fn new(c: VarZeroVecComponents<'a, T, F>) -> Self { Self { components: c, index: 0, // Invariant upheld, 0 is always a valid index-or-end start_index: 0, } } } impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> Iterator for VarZeroSliceIter<'a, T, F> { type Item = &'a T; fn next(&mut self) -> Option { // Note: the indices array doesn't contain 0 or len, we need to specially handle those edges. The 0 is handled // by start_index, and the len is handled by the code for `end`. if self.index >= self.components.len() { return None; } // Invariant established: self.index is in bounds for self.components.len(), // which means it is in bounds for self.components.indices_slice() since that has the same length let end = if self.index + 1 == self.components.len() { // We don't store the end index since it is computable, so the last element should use self.components.things.len() self.components.things.len() } else { // Safety: self.index was known to be in bounds from the bounds check above. unsafe { self.components .indices_slice() .get_unchecked(self.index) .iule_to_usize() } }; // Invariant established: end has the same invariant as self.start_index since it comes from indices_slice, which is guaranteed // to only contain valid indexes let item = unsafe { // Safety: self.start_index and end both have in-range invariants, plus they are valid indices from indices_slice // which means we can treat this data as a T T::from_bytes_unchecked(self.components.things.get_unchecked(self.start_index..end)) }; self.index += 1; // Invariant upheld: end has the same invariant as self.start_index self.start_index = end; Some(item) } fn size_hint(&self) -> (usize, Option) { let remainder = self.components.len() - self.index; (remainder, Some(remainder)) } } impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> ExactSizeIterator for VarZeroSliceIter<'a, T, F> { fn len(&self) -> usize { self.components.len() - self.index } } impl<'a, T, F> VarZeroVecComponents<'a, T, F> where T: VarULE, T: ?Sized, T: Ord, F: VarZeroVecFormat, { /// Binary searches a sorted `VarZeroVecComponents` for the given element. For more information, see /// the primitive function [`binary_search`](slice::binary_search). pub fn binary_search(&self, needle: &T) -> Result { self.binary_search_by(|probe| probe.cmp(needle)) } pub fn binary_search_in_range( &self, needle: &T, range: Range, ) -> Option> { self.binary_search_in_range_by(|probe| probe.cmp(needle), range) } } impl<'a, T, F> VarZeroVecComponents<'a, T, F> where T: VarULE, T: ?Sized, F: VarZeroVecFormat, { /// Binary searches a sorted `VarZeroVecComponents` for the given predicate. For more information, see /// the primitive function [`binary_search_by`](slice::binary_search_by). pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result { // Safety: 0 and len are in range unsafe { self.binary_search_in_range_unchecked(predicate, 0..self.len()) } } // Binary search within a range. // Values returned are relative to the range start! pub fn binary_search_in_range_by( &self, predicate: impl FnMut(&T) -> Ordering, range: Range, ) -> Option> { if range.end > self.len() { return None; } if range.end < range.start { return None; } // Safety: We bounds checked above: end is in-bounds or len, and start is <= end let range_absolute = unsafe { self.binary_search_in_range_unchecked(predicate, range.clone()) }; // The values returned are relative to the range start Some( range_absolute .map(|o| o - range.start) .map_err(|e| e - range.start), ) } /// Safety: range must be in range for the slice (start <= len, end <= len, start <= end) unsafe fn binary_search_in_range_unchecked( &self, mut predicate: impl FnMut(&T) -> Ordering, range: Range, ) -> Result { // Function invariant: size is always end - start let mut start = range.start; let mut end = range.end; let mut size; // Loop invariant: 0 <= start < end <= len // This invariant is initialized by the function safety invariants and the loop condition while start < end { size = end - start; // This establishes mid < end (which implies mid < len) // size is end - start. start + size is end (which is <= len). // mid = start + size/2 will be less than end let mid = start + size / 2; // Safety: mid is < end <= len, so in-range let cmp = predicate(self.get_unchecked(mid)); match cmp { Ordering::Less => { // This retains the loop invariant since it // increments start, and we already have 0 <= start // start < end is enforced by the loop condition start = mid + 1; } Ordering::Greater => { // mid < end, so this decreases end. // This means end <= len is still true, and // end > start is enforced by the loop condition end = mid; } Ordering::Equal => return Ok(mid), } } Err(start) } } /// Collects the bytes for a VarZeroSlice into a Vec. #[cfg(feature = "alloc")] pub fn get_serializable_bytes_non_empty(elements: &[A]) -> Option> where T: VarULE + ?Sized, A: EncodeAsVarULE, F: VarZeroVecFormat, { debug_assert!(!elements.is_empty()); let len = compute_serializable_len::(elements)?; debug_assert!( len >= F::Len::SIZE as u32, "Must have at least F::Len::SIZE bytes to hold the length of the vector" ); let mut output = alloc::vec![0u8; len as usize]; write_serializable_bytes::(elements, &mut output); Some(output) } /// Writes the bytes for a VarZeroLengthlessSlice into an output buffer. /// Usable for a VarZeroSlice if you first write the length bytes. /// /// Every byte in the buffer will be initialized after calling this function. /// /// # Panics /// /// Panics if the buffer is not exactly the correct length. pub fn write_serializable_bytes_without_length(elements: &[A], output: &mut [u8]) where T: VarULE + ?Sized, A: EncodeAsVarULE, F: VarZeroVecFormat, { assert!(elements.len() <= F::Len::MAX_VALUE as usize); if elements.is_empty() { return; } // idx_offset = offset from the start of the buffer for the next index let mut idx_offset: usize = 0; // first_dat_offset = offset from the start of the buffer of the first data block let first_dat_offset: usize = idx_offset + (elements.len() - 1) * F::Index::SIZE; // dat_offset = offset from the start of the buffer of the next data block let mut dat_offset: usize = first_dat_offset; for (i, element) in elements.iter().enumerate() { let element_len = element.encode_var_ule_len(); // The first index is always 0. We don't write it, or update the idx offset. if i != 0 { let idx_limit = idx_offset + F::Index::SIZE; #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior let idx_slice = &mut output[idx_offset..idx_limit]; // VZV expects data offsets to be stored relative to the first data block let idx = dat_offset - first_dat_offset; assert!(idx <= F::Index::MAX_VALUE as usize); #[allow(clippy::expect_used)] // this function is explicitly panicky let bytes_to_write = F::Index::iule_from_usize(idx).expect(F::Index::TOO_LARGE_ERROR); idx_slice.copy_from_slice(ULE::slice_as_bytes(&[bytes_to_write])); idx_offset = idx_limit; } let dat_limit = dat_offset + element_len; #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior let dat_slice = &mut output[dat_offset..dat_limit]; element.encode_var_ule_write(dat_slice); debug_assert_eq!(T::validate_bytes(dat_slice), Ok(())); dat_offset = dat_limit; } debug_assert_eq!(idx_offset, F::Index::SIZE * (elements.len() - 1)); assert_eq!(dat_offset, output.len()); } /// Writes the bytes for a VarZeroSlice into an output buffer. /// /// Every byte in the buffer will be initialized after calling this function. /// /// # Panics /// /// Panics if the buffer is not exactly the correct length. pub fn write_serializable_bytes(elements: &[A], output: &mut [u8]) where T: VarULE + ?Sized, A: EncodeAsVarULE, F: VarZeroVecFormat, { if elements.is_empty() { return; } assert!(elements.len() <= F::Len::MAX_VALUE as usize); #[allow(clippy::expect_used)] // This function is explicitly panicky let num_elements_ule = F::Len::iule_from_usize(elements.len()).expect(F::Len::TOO_LARGE_ERROR); #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior output[0..F::Len::SIZE].copy_from_slice(ULE::slice_as_bytes(&[num_elements_ule])); #[allow(clippy::indexing_slicing)] // Function contract allows panicky behavior write_serializable_bytes_without_length::(elements, &mut output[F::Len::SIZE..]); } pub fn compute_serializable_len_without_length(elements: &[A]) -> Option where T: VarULE + ?Sized, A: EncodeAsVarULE, F: VarZeroVecFormat, { let elements_len = elements.len(); let Some(elements_len_minus_one) = elements_len.checked_sub(1) else { // Empty vec is optimized to an empty byte representation return Some(0); }; let idx_len: u32 = u32::try_from(elements_len_minus_one) .ok()? .checked_mul(F::Index::SIZE as u32)?; let data_len: u32 = elements .iter() .map(|v| u32::try_from(v.encode_var_ule_len()).ok()) .try_fold(0u32, |s, v| s.checked_add(v?))?; let ret = idx_len.checked_add(data_len); if let Some(r) = ret { if r >= F::Index::MAX_VALUE { return None; } } ret } pub fn compute_serializable_len(elements: &[A]) -> Option where T: VarULE + ?Sized, A: EncodeAsVarULE, F: VarZeroVecFormat, { compute_serializable_len_without_length::(elements).map(|x| x + F::Len::SIZE as u32) } zerovec-0.11.1/src/varzerovec/databake.rs000064400000000000000000000072241046102023000164450ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::vecs::{Index16, Index32}; use crate::{ule::VarULE, VarZeroSlice, VarZeroVec}; use databake::*; impl Bake for VarZeroVec<'_, T, Index16> { fn bake(&self, env: &CrateEnv) -> TokenStream { env.insert("zerovec"); if self.is_empty() { quote! { zerovec::vecs::VarZeroVec16::new() } } else { let bytes = databake::Bake::bake(&self.as_bytes(), env); // Safe because self.as_bytes is a safe input quote! { unsafe { zerovec::vecs::VarZeroVec16::from_bytes_unchecked(#bytes) } } } } } impl Bake for VarZeroVec<'_, T, Index32> { fn bake(&self, env: &CrateEnv) -> TokenStream { env.insert("zerovec"); if self.is_empty() { quote! { zerovec::vecs::VarZeroVec32::new() } } else { let bytes = databake::Bake::bake(&self.as_bytes(), env); // Safe because self.as_bytes is a safe input quote! { unsafe { zerovec::vecs::VarZeroVec32::from_bytes_unchecked(#bytes) } } } } } impl BakeSize for VarZeroVec<'_, T, Index16> { fn borrows_size(&self) -> usize { self.as_bytes().len() } } impl BakeSize for VarZeroVec<'_, T, Index32> { fn borrows_size(&self) -> usize { self.as_bytes().len() } } impl Bake for &VarZeroSlice { fn bake(&self, env: &CrateEnv) -> TokenStream { env.insert("zerovec"); if self.is_empty() { quote! { zerovec::vecs::VarZeroSlice16::new_empty() } } else { let bytes = databake::Bake::bake(&self.as_bytes(), env); // Safe because self.as_bytes is a safe input quote! { unsafe { zerovec::vecs::VarZeroSlice16::from_bytes_unchecked(#bytes) } } } } } impl Bake for &VarZeroSlice { fn bake(&self, env: &CrateEnv) -> TokenStream { env.insert("zerovec"); if self.is_empty() { quote! { zerovec::vecs::VarZeroSlice32::new_empty() } } else { let bytes = databake::Bake::bake(&self.as_bytes(), env); // Safe because self.as_bytes is a safe input quote! { unsafe { zerovec::vecs::VarZeroSlice32::from_bytes_unchecked(#bytes) } } } } } impl BakeSize for &VarZeroSlice { fn borrows_size(&self) -> usize { if self.is_empty() { 0 } else { self.as_bytes().len() } } } impl BakeSize for &VarZeroSlice { fn borrows_size(&self) -> usize { if self.is_empty() { 0 } else { self.as_bytes().len() } } } #[test] fn test_baked_vec() { test_bake!( VarZeroVec, const, crate::vecs::VarZeroVec16::new(), zerovec ); test_bake!( VarZeroVec, const, unsafe { crate::vecs::VarZeroVec16::from_bytes_unchecked(b"\x02\0\0\0\0\0\x05\0helloworld") }, zerovec ); } #[test] fn test_baked_slice() { test_bake!( &VarZeroSlice, const, crate::vecs::VarZeroSlice16::new_empty(), zerovec ); test_bake!( &VarZeroSlice, const, unsafe { crate::vecs::VarZeroSlice16::from_bytes_unchecked(b"\x02\0\0\0\0\0\x05\0helloworld") }, zerovec ); } zerovec-0.11.1/src/varzerovec/error.rs000064400000000000000000000014501046102023000160350ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use core::fmt::Display; #[derive(Debug)] pub enum VarZeroVecFormatError { /// The byte buffer was not in the appropriate format for VarZeroVec. Metadata, /// One of the values could not be decoded. Values(crate::ule::UleError), } impl core::error::Error for VarZeroVecFormatError {} impl Display for VarZeroVecFormatError { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match self { Self::Metadata => write!(f, "VarZeroVecFormatError: metadata"), Self::Values(e) => write!(f, "VarZeroVecFormatError: {e}"), } } } zerovec-0.11.1/src/varzerovec/lengthless.rs000064400000000000000000000111161046102023000170540ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::components::VarZeroVecComponents; use super::*; use crate::ule::*; use core::marker::PhantomData; use core::mem; /// A slice representing the index and data tables of a VarZeroVec, /// *without* any length fields. The length field is expected to be stored elsewhere. /// /// Without knowing the length this is of course unsafe to use directly. #[repr(transparent)] #[derive(PartialEq, Eq)] pub(crate) struct VarZeroLengthlessSlice { marker: PhantomData<(F, T)>, /// The original slice this was constructed from // Safety invariant: This field must have successfully passed through // VarZeroVecComponents::parse_bytes_with_length() with the length // associated with this value. entire_slice: [u8], } impl VarZeroLengthlessSlice { /// Obtain a [`VarZeroVecComponents`] borrowing from the internal buffer /// /// Safety: `len` must be the length associated with this value #[inline] pub(crate) unsafe fn as_components<'a>(&'a self, len: u32) -> VarZeroVecComponents<'a, T, F> { unsafe { // safety: VarZeroSlice is guaranteed to parse here VarZeroVecComponents::from_bytes_unchecked_with_length(len, &self.entire_slice) } } /// Parse a VarZeroLengthlessSlice from a slice of the appropriate format /// /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`] pub fn parse_bytes<'a>(len: u32, slice: &'a [u8]) -> Result<&'a Self, UleError> { let _ = VarZeroVecComponents::::parse_bytes_with_length(len, slice) .map_err(|_| UleError::parse::())?; unsafe { // Safety: We just verified that it is of the correct format. Ok(Self::from_bytes_unchecked(slice)) } } /// Uses a `&[u8]` buffer as a `VarZeroLengthlessSlice` without any verification. /// /// # Safety /// /// `bytes` need to be an output from [`VarZeroLengthlessSlice::as_bytes()`], or alternatively /// successfully pass through `parse_bytes` (with `len`) /// /// The length associated with this value will be the length associated with the original slice. pub(crate) const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { // self is really just a wrapper around a byte slice mem::transmute(bytes) } /// Uses a `&mut [u8]` buffer as a `VarZeroLengthlessSlice` without any verification. /// /// # Safety /// /// `bytes` need to be an output from [`VarZeroLengthlessSlice::as_bytes()`], or alternatively /// be valid to be passed to `from_bytes_unchecked_with_length` /// /// The length associated with this value will be the length associated with the original slice. pub(crate) unsafe fn from_bytes_unchecked_mut(bytes: &mut [u8]) -> &mut Self { // self is really just a wrapper around a byte slice mem::transmute(bytes) } /// Get one of this slice's elements /// /// # Safety /// /// `index` must be in range, and `len` must be the length associated with this /// instance of VarZeroLengthlessSlice. pub(crate) unsafe fn get_unchecked(&self, len: u32, idx: usize) -> &T { self.as_components(len).get_unchecked(idx) } /// Get a reference to the entire encoded backing buffer of this slice /// /// The bytes can be passed back to [`Self::parse_bytes()`]. /// /// To take the bytes as a vector, see [`VarZeroVec::into_bytes()`]. #[inline] pub(crate) const fn as_bytes(&self) -> &[u8] { &self.entire_slice } /// Get the bytes behind this as a mutable slice /// /// # Safety /// /// - `len` is the length associated with this VarZeroLengthlessSlice /// - The resultant slice is only mutated in a way such that it remains a valid `T` /// /// # Panics /// /// Panics when idx is not in bounds for this slice pub(crate) unsafe fn get_bytes_at_mut(&mut self, len: u32, idx: usize) -> &mut [u8] { let components = self.as_components(len); let range = components.get_things_range(idx); let offset = components.get_indices_size(); // get_indices_size() returns the start of the things slice, and get_things_range() // returns a range in-bounds of the things slice #[allow(clippy::indexing_slicing)] &mut self.entire_slice[offset..][range] } } zerovec-0.11.1/src/varzerovec/mod.rs000064400000000000000000000014271046102023000154670ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). //! See [`VarZeroVec`](crate::VarZeroVec) for details pub(crate) mod components; pub(crate) mod error; pub(crate) mod lengthless; #[cfg(feature = "alloc")] pub(crate) mod owned; pub(crate) mod slice; pub(crate) mod vec; #[cfg(feature = "databake")] mod databake; #[cfg(feature = "serde")] mod serde; pub use crate::{VarZeroSlice, VarZeroVec}; #[doc(hidden)] pub use components::VarZeroVecComponents; pub use components::{Index16, Index32, Index8, VarZeroSliceIter, VarZeroVecFormat}; #[cfg(feature = "alloc")] pub use owned::VarZeroVecOwned; pub use error::VarZeroVecFormatError; zerovec-0.11.1/src/varzerovec/owned.rs000064400000000000000000000642101046102023000160230ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). // The mutation operations in this file should panic to prevent undefined behavior #![allow(clippy::unwrap_used)] #![allow(clippy::expect_used)] #![allow(clippy::indexing_slicing)] #![allow(clippy::panic)] use super::*; use crate::ule::*; use alloc::vec::Vec; use core::any; use core::convert::TryInto; use core::marker::PhantomData; use core::ops::Deref; use core::ops::Range; use core::{fmt, ptr, slice}; use super::components::IntegerULE; /// A fully-owned [`VarZeroVec`]. This type has no lifetime but has the same /// internal buffer representation of [`VarZeroVec`], making it cheaply convertible to /// [`VarZeroVec`] and [`VarZeroSlice`]. /// /// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the /// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`]. pub struct VarZeroVecOwned { marker1: PhantomData, marker2: PhantomData, // safety invariant: must parse into a valid VarZeroVecComponents entire_slice: Vec, } impl Clone for VarZeroVecOwned { fn clone(&self) -> Self { VarZeroVecOwned { marker1: PhantomData, marker2: PhantomData, entire_slice: self.entire_slice.clone(), } } } // The effect of a shift on the indices in the varzerovec. #[derive(PartialEq)] enum ShiftType { Insert, Replace, Remove, } impl Deref for VarZeroVecOwned { type Target = VarZeroSlice; fn deref(&self) -> &VarZeroSlice { self.as_slice() } } impl VarZeroVecOwned { /// Construct an empty VarZeroVecOwned pub fn new() -> Self { Self { marker1: PhantomData, marker2: PhantomData, entire_slice: Vec::new(), } } } impl VarZeroVecOwned { /// Construct a VarZeroVecOwned from a [`VarZeroSlice`] by cloning the internal data pub fn from_slice(slice: &VarZeroSlice) -> Self { Self { marker1: PhantomData, marker2: PhantomData, entire_slice: slice.as_bytes().into(), } } /// Construct a VarZeroVecOwned from a list of elements pub fn try_from_elements(elements: &[A]) -> Result where A: EncodeAsVarULE, { Ok(if elements.is_empty() { Self::from_slice(VarZeroSlice::new_empty()) } else { Self { marker1: PhantomData, marker2: PhantomData, // TODO(#1410): Rethink length errors in VZV. entire_slice: components::get_serializable_bytes_non_empty::(elements) .ok_or(F::Index::TOO_LARGE_ERROR)?, } }) } /// Obtain this `VarZeroVec` as a [`VarZeroSlice`] pub fn as_slice(&self) -> &VarZeroSlice { let slice: &[u8] = &self.entire_slice; unsafe { // safety: the slice is known to come from a valid parsed VZV VarZeroSlice::from_bytes_unchecked(slice) } } /// Try to allocate a buffer with enough capacity for `capacity` /// elements. Since `T` can take up an arbitrary size this will /// just allocate enough space for 4-byte Ts pub(crate) fn with_capacity(capacity: usize) -> Self { Self { marker1: PhantomData, marker2: PhantomData, entire_slice: Vec::with_capacity(capacity * (F::Index::SIZE + 4)), } } /// Try to reserve space for `capacity` /// elements. Since `T` can take up an arbitrary size this will /// just allocate enough space for 4-byte Ts pub(crate) fn reserve(&mut self, capacity: usize) { self.entire_slice.reserve(capacity * (F::Index::SIZE + 4)) } /// Get the position of a specific element in the data segment. /// /// If `idx == self.len()`, it will return the size of the data segment (where a new element would go). /// /// ## Safety /// `idx <= self.len()` and `self.as_encoded_bytes()` is well-formed. unsafe fn element_position_unchecked(&self, idx: usize) -> usize { let len = self.len(); let out = if idx == len { self.entire_slice.len() - F::Len::SIZE - (F::Index::SIZE * (len - 1)) } else if let Some(idx) = self.index_data(idx) { idx.iule_to_usize() } else { 0 }; debug_assert!(out + F::Len::SIZE + (len - 1) * F::Index::SIZE <= self.entire_slice.len()); out } /// Get the range of a specific element in the data segment. /// /// ## Safety /// `idx < self.len()` and `self.as_encoded_bytes()` is well-formed. unsafe fn element_range_unchecked(&self, idx: usize) -> core::ops::Range { let start = self.element_position_unchecked(idx); let end = self.element_position_unchecked(idx + 1); debug_assert!(start <= end, "{start} > {end}"); start..end } /// Set the number of elements in the list without any checks. /// /// ## Safety /// No safe functions may be called until `self.as_encoded_bytes()` is well-formed. unsafe fn set_len(&mut self, len: usize) { assert!(len <= F::Len::MAX_VALUE as usize); let len_bytes = len.to_le_bytes(); let len_ule = F::Len::iule_from_usize(len).expect(F::Len::TOO_LARGE_ERROR); self.entire_slice[0..F::Len::SIZE].copy_from_slice(ULE::slice_as_bytes(&[len_ule])); // Double-check that the length fits in the length field assert_eq!(len_bytes[F::Len::SIZE..].iter().sum::(), 0); } /// Get the range in the full data for a given index. Returns None for index 0 /// since there is no stored index for it. fn index_range(index: usize) -> Option> { let index_minus_one = index.checked_sub(1)?; let pos = F::Len::SIZE + F::Index::SIZE * index_minus_one; Some(pos..pos + F::Index::SIZE) } /// Return the raw bytes representing the given `index`. Returns None when given index 0 /// /// ## Safety /// The index must be valid, and self.as_encoded_bytes() must be well-formed unsafe fn index_data(&self, index: usize) -> Option<&F::Index> { let index_range = Self::index_range(index)?; Some(&F::Index::slice_from_bytes_unchecked(&self.entire_slice[index_range])[0]) } /// Return the mutable slice representing the given `index`. Returns None when given index 0 /// /// ## Safety /// The index must be valid. self.as_encoded_bytes() must have allocated space /// for this index, but need not have its length appropriately set. unsafe fn index_data_mut(&mut self, index: usize) -> Option<&mut F::Index> { let ptr = self.entire_slice.as_mut_ptr(); let range = Self::index_range(index)?; // Doing this instead of just `get_unchecked_mut()` because it's unclear // if `get_unchecked_mut()` can be called out of bounds on a slice even // if we know the buffer is larger. let data = slice::from_raw_parts_mut(ptr.add(range.start), F::Index::SIZE); Some(&mut F::Index::iule_from_bytes_unchecked_mut(data)[0]) } /// Shift the indices starting with and after `starting_index` by the provided `amount`. /// /// ## Panics /// Should never be called with a starting index of 0, since that index cannot be shifted. /// /// ## Safety /// Adding `amount` to each index after `starting_index` must not result in the slice from becoming malformed. /// The length of the slice must be correctly set. unsafe fn shift_indices(&mut self, starting_index: usize, amount: i32) { let normalized_idx = starting_index .checked_sub(1) .expect("shift_indices called with a 0 starting index"); let len = self.len(); let indices = F::Index::iule_from_bytes_unchecked_mut( &mut self.entire_slice[F::Len::SIZE..F::Len::SIZE + F::Index::SIZE * (len - 1)], ); for idx in &mut indices[normalized_idx..] { let mut new_idx = idx.iule_to_usize(); if amount > 0 { new_idx = new_idx.checked_add(amount.try_into().unwrap()).unwrap(); } else { new_idx = new_idx.checked_sub((-amount).try_into().unwrap()).unwrap(); } *idx = F::Index::iule_from_usize(new_idx).expect(F::Index::TOO_LARGE_ERROR); } } /// Get this [`VarZeroVecOwned`] as a borrowed [`VarZeroVec`] /// /// If you wish to repeatedly call methods on this [`VarZeroVecOwned`], /// it is more efficient to perform this conversion first pub fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F> { self.as_slice().into() } /// Empty the vector pub fn clear(&mut self) { self.entire_slice.clear() } /// Consume this vector and return the backing buffer #[inline] pub fn into_bytes(self) -> Vec { self.entire_slice } /// Invalidate and resize the data at an index, optionally inserting or removing the index. /// Also updates affected indices and the length. /// /// `new_size` is the encoded byte size of the element that is going to be inserted /// /// Returns a slice to the new element data - it doesn't contain uninitialized data but its value is indeterminate. /// /// ## Safety /// - `index` must be a valid index, or, if `shift_type == ShiftType::Insert`, `index == self.len()` is allowed. /// - `new_size` musn't result in the data segment growing larger than `F::Index::MAX_VALUE`. unsafe fn shift(&mut self, index: usize, new_size: usize, shift_type: ShiftType) -> &mut [u8] { // The format of the encoded data is: // - four bytes of "len" // - len*4 bytes for an array of indices // - the actual data to which the indices point // // When inserting or removing an element, the size of the indices segment must be changed, // so the data before the target element must be shifted by 4 bytes in addition to the // shifting needed for the new element size. let len = self.len(); let slice_len = self.entire_slice.len(); let prev_element = match shift_type { ShiftType::Insert => { let pos = self.element_position_unchecked(index); // In the case of an insert, there's no previous element, // so it's an empty range at the new position. pos..pos } _ => self.element_range_unchecked(index), }; // How much shifting must be done in bytes due to removal/insertion of an index. let index_shift: i64 = match shift_type { ShiftType::Insert => F::Index::SIZE as i64, ShiftType::Replace => 0, ShiftType::Remove => -(F::Index::SIZE as i64), }; // The total shift in byte size of the owned slice. let shift: i64 = new_size as i64 - (prev_element.end - prev_element.start) as i64 + index_shift; let new_slice_len = slice_len.wrapping_add(shift as usize); if shift > 0 { if new_slice_len > F::Index::MAX_VALUE as usize { panic!( "Attempted to grow VarZeroVec to an encoded size that does not fit within the length size used by {}", any::type_name::() ); } self.entire_slice.resize(new_slice_len, 0); } // Now that we've ensured there's enough space, we can shift the data around. { // Note: There are no references introduced between pointer creation and pointer use, and all // raw pointers are derived from a single &mut. This preserves pointer provenance. let slice_range = self.entire_slice.as_mut_ptr_range(); // The start of the indices buffer let indices_start = slice_range.start.add(F::Len::SIZE); let old_slice_end = slice_range.start.add(slice_len); let data_start = indices_start.add((len - 1) * F::Index::SIZE); let prev_element_p = data_start.add(prev_element.start)..data_start.add(prev_element.end); // The memory range of the affected index. // When inserting: where the new index goes. // When removing: where the index being removed is. // When replacing: unused. // Will be None when the affected index is index 0, which is special let index_range = if let Some(index_minus_one) = index.checked_sub(1) { let index_start = indices_start.add(F::Index::SIZE * index_minus_one); Some(index_start..index_start.add(F::Index::SIZE)) } else { None }; unsafe fn shift_bytes(block: Range<*const u8>, to: *mut u8) { debug_assert!(block.end >= block.start); ptr::copy(block.start, to, block.end.offset_from(block.start) as usize); } if shift_type == ShiftType::Remove { if let Some(ref index_range) = index_range { shift_bytes(index_range.end..prev_element_p.start, index_range.start); } else { // We are removing the first index, so we skip the second index and copy it over. The second index // is now zero and unnecessary. shift_bytes( indices_start.add(F::Index::SIZE)..prev_element_p.start, indices_start, ) } } // Shift data after the element to its new position. shift_bytes( prev_element_p.end..old_slice_end, prev_element_p .start .offset((new_size as i64 + index_shift) as isize), ); let first_affected_index = match shift_type { ShiftType::Insert => { if let Some(index_range) = index_range { // Move data before the element forward by 4 to make space for a new index. shift_bytes(index_range.start..prev_element_p.start, index_range.end); let index_data = self .index_data_mut(index) .expect("If index_range is some, index is > 0 and should not panic in index_data_mut"); *index_data = F::Index::iule_from_usize(prev_element.start) .expect(F::Index::TOO_LARGE_ERROR); } else { // We are adding a new index 0. There's nothing in the indices array for index 0, but the element // that is currently at index 0 will become index 1 and need a value // We first shift bytes to make space shift_bytes( indices_start..prev_element_p.start, indices_start.add(F::Index::SIZE), ); // And then we write a temporary zero to the zeroeth index, which will get shifted later let index_data = self .index_data_mut(1) .expect("Should be able to write to index 1"); *index_data = F::Index::iule_from_usize(0).expect("0 is always valid!"); } self.set_len(len + 1); index + 1 } ShiftType::Remove => { self.set_len(len - 1); if index == 0 { // We don't need to shift index 0 since index 0 is not stored in the indices buffer index + 1 } else { index } } ShiftType::Replace => index + 1, }; // No raw pointer use should occur after this point (because of self.index_data and self.set_len). // Set the new slice length. This must be done after shifting data around to avoid uninitialized data. self.entire_slice.set_len(new_slice_len); // Shift the affected indices. self.shift_indices(first_affected_index, (shift - index_shift) as i32); }; debug_assert!(self.verify_integrity()); // Return a mut slice to the new element data. let element_pos = F::Len::SIZE + (self.len() - 1) * F::Index::SIZE + self.element_position_unchecked(index); &mut self.entire_slice[element_pos..element_pos + new_size] } /// Checks the internal invariants of the vec to ensure safe code will not cause UB. /// Returns whether integrity was verified. /// /// Note: an index is valid if it doesn't point to data past the end of the slice and is /// less than or equal to all future indices. The length of the index segment is not part of each index. fn verify_integrity(&self) -> bool { if self.is_empty() { if self.entire_slice.is_empty() { return true; } else { panic!( "VarZeroVecOwned integrity: Found empty VarZeroVecOwned with a nonempty slice" ); } } let len = unsafe { ::slice_from_bytes_unchecked(&self.entire_slice[..F::Len::SIZE])[0] .iule_to_usize() }; if len == 0 { // An empty vec must have an empty slice: there is only a single valid byte representation. panic!("VarZeroVecOwned integrity: Found empty VarZeroVecOwned with a nonempty slice"); } if self.entire_slice.len() < F::Len::SIZE + (len - 1) * F::Index::SIZE { panic!("VarZeroVecOwned integrity: Not enough room for the indices"); } let data_len = self.entire_slice.len() - F::Len::SIZE - (len - 1) * F::Index::SIZE; if data_len > F::Index::MAX_VALUE as usize { panic!("VarZeroVecOwned integrity: Data segment is too long"); } // Test index validity. let indices = unsafe { F::Index::slice_from_bytes_unchecked( &self.entire_slice[F::Len::SIZE..F::Len::SIZE + (len - 1) * F::Index::SIZE], ) }; for idx in indices { if idx.iule_to_usize() > data_len { panic!("VarZeroVecOwned integrity: Indices must not point past the data segment"); } } for window in indices.windows(2) { if window[0].iule_to_usize() > window[1].iule_to_usize() { panic!("VarZeroVecOwned integrity: Indices must be in non-decreasing order"); } } true } /// Insert an element at the end of this vector pub fn push + ?Sized>(&mut self, element: &A) { self.insert(self.len(), element) } /// Insert an element at index `idx` pub fn insert + ?Sized>(&mut self, index: usize, element: &A) { let len = self.len(); if index > len { panic!("Called out-of-bounds insert() on VarZeroVec, index {index} len {len}"); } let value_len = element.encode_var_ule_len(); if len == 0 { let header_len = F::Len::SIZE; // Index array is size 0 for len = 1 let cap = header_len + value_len; self.entire_slice.resize(cap, 0); self.entire_slice[0] = 1; // set length element.encode_var_ule_write(&mut self.entire_slice[header_len..]); return; } assert!(value_len < F::Index::MAX_VALUE as usize); unsafe { let place = self.shift(index, value_len, ShiftType::Insert); element.encode_var_ule_write(place); } } /// Remove the element at index `idx` pub fn remove(&mut self, index: usize) { let len = self.len(); if index >= len { panic!("Called out-of-bounds remove() on VarZeroVec, index {index} len {len}"); } if len == 1 { // This is removing the last element. Set the slice to empty to ensure all empty vecs have empty data slices. self.entire_slice.clear(); return; } unsafe { self.shift(index, 0, ShiftType::Remove); } } /// Replace the element at index `idx` with another pub fn replace + ?Sized>(&mut self, index: usize, element: &A) { let len = self.len(); if index >= len { panic!("Called out-of-bounds replace() on VarZeroVec, index {index} len {len}"); } let value_len = element.encode_var_ule_len(); assert!(value_len < F::Index::MAX_VALUE as usize); unsafe { let place = self.shift(index, value_len, ShiftType::Replace); element.encode_var_ule_write(place); } } } impl fmt::Debug for VarZeroVecOwned where T: fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { VarZeroSlice::fmt(self, f) } } impl Default for VarZeroVecOwned { fn default() -> Self { Self::new() } } impl PartialEq<&'_ [A]> for VarZeroVecOwned where T: VarULE + ?Sized, T: PartialEq, A: AsRef, F: VarZeroVecFormat, { #[inline] fn eq(&self, other: &&[A]) -> bool { self.iter().eq(other.iter().map(|t| t.as_ref())) } } impl<'a, T: ?Sized + VarULE, F: VarZeroVecFormat> From<&'a VarZeroSlice> for VarZeroVecOwned { fn from(other: &'a VarZeroSlice) -> Self { Self::from_slice(other) } } #[cfg(test)] mod test { use super::VarZeroVecOwned; #[test] fn test_insert_integrity() { let mut items: Vec = Vec::new(); let mut zerovec = VarZeroVecOwned::::new(); // Insert into an empty vec. items.insert(0, "1234567890".into()); zerovec.insert(0, "1234567890"); assert_eq!(zerovec, &*items); zerovec.insert(1, "foo3"); items.insert(1, "foo3".into()); assert_eq!(zerovec, &*items); // Insert at the end. items.insert(items.len(), "qwertyuiop".into()); zerovec.insert(zerovec.len(), "qwertyuiop"); assert_eq!(zerovec, &*items); items.insert(0, "asdfghjkl;".into()); zerovec.insert(0, "asdfghjkl;"); assert_eq!(zerovec, &*items); items.insert(2, "".into()); zerovec.insert(2, ""); assert_eq!(zerovec, &*items); } #[test] // ensure that inserting empty items works fn test_empty_inserts() { let mut items: Vec = Vec::new(); let mut zerovec = VarZeroVecOwned::::new(); // Insert into an empty vec. items.insert(0, "".into()); zerovec.insert(0, ""); assert_eq!(zerovec, &*items); items.insert(0, "".into()); zerovec.insert(0, ""); assert_eq!(zerovec, &*items); items.insert(0, "1234567890".into()); zerovec.insert(0, "1234567890"); assert_eq!(zerovec, &*items); items.insert(0, "".into()); zerovec.insert(0, ""); assert_eq!(zerovec, &*items); } #[test] fn test_small_insert_integrity() { // Tests that insert() works even when there // is not enough space for the new index in entire_slice.len() let mut items: Vec = Vec::new(); let mut zerovec = VarZeroVecOwned::::new(); // Insert into an empty vec. items.insert(0, "abc".into()); zerovec.insert(0, "abc"); assert_eq!(zerovec, &*items); zerovec.insert(1, "def"); items.insert(1, "def".into()); assert_eq!(zerovec, &*items); } #[test] #[should_panic] fn test_insert_past_end() { VarZeroVecOwned::::new().insert(1, ""); } #[test] fn test_remove_integrity() { let mut items: Vec<&str> = vec!["apples", "bananas", "eeples", "", "baneenees", "five", ""]; let mut zerovec = VarZeroVecOwned::::try_from_elements(&items).unwrap(); for index in [0, 2, 4, 0, 1, 1, 0] { items.remove(index); zerovec.remove(index); assert_eq!(zerovec, &*items, "index {}, len {}", index, items.len()); } } #[test] fn test_removing_last_element_clears() { let mut zerovec = VarZeroVecOwned::::try_from_elements(&["buy some apples"]).unwrap(); assert!(!zerovec.as_bytes().is_empty()); zerovec.remove(0); assert!(zerovec.as_bytes().is_empty()); } #[test] #[should_panic] fn test_remove_past_end() { VarZeroVecOwned::::new().remove(0); } #[test] fn test_replace_integrity() { let mut items: Vec<&str> = vec!["apples", "bananas", "eeples", "", "baneenees", "five", ""]; let mut zerovec = VarZeroVecOwned::::try_from_elements(&items).unwrap(); // Replace with an element of the same size (and the first element) items[0] = "blablah"; zerovec.replace(0, "blablah"); assert_eq!(zerovec, &*items); // Replace with a smaller element items[1] = "twily"; zerovec.replace(1, "twily"); assert_eq!(zerovec, &*items); // Replace an empty element items[3] = "aoeuidhtns"; zerovec.replace(3, "aoeuidhtns"); assert_eq!(zerovec, &*items); // Replace the last element items[6] = "0123456789"; zerovec.replace(6, "0123456789"); assert_eq!(zerovec, &*items); // Replace with an empty element items[2] = ""; zerovec.replace(2, ""); assert_eq!(zerovec, &*items); } #[test] #[should_panic] fn test_replace_past_end() { VarZeroVecOwned::::new().replace(0, ""); } } zerovec-0.11.1/src/varzerovec/serde.rs000064400000000000000000000212011046102023000160020ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::{VarZeroSlice, VarZeroVec, VarZeroVecFormat}; use crate::ule::*; use alloc::boxed::Box; use alloc::vec::Vec; use core::fmt; use core::marker::PhantomData; use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor}; #[cfg(feature = "serde")] use serde::ser::{Serialize, SerializeSeq, Serializer}; struct VarZeroVecVisitor { #[allow(clippy::type_complexity)] // this is a private marker type, who cares marker: PhantomData<(fn() -> Box, F)>, } impl Default for VarZeroVecVisitor { fn default() -> Self { Self { marker: PhantomData, } } } impl<'de, T, F> Visitor<'de> for VarZeroVecVisitor where T: VarULE + ?Sized, Box: Deserialize<'de>, F: VarZeroVecFormat, { type Value = VarZeroVec<'de, T, F>; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("a sequence or borrowed buffer of bytes") } fn visit_borrowed_bytes(self, bytes: &'de [u8]) -> Result where E: de::Error, { VarZeroVec::parse_bytes(bytes).map_err(de::Error::custom) } fn visit_seq(self, mut seq: A) -> Result where A: SeqAccess<'de>, { let mut vec: Vec> = if let Some(capacity) = seq.size_hint() { Vec::with_capacity(capacity) } else { Vec::new() }; while let Some(value) = seq.next_element::>()? { vec.push(value); } Ok(VarZeroVec::from(&vec)) } } /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate impl<'de, 'a, T, F> Deserialize<'de> for VarZeroVec<'a, T, F> where T: VarULE + ?Sized, Box: Deserialize<'de>, F: VarZeroVecFormat, 'de: 'a, { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let visitor = VarZeroVecVisitor::::default(); if deserializer.is_human_readable() { deserializer.deserialize_seq(visitor) } else { deserializer.deserialize_bytes(visitor) } } } /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate impl<'de, 'a, T, F> Deserialize<'de> for &'a VarZeroSlice where T: VarULE + ?Sized, F: VarZeroVecFormat, 'de: 'a, { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { if deserializer.is_human_readable() { Err(de::Error::custom( "&VarZeroSlice cannot be deserialized from human-readable formats", )) } else { let bytes = <&[u8]>::deserialize(deserializer)?; VarZeroSlice::::parse_bytes(bytes).map_err(de::Error::custom) } } } /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate impl<'de, T, F> Deserialize<'de> for Box> where T: VarULE + ?Sized, Box: Deserialize<'de>, F: VarZeroVecFormat, { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let deserialized = VarZeroVec::::deserialize(deserializer)?; Ok(deserialized.to_boxed()) } } /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate #[cfg(feature = "serde")] impl Serialize for VarZeroVec<'_, T, F> where T: Serialize + VarULE + ?Sized, F: VarZeroVecFormat, { fn serialize(&self, serializer: S) -> Result where S: Serializer, { if serializer.is_human_readable() { let mut seq = serializer.serialize_seq(Some(self.len()))?; for value in self.iter() { seq.serialize_element(value)?; } seq.end() } else { serializer.serialize_bytes(self.as_bytes()) } } } /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate #[cfg(feature = "serde")] impl Serialize for VarZeroSlice where T: Serialize + VarULE + ?Sized, F: VarZeroVecFormat, { fn serialize(&self, serializer: S) -> Result where S: Serializer, { self.as_varzerovec().serialize(serializer) } } #[cfg(test)] #[allow(non_camel_case_types)] mod test { use crate::{VarZeroSlice, VarZeroVec}; #[derive(serde::Serialize, serde::Deserialize)] struct DeriveTest_VarZeroVec<'data> { #[serde(borrow)] _data: VarZeroVec<'data, str>, } #[derive(serde::Serialize, serde::Deserialize)] struct DeriveTest_VarZeroSlice<'data> { #[serde(borrow)] _data: &'data VarZeroSlice, } #[derive(serde::Serialize, serde::Deserialize)] struct DeriveTest_VarZeroVec_of_VarZeroSlice<'data> { #[serde(borrow)] _data: VarZeroVec<'data, VarZeroSlice>, } // ["foo", "bar", "baz", "dolor", "quux", "lorem ipsum"]; const BYTES: &[u8] = &[ 6, 0, 3, 0, 6, 0, 9, 0, 14, 0, 18, 0, 102, 111, 111, 98, 97, 114, 98, 97, 122, 100, 111, 108, 111, 114, 113, 117, 117, 120, 108, 111, 114, 101, 109, 32, 105, 112, 115, 117, 109, ]; const JSON_STR: &str = "[\"foo\",\"bar\",\"baz\",\"dolor\",\"quux\",\"lorem ipsum\"]"; const BINCODE_BUF: &[u8] = &[ 41, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, 0, 6, 0, 9, 0, 14, 0, 18, 0, 102, 111, 111, 98, 97, 114, 98, 97, 122, 100, 111, 108, 111, 114, 113, 117, 117, 120, 108, 111, 114, 101, 109, 32, 105, 112, 115, 117, 109, ]; // ["w", "ω", "文", "𑄃"] const NONASCII_STR: &[&str] = &["w", "ω", "文", "𑄃"]; const NONASCII_BYTES: &[u8] = &[ 4, 0, 1, 0, 3, 0, 6, 0, 119, 207, 137, 230, 150, 135, 240, 145, 132, 131, ]; #[test] fn test_serde_json() { let zerovec_orig: VarZeroVec = VarZeroVec::parse_bytes(BYTES).expect("parse"); let json_str = serde_json::to_string(&zerovec_orig).expect("serialize"); assert_eq!(JSON_STR, json_str); // VarZeroVec should deserialize from JSON to either Vec or VarZeroVec let vec_new: Vec> = serde_json::from_str(&json_str).expect("deserialize from buffer to Vec"); assert_eq!(zerovec_orig.to_vec(), vec_new); let zerovec_new: VarZeroVec = serde_json::from_str(&json_str).expect("deserialize from buffer to VarZeroVec"); assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec()); assert!(zerovec_new.is_owned()); } #[test] fn test_serde_bincode() { let zerovec_orig: VarZeroVec = VarZeroVec::parse_bytes(BYTES).expect("parse"); let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize"); assert_eq!(BINCODE_BUF, bincode_buf); let zerovec_new: VarZeroVec = bincode::deserialize(&bincode_buf).expect("deserialize from buffer to VarZeroVec"); assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec()); assert!(!zerovec_new.is_owned()); } #[test] fn test_vzv_borrowed() { let zerovec_orig: &VarZeroSlice = VarZeroSlice::parse_bytes(BYTES).expect("parse"); let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize"); assert_eq!(BINCODE_BUF, bincode_buf); let zerovec_new: &VarZeroSlice = bincode::deserialize(&bincode_buf).expect("deserialize from buffer to VarZeroSlice"); assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec()); } #[test] fn test_nonascii_bincode() { let src_vec = NONASCII_STR .iter() .copied() .map(Box::::from) .collect::>(); let mut zerovec: VarZeroVec = VarZeroVec::parse_bytes(NONASCII_BYTES).expect("parse"); assert_eq!(zerovec.to_vec(), src_vec); let bincode_buf = bincode::serialize(&zerovec).expect("serialize"); let zerovec_result = bincode::deserialize::>(&bincode_buf).expect("deserialize"); assert_eq!(zerovec_result.to_vec(), src_vec); // try again with owned zerovec zerovec.make_mut(); let bincode_buf = bincode::serialize(&zerovec).expect("serialize"); let zerovec_result = bincode::deserialize::>(&bincode_buf).expect("deserialize"); assert_eq!(zerovec_result.to_vec(), src_vec); } } zerovec-0.11.1/src/varzerovec/slice.rs000064400000000000000000000431371046102023000160130ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::components::{VarZeroSliceIter, VarZeroVecComponents}; use super::vec::VarZeroVecInner; use super::*; use crate::ule::*; use core::cmp::{Ord, Ordering, PartialOrd}; use core::fmt; use core::marker::PhantomData; use core::mem; use core::ops::Index; use core::ops::Range; /// A zero-copy "slice", that works for unsized types, i.e. the zero-copy version of `[T]` /// where `T` is not `Sized`. /// /// This behaves similarly to [`VarZeroVec`], however [`VarZeroVec`] is allowed to contain /// owned data and as such is ideal for deserialization since most human readable /// serialization formats cannot unconditionally deserialize zero-copy. /// /// This type can be used inside [`VarZeroVec`](crate::VarZeroVec) and [`ZeroMap`](crate::ZeroMap): /// This essentially allows for the construction of zero-copy types isomorphic to `Vec>` by instead /// using `VarZeroVec>`. /// /// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the /// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`]. /// /// This type can be nested within itself to allow for multi-level nested `Vec`s. /// /// # Examples /// /// ## Nested Slices /// /// The following code constructs the conceptual zero-copy equivalent of `Vec>>` /// /// ```rust /// use zerovec::{VarZeroSlice, VarZeroVec}; /// let strings_1: Vec<&str> = vec!["foo", "bar", "baz"]; /// let strings_2: Vec<&str> = vec!["twelve", "seventeen", "forty two"]; /// let strings_3: Vec<&str> = vec!["我", "喜歡", "烏龍茶"]; /// let strings_4: Vec<&str> = vec!["w", "ω", "文", "𑄃"]; /// let strings_12 = vec![&*strings_1, &*strings_2]; /// let strings_34 = vec![&*strings_3, &*strings_4]; /// let all_strings = vec![strings_12, strings_34]; /// /// let vzv_1: VarZeroVec = VarZeroVec::from(&strings_1); /// let vzv_2: VarZeroVec = VarZeroVec::from(&strings_2); /// let vzv_3: VarZeroVec = VarZeroVec::from(&strings_3); /// let vzv_4: VarZeroVec = VarZeroVec::from(&strings_4); /// let vzv_12 = VarZeroVec::from(&[vzv_1.as_slice(), vzv_2.as_slice()]); /// let vzv_34 = VarZeroVec::from(&[vzv_3.as_slice(), vzv_4.as_slice()]); /// let vzv_all = VarZeroVec::from(&[vzv_12.as_slice(), vzv_34.as_slice()]); /// /// let reconstructed: Vec>> = vzv_all /// .iter() /// .map(|v: &VarZeroSlice>| { /// v.iter() /// .map(|x: &VarZeroSlice<_>| { /// x.as_varzerovec() /// .iter() /// .map(|s| s.to_owned()) /// .collect::>() /// }) /// .collect::>() /// }) /// .collect::>(); /// assert_eq!(reconstructed, all_strings); /// /// let bytes = vzv_all.as_bytes(); /// let vzv_from_bytes: VarZeroVec>> = /// VarZeroVec::parse_bytes(bytes).unwrap(); /// assert_eq!(vzv_from_bytes, vzv_all); /// ``` /// /// ## Iterate over Windows /// /// Although [`VarZeroSlice`] does not itself have a `.windows` iterator like /// [core::slice::Windows], this behavior can be easily modeled using an iterator: /// /// ``` /// use zerovec::VarZeroVec; /// /// let vzv = VarZeroVec::::from(&["a", "b", "c", "d"]); /// # let mut pairs: Vec<(&str, &str)> = Vec::new(); /// /// let mut it = vzv.iter().peekable(); /// while let (Some(x), Some(y)) = (it.next(), it.peek()) { /// // Evaluate (x, y) here. /// # pairs.push((x, y)); /// } /// # assert_eq!(pairs, &[("a", "b"), ("b", "c"), ("c", "d")]); /// ``` // // safety invariant: The slice MUST be one which parses to // a valid VarZeroVecComponents #[repr(transparent)] pub struct VarZeroSlice { marker: PhantomData<(F, T)>, /// The original slice this was constructed from entire_slice: [u8], } impl VarZeroSlice { /// Construct a new empty VarZeroSlice pub const fn new_empty() -> &'static Self { // The empty VZV is special-cased to the empty slice unsafe { mem::transmute(&[] as &[u8]) } } /// Obtain a [`VarZeroVecComponents`] borrowing from the internal buffer #[inline] pub(crate) fn as_components<'a>(&'a self) -> VarZeroVecComponents<'a, T, F> { unsafe { // safety: VarZeroSlice is guaranteed to parse here VarZeroVecComponents::from_bytes_unchecked(&self.entire_slice) } } /// Uses a `&[u8]` buffer as a `VarZeroSlice` without any verification. /// /// # Safety /// /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`]. pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { // self is really just a wrapper around a byte slice mem::transmute(bytes) } /// Get the number of elements in this slice /// /// # Example /// /// ```rust /// # use zerovec::VarZeroVec; /// /// let strings = vec!["foo", "bar", "baz", "quux"]; /// let vec = VarZeroVec::::from(&strings); /// /// assert_eq!(vec.len(), 4); /// ``` pub fn len(&self) -> usize { self.as_components().len() } /// Returns `true` if the slice contains no elements. /// /// # Examples /// /// ``` /// # use zerovec::VarZeroVec; /// /// let strings: Vec = vec![]; /// let vec = VarZeroVec::::from(&strings); /// /// assert!(vec.is_empty()); /// ``` pub fn is_empty(&self) -> bool { self.as_components().is_empty() } /// Obtain an iterator over this slice's elements /// /// # Example /// /// ```rust /// # use zerovec::VarZeroVec; /// /// let strings = vec!["foo", "bar", "baz", "quux"]; /// let vec = VarZeroVec::::from(&strings); /// /// let mut iter_results: Vec<&str> = vec.iter().collect(); /// assert_eq!(iter_results[0], "foo"); /// assert_eq!(iter_results[1], "bar"); /// assert_eq!(iter_results[2], "baz"); /// assert_eq!(iter_results[3], "quux"); /// ``` pub fn iter<'b>(&'b self) -> VarZeroSliceIter<'b, T, F> { self.as_components().iter() } /// Get one of this slice's elements, returning `None` if the index is out of bounds /// /// # Example /// /// ```rust /// # use zerovec::VarZeroVec; /// /// let strings = vec!["foo", "bar", "baz", "quux"]; /// let vec = VarZeroVec::::from(&strings); /// /// let mut iter_results: Vec<&str> = vec.iter().collect(); /// assert_eq!(vec.get(0), Some("foo")); /// assert_eq!(vec.get(1), Some("bar")); /// assert_eq!(vec.get(2), Some("baz")); /// assert_eq!(vec.get(3), Some("quux")); /// assert_eq!(vec.get(4), None); /// ``` pub fn get(&self, idx: usize) -> Option<&T> { self.as_components().get(idx) } /// Get one of this slice's elements /// /// # Safety /// /// `index` must be in range /// /// # Example /// /// ```rust /// # use zerovec::VarZeroVec; /// /// let strings = vec!["foo", "bar", "baz", "quux"]; /// let vec = VarZeroVec::::from(&strings); /// /// let mut iter_results: Vec<&str> = vec.iter().collect(); /// unsafe { /// assert_eq!(vec.get_unchecked(0), "foo"); /// assert_eq!(vec.get_unchecked(1), "bar"); /// assert_eq!(vec.get_unchecked(2), "baz"); /// assert_eq!(vec.get_unchecked(3), "quux"); /// } /// ``` pub unsafe fn get_unchecked(&self, idx: usize) -> &T { self.as_components().get_unchecked(idx) } /// Obtain an owned `Vec>` out of this #[cfg(feature = "alloc")] pub fn to_vec(&self) -> alloc::vec::Vec> { self.as_components().to_vec() } /// Get a reference to the entire encoded backing buffer of this slice /// /// The bytes can be passed back to [`Self::parse_bytes()`]. /// /// To take the bytes as a vector, see [`VarZeroVec::into_bytes()`]. /// /// # Example /// /// ```rust /// # use zerovec::VarZeroVec; /// /// let strings = vec!["foo", "bar", "baz"]; /// let vzv = VarZeroVec::::from(&strings); /// /// assert_eq!(vzv, VarZeroVec::parse_bytes(vzv.as_bytes()).unwrap()); /// ``` #[inline] pub const fn as_bytes(&self) -> &[u8] { &self.entire_slice } /// Get this [`VarZeroSlice`] as a borrowed [`VarZeroVec`] /// /// If you wish to repeatedly call methods on this [`VarZeroSlice`], /// it is more efficient to perform this conversion first pub const fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F> { VarZeroVec(VarZeroVecInner::Borrowed(self)) } /// Parse a VarZeroSlice from a slice of the appropriate format /// /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`] pub fn parse_bytes<'a>(slice: &'a [u8]) -> Result<&'a Self, UleError> { ::parse_bytes(slice) } } impl VarZeroSlice where T: VarULE, T: ?Sized, T: Ord, F: VarZeroVecFormat, { /// Binary searches a sorted `VarZeroVec` for the given element. For more information, see /// the standard library function [`binary_search`]. /// /// # Example /// /// ``` /// # use zerovec::VarZeroVec; /// /// let strings = vec!["a", "b", "f", "g"]; /// let vec = VarZeroVec::::from(&strings); /// /// assert_eq!(vec.binary_search("f"), Ok(2)); /// assert_eq!(vec.binary_search("e"), Err(2)); /// ``` /// /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search #[inline] pub fn binary_search(&self, x: &T) -> Result { self.as_components().binary_search(x) } /// Binary searches a `VarZeroVec` for the given element within a certain sorted range. /// /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according /// to the behavior of the standard library function [`binary_search`]. /// /// The index is returned relative to the start of the range. /// /// # Example /// /// ``` /// # use zerovec::VarZeroVec; /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"]; /// let vec = VarZeroVec::::from(&strings); /// /// // Same behavior as binary_search when the range covers the whole slice: /// assert_eq!(vec.binary_search_in_range("g", 0..7), Some(Ok(3))); /// assert_eq!(vec.binary_search_in_range("h", 0..7), Some(Err(4))); /// /// // Will not look outside of the range: /// assert_eq!(vec.binary_search_in_range("g", 0..1), Some(Err(1))); /// assert_eq!(vec.binary_search_in_range("g", 6..7), Some(Err(0))); /// /// // Will return indices relative to the start of the range: /// assert_eq!(vec.binary_search_in_range("g", 1..6), Some(Ok(2))); /// assert_eq!(vec.binary_search_in_range("h", 1..6), Some(Err(3))); /// /// // Will return `None` if the range is out of bounds: /// assert_eq!(vec.binary_search_in_range("x", 100..200), None); /// assert_eq!(vec.binary_search_in_range("x", 0..200), None); /// ``` /// /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search #[inline] pub fn binary_search_in_range( &self, x: &T, range: Range, ) -> Option> { self.as_components().binary_search_in_range(x, range) } } impl VarZeroSlice where T: VarULE, T: ?Sized, F: VarZeroVecFormat, { /// Binary searches a sorted `VarZeroVec` for the given predicate. For more information, see /// the standard library function [`binary_search_by`]. /// /// # Example /// /// ``` /// # use zerovec::VarZeroVec; /// let strings = vec!["a", "b", "f", "g"]; /// let vec = VarZeroVec::::from(&strings); /// /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("f")), Ok(2)); /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("e")), Err(2)); /// ``` /// /// [`binary_search_by`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search_by #[inline] pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result { self.as_components().binary_search_by(predicate) } /// Binary searches a `VarZeroVec` for the given predicate within a certain sorted range. /// /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according /// to the behavior of the standard library function [`binary_search`]. /// /// The index is returned relative to the start of the range. /// /// # Example /// /// ``` /// # use zerovec::VarZeroVec; /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"]; /// let vec = VarZeroVec::::from(&strings); /// /// // Same behavior as binary_search when the range covers the whole slice: /// assert_eq!( /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..7), /// Some(Ok(3)) /// ); /// assert_eq!( /// vec.binary_search_in_range_by(|v| v.cmp("h"), 0..7), /// Some(Err(4)) /// ); /// /// // Will not look outside of the range: /// assert_eq!( /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..1), /// Some(Err(1)) /// ); /// assert_eq!( /// vec.binary_search_in_range_by(|v| v.cmp("g"), 6..7), /// Some(Err(0)) /// ); /// /// // Will return indices relative to the start of the range: /// assert_eq!( /// vec.binary_search_in_range_by(|v| v.cmp("g"), 1..6), /// Some(Ok(2)) /// ); /// assert_eq!( /// vec.binary_search_in_range_by(|v| v.cmp("h"), 1..6), /// Some(Err(3)) /// ); /// /// // Will return `None` if the range is out of bounds: /// assert_eq!( /// vec.binary_search_in_range_by(|v| v.cmp("x"), 100..200), /// None /// ); /// assert_eq!(vec.binary_search_in_range_by(|v| v.cmp("x"), 0..200), None); /// ``` /// /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search pub fn binary_search_in_range_by( &self, predicate: impl FnMut(&T) -> Ordering, range: Range, ) -> Option> { self.as_components() .binary_search_in_range_by(predicate, range) } } // Safety (based on the safety checklist on the VarULE trait): // 1. VarZeroSlice does not include any uninitialized or padding bytes (achieved by `#[repr(transparent)]` on a // `[u8]` slice which satisfies this invariant) // 2. VarZeroSlice is aligned to 1 byte (achieved by `#[repr(transparent)]` on a // `[u8]` slice which satisfies this invariant) // 3. The impl of `validate_bytes()` returns an error if any byte is not valid. // 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety // 5. The impl of `from_bytes_unchecked()` returns a reference to the same data. // 6. `as_bytes()` is equivalent to a regular transmute of the underlying data // 7. VarZeroSlice byte equality is semantic equality (relying on the guideline of the underlying VarULE type) unsafe impl VarULE for VarZeroSlice { fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> { let _: VarZeroVecComponents = VarZeroVecComponents::parse_bytes(bytes).map_err(|_| UleError::parse::())?; Ok(()) } unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { // self is really just a wrapper around a byte slice mem::transmute(bytes) } fn as_bytes(&self) -> &[u8] { &self.entire_slice } } impl Index for VarZeroSlice { type Output = T; fn index(&self, index: usize) -> &Self::Output { #[allow(clippy::panic)] // documented match self.get(index) { Some(x) => x, None => panic!( "index out of bounds: the len is {} but the index is {index}", self.len() ), } } } impl PartialEq> for VarZeroSlice where T: VarULE, T: ?Sized, T: PartialEq, F: VarZeroVecFormat, { #[inline] fn eq(&self, other: &VarZeroSlice) -> bool { // VarULE has an API guarantee that this is equivalent // to `T::VarULE::eq()` self.entire_slice.eq(&other.entire_slice) } } impl Eq for VarZeroSlice where T: VarULE, T: ?Sized, T: Eq, F: VarZeroVecFormat, { } impl PartialOrd for VarZeroSlice { #[inline] fn partial_cmp(&self, other: &Self) -> Option { self.iter().partial_cmp(other.iter()) } } impl Ord for VarZeroSlice { #[inline] fn cmp(&self, other: &Self) -> Ordering { self.iter().cmp(other.iter()) } } impl fmt::Debug for VarZeroSlice where T: fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.iter()).finish() } } impl AsRef> for VarZeroSlice { fn as_ref(&self) -> &VarZeroSlice { self } } zerovec-0.11.1/src/varzerovec/vec.rs000064400000000000000000000401651046102023000154670ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::ule::*; use core::cmp::{Ord, Ordering, PartialOrd}; use core::fmt; use core::ops::Deref; use super::*; /// A zero-copy, byte-aligned vector for variable-width types. /// /// `VarZeroVec` is designed as a drop-in replacement for `Vec` in situations where it is /// desirable to borrow data from an unaligned byte slice, such as zero-copy deserialization, and /// where `T`'s data is variable-length (e.g. `String`) /// /// `T` must implement [`VarULE`], which is already implemented for [`str`] and `[u8]`. For storing more /// complicated series of elements, it is implemented on `ZeroSlice` as well as `VarZeroSlice` /// for nesting. [`zerovec::make_varule`](crate::make_varule) may be used to generate /// a dynamically-sized [`VarULE`] type and conversions to and from a custom type. /// /// For example, here are some owned types and their zero-copy equivalents: /// /// - `Vec`: `VarZeroVec<'a, str>` /// - `Vec>>`: `VarZeroVec<'a, [u8]>` /// - `Vec>`: `VarZeroVec<'a, ZeroSlice>` /// - `Vec>`: `VarZeroVec<'a, VarZeroSlice>` /// /// Most of the methods on `VarZeroVec<'a, T>` come from its [`Deref`] implementation to [`VarZeroSlice`](VarZeroSlice). /// /// For creating zero-copy vectors of fixed-size types, see [`ZeroVec`](crate::ZeroVec). /// /// `VarZeroVec` behaves much like [`Cow`](alloc::borrow::Cow), where it can be constructed from /// owned data (and then mutated!) but can also borrow from some buffer. /// /// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the /// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`]. /// /// # Bytes and Equality /// /// Two [`VarZeroVec`]s are equal if and only if their bytes are equal, as described in the trait /// [`VarULE`]. However, we do not guarantee stability of byte equality or serialization format /// across major SemVer releases. /// /// To compare a [`Vec`] to a [`VarZeroVec`], it is generally recommended to use /// [`Iterator::eq`], since it is somewhat expensive at runtime to convert from a [`Vec`] to a /// [`VarZeroVec`] or vice-versa. /// /// Prior to zerovec reaching 1.0, the precise byte representation of [`VarZeroVec`] is still /// under consideration, with different options along the space-time spectrum. See /// [#1410](https://github.com/unicode-org/icu4x/issues/1410). /// /// # Example /// /// ```rust /// use zerovec::VarZeroVec; /// /// // The little-endian bytes correspond to the list of strings. /// let strings = vec!["w", "ω", "文", "𑄃"]; /// /// #[derive(serde::Serialize, serde::Deserialize)] /// struct Data<'a> { /// #[serde(borrow)] /// strings: VarZeroVec<'a, str>, /// } /// /// let data = Data { /// strings: VarZeroVec::from(&strings), /// }; /// /// let bincode_bytes = /// bincode::serialize(&data).expect("Serialization should be successful"); /// /// // Will deserialize without allocations /// let deserialized: Data = bincode::deserialize(&bincode_bytes) /// .expect("Deserialization should be successful"); /// /// assert_eq!(deserialized.strings.get(2), Some("文")); /// assert_eq!(deserialized.strings, &*strings); /// ``` /// /// Here's another example with `ZeroSlice` (similar to `[T]`): /// /// ```rust /// use zerovec::VarZeroVec; /// use zerovec::ZeroSlice; /// /// // The structured list correspond to the list of integers. /// let numbers: &[&[u32]] = &[ /// &[12, 25, 38], /// &[39179, 100], /// &[42, 55555], /// &[12345, 54321, 9], /// ]; /// /// #[derive(serde::Serialize, serde::Deserialize)] /// struct Data<'a> { /// #[serde(borrow)] /// vecs: VarZeroVec<'a, ZeroSlice>, /// } /// /// let data = Data { /// vecs: VarZeroVec::from(numbers), /// }; /// /// let bincode_bytes = /// bincode::serialize(&data).expect("Serialization should be successful"); /// /// let deserialized: Data = bincode::deserialize(&bincode_bytes) /// .expect("Deserialization should be successful"); /// /// assert_eq!(deserialized.vecs[0].get(1).unwrap(), 25); /// assert_eq!(deserialized.vecs[1], *numbers[1]); /// ``` /// /// [`VarZeroVec`]s can be nested infinitely via a similar mechanism, see the docs of [`VarZeroSlice`] /// for more information. /// /// # How it Works /// /// `VarZeroVec`, when used with non-human-readable serializers (like `bincode`), will /// serialize to a specially formatted list of bytes. The format is: /// /// - 2 bytes for `length` (interpreted as a little-endian u16) /// - `2 * (length - 1)` bytes of `indices` (interpreted as little-endian u16s) /// - Remaining bytes for actual `data` /// /// The format is tweakable by setting the `F` parameter, by default it uses u16 indices and lengths but other /// `VarZeroVecFormat` types can set other sizes. /// /// Each element in the `indices` array points to the ending index of its corresponding /// data part in the `data` list. The starting index can be calculated from the ending index /// of the next element (or 0 for the first element). The last ending index, not stored in the array, is /// the length of the `data` segment. /// /// See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for more details. /// /// [`ule`]: crate::ule pub struct VarZeroVec<'a, T: ?Sized, F = Index16>(pub(crate) VarZeroVecInner<'a, T, F>); pub(crate) enum VarZeroVecInner<'a, T: ?Sized, F = Index16> { #[cfg(feature = "alloc")] Owned(VarZeroVecOwned), Borrowed(&'a VarZeroSlice), } impl<'a, T: ?Sized, F> Clone for VarZeroVec<'a, T, F> { fn clone(&self) -> Self { match self.0 { #[cfg(feature = "alloc")] VarZeroVecInner::Owned(ref o) => o.clone().into(), VarZeroVecInner::Borrowed(b) => b.into(), } } } impl fmt::Debug for VarZeroVec<'_, T, F> where T: fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { VarZeroSlice::fmt(self, f) } } #[cfg(feature = "alloc")] impl<'a, T: ?Sized, F> From> for VarZeroVec<'a, T, F> { #[inline] fn from(other: VarZeroVecOwned) -> Self { Self(VarZeroVecInner::Owned(other)) } } impl<'a, T: ?Sized, F> From<&'a VarZeroSlice> for VarZeroVec<'a, T, F> { fn from(other: &'a VarZeroSlice) -> Self { Self(VarZeroVecInner::Borrowed(other)) } } #[cfg(feature = "alloc")] impl<'a, T: ?Sized + VarULE, F: VarZeroVecFormat> From> for VarZeroVecOwned { #[inline] fn from(other: VarZeroVec<'a, T, F>) -> Self { match other.0 { VarZeroVecInner::Owned(o) => o, VarZeroVecInner::Borrowed(b) => b.into(), } } } impl Default for VarZeroVec<'_, T> { #[inline] fn default() -> Self { Self::new() } } impl Deref for VarZeroVec<'_, T, F> { type Target = VarZeroSlice; fn deref(&self) -> &VarZeroSlice { self.as_slice() } } impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVec<'a, T, F> { /// Creates a new, empty `VarZeroVec`. /// /// # Examples /// /// ``` /// use zerovec::VarZeroVec; /// /// let vzv: VarZeroVec = VarZeroVec::new(); /// assert!(vzv.is_empty()); /// ``` #[inline] pub const fn new() -> Self { Self(VarZeroVecInner::Borrowed(VarZeroSlice::new_empty())) } /// Parse a VarZeroVec from a slice of the appropriate format /// /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`]. /// /// # Example /// /// ```rust /// # use zerovec::VarZeroVec; /// /// let strings = vec!["foo", "bar", "baz", "quux"]; /// let vec = VarZeroVec::::from(&strings); /// /// assert_eq!(&vec[0], "foo"); /// assert_eq!(&vec[1], "bar"); /// assert_eq!(&vec[2], "baz"); /// assert_eq!(&vec[3], "quux"); /// ``` pub fn parse_bytes(slice: &'a [u8]) -> Result { let borrowed = VarZeroSlice::::parse_bytes(slice)?; Ok(Self(VarZeroVecInner::Borrowed(borrowed))) } /// Uses a `&[u8]` buffer as a `VarZeroVec` without any verification. /// /// # Safety /// /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`]. pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self { Self(VarZeroVecInner::Borrowed(core::mem::transmute::< &[u8], &VarZeroSlice, >(bytes))) } /// Convert this into a mutable vector of the owned `T` type, cloning if necessary. /// /// /// # Example /// /// ```rust,ignore /// # use zerovec::VarZeroVec; /// let strings = vec!["foo", "bar", "baz", "quux"]; /// let mut vec = VarZeroVec::::from(&strings); /// /// assert_eq!(vec.len(), 4); /// let mutvec = vec.make_mut(); /// mutvec.push("lorem ipsum".into()); /// mutvec[2] = "dolor sit".into(); /// assert_eq!(&vec[0], "foo"); /// assert_eq!(&vec[1], "bar"); /// assert_eq!(&vec[2], "dolor sit"); /// assert_eq!(&vec[3], "quux"); /// assert_eq!(&vec[4], "lorem ipsum"); /// ``` // // This function is crate-public for now since we don't yet want to stabilize // the internal implementation details #[cfg(feature = "alloc")] pub fn make_mut(&mut self) -> &mut VarZeroVecOwned { match self.0 { VarZeroVecInner::Owned(ref mut vec) => vec, VarZeroVecInner::Borrowed(slice) => { let new_self = VarZeroVecOwned::from_slice(slice); *self = new_self.into(); // recursion is limited since we are guaranteed to hit the Owned branch self.make_mut() } } } /// Converts a borrowed ZeroVec to an owned ZeroVec. No-op if already owned. /// /// # Example /// /// ``` /// # use zerovec::VarZeroVec; /// /// let strings = vec!["foo", "bar", "baz", "quux"]; /// let vec = VarZeroVec::::from(&strings); /// /// assert_eq!(vec.len(), 4); /// // has 'static lifetime /// let owned = vec.into_owned(); /// ``` #[cfg(feature = "alloc")] pub fn into_owned(mut self) -> VarZeroVec<'static, T, F> { self.make_mut(); match self.0 { VarZeroVecInner::Owned(vec) => vec.into(), _ => unreachable!(), } } /// Obtain this `VarZeroVec` as a [`VarZeroSlice`] pub fn as_slice(&self) -> &VarZeroSlice { match self.0 { #[cfg(feature = "alloc")] VarZeroVecInner::Owned(ref owned) => owned, VarZeroVecInner::Borrowed(b) => b, } } /// Takes the byte vector representing the encoded data of this VarZeroVec. If borrowed, /// this function allocates a byte vector and copies the borrowed bytes into it. /// /// The bytes can be passed back to [`Self::parse_bytes()`]. /// /// To get a reference to the bytes without moving, see [`VarZeroSlice::as_bytes()`]. /// /// # Example /// /// ```rust /// # use zerovec::VarZeroVec; /// /// let strings = vec!["foo", "bar", "baz"]; /// let bytes = VarZeroVec::::from(&strings).into_bytes(); /// /// let mut borrowed: VarZeroVec = /// VarZeroVec::parse_bytes(&bytes).unwrap(); /// assert_eq!(borrowed, &*strings); /// ``` #[cfg(feature = "alloc")] pub fn into_bytes(self) -> alloc::vec::Vec { match self.0 { #[cfg(feature = "alloc")] VarZeroVecInner::Owned(vec) => vec.into_bytes(), VarZeroVecInner::Borrowed(vec) => vec.as_bytes().to_vec(), } } /// Return whether the [`VarZeroVec`] is operating on owned or borrowed /// data. [`VarZeroVec::into_owned()`] and [`VarZeroVec::make_mut()`] can /// be used to force it into an owned type pub fn is_owned(&self) -> bool { match self.0 { #[cfg(feature = "alloc")] VarZeroVecInner::Owned(..) => true, VarZeroVecInner::Borrowed(..) => false, } } #[doc(hidden)] pub fn as_components<'b>(&'b self) -> VarZeroVecComponents<'b, T, F> { self.as_slice().as_components() } } #[cfg(feature = "alloc")] impl From<&alloc::vec::Vec> for VarZeroVec<'static, T, F> where T: VarULE + ?Sized, A: EncodeAsVarULE, F: VarZeroVecFormat, { #[inline] fn from(elements: &alloc::vec::Vec) -> Self { Self::from(elements.as_slice()) } } #[cfg(feature = "alloc")] impl From<&[A]> for VarZeroVec<'static, T, F> where T: VarULE + ?Sized, A: EncodeAsVarULE, F: VarZeroVecFormat, { #[inline] fn from(elements: &[A]) -> Self { if elements.is_empty() { VarZeroSlice::new_empty().into() } else { #[allow(clippy::unwrap_used)] // TODO(#1410) Better story for fallibility VarZeroVecOwned::try_from_elements(elements).unwrap().into() } } } #[cfg(feature = "alloc")] impl From<&[A; N]> for VarZeroVec<'static, T, F> where T: VarULE + ?Sized, A: EncodeAsVarULE, F: VarZeroVecFormat, { #[inline] fn from(elements: &[A; N]) -> Self { Self::from(elements.as_slice()) } } impl<'a, 'b, T, F> PartialEq> for VarZeroVec<'a, T, F> where T: VarULE, T: ?Sized, T: PartialEq, F: VarZeroVecFormat, { #[inline] fn eq(&self, other: &VarZeroVec<'b, T, F>) -> bool { // VZV::from_elements used to produce a non-canonical representation of the // empty VZV, so we cannot use byte equality for empty vecs. if self.is_empty() || other.is_empty() { return self.is_empty() && other.is_empty(); } // VarULE has an API guarantee that byte equality is semantic equality. // For non-empty VZVs, there's only a single metadata representation, // so this guarantee extends to the whole VZV representation. self.as_bytes().eq(other.as_bytes()) } } impl<'a, T, F> Eq for VarZeroVec<'a, T, F> where T: VarULE, T: ?Sized, T: Eq, F: VarZeroVecFormat, { } impl PartialEq<&'_ [A]> for VarZeroVec<'_, T, F> where T: VarULE + ?Sized, T: PartialEq, A: AsRef, F: VarZeroVecFormat, { #[inline] fn eq(&self, other: &&[A]) -> bool { self.iter().eq(other.iter().map(|t| t.as_ref())) } } impl PartialEq<[A; N]> for VarZeroVec<'_, T, F> where T: VarULE + ?Sized, T: PartialEq, A: AsRef, F: VarZeroVecFormat, { #[inline] fn eq(&self, other: &[A; N]) -> bool { self.iter().eq(other.iter().map(|t| t.as_ref())) } } impl<'a, T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroVec<'a, T, F> { fn partial_cmp(&self, other: &Self) -> Option { self.iter().partial_cmp(other.iter()) } } impl<'a, T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroVec<'a, T, F> { fn cmp(&self, other: &Self) -> Ordering { self.iter().cmp(other.iter()) } } #[test] fn assert_single_empty_representation() { assert_eq!( VarZeroVec::::new().as_bytes(), VarZeroVec::::from(&[] as &[&str]).as_bytes() ); use crate::map::MutableZeroVecLike; let mut vzv = VarZeroVec::::from(&["hello", "world"][..]); assert_eq!(vzv.len(), 2); assert!(!vzv.as_bytes().is_empty()); vzv.zvl_remove(0); assert_eq!(vzv.len(), 1); assert!(!vzv.as_bytes().is_empty()); vzv.zvl_remove(0); assert_eq!(vzv.len(), 0); assert!(vzv.as_bytes().is_empty()); vzv.zvl_insert(0, "something"); assert_eq!(vzv.len(), 1); assert!(!vzv.as_bytes().is_empty()); } #[test] fn weird_empty_representation_equality() { assert_eq!( VarZeroVec::::parse_bytes(&[0, 0, 0, 0]).unwrap(), VarZeroVec::::parse_bytes(&[]).unwrap() ); } zerovec-0.11.1/src/yoke_impls.rs000064400000000000000000000464121046102023000147000ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). // This way we can copy-paste Yokeable impls #![allow(unknown_lints)] // forgetting_copy_types #![allow(renamed_and_removed_lints)] // forgetting_copy_types #![allow(forgetting_copy_types)] #![allow(clippy::forget_copy)] #![allow(clippy::forget_non_drop)] #[cfg(feature = "alloc")] use crate::map::ZeroMapBorrowed; #[cfg(feature = "alloc")] use crate::map::ZeroMapKV; #[cfg(feature = "alloc")] use crate::map2d::ZeroMap2dBorrowed; use crate::ule::*; use crate::{VarZeroCow, VarZeroVec, ZeroVec}; #[cfg(feature = "alloc")] use crate::{ZeroMap, ZeroMap2d}; use core::{mem, ptr}; use yoke::*; // This impl is similar to the impl on Cow and is safe for the same reasons /// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate unsafe impl<'a, T: 'static + AsULE> Yokeable<'a> for ZeroVec<'static, T> { type Output = ZeroVec<'a, T>; #[inline] fn transform(&'a self) -> &'a Self::Output { self } #[inline] fn transform_owned(self) -> Self::Output { self } #[inline] unsafe fn make(from: Self::Output) -> Self { debug_assert!(mem::size_of::() == mem::size_of::()); let from = mem::ManuallyDrop::new(from); let ptr: *const Self = (&*from as *const Self::Output).cast(); ptr::read(ptr) } #[inline] fn transform_mut(&'a mut self, f: F) where F: 'static + for<'b> FnOnce(&'b mut Self::Output), { unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) } } } // This impl is similar to the impl on Cow and is safe for the same reasons /// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate unsafe impl<'a, T: 'static + VarULE + ?Sized> Yokeable<'a> for VarZeroVec<'static, T> { type Output = VarZeroVec<'a, T>; #[inline] fn transform(&'a self) -> &'a Self::Output { self } #[inline] fn transform_owned(self) -> Self::Output { self } #[inline] unsafe fn make(from: Self::Output) -> Self { debug_assert!(mem::size_of::() == mem::size_of::()); let from = mem::ManuallyDrop::new(from); let ptr: *const Self = (&*from as *const Self::Output).cast(); ptr::read(ptr) } #[inline] fn transform_mut(&'a mut self, f: F) where F: 'static + for<'b> FnOnce(&'b mut Self::Output), { unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) } } } // This impl is similar to the impl on Cow and is safe for the same reasons /// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate unsafe impl<'a, T: 'static + ?Sized> Yokeable<'a> for VarZeroCow<'static, T> { type Output = VarZeroCow<'a, T>; #[inline] fn transform(&'a self) -> &'a Self::Output { self } #[inline] fn transform_owned(self) -> Self::Output { self } #[inline] unsafe fn make(from: Self::Output) -> Self { debug_assert!(mem::size_of::() == mem::size_of::()); let from = mem::ManuallyDrop::new(from); let ptr: *const Self = (&*from as *const Self::Output).cast(); ptr::read(ptr) } #[inline] fn transform_mut(&'a mut self, f: F) where F: 'static + for<'b> FnOnce(&'b mut Self::Output), { unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) } } } /// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate #[allow(clippy::transmute_ptr_to_ptr)] #[cfg(feature = "alloc")] unsafe impl<'a, K, V> Yokeable<'a> for ZeroMap<'static, K, V> where K: 'static + for<'b> ZeroMapKV<'b> + ?Sized, V: 'static + for<'b> ZeroMapKV<'b> + ?Sized, >::Container: for<'b> Yokeable<'b>, >::Container: for<'b> Yokeable<'b>, { type Output = ZeroMap<'a, K, V>; #[inline] fn transform(&'a self) -> &'a Self::Output { unsafe { // Unfortunately, because K and V are generic, rustc is // unaware that these are covariant types, and cannot perform this cast automatically. // We transmute it instead, and enforce the lack of a lifetime with the `K, V: 'static` bound mem::transmute::<&Self, &Self::Output>(self) } } #[inline] fn transform_owned(self) -> Self::Output { debug_assert!(mem::size_of::() == mem::size_of::()); unsafe { // Similar problem as transform(), but we need to use ptr::read since // the compiler isn't sure of the sizes let this = mem::ManuallyDrop::new(self); let ptr: *const Self::Output = (&*this as *const Self).cast(); ptr::read(ptr) } } #[inline] unsafe fn make(from: Self::Output) -> Self { debug_assert!(mem::size_of::() == mem::size_of::()); let from = mem::ManuallyDrop::new(from); let ptr: *const Self = (&*from as *const Self::Output).cast(); ptr::read(ptr) } #[inline] fn transform_mut(&'a mut self, f: F) where F: 'static + for<'b> FnOnce(&'b mut Self::Output), { unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) } } } /// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate #[allow(clippy::transmute_ptr_to_ptr)] #[cfg(feature = "alloc")] unsafe impl<'a, K, V> Yokeable<'a> for ZeroMapBorrowed<'static, K, V> where K: 'static + for<'b> ZeroMapKV<'b> + ?Sized, V: 'static + for<'b> ZeroMapKV<'b> + ?Sized, &'static >::Slice: for<'b> Yokeable<'b>, &'static >::Slice: for<'b> Yokeable<'b>, { type Output = ZeroMapBorrowed<'a, K, V>; #[inline] fn transform(&'a self) -> &'a Self::Output { unsafe { // Unfortunately, because K and V are generic, rustc is // unaware that these are covariant types, and cannot perform this cast automatically. // We transmute it instead, and enforce the lack of a lifetime with the `K, V: 'static` bound mem::transmute::<&Self, &Self::Output>(self) } } #[inline] fn transform_owned(self) -> Self::Output { debug_assert!(mem::size_of::() == mem::size_of::()); unsafe { // Similar problem as transform(), but we need to use ptr::read since // the compiler isn't sure of the sizes let this = mem::ManuallyDrop::new(self); let ptr: *const Self::Output = (&*this as *const Self).cast(); ptr::read(ptr) } } #[inline] unsafe fn make(from: Self::Output) -> Self { debug_assert!(mem::size_of::() == mem::size_of::()); let from = mem::ManuallyDrop::new(from); let ptr: *const Self = (&*from as *const Self::Output).cast(); ptr::read(ptr) } #[inline] fn transform_mut(&'a mut self, f: F) where F: 'static + for<'b> FnOnce(&'b mut Self::Output), { unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) } } } /// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate #[allow(clippy::transmute_ptr_to_ptr)] #[cfg(feature = "alloc")] unsafe impl<'a, K0, K1, V> Yokeable<'a> for ZeroMap2d<'static, K0, K1, V> where K0: 'static + for<'b> ZeroMapKV<'b> + ?Sized, K1: 'static + for<'b> ZeroMapKV<'b> + ?Sized, V: 'static + for<'b> ZeroMapKV<'b> + ?Sized, >::Container: for<'b> Yokeable<'b>, >::Container: for<'b> Yokeable<'b>, >::Container: for<'b> Yokeable<'b>, { type Output = ZeroMap2d<'a, K0, K1, V>; #[inline] fn transform(&'a self) -> &'a Self::Output { unsafe { // Unfortunately, because K and V are generic, rustc is // unaware that these are covariant types, and cannot perform this cast automatically. // We transmute it instead, and enforce the lack of a lifetime with the `K0, K1, V: 'static` bound mem::transmute::<&Self, &Self::Output>(self) } } #[inline] fn transform_owned(self) -> Self::Output { debug_assert!(mem::size_of::() == mem::size_of::()); unsafe { // Similar problem as transform(), but we need to use ptr::read since // the compiler isn't sure of the sizes let this = mem::ManuallyDrop::new(self); let ptr: *const Self::Output = (&*this as *const Self).cast(); ptr::read(ptr) } } #[inline] unsafe fn make(from: Self::Output) -> Self { debug_assert!(mem::size_of::() == mem::size_of::()); let from = mem::ManuallyDrop::new(from); let ptr: *const Self = (&*from as *const Self::Output).cast(); ptr::read(ptr) } #[inline] fn transform_mut(&'a mut self, f: F) where F: 'static + for<'b> FnOnce(&'b mut Self::Output), { unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) } } } /// This impl requires enabling the optional `yoke` Cargo feature of the `zerovec` crate #[allow(clippy::transmute_ptr_to_ptr)] #[cfg(feature = "alloc")] unsafe impl<'a, K0, K1, V> Yokeable<'a> for ZeroMap2dBorrowed<'static, K0, K1, V> where K0: 'static + for<'b> ZeroMapKV<'b> + ?Sized, K1: 'static + for<'b> ZeroMapKV<'b> + ?Sized, V: 'static + for<'b> ZeroMapKV<'b> + ?Sized, &'static >::Slice: for<'b> Yokeable<'b>, &'static >::Slice: for<'b> Yokeable<'b>, &'static >::Slice: for<'b> Yokeable<'b>, { type Output = ZeroMap2dBorrowed<'a, K0, K1, V>; #[inline] fn transform(&'a self) -> &'a Self::Output { unsafe { // Unfortunately, because K and V are generic, rustc is // unaware that these are covariant types, and cannot perform this cast automatically. // We transmute it instead, and enforce the lack of a lifetime with the `K0, K1, V: 'static` bound mem::transmute::<&Self, &Self::Output>(self) } } #[inline] fn transform_owned(self) -> Self::Output { debug_assert!(mem::size_of::() == mem::size_of::()); unsafe { // Similar problem as transform(), but we need to use ptr::read since // the compiler isn't sure of the sizes let this = mem::ManuallyDrop::new(self); let ptr: *const Self::Output = (&*this as *const Self).cast(); ptr::read(ptr) } } #[inline] unsafe fn make(from: Self::Output) -> Self { debug_assert!(mem::size_of::() == mem::size_of::()); let from = mem::ManuallyDrop::new(from); let ptr: *const Self = (&*from as *const Self::Output).cast(); ptr::read(ptr) } #[inline] fn transform_mut(&'a mut self, f: F) where F: 'static + for<'b> FnOnce(&'b mut Self::Output), { unsafe { f(mem::transmute::<&mut Self, &mut Self::Output>(self)) } } } #[cfg(test)] #[allow(non_camel_case_types, non_snake_case)] mod test { use super::*; use crate::{VarZeroSlice, ZeroSlice}; use databake::*; // Note: The following derives cover Yoke as well as Serde and databake. These may partially // duplicate tests elsewhere in this crate, but they are here for completeness. #[derive(yoke::Yokeable, zerofrom::ZeroFrom)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] #[cfg_attr(feature = "databake", derive(databake::Bake))] #[cfg_attr(feature = "databake", databake(path = zerovec::yoke_impls::test))] struct DeriveTest_ZeroVec<'data> { #[cfg_attr(feature = "serde", serde(borrow))] _data: ZeroVec<'data, u16>, } #[test] fn bake_ZeroVec() { test_bake!( DeriveTest_ZeroVec<'static>, crate::yoke_impls::test::DeriveTest_ZeroVec { _data: crate::ZeroVec::new(), }, zerovec, ); } #[derive(yoke::Yokeable)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] #[cfg_attr(feature = "databake", derive(databake::Bake))] #[cfg_attr(feature = "databake", databake(path = zerovec::yoke_impls::test))] struct DeriveTest_ZeroSlice<'data> { #[cfg_attr(feature = "serde", serde(borrow))] _data: &'data ZeroSlice, } #[test] fn bake_ZeroSlice() { test_bake!( DeriveTest_ZeroSlice<'static>, crate::yoke_impls::test::DeriveTest_ZeroSlice { _data: crate::ZeroSlice::new_empty(), }, zerovec, ); } #[derive(yoke::Yokeable, zerofrom::ZeroFrom)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] #[cfg_attr(feature = "databake", derive(databake::Bake))] #[cfg_attr(feature = "databake", databake(path = zerovec::yoke_impls::test))] struct DeriveTest_VarZeroVec<'data> { #[cfg_attr(feature = "serde", serde(borrow))] _data: VarZeroVec<'data, str>, } #[test] fn bake_VarZeroVec() { test_bake!( DeriveTest_VarZeroVec<'static>, crate::yoke_impls::test::DeriveTest_VarZeroVec { _data: crate::vecs::VarZeroVec16::new(), }, zerovec, ); } #[derive(yoke::Yokeable)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] #[cfg_attr(feature = "databake", derive(databake::Bake))] #[cfg_attr(feature = "databake", databake(path = zerovec::yoke_impls::test))] struct DeriveTest_VarZeroSlice<'data> { #[cfg_attr(feature = "serde", serde(borrow))] _data: &'data VarZeroSlice, } #[test] fn bake_VarZeroSlice() { test_bake!( DeriveTest_VarZeroSlice<'static>, crate::yoke_impls::test::DeriveTest_VarZeroSlice { _data: crate::vecs::VarZeroSlice16::new_empty() }, zerovec, ); } #[derive(yoke::Yokeable, zerofrom::ZeroFrom)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] #[cfg_attr(feature = "databake", derive(databake::Bake))] #[cfg_attr(feature = "databake", databake(path = zerovec::yoke_impls::test))] #[yoke(prove_covariance_manually)] struct DeriveTest_ZeroMap<'data> { #[cfg_attr(feature = "serde", serde(borrow))] _data: ZeroMap<'data, [u8], str>, } #[test] fn bake_ZeroMap() { test_bake!( DeriveTest_ZeroMap<'static>, crate::yoke_impls::test::DeriveTest_ZeroMap { _data: unsafe { #[allow(unused_unsafe)] crate::ZeroMap::from_parts_unchecked( crate::vecs::VarZeroVec16::new(), crate::vecs::VarZeroVec16::new(), ) }, }, zerovec, ); } #[derive(yoke::Yokeable)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] #[cfg_attr(feature = "databake", derive(databake::Bake))] #[cfg_attr(feature = "databake", databake(path = zerovec::yoke_impls::test))] #[yoke(prove_covariance_manually)] struct DeriveTest_ZeroMapBorrowed<'data> { #[cfg_attr(feature = "serde", serde(borrow))] _data: ZeroMapBorrowed<'data, [u8], str>, } #[test] fn bake_ZeroMapBorrowed() { test_bake!( DeriveTest_ZeroMapBorrowed<'static>, crate::yoke_impls::test::DeriveTest_ZeroMapBorrowed { _data: unsafe { #[allow(unused_unsafe)] crate::maps::ZeroMapBorrowed::from_parts_unchecked( crate::vecs::VarZeroSlice16::new_empty(), crate::vecs::VarZeroSlice16::new_empty(), ) }, }, zerovec, ); } #[derive(yoke::Yokeable, zerofrom::ZeroFrom)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] #[cfg_attr(feature = "databake", derive(databake::Bake))] #[cfg_attr(feature = "databake", databake(path = zerovec::yoke_impls::test))] #[yoke(prove_covariance_manually)] struct DeriveTest_ZeroMapWithULE<'data> { #[cfg_attr(feature = "serde", serde(borrow))] _data: ZeroMap<'data, ZeroSlice, str>, } #[test] fn bake_ZeroMapWithULE() { test_bake!( DeriveTest_ZeroMapWithULE<'static>, crate::yoke_impls::test::DeriveTest_ZeroMapWithULE { _data: unsafe { #[allow(unused_unsafe)] crate::ZeroMap::from_parts_unchecked( crate::vecs::VarZeroVec16::new(), crate::vecs::VarZeroVec16::new(), ) }, }, zerovec, ); } #[derive(yoke::Yokeable, zerofrom::ZeroFrom)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] #[cfg_attr(feature = "databake", derive(databake::Bake))] #[cfg_attr(feature = "databake", databake(path = zerovec::yoke_impls::test))] #[yoke(prove_covariance_manually)] struct DeriveTest_ZeroMap2d<'data> { #[cfg_attr(feature = "serde", serde(borrow))] _data: ZeroMap2d<'data, u16, u16, str>, } #[test] fn bake_ZeroMap2d() { test_bake!( DeriveTest_ZeroMap2d<'static>, crate::yoke_impls::test::DeriveTest_ZeroMap2d { _data: unsafe { #[allow(unused_unsafe)] crate::ZeroMap2d::from_parts_unchecked( crate::ZeroVec::new(), crate::ZeroVec::new(), crate::ZeroVec::new(), crate::vecs::VarZeroVec16::new(), ) }, }, zerovec, ); } #[derive(yoke::Yokeable)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] #[cfg_attr(feature = "databake", derive(databake::Bake))] #[cfg_attr(feature = "databake", databake(path = zerovec::yoke_impls::test))] #[yoke(prove_covariance_manually)] struct DeriveTest_ZeroMap2dBorrowed<'data> { #[cfg_attr(feature = "serde", serde(borrow))] _data: ZeroMap2dBorrowed<'data, u16, u16, str>, } #[test] fn bake_ZeroMap2dBorrowed() { test_bake!( DeriveTest_ZeroMap2dBorrowed<'static>, crate::yoke_impls::test::DeriveTest_ZeroMap2dBorrowed { _data: unsafe { #[allow(unused_unsafe)] crate::maps::ZeroMap2dBorrowed::from_parts_unchecked( crate::ZeroSlice::new_empty(), crate::ZeroSlice::new_empty(), crate::ZeroSlice::new_empty(), crate::vecs::VarZeroSlice16::new_empty(), ) }, }, zerovec, ); } } zerovec-0.11.1/src/zerofrom_impls.rs000064400000000000000000000062601046102023000155710ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). #[cfg(feature = "alloc")] use crate::map::ZeroMapKV; use crate::ule::*; use crate::vecs::VarZeroVecFormat; use crate::{VarZeroSlice, VarZeroVec, ZeroSlice, ZeroVec}; #[cfg(feature = "alloc")] use crate::{ZeroMap, ZeroMap2d}; use zerofrom::ZeroFrom; impl<'zf, T> ZeroFrom<'zf, ZeroVec<'_, T>> for ZeroVec<'zf, T> where T: 'static + AsULE, { #[inline] fn zero_from(other: &'zf ZeroVec<'_, T>) -> Self { ZeroVec::new_borrowed(other.as_ule_slice()) } } impl<'zf, T> ZeroFrom<'zf, ZeroSlice> for ZeroVec<'zf, T> where T: 'static + AsULE, { #[inline] fn zero_from(other: &'zf ZeroSlice) -> Self { ZeroVec::new_borrowed(other.as_ule_slice()) } } impl<'zf, T> ZeroFrom<'zf, ZeroSlice> for &'zf ZeroSlice where T: 'static + AsULE, { #[inline] fn zero_from(other: &'zf ZeroSlice) -> Self { other } } impl<'zf, T, F: VarZeroVecFormat> ZeroFrom<'zf, VarZeroSlice> for VarZeroVec<'zf, T, F> where T: 'static + VarULE + ?Sized, { #[inline] fn zero_from(other: &'zf VarZeroSlice) -> Self { other.into() } } impl<'zf, T, F: VarZeroVecFormat> ZeroFrom<'zf, VarZeroVec<'_, T, F>> for VarZeroVec<'zf, T, F> where T: 'static + VarULE + ?Sized, { #[inline] fn zero_from(other: &'zf VarZeroVec<'_, T, F>) -> Self { other.as_slice().into() } } impl<'zf, T> ZeroFrom<'zf, VarZeroSlice> for &'zf VarZeroSlice where T: 'static + VarULE + ?Sized, { #[inline] fn zero_from(other: &'zf VarZeroSlice) -> Self { other } } #[cfg(feature = "alloc")] impl<'zf, 's, K, V> ZeroFrom<'zf, ZeroMap<'s, K, V>> for ZeroMap<'zf, K, V> where K: 'static + for<'b> ZeroMapKV<'b> + ?Sized, V: 'static + for<'b> ZeroMapKV<'b> + ?Sized, >::Container: ZeroFrom<'zf, >::Container>, >::Container: ZeroFrom<'zf, >::Container>, { fn zero_from(other: &'zf ZeroMap<'s, K, V>) -> Self { ZeroMap { keys: K::Container::zero_from(&other.keys), values: V::Container::zero_from(&other.values), } } } #[cfg(feature = "alloc")] impl<'zf, 's, K0, K1, V> ZeroFrom<'zf, ZeroMap2d<'s, K0, K1, V>> for ZeroMap2d<'zf, K0, K1, V> where K0: 'static + for<'b> ZeroMapKV<'b> + ?Sized, K1: 'static + for<'b> ZeroMapKV<'b> + ?Sized, V: 'static + for<'b> ZeroMapKV<'b> + ?Sized, >::Container: ZeroFrom<'zf, >::Container>, >::Container: ZeroFrom<'zf, >::Container>, >::Container: ZeroFrom<'zf, >::Container>, { fn zero_from(other: &'zf ZeroMap2d<'s, K0, K1, V>) -> Self { ZeroMap2d { keys0: K0::Container::zero_from(&other.keys0), joiner: ZeroVec::zero_from(&other.joiner), keys1: K1::Container::zero_from(&other.keys1), values: V::Container::zero_from(&other.values), } } } zerovec-0.11.1/src/zerovec/databake.rs000064400000000000000000000035321046102023000157320ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::ZeroVec; use crate::{ule::AsULE, ZeroSlice}; use databake::*; impl Bake for ZeroVec<'_, T> { fn bake(&self, env: &CrateEnv) -> TokenStream { env.insert("zerovec"); if self.is_empty() { quote! { zerovec::ZeroVec::new() } } else { let bytes = databake::Bake::bake(&self.as_bytes(), env); quote! { unsafe { zerovec::ZeroVec::from_bytes_unchecked(#bytes) } } } } } impl BakeSize for ZeroVec<'_, T> { fn borrows_size(&self) -> usize { self.as_bytes().len() } } impl Bake for &ZeroSlice { fn bake(&self, env: &CrateEnv) -> TokenStream { env.insert("zerovec"); if self.is_empty() { quote! { zerovec::ZeroSlice::new_empty() } } else { let bytes = databake::Bake::bake(&self.as_bytes(), env); quote! { unsafe { zerovec::ZeroSlice::from_bytes_unchecked(#bytes) } } } } } impl BakeSize for &ZeroSlice { fn borrows_size(&self) -> usize { self.as_bytes().len() } } #[test] fn test_baked_vec() { test_bake!(ZeroVec, const, crate::ZeroVec::new(), zerovec); test_bake!( ZeroVec, const, unsafe { crate::ZeroVec::from_bytes_unchecked(b"\x02\x01\0\x16\0M\x01\\") }, zerovec ); } #[test] fn test_baked_slice() { test_bake!( &ZeroSlice, const, crate::ZeroSlice::new_empty(), zerovec ); test_bake!( &ZeroSlice, const, unsafe { crate::ZeroSlice::from_bytes_unchecked(b"\x02\x01\0\x16\0M\x01\\") }, zerovec ); } zerovec-0.11.1/src/zerovec/mod.rs000064400000000000000000001211621046102023000147550ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). #[cfg(feature = "databake")] mod databake; #[cfg(feature = "serde")] mod serde; mod slice; pub use slice::ZeroSlice; pub use slice::ZeroSliceIter; use crate::ule::*; #[cfg(feature = "alloc")] use alloc::borrow::Cow; #[cfg(feature = "alloc")] use alloc::vec::Vec; use core::cmp::{Ord, Ordering, PartialOrd}; use core::fmt; #[cfg(feature = "alloc")] use core::iter::FromIterator; use core::marker::PhantomData; use core::num::NonZeroUsize; use core::ops::Deref; use core::ptr::NonNull; /// A zero-copy, byte-aligned vector for fixed-width types. /// /// `ZeroVec` is designed as a drop-in replacement for `Vec` in situations where it is /// desirable to borrow data from an unaligned byte slice, such as zero-copy deserialization. /// /// `T` must implement [`AsULE`], which is auto-implemented for a number of built-in types, /// including all fixed-width multibyte integers. For variable-width types like [`str`], /// see [`VarZeroVec`](crate::VarZeroVec). [`zerovec::make_ule`](crate::make_ule) may /// be used to automatically implement [`AsULE`] for a type and generate the underlying [`ULE`] type. /// /// Typically, the zero-copy equivalent of a `Vec` will simply be `ZeroVec<'a, T>`. /// /// Most of the methods on `ZeroVec<'a, T>` come from its [`Deref`] implementation to [`ZeroSlice`](ZeroSlice). /// /// For creating zero-copy vectors of fixed-size types, see [`VarZeroVec`](crate::VarZeroVec). /// /// `ZeroVec` behaves much like [`Cow`](alloc::borrow::Cow), where it can be constructed from /// owned data (and then mutated!) but can also borrow from some buffer. /// /// # Example /// /// ``` /// use zerovec::ZeroVec; /// /// // The little-endian bytes correspond to the numbers on the following line. /// let nums: &[u16] = &[211, 281, 421, 461]; /// /// #[derive(serde::Serialize, serde::Deserialize)] /// struct Data<'a> { /// #[serde(borrow)] /// nums: ZeroVec<'a, u16>, /// } /// /// // The owned version will allocate /// let data = Data { /// nums: ZeroVec::alloc_from_slice(nums), /// }; /// let bincode_bytes = /// bincode::serialize(&data).expect("Serialization should be successful"); /// /// // Will deserialize without allocations /// let deserialized: Data = bincode::deserialize(&bincode_bytes) /// .expect("Deserialization should be successful"); /// /// // This deserializes without allocation! /// assert!(!deserialized.nums.is_owned()); /// assert_eq!(deserialized.nums.get(2), Some(421)); /// assert_eq!(deserialized.nums, nums); /// ``` /// /// [`ule`]: crate::ule /// /// # How it Works /// /// `ZeroVec` represents a slice of `T` as a slice of `T::ULE`. The difference between `T` and /// `T::ULE` is that `T::ULE` must be encoded in little-endian with 1-byte alignment. When accessing /// items from `ZeroVec`, we fetch the `T::ULE`, convert it on the fly to `T`, and return `T` by /// value. /// /// Benchmarks can be found in the project repository, with some results found in the [crate-level documentation](crate). /// /// See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for more details. pub struct ZeroVec<'a, T> where T: AsULE, { vector: EyepatchHackVector, /// Marker type, signalling variance and dropck behavior /// by containing all potential types this type represents marker1: PhantomData, marker2: PhantomData<&'a T::ULE>, } // Send inherits as long as all fields are Send, but also references are Send only // when their contents are Sync (this is the core purpose of Sync), so // we need a Send+Sync bound since this struct can logically be a vector or a slice. unsafe impl<'a, T: AsULE> Send for ZeroVec<'a, T> where T::ULE: Send + Sync {} // Sync typically inherits as long as all fields are Sync unsafe impl<'a, T: AsULE> Sync for ZeroVec<'a, T> where T::ULE: Sync {} impl<'a, T: AsULE> Deref for ZeroVec<'a, T> { type Target = ZeroSlice; #[inline] fn deref(&self) -> &Self::Target { self.as_slice() } } // Represents an unsafe potentially-owned vector/slice type, without a lifetime // working around dropck limitations. // // Must either be constructed by deconstructing a Vec, or from &[U] with capacity set to // zero. Should not outlive its source &[U] in the borrowed case; this type does not in // and of itself uphold this guarantee, but the .as_slice() method assumes it. // // After https://github.com/rust-lang/rust/issues/34761 stabilizes, // we should remove this type and use #[may_dangle] struct EyepatchHackVector { /// Pointer to data /// This pointer is *always* valid, the reason it is represented as a raw pointer /// is that it may logically represent an `&[T::ULE]` or the ptr,len of a `Vec` buf: NonNull<[U]>, #[cfg(feature = "alloc")] /// Borrowed if zero. Capacity of buffer above if not capacity: usize, } impl EyepatchHackVector { // Return a slice to the inner data for an arbitrary caller-specified lifetime #[inline] unsafe fn as_arbitrary_slice<'a>(&self) -> &'a [U] { self.buf.as_ref() } // Return a slice to the inner data #[inline] const fn as_slice<'a>(&'a self) -> &'a [U] { // Note: self.buf.as_ref() is not const until 1.73 unsafe { &*(self.buf.as_ptr() as *const [U]) } } /// Return this type as a vector /// /// Data MUST be known to be owned beforehand /// /// Because this borrows self, this is effectively creating two owners to the same /// data, make sure that `self` is cleaned up after this /// /// (this does not simply take `self` since then it wouldn't be usable from the Drop impl) #[cfg(feature = "alloc")] unsafe fn get_vec(&self) -> Vec { debug_assert!(self.capacity != 0); let slice: &[U] = self.as_slice(); let len = slice.len(); // Safety: we are assuming owned, and in owned cases // this always represents a valid vector Vec::from_raw_parts(self.buf.as_ptr() as *mut U, len, self.capacity) } } #[cfg(feature = "alloc")] impl Drop for EyepatchHackVector { #[inline] fn drop(&mut self) { if self.capacity != 0 { unsafe { // we don't need to clean up self here since we're already in a Drop impl let _ = self.get_vec(); } } } } impl<'a, T: AsULE> Clone for ZeroVec<'a, T> { fn clone(&self) -> Self { #[cfg(feature = "alloc")] if self.is_owned() { return ZeroVec::new_owned(self.as_ule_slice().into()); } Self { vector: EyepatchHackVector { buf: self.vector.buf, #[cfg(feature = "alloc")] capacity: 0, }, marker1: PhantomData, marker2: PhantomData, } } } impl<'a, T: AsULE> AsRef> for ZeroVec<'a, T> { fn as_ref(&self) -> &ZeroSlice { self.as_slice() } } impl fmt::Debug for ZeroVec<'_, T> where T: AsULE + fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "ZeroVec([")?; let mut first = true; for el in self.iter() { if !first { write!(f, ", ")?; } write!(f, "{el:?}")?; first = false; } write!(f, "])") } } impl Eq for ZeroVec<'_, T> where T: AsULE + Eq {} impl<'a, 'b, T> PartialEq> for ZeroVec<'a, T> where T: AsULE + PartialEq, { #[inline] fn eq(&self, other: &ZeroVec<'b, T>) -> bool { // Note: T implements PartialEq but not T::ULE self.iter().eq(other.iter()) } } impl PartialEq<&[T]> for ZeroVec<'_, T> where T: AsULE + PartialEq, { #[inline] fn eq(&self, other: &&[T]) -> bool { self.iter().eq(other.iter().copied()) } } impl PartialEq<[T; N]> for ZeroVec<'_, T> where T: AsULE + PartialEq, { #[inline] fn eq(&self, other: &[T; N]) -> bool { self.iter().eq(other.iter().copied()) } } impl<'a, T: AsULE> Default for ZeroVec<'a, T> { #[inline] fn default() -> Self { Self::new() } } impl<'a, T: AsULE + PartialOrd> PartialOrd for ZeroVec<'a, T> { fn partial_cmp(&self, other: &Self) -> Option { self.iter().partial_cmp(other.iter()) } } impl<'a, T: AsULE + Ord> Ord for ZeroVec<'a, T> { fn cmp(&self, other: &Self) -> Ordering { self.iter().cmp(other.iter()) } } impl<'a, T: AsULE> AsRef<[T::ULE]> for ZeroVec<'a, T> { fn as_ref(&self) -> &[T::ULE] { self.as_ule_slice() } } impl<'a, T: AsULE> From<&'a [T::ULE]> for ZeroVec<'a, T> { fn from(other: &'a [T::ULE]) -> Self { ZeroVec::new_borrowed(other) } } #[cfg(feature = "alloc")] impl<'a, T: AsULE> From> for ZeroVec<'a, T> { fn from(other: Vec) -> Self { ZeroVec::new_owned(other) } } impl<'a, T: AsULE> ZeroVec<'a, T> { /// Creates a new, borrowed, empty `ZeroVec`. /// /// # Examples /// /// ``` /// use zerovec::ZeroVec; /// /// let zv: ZeroVec = ZeroVec::new(); /// assert!(zv.is_empty()); /// ``` #[inline] pub const fn new() -> Self { Self::new_borrowed(&[]) } /// Same as `ZeroSlice::len`, which is available through `Deref` and not `const`. pub const fn const_len(&self) -> usize { self.vector.as_slice().len() } /// Creates a new owned `ZeroVec` using an existing /// allocated backing buffer /// /// If you have a slice of `&[T]`s, prefer using /// [`Self::alloc_from_slice()`]. #[inline] #[cfg(feature = "alloc")] pub fn new_owned(vec: Vec) -> Self { // Deconstruct the vector into parts // This is the only part of the code that goes from Vec // to ZeroVec, all other such operations should use this function let capacity = vec.capacity(); let len = vec.len(); let ptr = core::mem::ManuallyDrop::new(vec).as_mut_ptr(); // Safety: `ptr` comes from Vec::as_mut_ptr, which says: // "Returns an unsafe mutable pointer to the vector’s buffer, // or a dangling raw pointer valid for zero sized reads" let ptr = unsafe { NonNull::new_unchecked(ptr) }; let buf = NonNull::slice_from_raw_parts(ptr, len); Self { vector: EyepatchHackVector { buf, capacity }, marker1: PhantomData, marker2: PhantomData, } } /// Creates a new borrowed `ZeroVec` using an existing /// backing buffer #[inline] pub const fn new_borrowed(slice: &'a [T::ULE]) -> Self { // Safety: references in Rust cannot be null. // The safe function `impl From<&T> for NonNull` is not const. let slice = unsafe { NonNull::new_unchecked(slice as *const [_] as *mut [_]) }; Self { vector: EyepatchHackVector { buf: slice, #[cfg(feature = "alloc")] capacity: 0, }, marker1: PhantomData, marker2: PhantomData, } } /// Creates a new, owned, empty `ZeroVec`, with a certain capacity pre-allocated. #[cfg(feature = "alloc")] pub fn with_capacity(capacity: usize) -> Self { Self::new_owned(Vec::with_capacity(capacity)) } /// Parses a `&[u8]` buffer into a `ZeroVec`. /// /// This function is infallible for built-in integer types, but fallible for other types, /// such as `char`. For more information, see [`ULE::parse_bytes_to_slice`]. /// /// The bytes within the byte buffer must remain constant for the life of the ZeroVec. /// /// # Endianness /// /// The byte buffer must be encoded in little-endian, even if running in a big-endian /// environment. This ensures a consistent representation of data across platforms. /// /// # Example /// /// ``` /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; /// let zerovec: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// /// assert!(!zerovec.is_owned()); /// assert_eq!(zerovec.get(2), Some(421)); /// ``` pub fn parse_bytes(bytes: &'a [u8]) -> Result { let slice: &'a [T::ULE] = T::ULE::parse_bytes_to_slice(bytes)?; Ok(Self::new_borrowed(slice)) } /// Uses a `&[u8]` buffer as a `ZeroVec` without any verification. /// /// # Safety /// /// `bytes` need to be an output from [`ZeroSlice::as_bytes()`]. pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self { // &[u8] and &[T::ULE] are the same slice with different length metadata. Self::new_borrowed(core::slice::from_raw_parts( bytes.as_ptr() as *const T::ULE, bytes.len() / core::mem::size_of::(), )) } /// Converts a `ZeroVec` into a `ZeroVec`, retaining the current ownership model. /// /// Note that the length of the ZeroVec may change. /// /// # Examples /// /// Convert a borrowed `ZeroVec`: /// /// ``` /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; /// let zerovec: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// let zv_bytes = zerovec.into_bytes(); /// /// assert!(!zv_bytes.is_owned()); /// assert_eq!(zv_bytes.get(0), Some(0xD3)); /// ``` /// /// Convert an owned `ZeroVec`: /// /// ``` /// use zerovec::ZeroVec; /// /// let nums: &[u16] = &[211, 281, 421, 461]; /// let zerovec = ZeroVec::alloc_from_slice(nums); /// let zv_bytes = zerovec.into_bytes(); /// /// assert!(zv_bytes.is_owned()); /// assert_eq!(zv_bytes.get(0), Some(0xD3)); /// ``` #[cfg(feature = "alloc")] pub fn into_bytes(self) -> ZeroVec<'a, u8> { use alloc::borrow::Cow; match self.into_cow() { Cow::Borrowed(slice) => { let bytes: &'a [u8] = T::ULE::slice_as_bytes(slice); ZeroVec::new_borrowed(bytes) } Cow::Owned(vec) => { let bytes = Vec::from(T::ULE::slice_as_bytes(&vec)); ZeroVec::new_owned(bytes) } } } /// Returns this [`ZeroVec`] as a [`ZeroSlice`]. /// /// To get a reference with a longer lifetime from a borrowed [`ZeroVec`], /// use [`ZeroVec::as_maybe_borrowed`]. #[inline] pub const fn as_slice(&self) -> &ZeroSlice { let slice: &[T::ULE] = self.vector.as_slice(); ZeroSlice::from_ule_slice(slice) } /// Casts a `ZeroVec` to a compatible `ZeroVec

`. /// /// `T` and `P` are compatible if they have the same `ULE` representation. /// /// If the `ULE`s of `T` and `P` are different types but have the same size, /// use [`Self::try_into_converted()`]. /// /// # Examples /// /// ``` /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; /// /// let zerovec_u16: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// assert_eq!(zerovec_u16.get(3), Some(32973)); /// /// let zerovec_i16: ZeroVec = zerovec_u16.cast(); /// assert_eq!(zerovec_i16.get(3), Some(-32563)); /// ``` #[cfg(feature = "alloc")] pub fn cast

(self) -> ZeroVec<'a, P> where P: AsULE, { match self.into_cow() { Cow::Owned(v) => ZeroVec::new_owned(v), Cow::Borrowed(v) => ZeroVec::new_borrowed(v), } } /// Converts a `ZeroVec` into a `ZeroVec

`, retaining the current ownership model. /// /// If `T` and `P` have the exact same `ULE`, use [`Self::cast()`]. /// /// # Panics /// /// Panics if `T::ULE` and `P::ULE` are not the same size. /// /// # Examples /// /// Convert a borrowed `ZeroVec`: /// /// ``` /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0x7F, 0xF3, 0x01, 0x49, 0xF6, 0x01]; /// let zv_char: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("valid code points"); /// let zv_u8_3: ZeroVec<[u8; 3]> = /// zv_char.try_into_converted().expect("infallible conversion"); /// /// assert!(!zv_u8_3.is_owned()); /// assert_eq!(zv_u8_3.get(0), Some([0x7F, 0xF3, 0x01])); /// ``` /// /// Convert an owned `ZeroVec`: /// /// ``` /// use zerovec::ZeroVec; /// /// let chars: &[char] = &['🍿', '🙉']; /// let zv_char = ZeroVec::alloc_from_slice(chars); /// let zv_u8_3: ZeroVec<[u8; 3]> = /// zv_char.try_into_converted().expect("length is divisible"); /// /// assert!(zv_u8_3.is_owned()); /// assert_eq!(zv_u8_3.get(0), Some([0x7F, 0xF3, 0x01])); /// ``` /// /// If the types are not the same size, we refuse to convert: /// /// ```should_panic /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0x7F, 0xF3, 0x01, 0x49, 0xF6, 0x01]; /// let zv_char: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("valid code points"); /// /// // Panics! core::mem::size_of:: != core::mem::size_of:: /// zv_char.try_into_converted::(); /// ``` /// /// Instead, convert to bytes and then parse: /// /// ``` /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0x7F, 0xF3, 0x01, 0x49, 0xF6, 0x01]; /// let zv_char: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("valid code points"); /// let zv_u16: ZeroVec = /// zv_char.into_bytes().try_into_parsed().expect("infallible"); /// /// assert!(!zv_u16.is_owned()); /// assert_eq!(zv_u16.get(0), Some(0xF37F)); /// ``` #[cfg(feature = "alloc")] pub fn try_into_converted(self) -> Result, UleError> { assert_eq!( core::mem::size_of::<::ULE>(), core::mem::size_of::<

::ULE>() ); match self.into_cow() { Cow::Borrowed(old_slice) => { let bytes: &'a [u8] = T::ULE::slice_as_bytes(old_slice); let new_slice = P::ULE::parse_bytes_to_slice(bytes)?; Ok(ZeroVec::new_borrowed(new_slice)) } Cow::Owned(old_vec) => { let bytes: &[u8] = T::ULE::slice_as_bytes(&old_vec); P::ULE::validate_bytes(bytes)?; // Feature "vec_into_raw_parts" is not yet stable (#65816). Polyfill: let (ptr, len, cap) = { // Take ownership of the pointer let mut v = core::mem::ManuallyDrop::new(old_vec); // Fetch the pointer, length, and capacity (v.as_mut_ptr(), v.len(), v.capacity()) }; // Safety checklist for Vec::from_raw_parts: // 1. ptr came from a Vec // 2. P and T are asserted above to be the same size // 3. length is what it was before // 4. capacity is what it was before let new_vec = unsafe { let ptr = ptr as *mut P::ULE; Vec::from_raw_parts(ptr, len, cap) }; Ok(ZeroVec::new_owned(new_vec)) } } } /// Check if this type is fully owned #[inline] pub fn is_owned(&self) -> bool { #[cfg(feature = "alloc")] return self.vector.capacity != 0; #[cfg(not(feature = "alloc"))] return false; } /// If this is a borrowed [`ZeroVec`], return it as a slice that covers /// its lifetime parameter. /// /// To infallibly get a [`ZeroSlice`] with a shorter lifetime, use /// [`ZeroVec::as_slice`]. #[inline] pub fn as_maybe_borrowed(&self) -> Option<&'a ZeroSlice> { if self.is_owned() { None } else { // We can extend the lifetime of the slice to 'a // since we know it is borrowed let ule_slice = unsafe { self.vector.as_arbitrary_slice() }; Some(ZeroSlice::from_ule_slice(ule_slice)) } } /// If the ZeroVec is owned, returns the capacity of the vector. /// /// Otherwise, if the ZeroVec is borrowed, returns `None`. /// /// # Examples /// /// ``` /// use zerovec::ZeroVec; /// /// let mut zv = ZeroVec::::new_borrowed(&[0, 1, 2, 3]); /// assert!(!zv.is_owned()); /// assert_eq!(zv.owned_capacity(), None); /// /// // Convert to owned without appending anything /// zv.with_mut(|v| ()); /// assert!(zv.is_owned()); /// assert_eq!(zv.owned_capacity(), Some(4.try_into().unwrap())); /// /// // Double the size by appending /// zv.with_mut(|v| v.push(0)); /// assert!(zv.is_owned()); /// assert_eq!(zv.owned_capacity(), Some(8.try_into().unwrap())); /// ``` #[inline] pub fn owned_capacity(&self) -> Option { #[cfg(feature = "alloc")] return NonZeroUsize::try_from(self.vector.capacity).ok(); #[cfg(not(feature = "alloc"))] return None; } } impl<'a> ZeroVec<'a, u8> { /// Converts a `ZeroVec` into a `ZeroVec`, retaining the current ownership model. /// /// Note that the length of the ZeroVec may change. /// /// # Examples /// /// Convert a borrowed `ZeroVec`: /// /// ``` /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; /// let zv_bytes = ZeroVec::new_borrowed(bytes); /// let zerovec: ZeroVec = zv_bytes.try_into_parsed().expect("infallible"); /// /// assert!(!zerovec.is_owned()); /// assert_eq!(zerovec.get(0), Some(211)); /// ``` /// /// Convert an owned `ZeroVec`: /// /// ``` /// use zerovec::ZeroVec; /// /// let bytes: Vec = vec![0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; /// let zv_bytes = ZeroVec::new_owned(bytes); /// let zerovec: ZeroVec = zv_bytes.try_into_parsed().expect("infallible"); /// /// assert!(zerovec.is_owned()); /// assert_eq!(zerovec.get(0), Some(211)); /// ``` #[cfg(feature = "alloc")] pub fn try_into_parsed(self) -> Result, UleError> { match self.into_cow() { Cow::Borrowed(bytes) => { let slice: &'a [T::ULE] = T::ULE::parse_bytes_to_slice(bytes)?; Ok(ZeroVec::new_borrowed(slice)) } Cow::Owned(vec) => { let slice = Vec::from(T::ULE::parse_bytes_to_slice(&vec)?); Ok(ZeroVec::new_owned(slice)) } } } } impl<'a, T> ZeroVec<'a, T> where T: AsULE, { /// Creates a `ZeroVec` from a `&[T]` by allocating memory. /// /// This function results in an `Owned` instance of `ZeroVec`. /// /// # Example /// /// ``` /// use zerovec::ZeroVec; /// /// // The little-endian bytes correspond to the numbers on the following line. /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; /// let nums: &[u16] = &[211, 281, 421, 461]; /// /// let zerovec = ZeroVec::alloc_from_slice(nums); /// /// assert!(zerovec.is_owned()); /// assert_eq!(bytes, zerovec.as_bytes()); /// ``` #[inline] #[cfg(feature = "alloc")] pub fn alloc_from_slice(other: &[T]) -> Self { Self::new_owned(other.iter().copied().map(T::to_unaligned).collect()) } /// Creates a `Vec` from a `ZeroVec`. /// /// # Example /// /// ``` /// use zerovec::ZeroVec; /// /// let nums: &[u16] = &[211, 281, 421, 461]; /// let vec: Vec = ZeroVec::alloc_from_slice(nums).to_vec(); /// /// assert_eq!(nums, vec.as_slice()); /// ``` #[inline] #[cfg(feature = "alloc")] pub fn to_vec(&self) -> Vec { self.iter().collect() } } impl<'a, T> ZeroVec<'a, T> where T: EqULE, { /// Attempts to create a `ZeroVec<'a, T>` from a `&'a [T]` by borrowing the argument. /// /// If this is not possible, such as on a big-endian platform, `None` is returned. /// /// # Example /// /// ``` /// use zerovec::ZeroVec; /// /// // The little-endian bytes correspond to the numbers on the following line. /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; /// let nums: &[u16] = &[211, 281, 421, 461]; /// /// if let Some(zerovec) = ZeroVec::try_from_slice(nums) { /// assert!(!zerovec.is_owned()); /// assert_eq!(bytes, zerovec.as_bytes()); /// } /// ``` #[inline] pub fn try_from_slice(slice: &'a [T]) -> Option { T::slice_to_unaligned(slice).map(|ule_slice| Self::new_borrowed(ule_slice)) } /// Creates a `ZeroVec<'a, T>` from a `&'a [T]`, either by borrowing the argument or by /// allocating a new vector. /// /// This is a cheap operation on little-endian platforms, falling back to a more expensive /// operation on big-endian platforms. /// /// # Example /// /// ``` /// use zerovec::ZeroVec; /// /// // The little-endian bytes correspond to the numbers on the following line. /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; /// let nums: &[u16] = &[211, 281, 421, 461]; /// /// let zerovec = ZeroVec::from_slice_or_alloc(nums); /// /// // Note: zerovec could be either borrowed or owned. /// assert_eq!(bytes, zerovec.as_bytes()); /// ``` #[inline] #[cfg(feature = "alloc")] pub fn from_slice_or_alloc(slice: &'a [T]) -> Self { Self::try_from_slice(slice).unwrap_or_else(|| Self::alloc_from_slice(slice)) } } impl<'a, T> ZeroVec<'a, T> where T: AsULE, { /// Mutates each element according to a given function, meant to be /// a more convenient version of calling `.iter_mut()` with /// [`ZeroVec::with_mut()`] which serves fewer use cases. /// /// This will convert the ZeroVec into an owned ZeroVec if not already the case. /// /// # Example /// /// ``` /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; /// let mut zerovec: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// /// zerovec.for_each_mut(|item| *item += 1); /// /// assert_eq!(zerovec.to_vec(), &[212, 282, 422, 462]); /// assert!(zerovec.is_owned()); /// ``` #[inline] #[cfg(feature = "alloc")] pub fn for_each_mut(&mut self, mut f: impl FnMut(&mut T)) { self.to_mut_slice().iter_mut().for_each(|item| { let mut aligned = T::from_unaligned(*item); f(&mut aligned); *item = aligned.to_unaligned() }) } /// Same as [`ZeroVec::for_each_mut()`], but bubbles up errors. /// /// # Example /// /// ``` /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; /// let mut zerovec: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// /// zerovec.try_for_each_mut(|item| { /// *item = item.checked_add(1).ok_or(())?; /// Ok(()) /// })?; /// /// assert_eq!(zerovec.to_vec(), &[212, 282, 422, 462]); /// assert!(zerovec.is_owned()); /// # Ok::<(), ()>(()) /// ``` #[inline] #[cfg(feature = "alloc")] pub fn try_for_each_mut( &mut self, mut f: impl FnMut(&mut T) -> Result<(), E>, ) -> Result<(), E> { self.to_mut_slice().iter_mut().try_for_each(|item| { let mut aligned = T::from_unaligned(*item); f(&mut aligned)?; *item = aligned.to_unaligned(); Ok(()) }) } /// Converts a borrowed ZeroVec to an owned ZeroVec. No-op if already owned. /// /// # Example /// /// ``` /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; /// let zerovec: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// assert!(!zerovec.is_owned()); /// /// let owned = zerovec.into_owned(); /// assert!(owned.is_owned()); /// ``` #[cfg(feature = "alloc")] pub fn into_owned(self) -> ZeroVec<'static, T> { use alloc::borrow::Cow; match self.into_cow() { Cow::Owned(vec) => ZeroVec::new_owned(vec), Cow::Borrowed(b) => ZeroVec::new_owned(b.into()), } } /// Allows the ZeroVec to be mutated by converting it to an owned variant, and producing /// a mutable vector of ULEs. If you only need a mutable slice, consider using [`Self::to_mut_slice()`] /// instead. /// /// # Example /// /// ```rust /// # use crate::zerovec::ule::AsULE; /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; /// let mut zerovec: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// assert!(!zerovec.is_owned()); /// /// zerovec.with_mut(|v| v.push(12_u16.to_unaligned())); /// assert!(zerovec.is_owned()); /// ``` #[cfg(feature = "alloc")] pub fn with_mut(&mut self, f: impl FnOnce(&mut alloc::vec::Vec) -> R) -> R { use alloc::borrow::Cow; // We're in danger if f() panics whilst we've moved a vector out of self; // replace it with an empty dummy vector for now let this = core::mem::take(self); let mut vec = match this.into_cow() { Cow::Owned(v) => v, Cow::Borrowed(s) => s.into(), }; let ret = f(&mut vec); *self = Self::new_owned(vec); ret } /// Allows the ZeroVec to be mutated by converting it to an owned variant (if necessary) /// and returning a slice to its backing buffer. [`Self::with_mut()`] allows for mutation /// of the vector itself. /// /// # Example /// /// ```rust /// # use crate::zerovec::ule::AsULE; /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; /// let mut zerovec: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// assert!(!zerovec.is_owned()); /// /// zerovec.to_mut_slice()[1] = 5u16.to_unaligned(); /// assert!(zerovec.is_owned()); /// ``` #[cfg(feature = "alloc")] pub fn to_mut_slice(&mut self) -> &mut [T::ULE] { if !self.is_owned() { // `buf` is either a valid vector or slice of `T::ULE`s, either // way it's always valid let slice = self.vector.as_slice(); *self = ZeroVec::new_owned(slice.into()); } unsafe { self.vector.buf.as_mut() } } /// Remove all elements from this ZeroVec and reset it to an empty borrowed state. pub fn clear(&mut self) { *self = Self::new_borrowed(&[]) } /// Removes the first element of the ZeroVec. The ZeroVec remains in the same /// borrowed or owned state. /// /// # Examples /// /// ``` /// # use crate::zerovec::ule::AsULE; /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; /// let mut zerovec: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// assert!(!zerovec.is_owned()); /// /// let first = zerovec.take_first().unwrap(); /// assert_eq!(first, 0x00D3); /// assert!(!zerovec.is_owned()); /// /// let mut zerovec = zerovec.into_owned(); /// assert!(zerovec.is_owned()); /// let first = zerovec.take_first().unwrap(); /// assert_eq!(first, 0x0119); /// assert!(zerovec.is_owned()); /// ``` #[cfg(feature = "alloc")] pub fn take_first(&mut self) -> Option { match core::mem::take(self).into_cow() { Cow::Owned(mut vec) => { if vec.is_empty() { return None; } let ule = vec.remove(0); let rv = T::from_unaligned(ule); *self = ZeroVec::new_owned(vec); Some(rv) } Cow::Borrowed(b) => { let (ule, remainder) = b.split_first()?; let rv = T::from_unaligned(*ule); *self = ZeroVec::new_borrowed(remainder); Some(rv) } } } /// Removes the last element of the ZeroVec. The ZeroVec remains in the same /// borrowed or owned state. /// /// # Examples /// /// ``` /// # use crate::zerovec::ule::AsULE; /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; /// let mut zerovec: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// assert!(!zerovec.is_owned()); /// /// let last = zerovec.take_last().unwrap(); /// assert_eq!(last, 0x01CD); /// assert!(!zerovec.is_owned()); /// /// let mut zerovec = zerovec.into_owned(); /// assert!(zerovec.is_owned()); /// let last = zerovec.take_last().unwrap(); /// assert_eq!(last, 0x01A5); /// assert!(zerovec.is_owned()); /// ``` #[cfg(feature = "alloc")] pub fn take_last(&mut self) -> Option { match core::mem::take(self).into_cow() { Cow::Owned(mut vec) => { let ule = vec.pop()?; let rv = T::from_unaligned(ule); *self = ZeroVec::new_owned(vec); Some(rv) } Cow::Borrowed(b) => { let (ule, remainder) = b.split_last()?; let rv = T::from_unaligned(*ule); *self = ZeroVec::new_borrowed(remainder); Some(rv) } } } /// Converts the type into a `Cow<'a, [T::ULE]>`, which is /// the logical equivalent of this type's internal representation #[inline] #[cfg(feature = "alloc")] pub fn into_cow(self) -> Cow<'a, [T::ULE]> { let this = core::mem::ManuallyDrop::new(self); if this.is_owned() { let vec = unsafe { // safe to call: we know it's owned, // and `self`/`this` are thenceforth no longer used or dropped { this }.vector.get_vec() }; Cow::Owned(vec) } else { // We can extend the lifetime of the slice to 'a // since we know it is borrowed let slice = unsafe { { this }.vector.as_arbitrary_slice() }; Cow::Borrowed(slice) } } } #[cfg(feature = "alloc")] impl FromIterator for ZeroVec<'_, T> { /// Creates an owned [`ZeroVec`] from an iterator of values. fn from_iter(iter: I) -> Self where I: IntoIterator, { ZeroVec::new_owned(iter.into_iter().map(|t| t.to_unaligned()).collect()) } } /// Convenience wrapper for [`ZeroSlice::from_ule_slice`]. The value will be created at compile-time, /// meaning that all arguments must also be constant. /// /// # Arguments /// /// * `$aligned` - The type of an element in its canonical, aligned form, e.g., `char`. /// * `$convert` - A const function that converts an `$aligned` into its unaligned equivalent, e.g., /// `const fn from_aligned(a: CanonicalType) -> CanonicalType::ULE`. /// * `$x` - The elements that the `ZeroSlice` will hold. /// /// # Examples /// /// Using array-conversion functions provided by this crate: /// /// ``` /// use zerovec::{ZeroSlice, zeroslice, ule::AsULE}; /// /// const SIGNATURE: &ZeroSlice = zeroslice!(char; ::ULE::from_aligned; ['b', 'y', 'e', '✌']); /// const EMPTY: &ZeroSlice = zeroslice![]; /// /// let empty: &ZeroSlice = zeroslice![]; /// let nums = zeroslice!(u32; ::ULE::from_unsigned; [1, 2, 3, 4, 5]); /// assert_eq!(nums.last().unwrap(), 5); /// ``` /// /// Using a custom array-conversion function: /// /// ``` /// use zerovec::{ule::AsULE, ule::RawBytesULE, zeroslice, ZeroSlice}; /// /// const fn be_convert(num: i16) -> ::ULE { /// RawBytesULE(num.to_be_bytes()) /// } /// /// const NUMBERS_BE: &ZeroSlice = /// zeroslice!(i16; be_convert; [1, -2, 3, -4, 5]); /// ``` #[macro_export] macro_rules! zeroslice { () => ( $crate::ZeroSlice::new_empty() ); ($aligned:ty; $convert:expr; [$($x:expr),+ $(,)?]) => ( $crate::ZeroSlice::<$aligned>::from_ule_slice( {const X: &[<$aligned as $crate::ule::AsULE>::ULE] = &[ $($convert($x)),* ]; X} ) ); } /// Creates a borrowed `ZeroVec`. Convenience wrapper for `zeroslice!(...).as_zerovec()`. The value /// will be created at compile-time, meaning that all arguments must also be constant. /// /// See [`zeroslice!`](crate::zeroslice) for more information. /// /// # Examples /// /// ``` /// use zerovec::{ZeroVec, zerovec, ule::AsULE}; /// /// const SIGNATURE: ZeroVec = zerovec!(char; ::ULE::from_aligned; ['a', 'y', 'e', '✌']); /// assert!(!SIGNATURE.is_owned()); /// /// const EMPTY: ZeroVec = zerovec![]; /// assert!(!EMPTY.is_owned()); /// ``` #[macro_export] macro_rules! zerovec { () => ( $crate::ZeroVec::new() ); ($aligned:ty; $convert:expr; [$($x:expr),+ $(,)?]) => ( $crate::zeroslice![$aligned; $convert; [$($x),+]].as_zerovec() ); } #[cfg(test)] mod tests { use super::*; use crate::samples::*; #[test] fn test_get() { { let zerovec = ZeroVec::from_slice_or_alloc(TEST_SLICE); assert_eq!(zerovec.get(0), Some(TEST_SLICE[0])); assert_eq!(zerovec.get(1), Some(TEST_SLICE[1])); assert_eq!(zerovec.get(2), Some(TEST_SLICE[2])); } { let zerovec = ZeroVec::::parse_bytes(TEST_BUFFER_LE).unwrap(); assert_eq!(zerovec.get(0), Some(TEST_SLICE[0])); assert_eq!(zerovec.get(1), Some(TEST_SLICE[1])); assert_eq!(zerovec.get(2), Some(TEST_SLICE[2])); } } #[test] fn test_binary_search() { { let zerovec = ZeroVec::from_slice_or_alloc(TEST_SLICE); assert_eq!(Ok(3), zerovec.binary_search(&0x0e0d0c)); assert_eq!(Err(3), zerovec.binary_search(&0x0c0d0c)); } { let zerovec = ZeroVec::::parse_bytes(TEST_BUFFER_LE).unwrap(); assert_eq!(Ok(3), zerovec.binary_search(&0x0e0d0c)); assert_eq!(Err(3), zerovec.binary_search(&0x0c0d0c)); } } #[test] fn test_odd_alignment() { assert_eq!( Some(0x020100), ZeroVec::::parse_bytes(TEST_BUFFER_LE).unwrap().get(0) ); assert_eq!( Some(0x04000201), ZeroVec::::parse_bytes(&TEST_BUFFER_LE[1..77]) .unwrap() .get(0) ); assert_eq!( Some(0x05040002), ZeroVec::::parse_bytes(&TEST_BUFFER_LE[2..78]) .unwrap() .get(0) ); assert_eq!( Some(0x06050400), ZeroVec::::parse_bytes(&TEST_BUFFER_LE[3..79]) .unwrap() .get(0) ); assert_eq!( Some(0x060504), ZeroVec::::parse_bytes(&TEST_BUFFER_LE[4..]) .unwrap() .get(0) ); assert_eq!( Some(0x4e4d4c00), ZeroVec::::parse_bytes(&TEST_BUFFER_LE[75..79]) .unwrap() .get(0) ); assert_eq!( Some(0x4e4d4c00), ZeroVec::::parse_bytes(&TEST_BUFFER_LE[3..79]) .unwrap() .get(18) ); assert_eq!( Some(0x4e4d4c), ZeroVec::::parse_bytes(&TEST_BUFFER_LE[76..]) .unwrap() .get(0) ); assert_eq!( Some(0x4e4d4c), ZeroVec::::parse_bytes(TEST_BUFFER_LE).unwrap().get(19) ); // TODO(#1144): Check for correct slice length in RawBytesULE // assert_eq!( // None, // ZeroVec::::parse_bytes(&TEST_BUFFER_LE[77..]) // .unwrap() // .get(0) // ); assert_eq!( None, ZeroVec::::parse_bytes(TEST_BUFFER_LE).unwrap().get(20) ); assert_eq!( None, ZeroVec::::parse_bytes(&TEST_BUFFER_LE[3..79]) .unwrap() .get(19) ); } } zerovec-0.11.1/src/zerovec/serde.rs000064400000000000000000000170321046102023000153000ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::{ZeroSlice, ZeroVec}; use crate::ule::*; use alloc::boxed::Box; use alloc::vec::Vec; use core::fmt; use core::marker::PhantomData; use core::mem; use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor}; #[cfg(feature = "serde")] use serde::ser::{Serialize, SerializeSeq, Serializer}; struct ZeroVecVisitor { marker: PhantomData T>, } impl Default for ZeroVecVisitor { fn default() -> Self { Self { marker: PhantomData, } } } impl<'de, T> Visitor<'de> for ZeroVecVisitor where T: 'de + Deserialize<'de> + AsULE, { type Value = ZeroVec<'de, T>; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("a sequence or borrowed buffer of fixed-width elements") } fn visit_borrowed_bytes(self, bytes: &'de [u8]) -> Result where E: de::Error, { ZeroVec::parse_bytes(bytes).map_err(de::Error::custom) } fn visit_seq(self, mut seq: A) -> Result where A: SeqAccess<'de>, { let mut vec: Vec = if let Some(capacity) = seq.size_hint() { Vec::with_capacity(capacity) } else { Vec::new() }; while let Some(value) = seq.next_element::()? { vec.push(T::to_unaligned(value)); } Ok(ZeroVec::new_owned(vec)) } } /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate impl<'de, 'a, T> Deserialize<'de> for ZeroVec<'a, T> where T: 'de + Deserialize<'de> + AsULE, 'de: 'a, { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let visitor = ZeroVecVisitor::default(); if deserializer.is_human_readable() { deserializer.deserialize_seq(visitor) } else { deserializer.deserialize_bytes(visitor) } } } /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate impl Serialize for ZeroVec<'_, T> where T: Serialize + AsULE, { fn serialize(&self, serializer: S) -> Result where S: Serializer, { if serializer.is_human_readable() { let mut seq = serializer.serialize_seq(Some(self.len()))?; for value in self.iter() { seq.serialize_element(&value)?; } seq.end() } else { serializer.serialize_bytes(self.as_bytes()) } } } /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate impl<'de, T> Deserialize<'de> for Box> where T: Deserialize<'de> + AsULE + 'static, { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let mut zv = ZeroVec::::deserialize(deserializer)?; let vec = zv.with_mut(mem::take); Ok(ZeroSlice::from_boxed_slice(vec.into_boxed_slice())) } } /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate impl<'de, 'a, T> Deserialize<'de> for &'a ZeroSlice where T: Deserialize<'de> + AsULE + 'static, 'de: 'a, { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { if deserializer.is_human_readable() { Err(de::Error::custom( "&ZeroSlice cannot be deserialized from human-readable formats", )) } else { let deserialized: ZeroVec<'a, T> = ZeroVec::deserialize(deserializer)?; let borrowed = if let Some(b) = deserialized.as_maybe_borrowed() { b } else { return Err(de::Error::custom( "&ZeroSlice can only deserialize in zero-copy ways", )); }; Ok(borrowed) } } } /// This impl requires enabling the optional `serde` Cargo feature of the `zerovec` crate impl Serialize for ZeroSlice where T: Serialize + AsULE, { fn serialize(&self, serializer: S) -> Result where S: Serializer, { self.as_zerovec().serialize(serializer) } } #[cfg(test)] #[allow(non_camel_case_types)] mod test { use crate::samples::*; use crate::ZeroVec; #[derive(serde::Serialize, serde::Deserialize)] struct DeriveTest_ZeroVec<'data> { #[serde(borrow)] _data: ZeroVec<'data, u16>, } #[test] fn test_serde_json() { let zerovec_orig = ZeroVec::from_slice_or_alloc(TEST_SLICE); let json_str = serde_json::to_string(&zerovec_orig).expect("serialize"); assert_eq!(JSON_STR, json_str); // ZeroVec should deserialize from JSON to either Vec or ZeroVec let vec_new: Vec = serde_json::from_str(&json_str).expect("deserialize from buffer to Vec"); assert_eq!( zerovec_orig, ZeroVec::::from_slice_or_alloc(vec_new.as_slice()) ); let zerovec_new: ZeroVec = serde_json::from_str(&json_str).expect("deserialize from buffer to ZeroVec"); assert_eq!(zerovec_orig, zerovec_new); assert!(zerovec_new.is_owned()); } #[test] fn test_serde_bincode() { let zerovec_orig = ZeroVec::from_slice_or_alloc(TEST_SLICE); let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize"); assert_eq!(BINCODE_BUF, bincode_buf); // ZeroVec should deserialize from Bincode to ZeroVec but not Vec bincode::deserialize::>(&bincode_buf).expect_err("deserialize from buffer to Vec"); let zerovec_new: ZeroVec = bincode::deserialize(&bincode_buf).expect("deserialize from buffer to ZeroVec"); assert_eq!(zerovec_orig, zerovec_new); assert!(!zerovec_new.is_owned()); } #[test] fn test_serde_rmp() { let zerovec_orig = ZeroVec::from_slice_or_alloc(TEST_SLICE); let rmp_buf = rmp_serde::to_vec(&zerovec_orig).expect("serialize"); // ZeroVec should deserialize from Bincode to ZeroVec but not Vec bincode::deserialize::>(&rmp_buf).expect_err("deserialize from buffer to Vec"); let zerovec_new: ZeroVec = rmp_serde::from_slice(&rmp_buf).expect("deserialize from buffer to ZeroVec"); assert_eq!(zerovec_orig, zerovec_new); assert!(!zerovec_new.is_owned()); } #[test] fn test_chars_valid() { // 1-byte, 2-byte, 3-byte, and 4-byte character in UTF-8 (not as relevant in UTF-32) let zerovec_orig = ZeroVec::alloc_from_slice(&['w', 'ω', '文', '𑄃']); let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize"); let zerovec_new: ZeroVec = bincode::deserialize(&bincode_buf).expect("deserialize from buffer to ZeroVec"); assert_eq!(zerovec_orig, zerovec_new); assert!(!zerovec_new.is_owned()); } #[test] fn test_chars_invalid() { // 119 and 120 are valid, but not 0xD800 (high surrogate) let zerovec_orig: ZeroVec = ZeroVec::from_slice_or_alloc(&[119, 0xD800, 120]); let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize"); let zerovec_result = bincode::deserialize::>(&bincode_buf); assert!(zerovec_result.is_err()); } } zerovec-0.11.1/src/zerovec/slice.rs000064400000000000000000000454331046102023000153030ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::*; use core::cmp::Ordering; use core::ops::Range; /// A zero-copy "slice", i.e. the zero-copy version of `[T]`. /// /// This behaves /// similarly to [`ZeroVec`], however [`ZeroVec`] is allowed to contain /// owned data and as such is ideal for deserialization since most human readable /// serialization formats cannot unconditionally deserialize zero-copy. /// /// This type can be used inside [`VarZeroVec`](crate::VarZeroVec) and [`ZeroMap`](crate::ZeroMap): /// This essentially allows for the construction of zero-copy types isomorphic to `Vec>` by instead /// using `VarZeroVec>`. See the [`VarZeroVec`](crate::VarZeroVec) docs for an example. /// /// # Examples /// /// Const-construct a ZeroSlice of u16: /// /// ``` /// use zerovec::ule::AsULE; /// use zerovec::ZeroSlice; /// /// const DATA: &ZeroSlice = /// ZeroSlice::::from_ule_slice(&::ULE::from_array([ /// 211, 281, 421, 32973, /// ])); /// /// assert_eq!(DATA.get(1), Some(281)); /// ``` #[repr(transparent)] pub struct ZeroSlice([T::ULE]); impl ZeroSlice where T: AsULE, { /// Returns an empty slice. pub const fn new_empty() -> &'static Self { Self::from_ule_slice(&[]) } /// Get this [`ZeroSlice`] as a borrowed [`ZeroVec`] /// /// [`ZeroSlice`] does not have most of the methods that [`ZeroVec`] does, /// so it is recommended to convert it to a [`ZeroVec`] before doing anything. #[inline] pub const fn as_zerovec(&self) -> ZeroVec<'_, T> { ZeroVec::new_borrowed(&self.0) } /// Attempt to construct a `&ZeroSlice` from a byte slice, returning an error /// if it's not a valid byte sequence pub fn parse_bytes(bytes: &[u8]) -> Result<&Self, UleError> { T::ULE::parse_bytes_to_slice(bytes).map(Self::from_ule_slice) } /// Uses a `&[u8]` buffer as a `ZeroVec` without any verification. /// /// # Safety /// /// `bytes` need to be an output from [`ZeroSlice::as_bytes()`]. pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { // &[u8] and &[T::ULE] are the same slice with different length metadata. Self::from_ule_slice(core::slice::from_raw_parts( bytes.as_ptr() as *const T::ULE, bytes.len() / core::mem::size_of::(), )) } /// Construct a `&ZeroSlice` from a slice of ULEs. /// /// This function can be used for constructing ZeroVecs in a const context, avoiding /// parsing checks. /// /// See [`ZeroSlice`] for an example. #[inline] pub const fn from_ule_slice(slice: &[T::ULE]) -> &Self { // This is safe because ZeroSlice is transparent over [T::ULE] // so &ZeroSlice can be safely cast from &[T::ULE] unsafe { &*(slice as *const _ as *const Self) } } /// Construct a `Box>` from a boxed slice of ULEs #[inline] #[cfg(feature = "alloc")] pub fn from_boxed_slice(slice: alloc::boxed::Box<[T::ULE]>) -> alloc::boxed::Box { // This is safe because ZeroSlice is transparent over [T::ULE] // so Box> can be safely cast from Box<[T::ULE]> unsafe { alloc::boxed::Box::from_raw(alloc::boxed::Box::into_raw(slice) as *mut Self) } } /// Returns this slice as its underlying `&[u8]` byte buffer representation. /// /// Useful for serialization. /// /// # Example /// /// ``` /// use zerovec::ZeroVec; /// /// // The little-endian bytes correspond to the numbers on the following line. /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; /// let nums: &[u16] = &[211, 281, 421, 32973]; /// /// let zerovec = ZeroVec::alloc_from_slice(nums); /// /// assert_eq!(bytes, zerovec.as_bytes()); /// ``` #[inline] pub fn as_bytes(&self) -> &[u8] { T::ULE::slice_as_bytes(self.as_ule_slice()) } /// Dereferences this slice as `&[T::ULE]`. #[inline] pub const fn as_ule_slice(&self) -> &[T::ULE] { &self.0 } /// Returns the number of elements in this slice. /// /// # Example /// /// ``` /// use zerovec::ule::AsULE; /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; /// let zerovec: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// /// assert_eq!(4, zerovec.len()); /// assert_eq!( /// bytes.len(), /// zerovec.len() * std::mem::size_of::<::ULE>() /// ); /// ``` #[inline] pub const fn len(&self) -> usize { self.as_ule_slice().len() } /// Returns whether this slice is empty. /// /// # Example /// /// ``` /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; /// let zerovec: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// assert!(!zerovec.is_empty()); /// /// let emptyvec: ZeroVec = ZeroVec::parse_bytes(&[]).expect("infallible"); /// assert!(emptyvec.is_empty()); /// ``` #[inline] pub const fn is_empty(&self) -> bool { self.as_ule_slice().is_empty() } } impl ZeroSlice where T: AsULE, { /// Gets the element at the specified index. Returns `None` if out of range. /// /// # Example /// /// ``` /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; /// let zerovec: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// /// assert_eq!(zerovec.get(2), Some(421)); /// assert_eq!(zerovec.get(4), None); /// ``` #[inline] pub fn get(&self, index: usize) -> Option { self.as_ule_slice() .get(index) .copied() .map(T::from_unaligned) } /// Gets the entire slice as an array of length `N`. Returns `None` if the slice /// does not have exactly `N` elements. /// /// # Example /// /// ``` /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; /// let zerovec: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// let array: [u16; 4] = /// zerovec.get_as_array().expect("should be 4 items in array"); /// /// assert_eq!(array[2], 421); /// ``` pub fn get_as_array(&self) -> Option<[T; N]> { let ule_array = <&[T::ULE; N]>::try_from(self.as_ule_slice()).ok()?; Some(ule_array.map(|u| T::from_unaligned(u))) } /// Gets a subslice of elements within a certain range. Returns `None` if the range /// is out of bounds of this `ZeroSlice`. /// /// # Example /// /// ``` /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; /// let zerovec: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// /// assert_eq!( /// zerovec.get_subslice(1..3), /// Some(&*ZeroVec::from_slice_or_alloc(&[0x0119, 0x01A5])) /// ); /// assert_eq!(zerovec.get_subslice(3..5), None); /// ``` #[inline] pub fn get_subslice(&self, range: Range) -> Option<&ZeroSlice> { self.0.get(range).map(ZeroSlice::from_ule_slice) } /// Get a borrowed reference to the underlying ULE type at a specified index. /// /// Prefer [`Self::get()`] over this method where possible since working /// directly with `ULE` types is less ergonomic pub fn get_ule_ref(&self, index: usize) -> Option<&T::ULE> { self.as_ule_slice().get(index) } /// Casts a `ZeroSlice` to a compatible `ZeroSlice

`. /// /// `T` and `P` are compatible if they have the same `ULE` representation. /// /// If the `ULE`s of `T` and `P` are different, use [`Self::try_as_converted()`]. /// /// # Examples /// /// ``` /// use zerovec::ZeroSlice; /// /// const BYTES: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; /// const ZS_U16: &ZeroSlice = { /// match ZeroSlice::::try_from_bytes(BYTES) { /// Ok(s) => s, /// Err(_) => unreachable!(), /// } /// }; /// /// let zs_i16: &ZeroSlice = ZS_U16.cast(); /// /// assert_eq!(ZS_U16.get(3), Some(32973)); /// assert_eq!(zs_i16.get(3), Some(-32563)); /// ``` #[inline] pub const fn cast

(&self) -> &ZeroSlice

where P: AsULE, { ZeroSlice::

::from_ule_slice(self.as_ule_slice()) } /// Converts a `&ZeroSlice` into a `&ZeroSlice

`. /// /// The resulting slice will have the same length as the original slice /// if and only if `T::ULE` and `P::ULE` are the same size. /// /// If `T` and `P` have the exact same `ULE`, use [`Self::cast()`]. /// /// # Examples /// /// ``` /// use zerovec::ZeroSlice; /// /// const BYTES: &[u8] = &[0x7F, 0xF3, 0x01, 0x00, 0x49, 0xF6, 0x01, 0x00]; /// const ZS_U32: &ZeroSlice = { /// match ZeroSlice::::try_from_bytes(BYTES) { /// Ok(s) => s, /// Err(_) => unreachable!(), /// } /// }; /// /// let zs_u8_4: &ZeroSlice<[u8; 4]> = /// ZS_U32.try_as_converted().expect("valid code points"); /// /// assert_eq!(ZS_U32.get(0), Some(127871)); /// assert_eq!(zs_u8_4.get(0), Some([0x7F, 0xF3, 0x01, 0x00])); /// ``` #[inline] pub fn try_as_converted(&self) -> Result<&ZeroSlice

, UleError> { let new_slice = P::ULE::parse_bytes_to_slice(self.as_bytes())?; Ok(ZeroSlice::from_ule_slice(new_slice)) } /// Gets the first element. Returns `None` if empty. /// /// # Example /// /// ``` /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; /// let zerovec: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// /// assert_eq!(zerovec.first(), Some(211)); /// ``` #[inline] pub fn first(&self) -> Option { self.as_ule_slice().first().copied().map(T::from_unaligned) } /// Gets the last element. Returns `None` if empty. /// /// # Example /// /// ``` /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; /// let zerovec: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// /// assert_eq!(zerovec.last(), Some(32973)); /// ``` #[inline] pub fn last(&self) -> Option { self.as_ule_slice().last().copied().map(T::from_unaligned) } /// Gets an iterator over the elements. /// /// # Example /// /// ``` /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; /// let zerovec: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// let mut it = zerovec.iter(); /// /// assert_eq!(it.next(), Some(211)); /// assert_eq!(it.next(), Some(281)); /// assert_eq!(it.next(), Some(421)); /// assert_eq!(it.next(), Some(32973)); /// assert_eq!(it.next(), None); /// ``` #[inline] pub fn iter<'a>(&'a self) -> ZeroSliceIter<'a, T> { ZeroSliceIter(self.as_ule_slice().iter()) } /// Returns a tuple with the first element and a subslice of the remaining elements. /// /// # Example /// /// ``` /// use zerovec::ule::AsULE; /// use zerovec::ZeroSlice; /// /// const DATA: &ZeroSlice = /// ZeroSlice::::from_ule_slice(&::ULE::from_array([ /// 211, 281, 421, 32973, /// ])); /// const EXPECTED_VALUE: (u16, &ZeroSlice) = ( /// 211, /// ZeroSlice::::from_ule_slice(&::ULE::from_array([ /// 281, 421, 32973, /// ])), /// ); /// assert_eq!(EXPECTED_VALUE, DATA.split_first().unwrap()); /// ``` #[inline] pub fn split_first(&self) -> Option<(T, &ZeroSlice)> { if let Some(first) = self.first() { return Some(( first, // `unwrap()` must succeed, because `first()` returned `Some`. #[allow(clippy::unwrap_used)] self.get_subslice(1..self.len()).unwrap(), )); } None } } /// An iterator over elements in a VarZeroVec #[derive(Debug)] pub struct ZeroSliceIter<'a, T: AsULE>(core::slice::Iter<'a, T::ULE>); impl<'a, T: AsULE> Iterator for ZeroSliceIter<'a, T> { type Item = T; fn next(&mut self) -> Option { self.0.next().copied().map(T::from_unaligned) } } impl<'a, T: AsULE> ExactSizeIterator for ZeroSliceIter<'a, T> { fn len(&self) -> usize { self.0.len() } } impl<'a, T: AsULE> DoubleEndedIterator for ZeroSliceIter<'a, T> { fn next_back(&mut self) -> Option { self.0.next_back().copied().map(T::from_unaligned) } } impl ZeroSlice where T: AsULE + Ord, { /// Binary searches a sorted `ZeroVec` for the given element. For more information, see /// the primitive function [`binary_search`]. /// /// # Example /// /// ``` /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; /// let zerovec: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// /// assert_eq!(zerovec.binary_search(&281), Ok(1)); /// assert_eq!(zerovec.binary_search(&282), Err(2)); /// ``` /// /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search #[inline] pub fn binary_search(&self, x: &T) -> Result { self.as_ule_slice() .binary_search_by(|probe| T::from_unaligned(*probe).cmp(x)) } } impl ZeroSlice where T: AsULE, { /// Binary searches a sorted `ZeroVec` based on a given predicate. For more information, see /// the primitive function [`binary_search_by`]. /// /// # Example /// /// ``` /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; /// let zerovec: ZeroVec = /// ZeroVec::parse_bytes(bytes).expect("infallible"); /// /// assert_eq!(zerovec.binary_search_by(|x| x.cmp(&281)), Ok(1)); /// assert_eq!(zerovec.binary_search_by(|x| x.cmp(&282)), Err(2)); /// ``` /// /// [`binary_search_by`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search_by #[inline] pub fn binary_search_by( &self, mut predicate: impl FnMut(T) -> Ordering, ) -> Result { self.as_ule_slice() .binary_search_by(|probe| predicate(T::from_unaligned(*probe))) } } // Safety (based on the safety checklist on the VarULE trait): // (`ZeroSlice` is a transparent wrapper around [T::ULE]) // 1. [T::ULE] does not include any uninitialized or padding bytes (achieved by being a slice of a ULE type) // 2. [T::ULE] is aligned to 1 byte (achieved by being a slice of a ULE type) // 3. The impl of `validate_bytes()` returns an error if any byte is not valid. // 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety // 5. The impl of `from_bytes_unchecked()` returns a reference to the same data. // 6. `as_bytes()` and `parse_bytes()` are defaulted // 7. `[T::ULE]` byte equality is semantic equality (relying on the guideline of the underlying `ULE` type) unsafe impl VarULE for ZeroSlice { #[inline] fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> { T::ULE::validate_bytes(bytes) } #[inline] unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self { Self::from_ule_slice(T::ULE::slice_from_bytes_unchecked(bytes)) } } impl Eq for ZeroSlice where T: AsULE + Eq {} impl PartialEq> for ZeroSlice where T: AsULE + PartialEq, { #[inline] fn eq(&self, other: &ZeroSlice) -> bool { self.as_zerovec().eq(&other.as_zerovec()) } } impl PartialEq<[T]> for ZeroSlice where T: AsULE + PartialEq, { #[inline] fn eq(&self, other: &[T]) -> bool { self.iter().eq(other.iter().copied()) } } impl<'a, T> PartialEq> for ZeroSlice where T: AsULE + PartialEq, { #[inline] fn eq(&self, other: &ZeroVec<'a, T>) -> bool { self.as_zerovec().eq(other) } } impl<'a, T> PartialEq> for ZeroVec<'a, T> where T: AsULE + PartialEq, { #[inline] fn eq(&self, other: &ZeroSlice) -> bool { self.eq(&other.as_zerovec()) } } impl fmt::Debug for ZeroSlice where T: AsULE + fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.as_zerovec().fmt(f) } } impl PartialOrd for ZeroSlice { fn partial_cmp(&self, other: &Self) -> Option { self.iter().partial_cmp(other.iter()) } } impl Ord for ZeroSlice { fn cmp(&self, other: &Self) -> Ordering { self.iter().cmp(other.iter()) } } #[cfg(feature = "alloc")] impl AsRef> for alloc::vec::Vec { fn as_ref(&self) -> &ZeroSlice { ZeroSlice::::from_ule_slice(self) } } impl AsRef> for &[T::ULE] { fn as_ref(&self) -> &ZeroSlice { ZeroSlice::::from_ule_slice(self) } } impl Default for &ZeroSlice where T: AsULE, { fn default() -> Self { ZeroSlice::from_ule_slice(&[]) } } #[cfg(test)] mod test { use super::*; use crate::zeroslice; #[test] fn test_split_first() { { // empty slice. assert_eq!(None, ZeroSlice::::new_empty().split_first()); } { // single element slice const DATA: &ZeroSlice = zeroslice!(u16; ::ULE::from_unsigned; [211]); assert_eq!((211, zeroslice![]), DATA.split_first().unwrap()); } { // slice with many elements. const DATA: &ZeroSlice = zeroslice!(u16; ::ULE::from_unsigned; [211, 281, 421, 32973]); const EXPECTED_VALUE: (u16, &ZeroSlice) = ( 211, zeroslice!(u16; ::ULE::from_unsigned; [281, 421, 32973]), ); assert_eq!(EXPECTED_VALUE, DATA.split_first().unwrap()); } } }