tinystr-0.8.2/.cargo_vcs_info.json0000644000000001530000000000100125560ustar { "git": { "sha1": "29dfe2790b6cfdab94ca6a6b69f58ce54802dbf7" }, "path_in_vcs": "utils/tinystr" }tinystr-0.8.2/Cargo.lock0000644000000456150000000000100105450ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "aho-corasick" version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] [[package]] name = "anes" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstyle" version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] name = "autocfg" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "bincode" version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" dependencies = [ "serde", ] [[package]] name = "bumpalo" version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "cast" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cfg-if" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "ciborium" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" dependencies = [ "ciborium-io", "ciborium-ll", "serde", ] [[package]] name = "ciborium-io" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" [[package]] name = "ciborium-ll" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" dependencies = [ "ciborium-io", "half", ] [[package]] name = "clap" version = "4.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" version = "4.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7" dependencies = [ "anstyle", "clap_lex", ] [[package]] name = "clap_lex" version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" [[package]] name = "cobs" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1" dependencies = [ "thiserror", ] [[package]] name = "criterion" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" dependencies = [ "anes", "cast", "ciborium", "clap", "criterion-plot", "is-terminal", "itertools", "num-traits", "once_cell", "oorandom", "plotters", "rayon", "regex", "serde", "serde_derive", "serde_json", "tinytemplate", "walkdir", ] [[package]] name = "criterion-plot" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", "itertools", ] [[package]] name = "crossbeam-deque" version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" dependencies = [ "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-utils" version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crunchy" version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "databake" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff6ee9e2d2afb173bcdeee45934c89ec341ab26f91c9933774fc15c2b58f83ef" dependencies = [ "proc-macro2", "quote", ] [[package]] name = "displaydoc" version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "either" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "embedded-io" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" [[package]] name = "embedded-io" version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" [[package]] name = "getrandom" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "libc", "r-efi", "wasip2", ] [[package]] name = "half" version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" dependencies = [ "cfg-if", "crunchy", ] [[package]] name = "hermit-abi" version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" [[package]] name = "is-terminal" version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", "windows-sys", ] [[package]] name = "itertools" version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ "either", ] [[package]] name = "itoa" version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "js-sys" version = "0.3.81" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" dependencies = [ "once_cell", "wasm-bindgen", ] [[package]] name = "libc" version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" [[package]] name = "log" version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" [[package]] name = "memchr" version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "num-traits" version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", ] [[package]] name = "once_cell" version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "oorandom" version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" [[package]] name = "plotters" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" dependencies = [ "num-traits", "plotters-backend", "plotters-svg", "wasm-bindgen", "web-sys", ] [[package]] name = "plotters-backend" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" [[package]] name = "plotters-svg" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" dependencies = [ "plotters-backend", ] [[package]] name = "postcard" version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24" dependencies = [ "cobs", "embedded-io 0.4.0", "embedded-io 0.6.1", "serde", ] [[package]] name = "ppv-lite86" version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ "zerocopy", ] [[package]] name = "proc-macro2" version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" dependencies = [ "proc-macro2", ] [[package]] name = "r-efi" version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" [[package]] name = "rand" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha", "rand_core", ] [[package]] name = "rand_chacha" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", "rand_core", ] [[package]] name = "rand_core" version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ "getrandom", ] [[package]] name = "rayon" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ "either", "rayon-core", ] [[package]] name = "rayon-core" version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ "crossbeam-deque", "crossbeam-utils", ] [[package]] name = "regex" version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", "regex-automata", "regex-syntax", ] [[package]] name = "regex-automata" version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] [[package]] name = "regex-syntax" version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "rustversion" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "same-file" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" dependencies = [ "winapi-util", ] [[package]] name = "serde" version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ "serde_core", "serde_derive", ] [[package]] name = "serde_core" version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ "itoa", "memchr", "ryu", "serde", "serde_core", ] [[package]] name = "syn" version = "2.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "thiserror" version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "tinystr" version = "0.8.2" dependencies = [ "bincode", "criterion", "databake", "displaydoc", "postcard", "rand", "serde_core", "serde_json", "zerovec", ] [[package]] name = "tinytemplate" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" dependencies = [ "serde", "serde_json", ] [[package]] name = "unicode-ident" version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06" [[package]] name = "walkdir" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", ] [[package]] name = "wasip2" version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ "wit-bindgen", ] [[package]] name = "wasm-bindgen" version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" dependencies = [ "cfg-if", "once_cell", "rustversion", "wasm-bindgen-macro", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-backend" version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" dependencies = [ "bumpalo", "log", "proc-macro2", "quote", "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" dependencies = [ "quote", "wasm-bindgen-macro-support", ] [[package]] name = "wasm-bindgen-macro-support" version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" dependencies = [ "proc-macro2", "quote", "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" dependencies = [ "unicode-ident", ] [[package]] name = "web-sys" version = "0.3.81" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" dependencies = [ "js-sys", "wasm-bindgen", ] [[package]] name = "winapi-util" version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ "windows-sys", ] [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-sys" version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ "windows-link", ] [[package]] name = "wit-bindgen" version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" [[package]] name = "zerocopy" version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "zerofrom" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" [[package]] name = "zerovec" version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" dependencies = [ "serde", "zerofrom", ] tinystr-0.8.2/Cargo.toml0000644000000050220000000000100105540ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.82" name = "tinystr" version = "0.8.2" authors = ["The ICU4X Project Developers"] build = false include = [ "data/**/*", "src/**/*", "examples/**/*", "benches/**/*", "tests/**/*", "Cargo.toml", "LICENSE", "README.md", "build.rs", ] autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "A small ASCII-only bounded length string representation." readme = "README.md" keywords = [ "string", "str", "small", "tiny", "no_std", ] categories = ["data-structures"] license = "Unicode-3.0" repository = "https://github.com/unicode-org/icu4x" [package.metadata.workspaces] independent = true [package.metadata.docs.rs] all-features = true [features] alloc = [ "serde_core?/alloc", "zerovec?/alloc", ] databake = ["dep:databake"] default = ["alloc"] serde = ["dep:serde_core"] std = [] zerovec = ["dep:zerovec"] [lib] name = "tinystr" path = "src/lib.rs" bench = false [[test]] name = "serde" path = "tests/serde.rs" required-features = ["serde"] [[bench]] name = "construct" path = "benches/construct.rs" harness = false [[bench]] name = "overview" path = "benches/overview.rs" harness = false [[bench]] name = "read" path = "benches/read.rs" harness = false [[bench]] name = "serde" path = "benches/serde.rs" harness = false required-features = ["serde"] [dependencies.databake] version = "0.2.0" optional = true default-features = false [dependencies.displaydoc] version = "0.2.3" default-features = false [dependencies.serde_core] version = "1.0.220" optional = true default-features = false [dependencies.zerovec] version = "0.11.3" optional = true default-features = false [dev-dependencies.bincode] version = "1.3.1" [dev-dependencies.postcard] version = "1.0.3" features = ["use-std"] default-features = false [dev-dependencies.rand] version = "0.9" features = ["small_rng"] [dev-dependencies.serde_json] version = "1.0.45" features = ["alloc"] [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies.criterion] version = "0.5.0" tinystr-0.8.2/Cargo.toml.orig000064400000000000000000000032271046102023000142420ustar 00000000000000# This file is part of ICU4X. For terms of use, please see the file # called LICENSE at the top level of the ICU4X source tree # (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). [package] name = "tinystr" description = "A small ASCII-only bounded length string representation." version = "0.8.2" keywords = ["string", "str", "small", "tiny", "no_std"] categories = ["data-structures"] authors.workspace = true edition.workspace = true include.workspace = true license.workspace = true repository.workspace = true rust-version = "1.82" [package.metadata.workspaces] independent = true [package.metadata.docs.rs] all-features = true [dependencies] displaydoc = { workspace = true } serde_core = { workspace = true, optional = true } zerovec = { workspace = true, optional = true } databake = { workspace = true, optional = true } [dev-dependencies] bincode = { workspace = true } postcard = { workspace = true, features = ["use-std"] } rand = { workspace = true, features = ["small_rng"] } serde_json = { workspace = true, features = ["alloc"] } [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies] criterion = { workspace = true } [features] default = ["alloc"] alloc = ["serde_core?/alloc", "zerovec?/alloc"] zerovec = ["dep:zerovec"] databake = ["dep:databake"] serde = ["dep:serde_core"] # No longer does anything std = [] [lib] bench = false # This option is required for Benchmark CI [[test]] name = "serde" required-features = ["serde"] [[bench]] name = "overview" harness = false [[bench]] name = "construct" harness = false [[bench]] name = "read" harness = false [[bench]] name = "serde" harness = false required-features = ["serde"] tinystr-0.8.2/LICENSE000064400000000000000000000042231046102023000123550ustar 00000000000000UNICODE LICENSE V3 COPYRIGHT AND PERMISSION NOTICE Copyright © 2020-2024 Unicode, Inc. NOTICE TO USER: Carefully read the following legal agreement. BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE. Permission is hereby granted, free of charge, to any person obtaining a copy of data files and any associated documentation (the "Data Files") or software and any associated documentation (the "Software") to deal in the Data Files or Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, and/or sell copies of the Data Files or Software, and to permit persons to whom the Data Files or Software are furnished to do so, provided that either (a) this copyright and permission notice appear with all copies of the Data Files or Software, or (b) this copyright and permission notice appear in associated Documentation. THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA FILES OR SOFTWARE. Except as contained in this notice, the name of a copyright holder shall not be used in advertising or otherwise to promote the sale, use or other dealings in these Data Files or Software without prior written authorization of the copyright holder. SPDX-License-Identifier: Unicode-3.0 — Portions of ICU4X may have been adapted from ICU4C and/or ICU4J. ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others. tinystr-0.8.2/README.md000064400000000000000000000034501046102023000126300ustar 00000000000000# tinystr [![crates.io](https://img.shields.io/crates/v/tinystr)](https://crates.io/crates/tinystr) `tinystr` is a utility crate of the [`ICU4X`] project. It includes [`TinyAsciiStr`], a core API for representing small ASCII-only bounded length strings. It is optimized for operations on strings of size 8 or smaller. When use cases involve comparison and conversion of strings for lowercase/uppercase/titlecase, or checking numeric/alphabetic/alphanumeric, `TinyAsciiStr` is the edge performance library. ## Examples ```rust use tinystr::TinyAsciiStr; let s1: TinyAsciiStr<4> = "tEsT".parse().expect("Failed to parse."); assert_eq!(s1, "tEsT"); assert_eq!(s1.to_ascii_uppercase(), "TEST"); assert_eq!(s1.to_ascii_lowercase(), "test"); assert_eq!(s1.to_ascii_titlecase(), "Test"); assert!(s1.is_ascii_alphanumeric()); assert!(!s1.is_ascii_numeric()); let s2 = TinyAsciiStr::<8>::try_from_raw(*b"New York") .expect("Failed to parse."); assert_eq!(s2, "New York"); assert_eq!(s2.to_ascii_uppercase(), "NEW YORK"); assert_eq!(s2.to_ascii_lowercase(), "new york"); assert_eq!(s2.to_ascii_titlecase(), "New york"); assert!(!s2.is_ascii_alphanumeric()); ``` ## Details When strings are of size 8 or smaller, the struct transforms the strings as `u32`/`u64` and uses bitmasking to provide basic string manipulation operations: * `is_ascii_numeric` * `is_ascii_alphabetic` * `is_ascii_alphanumeric` * `to_ascii_lowercase` * `to_ascii_uppercase` * `to_ascii_titlecase` * `PartialEq` `TinyAsciiStr` will fall back to `u8` character manipulation for strings of length greater than 8. [`ICU4X`]: ../icu/index.html ## More Information For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x). tinystr-0.8.2/benches/common/mod.rs000064400000000000000000000036301046102023000153750ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). // This file was adapted from parts of https://github.com/zbraniecki/tinystr pub static STRINGS_4: &[&str] = &[ "US", "GB", "AR", "Hans", "CN", "AT", "PL", "FR", "AT", "Cyrl", "SR", "NO", "FR", "MK", "UK", ]; pub static STRINGS_8: &[&str] = &[ "Latn", "windows", "AR", "Hans", "macos", "AT", "pl", "FR", "en", "Cyrl", "SR", "NO", "419", "und", "UK", ]; pub static STRINGS_16: &[&str] = &[ "Latn", "windows", "AR", "Hans", "macos", "AT", "infiniband", "FR", "en", "Cyrl", "FromIntegral", "NO", "419", "MacintoshOSX2019", "UK", ]; #[macro_export] macro_rules! bench_block { ($c:expr, $name:expr, $action:ident) => { let mut group4 = $c.benchmark_group(&format!("{}/4", $name)); group4.bench_function("String", $action!(String, STRINGS_4)); group4.bench_function("TinyAsciiStr<4>", $action!(TinyAsciiStr<4>, STRINGS_4)); group4.bench_function("TinyAsciiStr<8>", $action!(TinyAsciiStr<8>, STRINGS_4)); group4.bench_function("TinyAsciiStr<16>", $action!(TinyAsciiStr<16>, STRINGS_4)); group4.finish(); let mut group8 = $c.benchmark_group(&format!("{}/8", $name)); group8.bench_function("String", $action!(String, STRINGS_8)); group8.bench_function("TinyAsciiStr<8>", $action!(TinyAsciiStr<8>, STRINGS_8)); group8.bench_function("TinyAsciiStr<16>", $action!(TinyAsciiStr<16>, STRINGS_8)); group8.finish(); let mut group16 = $c.benchmark_group(&format!("{}/16", $name)); group16.bench_function("String", $action!(String, STRINGS_16)); group16.bench_function("TinyAsciiStr<16>", $action!(TinyAsciiStr<16>, STRINGS_16)); group16.finish(); }; } tinystr-0.8.2/benches/construct.rs000064400000000000000000000041251046102023000153520ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). // This file was adapted from https://github.com/zbraniecki/tinystr mod common; use common::*; use criterion::black_box; use criterion::criterion_group; use criterion::criterion_main; use criterion::Bencher; use criterion::Criterion; use tinystr::TinyAsciiStr; fn construct_from_str(c: &mut Criterion) { macro_rules! cfs { ($r:ty, $inputs:expr) => { |b: &mut Bencher| { b.iter(|| { for s in $inputs { let _: $r = black_box(s.parse().unwrap()); } }) } }; } bench_block!(c, "construct_from_str", cfs); } fn construct_from_utf8(c: &mut Criterion) { macro_rules! cfu { ($r:ty, $inputs:expr) => { |b| { let raw: Vec<&[u8]> = $inputs.iter().map(|s| s.as_bytes()).collect(); b.iter(move || { for u in &raw { let _ = black_box(<$r>::try_from_utf8(*u).unwrap()); } }) } }; } let mut group4 = c.benchmark_group("construct_from_utf8/4"); group4.bench_function("TinyAsciiStr<4>", cfu!(TinyAsciiStr<4>, STRINGS_4)); group4.bench_function("TinyAsciiStr<8>", cfu!(TinyAsciiStr<8>, STRINGS_4)); group4.bench_function("TinyAsciiStr<16>", cfu!(TinyAsciiStr<16>, STRINGS_4)); group4.finish(); let mut group8 = c.benchmark_group("construct_from_utf8/8"); group8.bench_function("TinyAsciiStr<8>", cfu!(TinyAsciiStr<8>, STRINGS_8)); group8.bench_function("TinyAsciiStr<16>", cfu!(TinyAsciiStr<16>, STRINGS_8)); group8.finish(); let mut group16 = c.benchmark_group("construct_from_utf8/16"); group16.bench_function("TinyAsciiStr<16>", cfu!(TinyAsciiStr<16>, STRINGS_16)); group16.finish(); } criterion_group!(benches, construct_from_str, construct_from_utf8,); criterion_main!(benches); tinystr-0.8.2/benches/overview.rs000064400000000000000000000107531046102023000152000ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). mod common; use common::*; use criterion::black_box; use criterion::criterion_group; use criterion::criterion_main; use criterion::Criterion; use tinystr::TinyAsciiStr; fn overview(c: &mut Criterion) { let mut g = c.benchmark_group("overview"); g.bench_function("construct/utf8/TinyAsciiStr", |b| { b.iter(|| { for s in STRINGS_4 { let _: TinyAsciiStr<4> = TinyAsciiStr::try_from_utf8(black_box(s.as_bytes())).unwrap(); let _: TinyAsciiStr<8> = TinyAsciiStr::try_from_utf8(black_box(s.as_bytes())).unwrap(); let _: TinyAsciiStr<16> = TinyAsciiStr::try_from_utf8(black_box(s.as_bytes())).unwrap(); } for s in STRINGS_8 { let _: TinyAsciiStr<8> = TinyAsciiStr::try_from_utf8(black_box(s.as_bytes())).unwrap(); let _: TinyAsciiStr<16> = TinyAsciiStr::try_from_utf8(black_box(s.as_bytes())).unwrap(); } for s in STRINGS_16 { let _: TinyAsciiStr<16> = TinyAsciiStr::try_from_utf8(black_box(s.as_bytes())).unwrap(); } }); }); let strings_4_utf16: Vec> = STRINGS_4 .iter() .map(|s| s.encode_utf16().collect()) .collect(); let strings_8_utf16: Vec> = STRINGS_8 .iter() .map(|s| s.encode_utf16().collect()) .collect(); let strings_16_utf16: Vec> = STRINGS_16 .iter() .map(|s| s.encode_utf16().collect()) .collect(); g.bench_function("construct/utf16/TinyAsciiStr", |b| { b.iter(|| { for s in strings_4_utf16.iter() { let _: TinyAsciiStr<4> = TinyAsciiStr::try_from_utf16(black_box(s)).unwrap(); let _: TinyAsciiStr<8> = TinyAsciiStr::try_from_utf16(black_box(s)).unwrap(); let _: TinyAsciiStr<16> = TinyAsciiStr::try_from_utf16(black_box(s)).unwrap(); } for s in strings_8_utf16.iter() { let _: TinyAsciiStr<8> = TinyAsciiStr::try_from_utf16(black_box(s)).unwrap(); let _: TinyAsciiStr<16> = TinyAsciiStr::try_from_utf16(black_box(s)).unwrap(); } for s in strings_16_utf16.iter() { let _: TinyAsciiStr<16> = TinyAsciiStr::try_from_utf16(black_box(s)).unwrap(); } }); }); let parsed_ascii_4: Vec> = STRINGS_4 .iter() .map(|s| s.parse::>().unwrap()) .collect(); let parsed_ascii_8: Vec> = STRINGS_4 .iter() .chain(STRINGS_8) .map(|s| s.parse::>().unwrap()) .collect(); let parsed_ascii_16: Vec> = STRINGS_4 .iter() .chain(STRINGS_8) .chain(STRINGS_16) .map(|s| s.parse::>().unwrap()) .collect(); g.bench_function("read/TinyAsciiStr", |b| { b.iter(|| { let mut collector: usize = 0; for t in black_box(&parsed_ascii_4) { let s: &str = t; collector += s.bytes().map(usize::from).sum::(); } for t in black_box(&parsed_ascii_8) { let s: &str = t; collector += s.bytes().map(usize::from).sum::(); } for t in black_box(&parsed_ascii_16) { let s: &str = t; collector += s.bytes().map(usize::from).sum::(); } collector }); }); g.bench_function("compare/TinyAsciiStr", |b| { b.iter(|| { let mut collector: usize = 0; for ts in black_box(&parsed_ascii_4).windows(2) { let o = ts[0].cmp(&ts[1]); collector ^= o as usize; } for ts in black_box(&parsed_ascii_8).windows(2) { let o = ts[0].cmp(&ts[1]); collector ^= o as usize; } for ts in black_box(&parsed_ascii_16).windows(2) { let o = ts[0].cmp(&ts[1]); collector ^= o as usize; } collector }); }); } criterion_group!(benches, overview,); criterion_main!(benches); tinystr-0.8.2/benches/read.rs000064400000000000000000000016061046102023000142420ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). mod common; use common::*; use criterion::black_box; use criterion::criterion_group; use criterion::criterion_main; use criterion::Bencher; use criterion::Criterion; use tinystr::TinyAsciiStr; fn read(c: &mut Criterion) { macro_rules! cfs { ($r:ty, $inputs:expr) => { |b: &mut Bencher| { let parsed: Vec<$r> = $inputs.iter().map(|s| s.parse().unwrap()).collect(); b.iter(|| { for s in &parsed { let _: &str = black_box(&**s); } }) } }; } bench_block!(c, "read", cfs); } criterion_group!(benches, read,); criterion_main!(benches); tinystr-0.8.2/benches/serde.rs000064400000000000000000000020601046102023000144240ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). mod common; use common::*; use criterion::black_box; use criterion::criterion_group; use criterion::criterion_main; use criterion::Bencher; use criterion::Criterion; use tinystr::TinyAsciiStr; fn deserialize(c: &mut Criterion) { macro_rules! cfs { ($r:ty, $inputs:expr) => { |b: &mut Bencher| { let serialized: Vec> = $inputs .iter() .map(|s| postcard::to_stdvec(&s.parse::<$r>().unwrap()).unwrap()) .collect(); b.iter(|| { for bytes in &serialized { let _: Result<$r, _> = black_box(postcard::from_bytes(bytes)); } }) } }; } bench_block!(c, "deserialize", cfs); } criterion_group!(benches, deserialize,); criterion_main!(benches); tinystr-0.8.2/src/ascii.rs000064400000000000000000001124431046102023000136010ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::asciibyte::AsciiByte; use crate::int_ops::{Aligned4, Aligned8}; use crate::ParseError; use core::borrow::Borrow; use core::fmt; use core::ops::Deref; use core::str::{self, FromStr}; #[repr(transparent)] #[derive(PartialEq, Eq, Ord, PartialOrd, Copy, Clone, Hash)] pub struct TinyAsciiStr { bytes: [AsciiByte; N], } impl TinyAsciiStr { #[inline] pub const fn try_from_str(s: &str) -> Result { Self::try_from_utf8(s.as_bytes()) } /// Creates a `TinyAsciiStr` from the given UTF-8 slice. /// `code_units` may contain at most `N` non-null ASCII code points. #[inline] pub const fn try_from_utf8(code_units: &[u8]) -> Result { Self::try_from_utf8_inner(code_units, false) } /// Creates a `TinyAsciiStr` from the given UTF-16 slice. /// `code_units` may contain at most `N` non-null ASCII code points. #[inline] pub const fn try_from_utf16(code_units: &[u16]) -> Result { Self::try_from_utf16_inner(code_units, 0, code_units.len(), false) } /// Creates a `TinyAsciiStr` from a UTF-8 slice, replacing invalid code units. /// /// Invalid code units, as well as null or non-ASCII code points /// (i.e. those outside the range U+0001..=U+007F`) /// will be replaced with the replacement byte. /// /// The input slice will be truncated if its length exceeds `N`. pub const fn from_utf8_lossy(code_units: &[u8], replacement: u8) -> Self { let mut out = [0; N]; let mut i = 0; // Ord is not available in const, so no `.min(N)` let len = if code_units.len() > N { N } else { code_units.len() }; // Indexing is protected by the len check above #[expect(clippy::indexing_slicing)] while i < len { let b = code_units[i]; if b > 0 && b < 0x80 { out[i] = b; } else { out[i] = replacement; } i += 1; } Self { // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes` bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) }, } } /// Creates a `TinyAsciiStr` from a UTF-16 slice, replacing invalid code units. /// /// Invalid code units, as well as null or non-ASCII code points /// (i.e. those outside the range U+0001..=U+007F`) /// will be replaced with the replacement byte. /// /// The input slice will be truncated if its length exceeds `N`. pub const fn from_utf16_lossy(code_units: &[u16], replacement: u8) -> Self { let mut out = [0; N]; let mut i = 0; // Ord is not available in const, so no `.min(N)` let len = if code_units.len() > N { N } else { code_units.len() }; // Indexing is protected by the len check above #[expect(clippy::indexing_slicing)] while i < len { let b = code_units[i]; if b > 0 && b < 0x80 { out[i] = b as u8; } else { out[i] = replacement; } i += 1; } Self { // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes` bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) }, } } /// Attempts to parse a fixed-length byte array to a `TinyAsciiStr`. /// /// The byte array may contain trailing NUL bytes. /// /// # Example /// /// ``` /// use tinystr::tinystr; /// use tinystr::TinyAsciiStr; /// /// assert_eq!( /// TinyAsciiStr::<3>::try_from_raw(*b"GB\0"), /// Ok(tinystr!(3, "GB")) /// ); /// assert_eq!( /// TinyAsciiStr::<3>::try_from_raw(*b"USD"), /// Ok(tinystr!(3, "USD")) /// ); /// assert!(TinyAsciiStr::<3>::try_from_raw(*b"\0A\0").is_err()); /// ``` pub const fn try_from_raw(raw: [u8; N]) -> Result { Self::try_from_utf8_inner(&raw, true) } pub(crate) const fn try_from_utf8_inner( code_units: &[u8], allow_trailing_null: bool, ) -> Result { if code_units.len() > N { return Err(ParseError::TooLong { max: N, len: code_units.len(), }); } let mut out = [0; N]; let mut i = 0; let mut found_null = false; // Indexing is protected by TinyStrError::TooLarge #[expect(clippy::indexing_slicing)] while i < code_units.len() { let b = code_units[i]; if b == 0 { found_null = true; } else if b >= 0x80 { return Err(ParseError::NonAscii); } else if found_null { // Error if there are contentful bytes after null return Err(ParseError::ContainsNull); } out[i] = b; i += 1; } if !allow_trailing_null && found_null { // We found some trailing nulls, error return Err(ParseError::ContainsNull); } Ok(Self { // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes` bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) }, }) } pub(crate) const fn try_from_utf16_inner( code_units: &[u16], start: usize, end: usize, allow_trailing_null: bool, ) -> Result { let len = end - start; if len > N { return Err(ParseError::TooLong { max: N, len }); } let mut out = [0; N]; let mut i = 0; let mut found_null = false; // Indexing is protected by TinyStrError::TooLarge #[expect(clippy::indexing_slicing)] while i < len { let b = code_units[start + i]; if b == 0 { found_null = true; } else if b >= 0x80 { return Err(ParseError::NonAscii); } else if found_null { // Error if there are contentful bytes after null return Err(ParseError::ContainsNull); } out[i] = b as u8; i += 1; } if !allow_trailing_null && found_null { // We found some trailing nulls, error return Err(ParseError::ContainsNull); } Ok(Self { // SAFETY: `out` only contains ASCII bytes and has same size as `self.bytes` bytes: unsafe { AsciiByte::to_ascii_byte_array(&out) }, }) } /// Creates a `TinyAsciiStr` containing the decimal representation of /// the given unsigned integer. /// /// If the number of decimal digits exceeds `N`, the highest-magnitude /// digits are truncated, and the lowest-magnitude digits are returned /// as the error. /// /// Note: this function takes a u32. Larger integer types should probably /// not be stored in a `TinyAsciiStr`. /// /// # Examples /// /// ``` /// use tinystr::tinystr; /// use tinystr::TinyAsciiStr; /// /// let s0_4 = TinyAsciiStr::<4>::new_unsigned_decimal(0).unwrap(); /// let s456_4 = TinyAsciiStr::<4>::new_unsigned_decimal(456).unwrap(); /// let s456_3 = TinyAsciiStr::<3>::new_unsigned_decimal(456).unwrap(); /// let s456_2 = TinyAsciiStr::<2>::new_unsigned_decimal(456).unwrap_err(); /// /// assert_eq!(s0_4, tinystr!(4, "0")); /// assert_eq!(s456_4, tinystr!(4, "456")); /// assert_eq!(s456_3, tinystr!(3, "456")); /// assert_eq!(s456_2, tinystr!(2, "56")); /// ``` /// /// Example with saturating the value: /// /// ``` /// use tinystr::tinystr; /// use tinystr::TinyAsciiStr; /// /// let str_truncated = /// TinyAsciiStr::<2>::new_unsigned_decimal(456).unwrap_or_else(|s| s); /// let str_saturated = TinyAsciiStr::<2>::new_unsigned_decimal(456) /// .unwrap_or(tinystr!(2, "99")); /// /// assert_eq!(str_truncated, tinystr!(2, "56")); /// assert_eq!(str_saturated, tinystr!(2, "99")); /// ``` pub fn new_unsigned_decimal(number: u32) -> Result { let mut bytes = [AsciiByte::B0; N]; let mut x = number; let mut i = 0usize; #[expect(clippy::indexing_slicing)] // in-range: i < N while i < N && (x != 0 || i == 0) { bytes[N - i - 1] = AsciiByte::from_decimal_digit((x % 10) as u8); x /= 10; i += 1; } if i < N { bytes.copy_within((N - i)..N, 0); bytes[i..N].fill(AsciiByte::B0); } let s = Self { bytes }; if x != 0 { Err(s) } else { Ok(s) } } #[inline] pub const fn as_str(&self) -> &str { // as_utf8 is valid utf8 unsafe { str::from_utf8_unchecked(self.as_utf8()) } } #[inline] #[must_use] pub const fn len(&self) -> usize { if N <= 4 { Aligned4::from_ascii_bytes(&self.bytes).len() } else if N <= 8 { Aligned8::from_ascii_bytes(&self.bytes).len() } else { let mut i = 0; #[expect(clippy::indexing_slicing)] // < N is safe while i < N && self.bytes[i] as u8 != AsciiByte::B0 as u8 { i += 1 } i } } #[inline] #[must_use] pub const fn is_empty(&self) -> bool { self.bytes[0] as u8 == AsciiByte::B0 as u8 } #[inline] #[must_use] pub const fn as_utf8(&self) -> &[u8] { // Safe because `self.bytes.as_slice()` pointer-casts to `&[u8]`, // and changing the length of that slice to self.len() < N is safe. unsafe { core::slice::from_raw_parts(self.bytes.as_slice().as_ptr() as *const u8, self.len()) } } #[inline] #[must_use] pub const fn all_bytes(&self) -> &[u8; N] { // SAFETY: `self.bytes` has same size as [u8; N] unsafe { &*(self.bytes.as_ptr() as *const [u8; N]) } } #[inline] #[must_use] /// Resizes a `TinyAsciiStr` to a `TinyAsciiStr`. /// /// If `M < len()` the string gets truncated, otherwise only the /// memory representation changes. pub const fn resize(self) -> TinyAsciiStr { let mut bytes = [0; M]; let mut i = 0; // Indexing is protected by the loop guard #[expect(clippy::indexing_slicing)] while i < M && i < N { bytes[i] = self.bytes[i] as u8; i += 1; } // `self.bytes` only contains ASCII bytes, with no null bytes between // ASCII characters, so this also holds for `bytes`. unsafe { TinyAsciiStr::from_utf8_unchecked(bytes) } } #[inline] #[must_use] /// Returns a `TinyAsciiStr` with the concatenation of this string, /// `TinyAsciiStr`, and another string, `TinyAsciiStr`. /// /// If `Q < N + M`, the string gets truncated. /// /// # Examples /// /// ``` /// use tinystr::tinystr; /// use tinystr::TinyAsciiStr; /// /// let abc = tinystr!(6, "abc"); /// let defg = tinystr!(6, "defg"); /// /// // The concatenation is successful if Q is large enough... /// assert_eq!(abc.concat(defg), tinystr!(16, "abcdefg")); /// assert_eq!(abc.concat(defg), tinystr!(12, "abcdefg")); /// assert_eq!(abc.concat(defg), tinystr!(8, "abcdefg")); /// assert_eq!(abc.concat(defg), tinystr!(7, "abcdefg")); /// /// /// ...but it truncates of Q is too small. /// assert_eq!(abc.concat(defg), tinystr!(6, "abcdef")); /// assert_eq!(abc.concat(defg), tinystr!(2, "ab")); /// ``` pub const fn concat( self, other: TinyAsciiStr, ) -> TinyAsciiStr { let mut result = self.resize::(); let mut i = self.len(); let mut j = 0; // Indexing is protected by the loop guard #[expect(clippy::indexing_slicing)] while i < Q && j < M { result.bytes[i] = other.bytes[j]; i += 1; j += 1; } result } /// # Safety /// Must be called with a bytes array made of valid ASCII bytes, with no null bytes /// between ASCII characters #[must_use] pub const unsafe fn from_utf8_unchecked(code_units: [u8; N]) -> Self { Self { bytes: AsciiByte::to_ascii_byte_array(&code_units), } } } macro_rules! check_is { ($self:ident, $check_int:ident, $check_u8:ident) => { if N <= 4 { Aligned4::from_ascii_bytes(&$self.bytes).$check_int() } else if N <= 8 { Aligned8::from_ascii_bytes(&$self.bytes).$check_int() } else { let mut i = 0; while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { if !($self.bytes[i] as u8).$check_u8() { return false; } i += 1; } true } }; ($self:ident, $check_int:ident, !$check_u8_0_inv:ident, !$check_u8_1_inv:ident) => { if N <= 4 { Aligned4::from_ascii_bytes(&$self.bytes).$check_int() } else if N <= 8 { Aligned8::from_ascii_bytes(&$self.bytes).$check_int() } else { // Won't panic because N is > 8 if ($self.bytes[0] as u8).$check_u8_0_inv() { return false; } let mut i = 1; while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { if ($self.bytes[i] as u8).$check_u8_1_inv() { return false; } i += 1; } true } }; ($self:ident, $check_int:ident, $check_u8_0_inv:ident, $check_u8_1_inv:ident) => { if N <= 4 { Aligned4::from_ascii_bytes(&$self.bytes).$check_int() } else if N <= 8 { Aligned8::from_ascii_bytes(&$self.bytes).$check_int() } else { // Won't panic because N is > 8 if !($self.bytes[0] as u8).$check_u8_0_inv() { return false; } let mut i = 1; while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { if !($self.bytes[i] as u8).$check_u8_1_inv() { return false; } i += 1; } true } }; } impl TinyAsciiStr { /// Checks if the value is composed of ASCII alphabetic characters: /// /// * U+0041 'A' ..= U+005A 'Z', or /// * U+0061 'a' ..= U+007A 'z'. /// /// # Examples /// /// ``` /// use tinystr::TinyAsciiStr; /// /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse."); /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse."); /// /// assert!(s1.is_ascii_alphabetic()); /// assert!(!s2.is_ascii_alphabetic()); /// ``` #[inline] #[must_use] pub const fn is_ascii_alphabetic(&self) -> bool { check_is!(self, is_ascii_alphabetic, is_ascii_alphabetic) } /// Checks if the value is composed of ASCII alphanumeric characters: /// /// * U+0041 'A' ..= U+005A 'Z', or /// * U+0061 'a' ..= U+007A 'z', or /// * U+0030 '0' ..= U+0039 '9'. /// /// # Examples /// /// ``` /// use tinystr::TinyAsciiStr; /// /// let s1: TinyAsciiStr<4> = "A15b".parse().expect("Failed to parse."); /// let s2: TinyAsciiStr<4> = "[3@w".parse().expect("Failed to parse."); /// /// assert!(s1.is_ascii_alphanumeric()); /// assert!(!s2.is_ascii_alphanumeric()); /// ``` #[inline] #[must_use] pub const fn is_ascii_alphanumeric(&self) -> bool { check_is!(self, is_ascii_alphanumeric, is_ascii_alphanumeric) } /// Checks if the value is composed of ASCII decimal digits: /// /// * U+0030 '0' ..= U+0039 '9'. /// /// # Examples /// /// ``` /// use tinystr::TinyAsciiStr; /// /// let s1: TinyAsciiStr<4> = "312".parse().expect("Failed to parse."); /// let s2: TinyAsciiStr<4> = "3d".parse().expect("Failed to parse."); /// /// assert!(s1.is_ascii_numeric()); /// assert!(!s2.is_ascii_numeric()); /// ``` #[inline] #[must_use] pub const fn is_ascii_numeric(&self) -> bool { check_is!(self, is_ascii_numeric, is_ascii_digit) } /// Checks if the value is in ASCII lower case. /// /// All letter characters are checked for case. Non-letter characters are ignored. /// /// # Examples /// /// ``` /// use tinystr::TinyAsciiStr; /// /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse."); /// let s2: TinyAsciiStr<4> = "test".parse().expect("Failed to parse."); /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse."); /// /// assert!(!s1.is_ascii_lowercase()); /// assert!(s2.is_ascii_lowercase()); /// assert!(s3.is_ascii_lowercase()); /// ``` #[inline] #[must_use] pub const fn is_ascii_lowercase(&self) -> bool { check_is!( self, is_ascii_lowercase, !is_ascii_uppercase, !is_ascii_uppercase ) } /// Checks if the value is in ASCII title case. /// /// This verifies that the first character is ASCII uppercase and all others ASCII lowercase. /// Non-letter characters are ignored. /// /// # Examples /// /// ``` /// use tinystr::TinyAsciiStr; /// /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse."); /// let s2: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse."); /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse."); /// /// assert!(!s1.is_ascii_titlecase()); /// assert!(s2.is_ascii_titlecase()); /// assert!(s3.is_ascii_titlecase()); /// ``` #[inline] #[must_use] pub const fn is_ascii_titlecase(&self) -> bool { check_is!( self, is_ascii_titlecase, !is_ascii_lowercase, !is_ascii_uppercase ) } /// Checks if the value is in ASCII upper case. /// /// All letter characters are checked for case. Non-letter characters are ignored. /// /// # Examples /// /// ``` /// use tinystr::TinyAsciiStr; /// /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse."); /// let s2: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse."); /// let s3: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse."); /// /// assert!(!s1.is_ascii_uppercase()); /// assert!(s2.is_ascii_uppercase()); /// assert!(!s3.is_ascii_uppercase()); /// ``` #[inline] #[must_use] pub const fn is_ascii_uppercase(&self) -> bool { check_is!( self, is_ascii_uppercase, !is_ascii_lowercase, !is_ascii_lowercase ) } /// Checks if the value is composed of ASCII alphabetic lower case characters: /// /// * U+0061 'a' ..= U+007A 'z', /// /// # Examples /// /// ``` /// use tinystr::TinyAsciiStr; /// /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse."); /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse."); /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse."); /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse."); /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse."); /// /// assert!(!s1.is_ascii_alphabetic_lowercase()); /// assert!(!s2.is_ascii_alphabetic_lowercase()); /// assert!(!s3.is_ascii_alphabetic_lowercase()); /// assert!(s4.is_ascii_alphabetic_lowercase()); /// assert!(!s5.is_ascii_alphabetic_lowercase()); /// ``` #[inline] #[must_use] pub const fn is_ascii_alphabetic_lowercase(&self) -> bool { check_is!( self, is_ascii_alphabetic_lowercase, is_ascii_lowercase, is_ascii_lowercase ) } /// Checks if the value is composed of ASCII alphabetic, with the first character being ASCII uppercase, and all others ASCII lowercase. /// /// # Examples /// /// ``` /// use tinystr::TinyAsciiStr; /// /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse."); /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse."); /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse."); /// let s4: TinyAsciiStr<4> = "test".parse().expect("Failed to parse."); /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse."); /// /// assert!(s1.is_ascii_alphabetic_titlecase()); /// assert!(!s2.is_ascii_alphabetic_titlecase()); /// assert!(!s3.is_ascii_alphabetic_titlecase()); /// assert!(!s4.is_ascii_alphabetic_titlecase()); /// assert!(!s5.is_ascii_alphabetic_titlecase()); /// ``` #[inline] #[must_use] pub const fn is_ascii_alphabetic_titlecase(&self) -> bool { check_is!( self, is_ascii_alphabetic_titlecase, is_ascii_uppercase, is_ascii_lowercase ) } /// Checks if the value is composed of ASCII alphabetic upper case characters: /// /// * U+0041 'A' ..= U+005A 'Z', /// /// # Examples /// /// ``` /// use tinystr::TinyAsciiStr; /// /// let s1: TinyAsciiStr<4> = "Test".parse().expect("Failed to parse."); /// let s2: TinyAsciiStr<4> = "Te3t".parse().expect("Failed to parse."); /// let s3: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse."); /// let s4: TinyAsciiStr<4> = "TEST".parse().expect("Failed to parse."); /// let s5: TinyAsciiStr<4> = "001z".parse().expect("Failed to parse."); /// /// assert!(!s1.is_ascii_alphabetic_uppercase()); /// assert!(!s2.is_ascii_alphabetic_uppercase()); /// assert!(!s3.is_ascii_alphabetic_uppercase()); /// assert!(s4.is_ascii_alphabetic_uppercase()); /// assert!(!s5.is_ascii_alphabetic_uppercase()); /// ``` #[inline] #[must_use] pub const fn is_ascii_alphabetic_uppercase(&self) -> bool { check_is!( self, is_ascii_alphabetic_uppercase, is_ascii_uppercase, is_ascii_uppercase ) } } macro_rules! to { ($self:ident, $to:ident, $later_char_to:ident $(,$first_char_to:ident)?) => {{ let mut i = 0; if N <= 4 { let aligned = Aligned4::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes(); // Won't panic because self.bytes has length N and aligned has length >= N #[expect(clippy::indexing_slicing)] while i < N { $self.bytes[i] = aligned[i]; i += 1; } } else if N <= 8 { let aligned = Aligned8::from_ascii_bytes(&$self.bytes).$to().to_ascii_bytes(); // Won't panic because self.bytes has length N and aligned has length >= N #[expect(clippy::indexing_slicing)] while i < N { $self.bytes[i] = aligned[i]; i += 1; } } else { while i < N && $self.bytes[i] as u8 != AsciiByte::B0 as u8 { // SAFETY: AsciiByte is repr(u8) and has same size as u8 unsafe { $self.bytes[i] = core::mem::transmute::( ($self.bytes[i] as u8).$later_char_to() ); } i += 1; } // SAFETY: AsciiByte is repr(u8) and has same size as u8 $( $self.bytes[0] = unsafe { core::mem::transmute::(($self.bytes[0] as u8).$first_char_to()) }; )? } $self }}; } impl TinyAsciiStr { /// Converts this type to its ASCII lower case equivalent in-place. /// /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', other characters are unchanged. /// /// # Examples /// /// ``` /// use tinystr::TinyAsciiStr; /// /// let s1: TinyAsciiStr<4> = "TeS3".parse().expect("Failed to parse."); /// /// assert_eq!(&*s1.to_ascii_lowercase(), "tes3"); /// ``` #[inline] #[must_use] pub const fn to_ascii_lowercase(mut self) -> Self { to!(self, to_ascii_lowercase, to_ascii_lowercase) } /// Converts this type to its ASCII title case equivalent in-place. /// /// The first character is converted to ASCII uppercase; the remaining characters /// are converted to ASCII lowercase. /// /// # Examples /// /// ``` /// use tinystr::TinyAsciiStr; /// /// let s1: TinyAsciiStr<4> = "teSt".parse().expect("Failed to parse."); /// /// assert_eq!(&*s1.to_ascii_titlecase(), "Test"); /// ``` #[inline] #[must_use] pub const fn to_ascii_titlecase(mut self) -> Self { to!( self, to_ascii_titlecase, to_ascii_lowercase, to_ascii_uppercase ) } /// Converts this type to its ASCII upper case equivalent in-place. /// /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', other characters are unchanged. /// /// # Examples /// /// ``` /// use tinystr::TinyAsciiStr; /// /// let s1: TinyAsciiStr<4> = "Tes3".parse().expect("Failed to parse."); /// /// assert_eq!(&*s1.to_ascii_uppercase(), "TES3"); /// ``` #[inline] #[must_use] pub const fn to_ascii_uppercase(mut self) -> Self { to!(self, to_ascii_uppercase, to_ascii_uppercase) } } impl fmt::Debug for TinyAsciiStr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Debug::fmt(self.as_str(), f) } } impl fmt::Display for TinyAsciiStr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Display::fmt(self.as_str(), f) } } impl Deref for TinyAsciiStr { type Target = str; #[inline] fn deref(&self) -> &str { self.as_str() } } impl Borrow for TinyAsciiStr { #[inline] fn borrow(&self) -> &str { self.as_str() } } impl FromStr for TinyAsciiStr { type Err = ParseError; #[inline] fn from_str(s: &str) -> Result { Self::try_from_str(s) } } impl PartialEq for TinyAsciiStr { fn eq(&self, other: &str) -> bool { self.deref() == other } } impl PartialEq<&str> for TinyAsciiStr { fn eq(&self, other: &&str) -> bool { self.deref() == *other } } #[cfg(feature = "alloc")] impl PartialEq for TinyAsciiStr { fn eq(&self, other: &alloc::string::String) -> bool { self.deref() == other.deref() } } #[cfg(feature = "alloc")] impl PartialEq> for alloc::string::String { fn eq(&self, other: &TinyAsciiStr) -> bool { self.deref() == other.deref() } } #[cfg(test)] mod test { use super::*; use rand::distr::Distribution; use rand::distr::StandardUniform; use rand::rngs::SmallRng; use rand::SeedableRng; const STRINGS: [&str; 26] = [ "Latn", "laTn", "windows", "AR", "Hans", "macos", "AT", "infiniband", "FR", "en", "Cyrl", "FromIntegral", "NO", "419", "MacintoshOSX2019", "a3z", "A3z", "A3Z", "a3Z", "3A", "3Z", "3a", "3z", "@@[`{", "UK", "E12", ]; fn gen_strings(num_strings: usize, allowed_lengths: &[usize]) -> Vec { use rand::seq::IndexedRandom; let mut rng = SmallRng::seed_from_u64(2022); // Need to do this in 2 steps since the RNG is needed twice let string_lengths = core::iter::repeat_with(|| *allowed_lengths.choose(&mut rng).unwrap()) .take(num_strings) .collect::>(); string_lengths .iter() .map(|len| { StandardUniform .sample_iter(&mut rng) .filter(|b: &u8| *b > 0 && *b < 0x80) .take(*len) .collect::>() }) .map(|byte_vec| String::from_utf8(byte_vec).expect("All ASCII")) .collect() } fn check_operation(reference_f: F1, tinystr_f: F2) where F1: Fn(&str) -> T, F2: Fn(TinyAsciiStr) -> T, T: core::fmt::Debug + core::cmp::PartialEq, { for s in STRINGS .into_iter() .map(str::to_owned) .chain(gen_strings(100, &[3, 4, 5, 8, 12])) { let t = match TinyAsciiStr::::from_str(&s) { Ok(t) => t, Err(ParseError::TooLong { .. }) => continue, Err(e) => panic!("{}", e), }; let expected = reference_f(&s); let actual = tinystr_f(t); assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}"); let s_utf16: Vec = s.encode_utf16().collect(); let t = match TinyAsciiStr::::try_from_utf16(&s_utf16) { Ok(t) => t, Err(ParseError::TooLong { .. }) => continue, Err(e) => panic!("{}", e), }; let expected = reference_f(&s); let actual = tinystr_f(t); assert_eq!(expected, actual, "TinyAsciiStr<{N}>: {s:?}"); } } #[test] fn test_is_ascii_alphabetic() { fn check() { check_operation( |s| s.chars().all(|c| c.is_ascii_alphabetic()), |t: TinyAsciiStr| TinyAsciiStr::is_ascii_alphabetic(&t), ) } check::<2>(); check::<3>(); check::<4>(); check::<5>(); check::<8>(); check::<16>(); } #[test] fn test_is_ascii_alphanumeric() { fn check() { check_operation( |s| s.chars().all(|c| c.is_ascii_alphanumeric()), |t: TinyAsciiStr| TinyAsciiStr::is_ascii_alphanumeric(&t), ) } check::<2>(); check::<3>(); check::<4>(); check::<5>(); check::<8>(); check::<16>(); } #[test] fn test_is_ascii_numeric() { fn check() { check_operation( |s| s.chars().all(|c| c.is_ascii_digit()), |t: TinyAsciiStr| TinyAsciiStr::is_ascii_numeric(&t), ) } check::<2>(); check::<3>(); check::<4>(); check::<5>(); check::<8>(); check::<16>(); } #[test] fn test_is_ascii_lowercase() { fn check() { check_operation( |s| { s == TinyAsciiStr::<16>::try_from_str(s) .unwrap() .to_ascii_lowercase() .as_str() }, |t: TinyAsciiStr| TinyAsciiStr::is_ascii_lowercase(&t), ) } check::<2>(); check::<3>(); check::<4>(); check::<5>(); check::<8>(); check::<16>(); } #[test] fn test_is_ascii_titlecase() { fn check() { check_operation( |s| { s == TinyAsciiStr::<16>::try_from_str(s) .unwrap() .to_ascii_titlecase() .as_str() }, |t: TinyAsciiStr| TinyAsciiStr::is_ascii_titlecase(&t), ) } check::<2>(); check::<3>(); check::<4>(); check::<5>(); check::<8>(); check::<16>(); } #[test] fn test_is_ascii_uppercase() { fn check() { check_operation( |s| { s == TinyAsciiStr::<16>::try_from_str(s) .unwrap() .to_ascii_uppercase() .as_str() }, |t: TinyAsciiStr| TinyAsciiStr::is_ascii_uppercase(&t), ) } check::<2>(); check::<3>(); check::<4>(); check::<5>(); check::<8>(); check::<16>(); } #[test] fn test_is_ascii_alphabetic_lowercase() { fn check() { check_operation( |s| { // Check alphabetic s.chars().all(|c| c.is_ascii_alphabetic()) && // Check lowercase s == TinyAsciiStr::<16>::try_from_str(s) .unwrap() .to_ascii_lowercase() .as_str() }, |t: TinyAsciiStr| TinyAsciiStr::is_ascii_alphabetic_lowercase(&t), ) } check::<2>(); check::<3>(); check::<4>(); check::<5>(); check::<8>(); check::<16>(); } #[test] fn test_is_ascii_alphabetic_titlecase() { fn check() { check_operation( |s| { // Check alphabetic s.chars().all(|c| c.is_ascii_alphabetic()) && // Check titlecase s == TinyAsciiStr::<16>::try_from_str(s) .unwrap() .to_ascii_titlecase() .as_str() }, |t: TinyAsciiStr| TinyAsciiStr::is_ascii_alphabetic_titlecase(&t), ) } check::<2>(); check::<3>(); check::<4>(); check::<5>(); check::<8>(); check::<16>(); } #[test] fn test_is_ascii_alphabetic_uppercase() { fn check() { check_operation( |s| { // Check alphabetic s.chars().all(|c| c.is_ascii_alphabetic()) && // Check uppercase s == TinyAsciiStr::<16>::try_from_str(s) .unwrap() .to_ascii_uppercase() .as_str() }, |t: TinyAsciiStr| TinyAsciiStr::is_ascii_alphabetic_uppercase(&t), ) } check::<2>(); check::<3>(); check::<4>(); check::<5>(); check::<8>(); check::<16>(); } #[test] fn test_to_ascii_lowercase() { fn check() { check_operation( |s| { s.chars() .map(|c| c.to_ascii_lowercase()) .collect::() }, |t: TinyAsciiStr| TinyAsciiStr::to_ascii_lowercase(t).as_str().to_owned(), ) } check::<2>(); check::<3>(); check::<4>(); check::<5>(); check::<8>(); check::<16>(); } #[test] fn test_to_ascii_titlecase() { fn check() { check_operation( |s| { let mut r = s .chars() .map(|c| c.to_ascii_lowercase()) .collect::(); // Safe because the string is nonempty and an ASCII string unsafe { r.as_bytes_mut()[0].make_ascii_uppercase() }; r }, |t: TinyAsciiStr| TinyAsciiStr::to_ascii_titlecase(t).as_str().to_owned(), ) } check::<2>(); check::<3>(); check::<4>(); check::<5>(); check::<8>(); check::<16>(); } #[test] fn test_to_ascii_uppercase() { fn check() { check_operation( |s| { s.chars() .map(|c| c.to_ascii_uppercase()) .collect::() }, |t: TinyAsciiStr| TinyAsciiStr::to_ascii_uppercase(t).as_str().to_owned(), ) } check::<2>(); check::<3>(); check::<4>(); check::<5>(); check::<8>(); check::<16>(); } #[test] fn lossy_constructor() { assert_eq!(TinyAsciiStr::<4>::from_utf8_lossy(b"", b'?').as_str(), ""); assert_eq!( TinyAsciiStr::<4>::from_utf8_lossy(b"oh\0o", b'?').as_str(), "oh?o" ); assert_eq!( TinyAsciiStr::<4>::from_utf8_lossy(b"\0", b'?').as_str(), "?" ); assert_eq!( TinyAsciiStr::<4>::from_utf8_lossy(b"toolong", b'?').as_str(), "tool" ); assert_eq!( TinyAsciiStr::<4>::from_utf8_lossy(&[b'a', 0x80, 0xFF, b'1'], b'?').as_str(), "a??1" ); } } tinystr-0.8.2/src/asciibyte.rs000064400000000000000000000060701046102023000144630ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). #[repr(u8)] #[allow(dead_code)] #[derive(PartialEq, Eq, Ord, PartialOrd, Copy, Clone, Hash)] pub enum AsciiByte { B0 = 0, B1 = 1, B2 = 2, B3 = 3, B4 = 4, B5 = 5, B6 = 6, B7 = 7, B8 = 8, B9 = 9, B10 = 10, B11 = 11, B12 = 12, B13 = 13, B14 = 14, B15 = 15, B16 = 16, B17 = 17, B18 = 18, B19 = 19, B20 = 20, B21 = 21, B22 = 22, B23 = 23, B24 = 24, B25 = 25, B26 = 26, B27 = 27, B28 = 28, B29 = 29, B30 = 30, B31 = 31, B32 = 32, B33 = 33, B34 = 34, B35 = 35, B36 = 36, B37 = 37, B38 = 38, B39 = 39, B40 = 40, B41 = 41, B42 = 42, B43 = 43, B44 = 44, B45 = 45, B46 = 46, B47 = 47, B48 = 48, B49 = 49, B50 = 50, B51 = 51, B52 = 52, B53 = 53, B54 = 54, B55 = 55, B56 = 56, B57 = 57, B58 = 58, B59 = 59, B60 = 60, B61 = 61, B62 = 62, B63 = 63, B64 = 64, B65 = 65, B66 = 66, B67 = 67, B68 = 68, B69 = 69, B70 = 70, B71 = 71, B72 = 72, B73 = 73, B74 = 74, B75 = 75, B76 = 76, B77 = 77, B78 = 78, B79 = 79, B80 = 80, B81 = 81, B82 = 82, B83 = 83, B84 = 84, B85 = 85, B86 = 86, B87 = 87, B88 = 88, B89 = 89, B90 = 90, B91 = 91, B92 = 92, B93 = 93, B94 = 94, B95 = 95, B96 = 96, B97 = 97, B98 = 98, B99 = 99, B100 = 100, B101 = 101, B102 = 102, B103 = 103, B104 = 104, B105 = 105, B106 = 106, B107 = 107, B108 = 108, B109 = 109, B110 = 110, B111 = 111, B112 = 112, B113 = 113, B114 = 114, B115 = 115, B116 = 116, B117 = 117, B118 = 118, B119 = 119, B120 = 120, B121 = 121, B122 = 122, B123 = 123, B124 = 124, B125 = 125, B126 = 126, B127 = 127, } impl AsciiByte { /// Convert [u8; N] to [AsciiByte; N] /// /// # Safety /// /// All bytes MUST be in the range 0 to 127 inclusive. #[inline] pub const unsafe fn to_ascii_byte_array(bytes: &[u8; N]) -> [AsciiByte; N] { *(bytes as *const [u8; N] as *const [AsciiByte; N]) } #[inline] pub(crate) fn from_decimal_digit(digit: u8) -> AsciiByte { // Note: This code optimizes nicely with no branches. match digit { 0 => AsciiByte::B48, 1 => AsciiByte::B49, 2 => AsciiByte::B50, 3 => AsciiByte::B51, 4 => AsciiByte::B52, 5 => AsciiByte::B53, 6 => AsciiByte::B54, 7 => AsciiByte::B55, 8 => AsciiByte::B56, 9 => AsciiByte::B57, _ => { debug_assert!(false, "not a single digit: {digit}"); AsciiByte::B32 // Space } } } } tinystr-0.8.2/src/databake.rs000064400000000000000000000035461046102023000142500ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::TinyAsciiStr; use crate::UnvalidatedTinyAsciiStr; use databake::*; impl Bake for TinyAsciiStr { fn bake(&self, env: &CrateEnv) -> TokenStream { env.insert("tinystr"); let string = self.as_str(); quote! { tinystr::tinystr!(#N, #string) } } } impl BakeSize for TinyAsciiStr { fn borrows_size(&self) -> usize { 0 } } impl databake::Bake for UnvalidatedTinyAsciiStr { fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream { match self.try_into_tinystr() { Ok(tiny) => { let tiny = tiny.bake(env); databake::quote! { #tiny.to_unvalidated() } } Err(_) => { let bytes = self.0.bake(env); env.insert("tinystr"); databake::quote! { tinystr::UnvalidatedTinyAsciiStr::from_utf8_unchecked(#bytes) } } } } } impl databake::BakeSize for UnvalidatedTinyAsciiStr { fn borrows_size(&self) -> usize { 0 } } #[test] fn test() { test_bake!( TinyAsciiStr<10>, const, crate::tinystr!(10usize, "foo"), tinystr ); } #[test] fn test_unvalidated() { test_bake!( UnvalidatedTinyAsciiStr<10>, const, crate::tinystr!(10usize, "foo").to_unvalidated(), tinystr ); test_bake!( UnvalidatedTinyAsciiStr<3>, const, crate::UnvalidatedTinyAsciiStr::from_utf8_unchecked(*b"AB\xCD"), tinystr ); } tinystr-0.8.2/src/error.rs000064400000000000000000000012501046102023000136330ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use displaydoc::Display; impl core::error::Error for ParseError {} #[derive(Display, Debug, PartialEq, Eq)] #[non_exhaustive] pub enum ParseError { #[displaydoc("found string of larger length {len} when constructing string of length {max}")] TooLong { max: usize, len: usize }, #[displaydoc("tinystr types do not support strings with null bytes")] ContainsNull, #[displaydoc("attempted to construct TinyAsciiStr from a non-ASCII string")] NonAscii, } tinystr-0.8.2/src/int_ops.rs000064400000000000000000000275411046102023000141700ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::asciibyte::AsciiByte; /// Internal helper struct that performs operations on aligned integers. /// Supports strings up to 4 bytes long. #[repr(transparent)] pub struct Aligned4(u32); impl Aligned4 { /// # Panics /// Panics if N is greater than 4 #[inline] pub const fn from_utf8(src: &[u8; N]) -> Self { let mut bytes = [0; 4]; let mut i = 0; // The function documentation defines when panics may occur #[expect(clippy::indexing_slicing)] while i < N { bytes[i] = src[i]; i += 1; } Self(u32::from_ne_bytes(bytes)) } #[inline] pub const fn from_ascii_bytes(src: &[AsciiByte; N]) -> Self { Self::from_utf8::(unsafe { core::mem::transmute::<&[AsciiByte; N], &[u8; N]>(src) }) } #[inline] pub const fn to_bytes(&self) -> [u8; 4] { self.0.to_ne_bytes() } #[inline] pub const fn to_ascii_bytes(&self) -> [AsciiByte; 4] { unsafe { core::mem::transmute(self.to_bytes()) } } pub const fn len(&self) -> usize { let word = self.0; #[cfg(target_endian = "little")] let len = (4 - word.leading_zeros() / 8) as usize; #[cfg(target_endian = "big")] let len = (4 - word.trailing_zeros() / 8) as usize; len } pub const fn is_ascii_alphabetic(&self) -> bool { let word = self.0; // Each of the following bitmasks set *the high bit* (0x8) to 0 for valid and 1 for invalid. // `mask` sets all NUL bytes to 0. let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; // `lower` converts the string to lowercase. It may also change the value of non-alpha // characters, but this does not matter for the alphabetic test that follows. let lower = word | 0x2020_2020; // `alpha` sets all alphabetic bytes to 0. We only need check for lowercase characters. let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505); // The overall string is valid if every character passes at least one test. // We performed two tests here: non-NUL (`mask`) and alphabetic (`alpha`). (alpha & mask) == 0 } pub const fn is_ascii_alphanumeric(&self) -> bool { let word = self.0; // See explanatory comments in is_ascii_alphabetic let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646); let lower = word | 0x2020_2020; let alpha = !(lower + 0x1f1f_1f1f) | (lower + 0x0505_0505); (alpha & numeric & mask) == 0 } pub const fn is_ascii_numeric(&self) -> bool { let word = self.0; // See explanatory comments in is_ascii_alphabetic let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; let numeric = !(word + 0x5050_5050) | (word + 0x4646_4646); (numeric & mask) == 0 } pub const fn is_ascii_lowercase(&self) -> bool { let word = self.0; // For efficiency, this function tests for an invalid string rather than a valid string. // A string is ASCII lowercase iff it contains no uppercase ASCII characters. // `invalid_case` sets all uppercase ASCII characters to 0 and all others to 1. let invalid_case = !(word + 0x3f3f_3f3f) | (word + 0x2525_2525); // The string is valid if it contains no invalid characters (if all high bits are 1). (invalid_case & 0x8080_8080) == 0x8080_8080 } pub const fn is_ascii_titlecase(&self) -> bool { let word = self.0; // See explanatory comments in is_ascii_lowercase let invalid_case = if cfg!(target_endian = "little") { !(word + 0x3f3f_3f1f) | (word + 0x2525_2505) } else { !(word + 0x1f3f_3f3f) | (word + 0x0525_2525) }; (invalid_case & 0x8080_8080) == 0x8080_8080 } pub const fn is_ascii_uppercase(&self) -> bool { let word = self.0; // See explanatory comments in is_ascii_lowercase let invalid_case = !(word + 0x1f1f_1f1f) | (word + 0x0505_0505); (invalid_case & 0x8080_8080) == 0x8080_8080 } pub const fn is_ascii_alphabetic_lowercase(&self) -> bool { let word = self.0; // `mask` sets all NUL bytes to 0. let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; // `lower_alpha` sets all lowercase ASCII characters to 0 and all others to 1. let lower_alpha = !(word + 0x1f1f_1f1f) | (word + 0x0505_0505); // The overall string is valid if every character passes at least one test. // We performed two tests here: non-NUL (`mask`) and lowercase ASCII character (`alpha`). (lower_alpha & mask) == 0 } pub const fn is_ascii_alphabetic_titlecase(&self) -> bool { let word = self.0; // See explanatory comments in is_ascii_alphabetic_lowercase let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; let title_case = if cfg!(target_endian = "little") { !(word + 0x1f1f_1f3f) | (word + 0x0505_0525) } else { !(word + 0x3f1f_1f1f) | (word + 0x2505_0505) }; (title_case & mask) == 0 } pub const fn is_ascii_alphabetic_uppercase(&self) -> bool { let word = self.0; // See explanatory comments in is_ascii_alphabetic_lowercase let mask = (word + 0x7f7f_7f7f) & 0x8080_8080; let upper_alpha = !(word + 0x3f3f_3f3f) | (word + 0x2525_2525); (upper_alpha & mask) == 0 } pub const fn to_ascii_lowercase(&self) -> Self { let word = self.0; let result = word | (((word + 0x3f3f_3f3f) & !(word + 0x2525_2525) & 0x8080_8080) >> 2); Self(result) } pub const fn to_ascii_titlecase(&self) -> Self { let word = self.0.to_le(); let mask = ((word + 0x3f3f_3f1f) & !(word + 0x2525_2505) & 0x8080_8080) >> 2; let result = (word | mask) & !(0x20 & mask); Self(u32::from_le(result)) } pub const fn to_ascii_uppercase(&self) -> Self { let word = self.0; let result = word & !(((word + 0x1f1f_1f1f) & !(word + 0x0505_0505) & 0x8080_8080) >> 2); Self(result) } } /// Internal helper struct that performs operations on aligned integers. /// Supports strings up to 8 bytes long. #[repr(transparent)] pub struct Aligned8(u64); impl Aligned8 { /// # Panics /// Panics if N is greater than 8 #[inline] pub const fn from_utf8(src: &[u8; N]) -> Self { let mut bytes = [0; 8]; let mut i = 0; // The function documentation defines when panics may occur #[expect(clippy::indexing_slicing)] while i < N { bytes[i] = src[i]; i += 1; } Self(u64::from_ne_bytes(bytes)) } #[inline] pub const fn from_ascii_bytes(src: &[AsciiByte; N]) -> Self { Self::from_utf8::(unsafe { core::mem::transmute::<&[AsciiByte; N], &[u8; N]>(src) }) } #[inline] pub const fn to_bytes(&self) -> [u8; 8] { self.0.to_ne_bytes() } #[inline] pub const fn to_ascii_bytes(&self) -> [AsciiByte; 8] { unsafe { core::mem::transmute(self.to_bytes()) } } pub const fn len(&self) -> usize { let word = self.0; #[cfg(target_endian = "little")] let len = (8 - word.leading_zeros() / 8) as usize; #[cfg(target_endian = "big")] let len = (8 - word.trailing_zeros() / 8) as usize; len } pub const fn is_ascii_alphabetic(&self) -> bool { let word = self.0; let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080; let lower = word | 0x2020_2020_2020_2020; let alpha = !(lower + 0x1f1f_1f1f_1f1f_1f1f) | (lower + 0x0505_0505_0505_0505); (alpha & mask) == 0 } pub const fn is_ascii_alphanumeric(&self) -> bool { let word = self.0; let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080; let numeric = !(word + 0x5050_5050_5050_5050) | (word + 0x4646_4646_4646_4646); let lower = word | 0x2020_2020_2020_2020; let alpha = !(lower + 0x1f1f_1f1f_1f1f_1f1f) | (lower + 0x0505_0505_0505_0505); (alpha & numeric & mask) == 0 } pub const fn is_ascii_numeric(&self) -> bool { let word = self.0; let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080; let numeric = !(word + 0x5050_5050_5050_5050) | (word + 0x4646_4646_4646_4646); (numeric & mask) == 0 } pub const fn is_ascii_lowercase(&self) -> bool { let word = self.0; let invalid_case = !(word + 0x3f3f_3f3f_3f3f_3f3f) | (word + 0x2525_2525_2525_2525); (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080 } pub const fn is_ascii_titlecase(&self) -> bool { let word = self.0; let invalid_case = if cfg!(target_endian = "little") { !(word + 0x3f3f_3f3f_3f3f_3f1f) | (word + 0x2525_2525_2525_2505) } else { !(word + 0x1f3f_3f3f_3f3f_3f3f) | (word + 0x0525_2525_2525_2525) }; (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080 } pub const fn is_ascii_uppercase(&self) -> bool { let word = self.0; let invalid_case = !(word + 0x1f1f_1f1f_1f1f_1f1f) | (word + 0x0505_0505_0505_0505); (invalid_case & 0x8080_8080_8080_8080) == 0x8080_8080_8080_8080 } pub const fn is_ascii_alphabetic_lowercase(&self) -> bool { let word = self.0; // `mask` sets all NUL bytes to 0. let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080; // `lower_alpha` sets all lowercase ASCII characters to 0 and all others to 1. let lower_alpha = !(word + 0x1f1f_1f1f_1f1f_1f1f) | (word + 0x0505_0505_0505_0505); // The overall string is valid if every character passes at least one test. // We performed two tests here: non-NUL (`mask`) and lowercase ASCII character (`alpha`). (lower_alpha & mask) == 0 } pub const fn is_ascii_alphabetic_titlecase(&self) -> bool { let word = self.0; // See explanatory comments in is_ascii_alphabetic_lowercase let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080; let title_case = if cfg!(target_endian = "little") { !(word + 0x1f1f_1f1f_1f1f_1f3f) | (word + 0x0505_0505_0505_0525) } else { !(word + 0x3f1f_1f1f_1f1f_1f1f) | (word + 0x2505_0505_0505_0505) }; (title_case & mask) == 0 } pub const fn is_ascii_alphabetic_uppercase(&self) -> bool { let word = self.0; // See explanatory comments in is_ascii_alphabetic_lowercase let mask = (word + 0x7f7f_7f7f_7f7f_7f7f) & 0x8080_8080_8080_8080; let upper_alpha = !(word + 0x3f3f_3f3f_3f3f_3f3f) | (word + 0x2525_2525_2525_2525); (upper_alpha & mask) == 0 } pub const fn to_ascii_lowercase(&self) -> Self { let word = self.0; let result = word | (((word + 0x3f3f_3f3f_3f3f_3f3f) & !(word + 0x2525_2525_2525_2525) & 0x8080_8080_8080_8080) >> 2); Self(result) } pub const fn to_ascii_titlecase(&self) -> Self { let word = self.0.to_le(); let mask = ((word + 0x3f3f_3f3f_3f3f_3f1f) & !(word + 0x2525_2525_2525_2505) & 0x8080_8080_8080_8080) >> 2; let result = (word | mask) & !(0x20 & mask); Self(u64::from_le(result)) } pub const fn to_ascii_uppercase(&self) -> Self { let word = self.0; let result = word & !(((word + 0x1f1f_1f1f_1f1f_1f1f) & !(word + 0x0505_0505_0505_0505) & 0x8080_8080_8080_8080) >> 2); Self(result) } } tinystr-0.8.2/src/lib.rs000064400000000000000000000064401046102023000132560ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). //! `tinystr` is a utility crate of the [`ICU4X`] project. //! //! It includes [`TinyAsciiStr`], a core API for representing small ASCII-only bounded length strings. //! //! It is optimized for operations on strings of size 8 or smaller. When use cases involve comparison //! and conversion of strings for lowercase/uppercase/titlecase, or checking //! numeric/alphabetic/alphanumeric, `TinyAsciiStr` is the edge performance library. //! //! # Examples //! //! ```rust //! use tinystr::TinyAsciiStr; //! //! let s1: TinyAsciiStr<4> = "tEsT".parse().expect("Failed to parse."); //! //! assert_eq!(s1, "tEsT"); //! assert_eq!(s1.to_ascii_uppercase(), "TEST"); //! assert_eq!(s1.to_ascii_lowercase(), "test"); //! assert_eq!(s1.to_ascii_titlecase(), "Test"); //! assert!(s1.is_ascii_alphanumeric()); //! assert!(!s1.is_ascii_numeric()); //! //! let s2 = TinyAsciiStr::<8>::try_from_raw(*b"New York") //! .expect("Failed to parse."); //! //! assert_eq!(s2, "New York"); //! assert_eq!(s2.to_ascii_uppercase(), "NEW YORK"); //! assert_eq!(s2.to_ascii_lowercase(), "new york"); //! assert_eq!(s2.to_ascii_titlecase(), "New york"); //! assert!(!s2.is_ascii_alphanumeric()); //! ``` //! //! # Details //! //! When strings are of size 8 or smaller, the struct transforms the strings as `u32`/`u64` and uses //! bitmasking to provide basic string manipulation operations: //! * `is_ascii_numeric` //! * `is_ascii_alphabetic` //! * `is_ascii_alphanumeric` //! * `to_ascii_lowercase` //! * `to_ascii_uppercase` //! * `to_ascii_titlecase` //! * `PartialEq` //! //! `TinyAsciiStr` will fall back to `u8` character manipulation for strings of length greater than 8. //! //! [`ICU4X`]: ../icu/index.html // https://github.com/unicode-org/icu4x/blob/main/documents/process/boilerplate.md#library-annotations #![cfg_attr(not(any(test, doc)), no_std)] #![cfg_attr( not(test), deny( clippy::indexing_slicing, clippy::unwrap_used, clippy::expect_used, clippy::panic, clippy::exhaustive_structs, clippy::exhaustive_enums, clippy::trivially_copy_pass_by_ref, missing_debug_implementations, ) )] mod macros; mod ascii; mod asciibyte; mod error; mod int_ops; mod unvalidated; #[cfg(feature = "serde")] mod serde; #[cfg(feature = "databake")] mod databake; #[cfg(feature = "zerovec")] mod ule; #[cfg(feature = "alloc")] extern crate alloc; pub use ascii::TinyAsciiStr; pub use error::ParseError; pub use unvalidated::UnvalidatedTinyAsciiStr; /// These are temporary compatability reexports that will be removed /// in a future version. pub type TinyStr4 = TinyAsciiStr<4>; /// These are temporary compatability reexports that will be removed /// in a future version. pub type TinyStr8 = TinyAsciiStr<8>; /// These are temporary compatability reexports that will be removed /// in a future version. pub type TinyStr16 = TinyAsciiStr<16>; #[test] fn test_size() { assert_eq!( core::mem::size_of::(), core::mem::size_of::>() ); assert_eq!( core::mem::size_of::(), core::mem::size_of::>() ); } tinystr-0.8.2/src/macros.rs000064400000000000000000000017451046102023000137770ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). #[macro_export] macro_rules! tinystr { ($n:literal, $s:literal) => {{ // Force it into a const context; otherwise it may get evaluated at runtime instead. const TINYSTR_MACRO_CONST: $crate::TinyAsciiStr<$n> = { match $crate::TinyAsciiStr::try_from_utf8($s.as_bytes()) { Ok(s) => s, // Cannot format the error since formatting isn't const yet Err(_) => panic!(concat!("Failed to construct tinystr from ", $s)), } }; TINYSTR_MACRO_CONST }}; } #[cfg(test)] mod tests { #[test] fn test_macro_construction() { let s1 = tinystr!(8, "foobar"); assert_eq!(&*s1, "foobar"); let s1 = tinystr!(12, "foobarbaz"); assert_eq!(&*s1, "foobarbaz"); } } tinystr-0.8.2/src/serde.rs000064400000000000000000000062111046102023000136060ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::TinyAsciiStr; use core::fmt; use core::marker::PhantomData; use core::ops::Deref; use serde_core::de::{Error, SeqAccess, Visitor}; use serde_core::ser::SerializeTuple; use serde_core::{Deserialize, Deserializer, Serialize, Serializer}; impl Serialize for TinyAsciiStr { #[inline] fn serialize(&self, serializer: S) -> Result where S: Serializer, { if serializer.is_human_readable() { self.deref().serialize(serializer) } else { let mut seq = serializer.serialize_tuple(N)?; for byte in self.all_bytes() { seq.serialize_element(byte)?; } seq.end() } } } struct TinyAsciiStrVisitor { marker: PhantomData>, } impl TinyAsciiStrVisitor { fn new() -> Self { TinyAsciiStrVisitor { marker: PhantomData, } } } impl<'de, const N: usize> Visitor<'de> for TinyAsciiStrVisitor { type Value = TinyAsciiStr; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { write!(formatter, "a TinyAsciiStr<{N}>") } #[inline] fn visit_seq(self, mut seq: A) -> Result where A: SeqAccess<'de>, { let mut bytes = [0u8; N]; let mut zeroes = false; for out in &mut bytes.iter_mut().take(N) { let byte = seq .next_element()? .ok_or_else(|| Error::invalid_length(N, &self))?; if byte == 0 { zeroes = true; } else if zeroes { return Err(Error::custom("TinyAsciiStr cannot contain null bytes")); } if byte >= 0x80 { return Err(Error::custom("TinyAsciiStr cannot contain non-ascii bytes")); } *out = byte; } Ok(unsafe { TinyAsciiStr::from_utf8_unchecked(bytes) }) } } impl<'de, const N: usize> Deserialize<'de> for TinyAsciiStr { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { if deserializer.is_human_readable() { struct HumanVisitor; impl<'de, const M: usize> Visitor<'de> for HumanVisitor { type Value = TinyAsciiStr; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { write!(formatter, "a TinyAsciiStr<{M}>") } fn visit_str(self, v: &str) -> Result where E: Error, { TinyAsciiStr::try_from_str(v).map_err(|_| Error::custom("invalid str")) } } deserializer.deserialize_str(HumanVisitor::) } else { deserializer.deserialize_tuple(N, TinyAsciiStrVisitor::::new()) } } } tinystr-0.8.2/src/ule.rs000064400000000000000000000101631046102023000132720ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::{TinyAsciiStr, UnvalidatedTinyAsciiStr}; #[cfg(feature = "alloc")] use zerovec::maps::ZeroMapKV; use zerovec::ule::*; #[cfg(feature = "alloc")] use zerovec::{ZeroSlice, ZeroVec}; // Safety (based on the safety checklist on the ULE trait): // 1. TinyAsciiStr does not include any uninitialized or padding bytes. // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) // 2. TinyAsciiStr is aligned to 1 byte. // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) // 3. The impl of validate_bytes() returns an error if any byte is not valid. // 4. The impl of validate_bytes() returns an error if there are extra bytes. // 5. The other ULE methods use the default impl. // 6. TinyAsciiStr byte equality is semantic equality unsafe impl ULE for TinyAsciiStr { #[inline] fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> { if bytes.len() % N != 0 { return Err(UleError::length::(bytes.len())); } // Validate the bytes for chunk in bytes.chunks_exact(N) { let _ = TinyAsciiStr::::try_from_utf8_inner(chunk, true) .map_err(|_| UleError::parse::())?; } Ok(()) } } impl NicheBytes for TinyAsciiStr { // AsciiByte is 0..128 const NICHE_BIT_PATTERN: [u8; N] = [255; N]; } impl AsULE for TinyAsciiStr { type ULE = Self; #[inline] fn to_unaligned(self) -> Self::ULE { self } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { unaligned } } #[cfg(feature = "alloc")] impl<'a, const N: usize> ZeroMapKV<'a> for TinyAsciiStr { type Container = ZeroVec<'a, TinyAsciiStr>; type Slice = ZeroSlice>; type GetType = TinyAsciiStr; type OwnedType = TinyAsciiStr; } // Safety (based on the safety checklist on the ULE trait): // 1. UnvalidatedTinyAsciiStr does not include any uninitialized or padding bytes. // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) // 2. UnvalidatedTinyAsciiStr is aligned to 1 byte. // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) // 3. The impl of validate_bytes() returns an error if any byte is not valid. // 4. The impl of validate_bytes() returns an error if there are extra bytes. // 5. The other ULE methods use the default impl. // 6. UnvalidatedTinyAsciiStr byte equality is semantic equality unsafe impl ULE for UnvalidatedTinyAsciiStr { #[inline] fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> { if bytes.len() % N != 0 { return Err(UleError::length::(bytes.len())); } Ok(()) } } impl AsULE for UnvalidatedTinyAsciiStr { type ULE = Self; #[inline] fn to_unaligned(self) -> Self::ULE { self } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { unaligned } } #[cfg(feature = "alloc")] impl<'a, const N: usize> ZeroMapKV<'a> for UnvalidatedTinyAsciiStr { type Container = ZeroVec<'a, UnvalidatedTinyAsciiStr>; type Slice = ZeroSlice>; type GetType = UnvalidatedTinyAsciiStr; type OwnedType = UnvalidatedTinyAsciiStr; } #[cfg(test)] mod test { use crate::*; use zerovec::*; #[test] fn test_zerovec() { let mut vec = ZeroVec::>::new(); vec.with_mut(|v| v.push("foobar".parse().unwrap())); vec.with_mut(|v| v.push("baz".parse().unwrap())); vec.with_mut(|v| v.push("quux".parse().unwrap())); let bytes = vec.as_bytes(); let vec: ZeroVec> = ZeroVec::parse_bytes(bytes).unwrap(); assert_eq!(&*vec.get(0).unwrap(), "foobar"); assert_eq!(&*vec.get(1).unwrap(), "baz"); assert_eq!(&*vec.get(2).unwrap(), "quux"); } } tinystr-0.8.2/src/unvalidated.rs000064400000000000000000000070571046102023000150150ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::ParseError; use crate::TinyAsciiStr; use core::fmt; /// A fixed-length bytes array that is expected to be an ASCII string but does not enforce that invariant. /// /// Use this type instead of `TinyAsciiStr` if you don't need to enforce ASCII during deserialization. For /// example, strings that are keys of a map don't need to ever be reified as `TinyAsciiStr`s. /// /// The main advantage of this type over `[u8; N]` is that it serializes as a string in /// human-readable formats like JSON. #[derive(PartialEq, PartialOrd, Eq, Ord, Clone, Copy)] pub struct UnvalidatedTinyAsciiStr(pub(crate) [u8; N]); impl fmt::Debug for UnvalidatedTinyAsciiStr { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // Debug as a string if possible match self.try_into_tinystr() { Ok(s) => fmt::Debug::fmt(&s, f), Err(_) => fmt::Debug::fmt(&self.0, f), } } } impl UnvalidatedTinyAsciiStr { #[inline] /// Converts into a [`TinyAsciiStr`]. Fails if the bytes are not valid ASCII. pub fn try_into_tinystr(self) -> Result, ParseError> { TinyAsciiStr::try_from_raw(self.0) } #[inline] /// Unsafely converts into a [`TinyAsciiStr`]. pub const fn from_utf8_unchecked(bytes: [u8; N]) -> Self { Self(bytes) } } impl TinyAsciiStr { #[inline] // Converts into a [`UnvalidatedTinyAsciiStr`] pub const fn to_unvalidated(self) -> UnvalidatedTinyAsciiStr { UnvalidatedTinyAsciiStr(*self.all_bytes()) } } impl From> for UnvalidatedTinyAsciiStr { fn from(other: TinyAsciiStr) -> Self { other.to_unvalidated() } } #[cfg(feature = "serde")] impl serde_core::Serialize for UnvalidatedTinyAsciiStr { fn serialize(&self, serializer: S) -> Result where S: serde_core::Serializer, { use serde_core::ser::Error; self.try_into_tinystr() .map_err(|_| S::Error::custom("invalid ascii in UnvalidatedTinyAsciiStr"))? .serialize(serializer) } } macro_rules! deserialize { ($size:literal) => { #[cfg(feature = "serde")] impl<'de, 'a> serde_core::Deserialize<'de> for UnvalidatedTinyAsciiStr<$size> where 'de: 'a, { fn deserialize(deserializer: D) -> Result where D: serde_core::Deserializer<'de>, { if deserializer.is_human_readable() { Ok(TinyAsciiStr::deserialize(deserializer)?.to_unvalidated()) } else { Ok(Self(<[u8; $size]>::deserialize(deserializer)?)) } } } }; } deserialize!(1); deserialize!(2); deserialize!(3); deserialize!(4); deserialize!(5); deserialize!(6); deserialize!(7); deserialize!(8); deserialize!(9); deserialize!(10); deserialize!(11); deserialize!(12); deserialize!(13); deserialize!(14); deserialize!(15); deserialize!(16); deserialize!(17); deserialize!(18); deserialize!(19); deserialize!(20); deserialize!(21); deserialize!(22); deserialize!(23); deserialize!(24); deserialize!(25); deserialize!(26); deserialize!(27); deserialize!(28); deserialize!(29); deserialize!(30); deserialize!(31); deserialize!(32); tinystr-0.8.2/tests/serde.rs000064400000000000000000000031711046102023000141630ustar 00000000000000// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use tinystr::*; // Tests largely adapted from `tinystr` crate // https://github.com/zbraniecki/tinystr/blob/4e4eab55dd6bded7f29a18b41452c506c461716c/tests/serde.rs macro_rules! test_roundtrip { ($f:ident, $n:literal, $val:expr) => { #[test] fn $f() { let tiny: TinyAsciiStr<$n> = $val.parse().unwrap(); let json_string = serde_json::to_string(&tiny).unwrap(); let expected_json = concat!("\"", $val, "\""); assert_eq!(json_string, expected_json); let recover: TinyAsciiStr<$n> = serde_json::from_str(&json_string).unwrap(); assert_eq!(&*tiny, &*recover); let bin = bincode::serialize(&tiny).unwrap(); assert_eq!(bin, &tiny.all_bytes()[..]); let debin: TinyAsciiStr<$n> = bincode::deserialize(&bin).unwrap(); assert_eq!(&*tiny, &*debin); let post = postcard::to_stdvec(&tiny).unwrap(); assert_eq!(post, &tiny.all_bytes()[..]); let unpost: TinyAsciiStr<$n> = postcard::from_bytes(&post).unwrap(); assert_eq!(&*tiny, &*unpost); } }; } test_roundtrip!(test_roundtrip4_1, 4, "en"); test_roundtrip!(test_roundtrip4_2, 4, "Latn"); test_roundtrip!(test_roundtrip8, 8, "calendar"); test_roundtrip!(test_roundtrip16, 16, "verylongstring"); test_roundtrip!(test_roundtrip10, 11, "shortstring"); test_roundtrip!(test_roundtrip30, 24, "veryveryverylongstring");