unic-langid-impl-0.9.0/.cargo_vcs_info.json0000644000000001121365450372000142700ustar00{ "git": { "sha1": "b318e9cd30edd629b61867041a81237e5282b3e1" } } unic-langid-impl-0.9.0/Cargo.lock0000644000000357001365450372000122560ustar00# This file is automatically @generated by Cargo. # It is not intended for manual editing. [[package]] name = "atty" version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ "hermit-abi", "libc", "winapi", ] [[package]] name = "autocfg" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" [[package]] name = "bitflags" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" [[package]] name = "bstr" version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2889e6d50f394968c8bf4240dc3f2a7eb4680844d27308f798229ac9d4725f41" dependencies = [ "lazy_static", "memchr", "regex-automata", "serde", ] [[package]] name = "bumpalo" version = "3.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "12ae9db68ad7fac5fe51304d20f016c911539251075a214f8e663babefa35187" [[package]] name = "byteorder" version = "1.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" [[package]] name = "cast" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b9434b9a5aa1450faa3f9cb14ea0e8c53bb5d2b3c1bfd1ab4fc03e9f33fbfb0" dependencies = [ "rustc_version", ] [[package]] name = "cfg-if" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" [[package]] name = "clap" version = "2.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" dependencies = [ "bitflags", "textwrap", "unicode-width", ] [[package]] name = "criterion" version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63f696897c88b57f4ffe3c69d8e1a0613c7d0e6c4833363c8560fbde9c47b966" dependencies = [ "atty", "cast", "clap", "criterion-plot", "csv", "itertools", "lazy_static", "num-traits", "oorandom", "plotters", "rayon", "regex", "serde", "serde_derive", "serde_json", "tinytemplate", "walkdir", ] [[package]] name = "criterion-plot" version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ddeaf7989f00f2e1d871a26a110f3ed713632feac17f65f03ca938c542618b60" dependencies = [ "cast", "itertools", ] [[package]] name = "crossbeam-deque" version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f02af974daeee82218205558e51ec8768b48cf524bd01d550abe5573a608285" dependencies = [ "crossbeam-epoch", "crossbeam-utils", "maybe-uninit", ] [[package]] name = "crossbeam-epoch" version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", "lazy_static", "maybe-uninit", "memoffset", "scopeguard", ] [[package]] name = "crossbeam-queue" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c695eeca1e7173472a32221542ae469b3e9aac3a4fc81f7696bcad82029493db" dependencies = [ "cfg-if", "crossbeam-utils", ] [[package]] name = "crossbeam-utils" version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" dependencies = [ "autocfg", "cfg-if", "lazy_static", ] [[package]] name = "csv" version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00affe7f6ab566df61b4be3ce8cf16bc2576bca0963ceb0955e45d514bf9a279" dependencies = [ "bstr", "csv-core", "itoa", "ryu", "serde", ] [[package]] name = "csv-core" version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" dependencies = [ "memchr", ] [[package]] name = "either" version = "1.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" [[package]] name = "hermit-abi" version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61565ff7aaace3525556587bd2dc31d4a07071957be715e63ce7b1eccf51a8f4" dependencies = [ "libc", ] [[package]] name = "itertools" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" dependencies = [ "either", ] [[package]] name = "itoa" version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e" [[package]] name = "js-sys" version = "0.3.39" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa5a448de267e7358beaf4a5d849518fe9a0c13fce7afd44b06e68550e5562a7" dependencies = [ "wasm-bindgen", ] [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" version = "0.2.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99e85c08494b21a9054e7fe1374a732aeadaff3980b6990b94bfd3a70f690005" [[package]] name = "log" version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" dependencies = [ "cfg-if", ] [[package]] name = "maybe-uninit" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" [[package]] name = "memchr" version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" [[package]] name = "memoffset" version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4fc2c02a7e374099d4ee95a193111f72d2110197fe200272371758f6c3643d8" dependencies = [ "autocfg", ] [[package]] name = "num-traits" version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c62be47e61d1842b9170f0fdeec8eba98e60e90e5446449a0545e5152acd7096" dependencies = [ "autocfg", ] [[package]] name = "num_cpus" version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" dependencies = [ "hermit-abi", "libc", ] [[package]] name = "oorandom" version = "11.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94af325bc33c7f60191be4e2c984d48aaa21e2854f473b85398344b60c9b6358" [[package]] name = "plotters" version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9b1d9ca091d370ea3a78d5619145d1b59426ab0c9eedbad2514a4cee08bf389" dependencies = [ "js-sys", "num-traits", "wasm-bindgen", "web-sys", ] [[package]] name = "proc-macro2" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8872cf6f48eee44265156c111456a700ab3483686b3f96df4cf5481c89157319" dependencies = [ "unicode-xid", ] [[package]] name = "quote" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c1f4b0efa5fc5e8ceb705136bfee52cfdb6a4e3509f770b478cd6ed434232a7" dependencies = [ "proc-macro2", ] [[package]] name = "rayon" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db6ce3297f9c85e16621bb8cca38a06779ffc31bb8184e1be4bed2be4678a098" dependencies = [ "crossbeam-deque", "either", "rayon-core", ] [[package]] name = "rayon-core" version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08a89b46efaf957e52b18062fb2f4660f8b8a4dde1807ca002690868ef2c85a9" dependencies = [ "crossbeam-deque", "crossbeam-queue", "crossbeam-utils", "lazy_static", "num_cpus", ] [[package]] name = "regex" version = "1.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6020f034922e3194c711b82a627453881bc4682166cabb07134a10c26ba7692" dependencies = [ "regex-syntax", ] [[package]] name = "regex-automata" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4" dependencies = [ "byteorder", ] [[package]] name = "regex-syntax" version = "0.6.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fe5bd57d1d7414c6b5ed48563a2c855d995ff777729dcd91c369ec7fea395ae" [[package]] name = "rustc_version" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" dependencies = [ "semver", ] [[package]] name = "ryu" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed3d612bc64430efeb3f7ee6ef26d590dce0c43249217bddc62112540c7941e1" [[package]] name = "same-file" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" dependencies = [ "winapi-util", ] [[package]] name = "scopeguard" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "semver" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" dependencies = [ "semver-parser", ] [[package]] name = "semver-parser" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "serde" version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "36df6ac6412072f67cf767ebbde4133a5b2e88e76dc6187fa7104cd16f783399" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e549e3abf4fb8621bd1609f11dfc9f5e50320802273b12f3811a67e6716ea6c" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.52" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7894c8ed05b7a3a279aeb79025fdec1d3158080b75b98a08faf2806bb799edd" dependencies = [ "itoa", "ryu", "serde", ] [[package]] name = "syn" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "410a7488c0a728c7ceb4ad59b9567eb4053d02e8cc7f5c0e0eeeb39518369213" dependencies = [ "proc-macro2", "quote", "unicode-xid", ] [[package]] name = "textwrap" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" dependencies = [ "unicode-width", ] [[package]] name = "tinystr" version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4bac79c4b51eda1b090b1edebfb667821bbb51f713855164dc7cec2cb8ac2ba3" [[package]] name = "tinytemplate" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45e4bc5ac99433e0dcb8b9f309dd271a165ae37dde129b9e0ce1bfdd8bfe4891" dependencies = [ "serde", "serde_json", ] [[package]] name = "unic-langid-impl" version = "0.9.0" dependencies = [ "criterion", "serde", "serde_json", "tinystr", ] [[package]] name = "unicode-width" version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479" [[package]] name = "unicode-xid" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" [[package]] name = "walkdir" version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d" dependencies = [ "same-file", "winapi", "winapi-util", ] [[package]] name = "wasm-bindgen" version = "0.2.62" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c7d40d09cdbf0f4895ae58cf57d92e1e57a9dd8ed2e8390514b54a47cc5551" dependencies = [ "cfg-if", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" version = "0.2.62" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3972e137ebf830900db522d6c8fd74d1900dcfc733462e9a12e942b00b4ac94" dependencies = [ "bumpalo", "lazy_static", "log", "proc-macro2", "quote", "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" version = "0.2.62" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2cd85aa2c579e8892442954685f0d801f9129de24fa2136b2c6a539c76b65776" dependencies = [ "quote", "wasm-bindgen-macro-support", ] [[package]] name = "wasm-bindgen-macro-support" version = "0.2.62" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8eb197bd3a47553334907ffd2f16507b4f4f01bbec3ac921a7719e0decdfe72a" dependencies = [ "proc-macro2", "quote", "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" version = "0.2.62" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a91c2916119c17a8e316507afaaa2dd94b47646048014bbdf6bef098c1bb58ad" [[package]] name = "web-sys" version = "0.3.39" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bc359e5dd3b46cb9687a051d50a2fdd228e4ba7cf6fcf861a5365c3d671a642" dependencies = [ "js-sys", "wasm-bindgen", ] [[package]] name = "winapi" version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" dependencies = [ "winapi", ] [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" unic-langid-impl-0.9.0/Cargo.toml0000644000000033341365450372000122770ustar00# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2018" name = "unic-langid-impl" version = "0.9.0" authors = ["Zibi Braniecki "] include = ["src/**/*", "benches/*.rs", "Cargo.toml", "README.md"] description = "API for managing Unicode Language Identifiers" readme = "README.md" categories = ["internationalization"] license = "MIT/Apache-2.0" repository = "https://github.com/zbraniecki/unic-locale" [[bin]] name = "generate_likelysubtags" required-features = ["binary"] [[bin]] name = "generate_layout" required-features = ["binary"] [[test]] name = "likelysubtags" path = "tests/likelysubtags.rs" required-features = ["likelysubtags"] [[bench]] name = "parser" harness = false [[bench]] name = "langid" harness = false [[bench]] name = "canonicalize" harness = false [[bench]] name = "likely_subtags" harness = false required-features = ["likelysubtags"] [dependencies.serde] version = "1.0" optional = true [dependencies.serde_json] version = "1.0" optional = true [dependencies.tinystr] version = "0.3.2" [dev-dependencies.criterion] version = "0.3" [dev-dependencies.serde] version = "1.0" features = ["derive"] [dev-dependencies.serde_json] version = "1.0" [features] binary = ["serde", "serde_json"] likelysubtags = [] unic-langid-impl-0.9.0/Cargo.toml.orig010064400017500001731000000022141365450346500157720ustar0000000000000000[package] name = "unic-langid-impl" description = "API for managing Unicode Language Identifiers" version = "0.9.0" authors = ["Zibi Braniecki "] edition = "2018" readme = "README.md" repository = "https://github.com/zbraniecki/unic-locale" license = "MIT/Apache-2.0" categories = ["internationalization"] include = [ "src/**/*", "benches/*.rs", "Cargo.toml", "README.md" ] [dependencies] tinystr = "0.3.2" serde = { version = "1.0", optional = true } serde_json = { version = "1.0", optional = true } [dev-dependencies] criterion = "0.3" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" [features] likelysubtags = [] binary = ["serde", "serde_json"] [[bin]] name = "generate_likelysubtags" required-features = ["binary"] [[bin]] name = "generate_layout" required-features = ["binary"] [[test]] name = "likelysubtags" path = "tests/likelysubtags.rs" required-features = ["likelysubtags"] [[bench]] name = "parser" harness = false [[bench]] name = "langid" harness = false [[bench]] name = "canonicalize" harness = false [[bench]] name = "likely_subtags" harness = false required-features = ["likelysubtags"] unic-langid-impl-0.9.0/README.md010064400017500001731000000001261356261012300143460ustar0000000000000000This is an internal implementation crate for `unic-langid`. Please use `unic-langid`. unic-langid-impl-0.9.0/benches/canonicalize.rs010064400017500001731000000021621365450332700175160ustar0000000000000000use criterion::black_box; use criterion::criterion_group; use criterion::criterion_main; use criterion::Criterion; use unic_langid_impl::canonicalize; fn langid_canonicalize_bench(c: &mut Criterion) { let strings = &[ "En_uS", "EN-GB", "ES-aR", "iT", "zH_HaNs_cN", "dE-aT", "Pl", "FR-FR", "de_AT", "sR-CyrL_sr", "NB-NO", "fr_fr", "Mk", "uK", "en-us", "en_gb", "ES-AR", "tH", "DE", "ZH_cyrl_hN", "eN-lAtN-uS", ]; c.bench_function("langid_canonicalize", |b| { b.iter(|| { for s in strings { let _ = canonicalize(black_box(s)); } }) }); c.bench_function("langid_canonicalize_from_bytes", |b| { let slices: Vec<&[u8]> = strings.iter().map(|s| s.as_bytes()).collect(); b.iter(|| { for s in &slices { let _ = canonicalize(black_box(s)); } }) }); } criterion_group!(benches, langid_canonicalize_bench,); criterion_main!(benches); unic-langid-impl-0.9.0/benches/langid.rs010064400017500001731000000043031365450332700163140ustar0000000000000000use criterion::black_box; use criterion::criterion_group; use criterion::criterion_main; use criterion::Criterion; use criterion::Fun; use unic_langid_impl::subtags; use unic_langid_impl::LanguageIdentifier; static STRINGS: &[&str] = &[ "en-US", "en-GB", "es-AR", "it", "zh-Hans-CN", "de-AT", "pl", "fr-FR", "de-AT", "sr-Cyrl-SR", "nb-NO", "fr-FR", "mk", "uk", "en-US", "en-GB", "es-AR", "th", "de", "zh-Cyrl-HN", "en-Latn-US", ]; fn language_identifier_construct_bench(c: &mut Criterion) { let langids: Vec = STRINGS .iter() .map(|s| -> LanguageIdentifier { s.parse().unwrap() }) .collect(); let funcs = vec![ Fun::new("from_str", |b, _| { b.iter(|| { for s in STRINGS { let _: Result = black_box(s).parse(); } }) }), Fun::new("from_bytes", |b, _| { let slices: Vec<&[u8]> = STRINGS.iter().map(|s| s.as_bytes()).collect(); b.iter(|| { for s in &slices { let _ = LanguageIdentifier::from_bytes(black_box(s)); } }) }), Fun::new("from_parts", |b, langids: &Vec| { let entries: Vec<( subtags::Language, Option, Option, Vec, )> = langids .iter() .cloned() .map(|langid| langid.into_parts()) .collect(); b.iter(|| { for (language, script, region, variants) in &entries { let _ = LanguageIdentifier::from_parts( language.clone(), script.clone(), region.clone(), variants, ); } }) }), ]; c.bench_functions("language_identifier_construct", funcs, langids); } criterion_group!(benches, language_identifier_construct_bench,); criterion_main!(benches); unic-langid-impl-0.9.0/benches/likely_subtags.rs010064400017500001731000000046761356730621600201150ustar0000000000000000use criterion::black_box; use criterion::criterion_group; use criterion::criterion_main; use criterion::Criterion; use tinystr::{TinyStr4, TinyStr8}; use unic_langid_impl::LanguageIdentifier; static STRINGS: &[&str] = &[ "en-US", "en-GB", "es-AR", "it", "zh-Hans-CN", "de-AT", "pl", "fr-FR", "de-AT", "sr-Cyrl-SR", "nb-NO", "fr-FR", "mk", "uk", "und-PL", "und-Latn-AM", "ug-Cyrl", "sr-ME", "mn-Mong", "lif-Limb", "gan", "zh-Hant", "yue-Hans", "unr", "unr-Deva", "und-Thai-CN", "ug-Cyrl", "en-Latn-DE", "pl-FR", "de-CH", "tuq", "sr-ME", "ng", "klx", "kk-Arab", "en-Cyrl", "und-Cyrl-UK", "und-Arab", "und-Arab-FO", ]; fn maximize_bench(c: &mut Criterion) { c.bench_function("maximize", move |b| { b.iter(|| { let langids: Vec = STRINGS .iter() .map(|s| -> LanguageIdentifier { s.parse().unwrap() }) .collect(); for mut s in langids { s.maximize(); let _ = black_box(s.to_string()); } }) }); } fn extract_input(s: &str) -> (Option, Option, Option) { let chunks: Vec<&str> = s.split("-").collect(); let mut lang: Option = chunks.get(0).map(|s| s.parse().unwrap()); let mut script: Option = chunks.get(1).map(|s| s.parse().unwrap()); let mut region: Option = chunks.get(2).map(|s| s.parse().unwrap()); if let Some(l) = lang { if l.as_str() == "und" { lang = None; } } if let Some(s) = script { if s.as_str().chars().count() == 2 { region = script; script = None; } } (lang, script, region) } fn raw_maximize_bench(c: &mut Criterion) { let entries: Vec<(Option, Option, Option)> = STRINGS.iter().map(|s| extract_input(s)).collect(); c.bench_function("raw_maximize", move |b| { b.iter(|| { for (lang, script, region) in &entries { let _ = unic_langid_impl::likelysubtags::maximize( lang.clone(), script.clone(), region.clone(), ); } }) }); } criterion_group!(benches, maximize_bench, raw_maximize_bench,); criterion_main!(benches); unic-langid-impl-0.9.0/benches/parser.rs010064400017500001731000000030221365432642400163500ustar0000000000000000use criterion::black_box; use criterion::criterion_group; use criterion::criterion_main; use criterion::Criterion; use unic_langid_impl::parser::parse_language_identifier; fn language_identifier_parser_bench(c: &mut Criterion) { let strings = &[ "en-US", "en-GB", "es-AR", "it", "zh-Hans-CN", "de-AT", "pl", "fr-FR", "de-AT", "sr-Cyrl-SR", "nb-NO", "fr-FR", "mk", "uk", ]; c.bench_function("language_identifier_parser", |b| { let slices: Vec<&[u8]> = strings.iter().map(|s| s.as_bytes()).collect(); b.iter(|| { for s in &slices { let _ = parse_language_identifier(black_box(s)); } }) }); } fn language_identifier_parser_casing_bench(c: &mut Criterion) { let strings = &[ "En_uS", "EN-GB", "ES-aR", "iT", "zH_HaNs_cN", "dE-aT", "Pl", "FR-FR", "de_AT", "sR-CyrL_sr", "NB-NO", "fr_fr", "Mk", "uK", ]; c.bench_function("language_identifier_parser_casing", |b| { let slices: Vec<&[u8]> = strings.iter().map(|s| s.as_bytes()).collect(); b.iter(|| { for s in &slices { let _ = parse_language_identifier(black_box(s)); } }) }); } criterion_group!( benches, language_identifier_parser_bench, language_identifier_parser_casing_bench, ); criterion_main!(benches); unic-langid-impl-0.9.0/data/cldr-misc-full/README.md010064400017500001731000000003301365450332700201020ustar0000000000000000# cldr-misc-full This repository provides the a portion of the JSON distribution of CLDR locale data for internationalization. Refer to the README at https://github.com/unicode-cldr/cldr-json for complete details. unic-langid-impl-0.9.0/src/bin/generate_layout.rs010064400017500001731000000073151365450333000201750ustar0000000000000000use serde_json::Value; use std::collections::HashMap; use std::collections::HashSet; use std::fs; use unic_langid_impl::subtags::{Language, Script}; use unic_langid_impl::CharacterDirection; use unic_langid_impl::LanguageIdentifier; fn langid_to_direction_map(path: &str) -> HashMap { let mut result = HashMap::new(); for entry in fs::read_dir(path).unwrap() { let entry = entry.unwrap(); let mut path = entry.path(); path.push("layout.json"); let contents = fs::read_to_string(path).expect("Something went wrong reading the file"); let v: Value = serde_json::from_str(&contents).unwrap(); let langid_key = v["main"].as_object().unwrap().keys().nth(0).unwrap(); if langid_key == "root" { continue; } let langid: LanguageIdentifier = langid_key.parse().unwrap(); let character_order = match v["main"][langid_key]["layout"]["orientation"]["characterOrder"] .as_str() .unwrap() { "right-to-left" => CharacterDirection::RTL, "left-to-right" => CharacterDirection::LTR, _ => unimplemented!("Encountered unknown directionality!"), }; result.insert(langid, character_order); } result } fn check_all_variants_rtl( map: &HashMap, lang: Option, script: Option