unicode-bidi-0.3.13/.appveyor.yml000064400000000000000000000007261046102023000147160ustar 00000000000000install: - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe - rustup-init -yv --default-toolchain nightly - set PATH=%PATH%;%USERPROFILE%\.cargo\bin - rustc -V - cargo -V - git submodule update --init --recursive build: false environment: RUST_BACKTRACE: full test_script: - cargo build --verbose --all - cargo doc --verbose --all --no-deps - cargo test --verbose --all - cargo test --verbose --all --features serde unicode-bidi-0.3.13/.cargo_vcs_info.json0000644000000001360000000000100134530ustar { "git": { "sha1": "3b7a02fd6006a0f83c78a3f59405e97dfd75e311" }, "path_in_vcs": "" }unicode-bidi-0.3.13/.github/workflows/main.yml000064400000000000000000000030041046102023000173040ustar 00000000000000name: CI on: push: branches: ['master', 'auto'] pull_request: jobs: Test: strategy: matrix: os: [ubuntu-latest] rust: [1.36.0, stable, beta, nightly] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: toolchain: ${{ matrix.rust }} override: true profile: minimal - uses: actions-rs/cargo@v1 with: command: build args: --all-targets - uses: actions-rs/cargo@v1 with: command: test - uses: actions-rs/cargo@v1 with: command: test args: --features "serde" - uses: actions-rs/cargo@v1 with: command: test args: --no-default-features - uses: actions-rs/cargo@v1 with: command: test args: --no-default-features --features=hardcoded-data Fmt: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: toolchain: stable override: true components: rustfmt - uses: actions-rs/cargo@v1 with: command: fmt args: --check build_result: name: homu build finished runs-on: ubuntu-latest needs: - "Test" - "Fmt" steps: - name: Mark the job as successful run: exit 0 if: success() - name: Mark the job as unsuccessful run: exit 1 if: "!success()" unicode-bidi-0.3.13/.gitignore000064400000000000000000000000431046102023000142300ustar 00000000000000Cargo.lock /data/ /target/ /*.html unicode-bidi-0.3.13/.rustfmt.toml000064400000000000000000000000211046102023000147130ustar 00000000000000array_width = 80 unicode-bidi-0.3.13/AUTHORS000064400000000000000000000001711046102023000133120ustar 00000000000000This software was written by the following people: Matt Brubeck Behnam Esfahbod unicode-bidi-0.3.13/COPYRIGHT000064400000000000000000000006341046102023000135410ustar 00000000000000This project is copyright 2015, The Servo Project Developers (given in the file AUTHORS). Licensed under the Apache License, Version 2.0 or the MIT license , at your option. All files in the project carrying such notice may not be copied, modified, or distributed except according to those terms. unicode-bidi-0.3.13/Cargo.lock0000644000000106170000000000100114330ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "flame" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fc2706461e1ee94f55cab2ed2e3d34ae9536cfa830358ef80acff1a3dacab30" dependencies = [ "lazy_static", "serde", "serde_derive", "serde_json", "thread-id", ] [[package]] name = "flamer" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "36b732da54fd4ea34452f2431cf464ac7be94ca4b339c9cd3d3d12eb06fe7aab" dependencies = [ "flame", "quote", "syn", ] [[package]] name = "itoa" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440" [[package]] name = "lazy_static" version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73" [[package]] name = "libc" version = "0.2.138" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db6d7e329c562c5dfab7a46a2afabc8b987ab9a4834c9d1ca04dc54c1546cef8" [[package]] name = "proc-macro2" version = "1.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9d89e5dba24725ae5678020bf8f1357a9aa7ff10736b551adbcd3f8d17d766f" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "556d0f47a940e895261e77dc200d5eadfc6ef644c179c6f5edfc105e3a2292c8" dependencies = [ "proc-macro2", ] [[package]] name = "redox_syscall" version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" [[package]] name = "ryu" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde" [[package]] name = "serde" version = "1.0.151" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fed41fc1a24994d044e6db6935e69511a1153b52c15eb42493b26fa87feba0" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.151" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "255abe9a125a985c05190d687b320c12f9b1f0b99445e608c21ba0782c719ad8" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8778cc0b528968fe72abec38b5db5a20a70d148116cd9325d2bc5f5180ca3faf" dependencies = [ "itoa", "ryu", "serde", ] [[package]] name = "serde_test" version = "1.0.151" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8f77be7305dac4f250891d2f7444276315f3c288176d35746b6a4ca786dacb3" dependencies = [ "serde", ] [[package]] name = "syn" version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ee3a69cd2c7e06684677e5629b3878b253af05e4714964204279c6bc02cf0b" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "thread-id" version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7fbf4c9d56b320106cd64fd024dadfa0be7cb4706725fc44a7d7ce952d820c1" dependencies = [ "libc", "redox_syscall", "winapi", ] [[package]] name = "unicode-bidi" version = "0.3.13" dependencies = [ "flame", "flamer", "serde", "serde_test", ] [[package]] name = "unicode-ident" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" [[package]] name = "winapi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" unicode-bidi-0.3.13/Cargo.toml0000644000000032370000000000100114560ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "unicode-bidi" version = "0.3.13" authors = ["The Servo Project Developers"] exclude = [ "benches/**", "data/**", "examples/**", "tests/**", "tools/**", ] description = "Implementation of the Unicode Bidirectional Algorithm" documentation = "https://docs.rs/unicode-bidi/" readme = "README.md" keywords = [ "rtl", "unicode", "text", "layout", "bidi", ] categories = [ "no-std", "encoding", "text-processing", ] license = "MIT OR Apache-2.0" repository = "https://github.com/servo/unicode-bidi" [lib] name = "unicode_bidi" [[test]] name = "conformance_tests" path = "tests/conformance_tests.rs" required-features = ["hardcoded-data"] [dependencies.flame] version = "0.2" optional = true [dependencies.flamer] version = "0.4" optional = true [dependencies.serde] version = ">=0.8, <2.0" features = ["derive"] optional = true default-features = false [dev-dependencies.serde_test] version = ">=0.8, <2.0" [features] bench_it = [] default = [ "std", "hardcoded-data", ] flame_it = [ "flame", "flamer", ] hardcoded-data = [] std = [] unstable = [] with_serde = ["serde"] [badges.appveyor] repository = "servo/unicode-bidi" unicode-bidi-0.3.13/Cargo.toml.orig000064400000000000000000000027151046102023000151370ustar 00000000000000[package] name = "unicode-bidi" version = "0.3.13" authors = ["The Servo Project Developers"] license = "MIT OR Apache-2.0" description = "Implementation of the Unicode Bidirectional Algorithm" repository = "https://github.com/servo/unicode-bidi" documentation = "https://docs.rs/unicode-bidi/" keywords = ["rtl", "unicode", "text", "layout", "bidi"] readme="README.md" edition = "2018" categories = [ "no-std", "encoding", "text-processing", ] # No data is shipped; benches, examples and tests also depend on data. exclude = [ "benches/**", "data/**", "examples/**", "tests/**", "tools/**", ] [badges] appveyor = { repository = "servo/unicode-bidi" } [lib] name = "unicode_bidi" [dependencies] flame = { version = "0.2", optional = true } flamer = { version = "0.4", optional = true } serde = { version = ">=0.8, <2.0", default-features = false, optional = true, features = ["derive"] } [dev-dependencies] serde_test = ">=0.8, <2.0" [features] # Note: We don't actually use the `std` feature for anything other than making # doctests work. But it may come in handy in the future. default = ["std", "hardcoded-data"] hardcoded-data = [] # Include hardcoded Bidi data std = [] unstable = [] # travis-cargo needs it bench_it = [] flame_it = ["flame", "flamer"] with_serde = ["serde"] # DEPRECATED, please use `serde` feature, instead. [[test]] name = "conformance_tests" required-features = ["hardcoded-data"] path = "tests/conformance_tests.rs" unicode-bidi-0.3.13/LICENSE-APACHE000064400000000000000000000251371046102023000141770ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. unicode-bidi-0.3.13/LICENSE-MIT000064400000000000000000000020571046102023000137030ustar 00000000000000Copyright (c) 2015 The Rust Project Developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. unicode-bidi-0.3.13/README.md000064400000000000000000000010721046102023000135220ustar 00000000000000# unicode-bidi This crate implements the [Unicode Bidirectional Algorithm][tr9] for display of mixed right-to-left and left-to-right text. It is written in safe Rust, compatible with the current stable release. [Documentation](https://docs.rs/unicode-bidi/) [![CI](https://github.com/servo/unicode-bidi/actions/workflows/main.yml/badge.svg)](https://github.com/servo/unicode-bidi/actions) [![AppVeyor](https://img.shields.io/appveyor/ci/servo/unicode-bidi/master.svg)](https://ci.appveyor.com/project/servo/unicode-bidi) [tr9]: https://www.unicode.org/reports/tr9/ unicode-bidi-0.3.13/src/char_data/mod.rs000064400000000000000000000132151046102023000160670ustar 00000000000000// Copyright 2015 The Servo Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! Accessor for `Bidi_Class` property from Unicode Character Database (UCD) mod tables; pub use self::tables::{BidiClass, UNICODE_VERSION}; #[cfg(feature = "hardcoded-data")] use core::char; #[cfg(feature = "hardcoded-data")] use core::cmp::Ordering::{Equal, Greater, Less}; #[cfg(feature = "hardcoded-data")] use self::tables::bidi_class_table; use crate::data_source::BidiMatchedOpeningBracket; use crate::BidiClass::*; #[cfg(feature = "hardcoded-data")] use crate::BidiDataSource; /// Hardcoded Bidi data that ships with the unicode-bidi crate. /// /// This can be enabled with the default `hardcoded-data` Cargo feature. #[cfg(feature = "hardcoded-data")] pub struct HardcodedBidiData; #[cfg(feature = "hardcoded-data")] impl BidiDataSource for HardcodedBidiData { fn bidi_class(&self, c: char) -> BidiClass { bsearch_range_value_table(c, bidi_class_table) } } /// Find the `BidiClass` of a single char. #[cfg(feature = "hardcoded-data")] pub fn bidi_class(c: char) -> BidiClass { bsearch_range_value_table(c, bidi_class_table) } /// If this character is a bracket according to BidiBrackets.txt, /// return the corresponding *normalized* *opening bracket* of the pair, /// and whether or not it itself is an opening bracket. pub(crate) fn bidi_matched_opening_bracket(c: char) -> Option { for pair in self::tables::bidi_pairs_table { if pair.0 == c || pair.1 == c { let skeleton = pair.2.unwrap_or(pair.0); return Some(BidiMatchedOpeningBracket { opening: skeleton, is_open: pair.0 == c, }); } } None } pub fn is_rtl(bidi_class: BidiClass) -> bool { match bidi_class { RLE | RLO | RLI => true, _ => false, } } #[cfg(feature = "hardcoded-data")] fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) -> BidiClass { match r.binary_search_by(|&(lo, hi, _)| { if lo <= c && c <= hi { Equal } else if hi < c { Less } else { Greater } }) { Ok(idx) => { let (_, _, cat) = r[idx]; cat } // UCD/extracted/DerivedBidiClass.txt: "All code points not explicitly listed // for Bidi_Class have the value Left_To_Right (L)." Err(_) => L, } } #[cfg(all(test, feature = "hardcoded-data"))] mod tests { use super::*; #[test] fn test_ascii() { assert_eq!(bidi_class('\u{0000}'), BN); assert_eq!(bidi_class('\u{0040}'), ON); assert_eq!(bidi_class('\u{0041}'), L); assert_eq!(bidi_class('\u{0062}'), L); assert_eq!(bidi_class('\u{007F}'), BN); } #[test] fn test_bmp() { // Hebrew assert_eq!(bidi_class('\u{0590}'), R); assert_eq!(bidi_class('\u{05D0}'), R); assert_eq!(bidi_class('\u{05D1}'), R); assert_eq!(bidi_class('\u{05FF}'), R); // Arabic assert_eq!(bidi_class('\u{0600}'), AN); assert_eq!(bidi_class('\u{0627}'), AL); assert_eq!(bidi_class('\u{07BF}'), AL); // Default R + Arabic Extras assert_eq!(bidi_class('\u{07C0}'), R); assert_eq!(bidi_class('\u{085F}'), R); assert_eq!(bidi_class('\u{0860}'), AL); assert_eq!(bidi_class('\u{0870}'), AL); assert_eq!(bidi_class('\u{089F}'), NSM); assert_eq!(bidi_class('\u{08A0}'), AL); assert_eq!(bidi_class('\u{089F}'), NSM); assert_eq!(bidi_class('\u{08FF}'), NSM); // Default ET assert_eq!(bidi_class('\u{20A0}'), ET); assert_eq!(bidi_class('\u{20CF}'), ET); // Arabic Presentation Forms assert_eq!(bidi_class('\u{FB1D}'), R); assert_eq!(bidi_class('\u{FB4F}'), R); assert_eq!(bidi_class('\u{FB50}'), AL); assert_eq!(bidi_class('\u{FDCF}'), ON); assert_eq!(bidi_class('\u{FDF0}'), AL); assert_eq!(bidi_class('\u{FDFF}'), ON); assert_eq!(bidi_class('\u{FE70}'), AL); assert_eq!(bidi_class('\u{FEFE}'), AL); assert_eq!(bidi_class('\u{FEFF}'), BN); // noncharacters assert_eq!(bidi_class('\u{FDD0}'), L); assert_eq!(bidi_class('\u{FDD1}'), L); assert_eq!(bidi_class('\u{FDEE}'), L); assert_eq!(bidi_class('\u{FDEF}'), L); assert_eq!(bidi_class('\u{FFFE}'), L); assert_eq!(bidi_class('\u{FFFF}'), L); } #[test] fn test_smp() { // Default AL + R assert_eq!(bidi_class('\u{10800}'), R); assert_eq!(bidi_class('\u{10FFF}'), R); assert_eq!(bidi_class('\u{1E800}'), R); assert_eq!(bidi_class('\u{1EDFF}'), R); assert_eq!(bidi_class('\u{1EE00}'), AL); assert_eq!(bidi_class('\u{1EEFF}'), AL); assert_eq!(bidi_class('\u{1EF00}'), R); assert_eq!(bidi_class('\u{1EFFF}'), R); } #[test] fn test_unassigned_planes() { assert_eq!(bidi_class('\u{30000}'), L); assert_eq!(bidi_class('\u{40000}'), L); assert_eq!(bidi_class('\u{50000}'), L); assert_eq!(bidi_class('\u{60000}'), L); assert_eq!(bidi_class('\u{70000}'), L); assert_eq!(bidi_class('\u{80000}'), L); assert_eq!(bidi_class('\u{90000}'), L); assert_eq!(bidi_class('\u{a0000}'), L); } } unicode-bidi-0.3.13/src/char_data/tables.rs000064400000000000000000001362311046102023000165660ustar 00000000000000// NOTE: // The following code was generated by "tools/generate.py". do not edit directly #![allow(missing_docs, non_upper_case_globals, non_snake_case)] #![cfg_attr(rustfmt, rustfmt_skip)] /// The [Unicode version](http://www.unicode.org/versions/) of data pub const UNICODE_VERSION: (u64, u64, u64) = (15, 0, 0); #[allow(non_camel_case_types)] #[derive(Clone, Copy, Debug, PartialEq, Eq)] /// Represents values of the Unicode character property /// [`Bidi_Class`](http://www.unicode.org/reports/tr44/#Bidi_Class), also /// known as the *bidirectional character type*. /// /// * /// * pub enum BidiClass { AL, AN, B, BN, CS, EN, ES, ET, FSI, L, LRE, LRI, LRO, NSM, ON, PDF, PDI, R, RLE, RLI, RLO, S, WS, } #[cfg(feature = "hardcoded-data")] use self::BidiClass::*; #[cfg(feature = "hardcoded-data")] pub const bidi_class_table: &'static [(char, char, BidiClass)] = &[ ('\u{0}', '\u{8}', BN), ('\u{9}', '\u{9}', S), ('\u{a}', '\u{a}', B), ('\u{b}', '\u{b}', S), ('\u{c}', '\u{c}', WS), ('\u{d}', '\u{d}', B), ('\u{e}', '\u{1b}', BN), ('\u{1c}', '\u{1e}', B), ('\u{1f}', '\u{1f}', S), ('\u{20}', '\u{20}', WS), ('\u{21}', '\u{22}', ON), ('\u{23}', '\u{25}', ET), ('\u{26}', '\u{2a}', ON), ('\u{2b}', '\u{2b}', ES), ('\u{2c}', '\u{2c}', CS), ('\u{2d}', '\u{2d}', ES), ('\u{2e}', '\u{2f}', CS), ('\u{30}', '\u{39}', EN), ('\u{3a}', '\u{3a}', CS), ('\u{3b}', '\u{40}', ON), ('\u{41}', '\u{5a}', L), ('\u{5b}', '\u{60}', ON), ('\u{61}', '\u{7a}', L), ('\u{7b}', '\u{7e}', ON), ('\u{7f}', '\u{84}', BN), ('\u{85}', '\u{85}', B), ('\u{86}', '\u{9f}', BN), ('\u{a0}', '\u{a0}', CS), ('\u{a1}', '\u{a1}', ON), ('\u{a2}', '\u{a5}', ET), ('\u{a6}', '\u{a9}', ON), ('\u{aa}', '\u{aa}', L), ('\u{ab}', '\u{ac}', ON), ('\u{ad}', '\u{ad}', BN), ('\u{ae}', '\u{af}', ON), ('\u{b0}', '\u{b1}', ET), ('\u{b2}', '\u{b3}', EN), ('\u{b4}', '\u{b4}', ON), ('\u{b5}', '\u{b5}', L), ('\u{b6}', '\u{b8}', ON), ('\u{b9}', '\u{b9}', EN), ('\u{ba}', '\u{ba}', L), ('\u{bb}', '\u{bf}', ON), ('\u{c0}', '\u{d6}', L), ('\u{d7}', '\u{d7}', ON), ('\u{d8}', '\u{f6}', L), ('\u{f7}', '\u{f7}', ON), ('\u{f8}', '\u{2b8}', L), ('\u{2b9}', '\u{2ba}', ON), ('\u{2bb}', '\u{2c1}', L), ('\u{2c2}', '\u{2cf}', ON), ('\u{2d0}', '\u{2d1}', L), ('\u{2d2}', '\u{2df}', ON), ('\u{2e0}', '\u{2e4}', L), ('\u{2e5}', '\u{2ed}', ON), ('\u{2ee}', '\u{2ee}', L), ('\u{2ef}', '\u{2ff}', ON), ('\u{300}', '\u{36f}', NSM), ('\u{370}', '\u{373}', L), ('\u{374}', '\u{375}', ON), ('\u{376}', '\u{377}', L), ('\u{37a}', '\u{37d}', L), ('\u{37e}', '\u{37e}', ON), ('\u{37f}', '\u{37f}', L), ('\u{384}', '\u{385}', ON), ('\u{386}', '\u{386}', L), ('\u{387}', '\u{387}', ON), ('\u{388}', '\u{38a}', L), ('\u{38c}', '\u{38c}', L), ('\u{38e}', '\u{3a1}', L), ('\u{3a3}', '\u{3f5}', L), ('\u{3f6}', '\u{3f6}', ON), ('\u{3f7}', '\u{482}', L), ('\u{483}', '\u{489}', NSM), ('\u{48a}', '\u{52f}', L), ('\u{531}', '\u{556}', L), ('\u{559}', '\u{589}', L), ('\u{58a}', '\u{58a}', ON), ('\u{58d}', '\u{58e}', ON), ('\u{58f}', '\u{58f}', ET), ('\u{590}', '\u{590}', R), ('\u{591}', '\u{5bd}', NSM), ('\u{5be}', '\u{5be}', R), ('\u{5bf}', '\u{5bf}', NSM), ('\u{5c0}', '\u{5c0}', R), ('\u{5c1}', '\u{5c2}', NSM), ('\u{5c3}', '\u{5c3}', R), ('\u{5c4}', '\u{5c5}', NSM), ('\u{5c6}', '\u{5c6}', R), ('\u{5c7}', '\u{5c7}', NSM), ('\u{5c8}', '\u{5ff}', R), ('\u{600}', '\u{605}', AN), ('\u{606}', '\u{607}', ON), ('\u{608}', '\u{608}', AL), ('\u{609}', '\u{60a}', ET), ('\u{60b}', '\u{60b}', AL), ('\u{60c}', '\u{60c}', CS), ('\u{60d}', '\u{60d}', AL), ('\u{60e}', '\u{60f}', ON), ('\u{610}', '\u{61a}', NSM), ('\u{61b}', '\u{64a}', AL), ('\u{64b}', '\u{65f}', NSM), ('\u{660}', '\u{669}', AN), ('\u{66a}', '\u{66a}', ET), ('\u{66b}', '\u{66c}', AN), ('\u{66d}', '\u{66f}', AL), ('\u{670}', '\u{670}', NSM), ('\u{671}', '\u{6d5}', AL), ('\u{6d6}', '\u{6dc}', NSM), ('\u{6dd}', '\u{6dd}', AN), ('\u{6de}', '\u{6de}', ON), ('\u{6df}', '\u{6e4}', NSM), ('\u{6e5}', '\u{6e6}', AL), ('\u{6e7}', '\u{6e8}', NSM), ('\u{6e9}', '\u{6e9}', ON), ('\u{6ea}', '\u{6ed}', NSM), ('\u{6ee}', '\u{6ef}', AL), ('\u{6f0}', '\u{6f9}', EN), ('\u{6fa}', '\u{710}', AL), ('\u{711}', '\u{711}', NSM), ('\u{712}', '\u{72f}', AL), ('\u{730}', '\u{74a}', NSM), ('\u{74b}', '\u{7a5}', AL), ('\u{7a6}', '\u{7b0}', NSM), ('\u{7b1}', '\u{7bf}', AL), ('\u{7c0}', '\u{7ea}', R), ('\u{7eb}', '\u{7f3}', NSM), ('\u{7f4}', '\u{7f5}', R), ('\u{7f6}', '\u{7f9}', ON), ('\u{7fa}', '\u{7fc}', R), ('\u{7fd}', '\u{7fd}', NSM), ('\u{7fe}', '\u{815}', R), ('\u{816}', '\u{819}', NSM), ('\u{81a}', '\u{81a}', R), ('\u{81b}', '\u{823}', NSM), ('\u{824}', '\u{824}', R), ('\u{825}', '\u{827}', NSM), ('\u{828}', '\u{828}', R), ('\u{829}', '\u{82d}', NSM), ('\u{82e}', '\u{858}', R), ('\u{859}', '\u{85b}', NSM), ('\u{85c}', '\u{85f}', R), ('\u{860}', '\u{86a}', AL), ('\u{86b}', '\u{86f}', R), ('\u{870}', '\u{88e}', AL), ('\u{88f}', '\u{88f}', R), ('\u{890}', '\u{891}', AN), ('\u{892}', '\u{897}', R), ('\u{898}', '\u{89f}', NSM), ('\u{8a0}', '\u{8c9}', AL), ('\u{8ca}', '\u{8e1}', NSM), ('\u{8e2}', '\u{8e2}', AN), ('\u{8e3}', '\u{902}', NSM), ('\u{903}', '\u{939}', L), ('\u{93a}', '\u{93a}', NSM), ('\u{93b}', '\u{93b}', L), ('\u{93c}', '\u{93c}', NSM), ('\u{93d}', '\u{940}', L), ('\u{941}', '\u{948}', NSM), ('\u{949}', '\u{94c}', L), ('\u{94d}', '\u{94d}', NSM), ('\u{94e}', '\u{950}', L), ('\u{951}', '\u{957}', NSM), ('\u{958}', '\u{961}', L), ('\u{962}', '\u{963}', NSM), ('\u{964}', '\u{980}', L), ('\u{981}', '\u{981}', NSM), ('\u{982}', '\u{983}', L), ('\u{985}', '\u{98c}', L), ('\u{98f}', '\u{990}', L), ('\u{993}', '\u{9a8}', L), ('\u{9aa}', '\u{9b0}', L), ('\u{9b2}', '\u{9b2}', L), ('\u{9b6}', '\u{9b9}', L), ('\u{9bc}', '\u{9bc}', NSM), ('\u{9bd}', '\u{9c0}', L), ('\u{9c1}', '\u{9c4}', NSM), ('\u{9c7}', '\u{9c8}', L), ('\u{9cb}', '\u{9cc}', L), ('\u{9cd}', '\u{9cd}', NSM), ('\u{9ce}', '\u{9ce}', L), ('\u{9d7}', '\u{9d7}', L), ('\u{9dc}', '\u{9dd}', L), ('\u{9df}', '\u{9e1}', L), ('\u{9e2}', '\u{9e3}', NSM), ('\u{9e6}', '\u{9f1}', L), ('\u{9f2}', '\u{9f3}', ET), ('\u{9f4}', '\u{9fa}', L), ('\u{9fb}', '\u{9fb}', ET), ('\u{9fc}', '\u{9fd}', L), ('\u{9fe}', '\u{9fe}', NSM), ('\u{a01}', '\u{a02}', NSM), ('\u{a03}', '\u{a03}', L), ('\u{a05}', '\u{a0a}', L), ('\u{a0f}', '\u{a10}', L), ('\u{a13}', '\u{a28}', L), ('\u{a2a}', '\u{a30}', L), ('\u{a32}', '\u{a33}', L), ('\u{a35}', '\u{a36}', L), ('\u{a38}', '\u{a39}', L), ('\u{a3c}', '\u{a3c}', NSM), ('\u{a3e}', '\u{a40}', L), ('\u{a41}', '\u{a42}', NSM), ('\u{a47}', '\u{a48}', NSM), ('\u{a4b}', '\u{a4d}', NSM), ('\u{a51}', '\u{a51}', NSM), ('\u{a59}', '\u{a5c}', L), ('\u{a5e}', '\u{a5e}', L), ('\u{a66}', '\u{a6f}', L), ('\u{a70}', '\u{a71}', NSM), ('\u{a72}', '\u{a74}', L), ('\u{a75}', '\u{a75}', NSM), ('\u{a76}', '\u{a76}', L), ('\u{a81}', '\u{a82}', NSM), ('\u{a83}', '\u{a83}', L), ('\u{a85}', '\u{a8d}', L), ('\u{a8f}', '\u{a91}', L), ('\u{a93}', '\u{aa8}', L), ('\u{aaa}', '\u{ab0}', L), ('\u{ab2}', '\u{ab3}', L), ('\u{ab5}', '\u{ab9}', L), ('\u{abc}', '\u{abc}', NSM), ('\u{abd}', '\u{ac0}', L), ('\u{ac1}', '\u{ac5}', NSM), ('\u{ac7}', '\u{ac8}', NSM), ('\u{ac9}', '\u{ac9}', L), ('\u{acb}', '\u{acc}', L), ('\u{acd}', '\u{acd}', NSM), ('\u{ad0}', '\u{ad0}', L), ('\u{ae0}', '\u{ae1}', L), ('\u{ae2}', '\u{ae3}', NSM), ('\u{ae6}', '\u{af0}', L), ('\u{af1}', '\u{af1}', ET), ('\u{af9}', '\u{af9}', L), ('\u{afa}', '\u{aff}', NSM), ('\u{b01}', '\u{b01}', NSM), ('\u{b02}', '\u{b03}', L), ('\u{b05}', '\u{b0c}', L), ('\u{b0f}', '\u{b10}', L), ('\u{b13}', '\u{b28}', L), ('\u{b2a}', '\u{b30}', L), ('\u{b32}', '\u{b33}', L), ('\u{b35}', '\u{b39}', L), ('\u{b3c}', '\u{b3c}', NSM), ('\u{b3d}', '\u{b3e}', L), ('\u{b3f}', '\u{b3f}', NSM), ('\u{b40}', '\u{b40}', L), ('\u{b41}', '\u{b44}', NSM), ('\u{b47}', '\u{b48}', L), ('\u{b4b}', '\u{b4c}', L), ('\u{b4d}', '\u{b4d}', NSM), ('\u{b55}', '\u{b56}', NSM), ('\u{b57}', '\u{b57}', L), ('\u{b5c}', '\u{b5d}', L), ('\u{b5f}', '\u{b61}', L), ('\u{b62}', '\u{b63}', NSM), ('\u{b66}', '\u{b77}', L), ('\u{b82}', '\u{b82}', NSM), ('\u{b83}', '\u{b83}', L), ('\u{b85}', '\u{b8a}', L), ('\u{b8e}', '\u{b90}', L), ('\u{b92}', '\u{b95}', L), ('\u{b99}', '\u{b9a}', L), ('\u{b9c}', '\u{b9c}', L), ('\u{b9e}', '\u{b9f}', L), ('\u{ba3}', '\u{ba4}', L), ('\u{ba8}', '\u{baa}', L), ('\u{bae}', '\u{bb9}', L), ('\u{bbe}', '\u{bbf}', L), ('\u{bc0}', '\u{bc0}', NSM), ('\u{bc1}', '\u{bc2}', L), ('\u{bc6}', '\u{bc8}', L), ('\u{bca}', '\u{bcc}', L), ('\u{bcd}', '\u{bcd}', NSM), ('\u{bd0}', '\u{bd0}', L), ('\u{bd7}', '\u{bd7}', L), ('\u{be6}', '\u{bf2}', L), ('\u{bf3}', '\u{bf8}', ON), ('\u{bf9}', '\u{bf9}', ET), ('\u{bfa}', '\u{bfa}', ON), ('\u{c00}', '\u{c00}', NSM), ('\u{c01}', '\u{c03}', L), ('\u{c04}', '\u{c04}', NSM), ('\u{c05}', '\u{c0c}', L), ('\u{c0e}', '\u{c10}', L), ('\u{c12}', '\u{c28}', L), ('\u{c2a}', '\u{c39}', L), ('\u{c3c}', '\u{c3c}', NSM), ('\u{c3d}', '\u{c3d}', L), ('\u{c3e}', '\u{c40}', NSM), ('\u{c41}', '\u{c44}', L), ('\u{c46}', '\u{c48}', NSM), ('\u{c4a}', '\u{c4d}', NSM), ('\u{c55}', '\u{c56}', NSM), ('\u{c58}', '\u{c5a}', L), ('\u{c5d}', '\u{c5d}', L), ('\u{c60}', '\u{c61}', L), ('\u{c62}', '\u{c63}', NSM), ('\u{c66}', '\u{c6f}', L), ('\u{c77}', '\u{c77}', L), ('\u{c78}', '\u{c7e}', ON), ('\u{c7f}', '\u{c80}', L), ('\u{c81}', '\u{c81}', NSM), ('\u{c82}', '\u{c8c}', L), ('\u{c8e}', '\u{c90}', L), ('\u{c92}', '\u{ca8}', L), ('\u{caa}', '\u{cb3}', L), ('\u{cb5}', '\u{cb9}', L), ('\u{cbc}', '\u{cbc}', NSM), ('\u{cbd}', '\u{cc4}', L), ('\u{cc6}', '\u{cc8}', L), ('\u{cca}', '\u{ccb}', L), ('\u{ccc}', '\u{ccd}', NSM), ('\u{cd5}', '\u{cd6}', L), ('\u{cdd}', '\u{cde}', L), ('\u{ce0}', '\u{ce1}', L), ('\u{ce2}', '\u{ce3}', NSM), ('\u{ce6}', '\u{cef}', L), ('\u{cf1}', '\u{cf3}', L), ('\u{d00}', '\u{d01}', NSM), ('\u{d02}', '\u{d0c}', L), ('\u{d0e}', '\u{d10}', L), ('\u{d12}', '\u{d3a}', L), ('\u{d3b}', '\u{d3c}', NSM), ('\u{d3d}', '\u{d40}', L), ('\u{d41}', '\u{d44}', NSM), ('\u{d46}', '\u{d48}', L), ('\u{d4a}', '\u{d4c}', L), ('\u{d4d}', '\u{d4d}', NSM), ('\u{d4e}', '\u{d4f}', L), ('\u{d54}', '\u{d61}', L), ('\u{d62}', '\u{d63}', NSM), ('\u{d66}', '\u{d7f}', L), ('\u{d81}', '\u{d81}', NSM), ('\u{d82}', '\u{d83}', L), ('\u{d85}', '\u{d96}', L), ('\u{d9a}', '\u{db1}', L), ('\u{db3}', '\u{dbb}', L), ('\u{dbd}', '\u{dbd}', L), ('\u{dc0}', '\u{dc6}', L), ('\u{dca}', '\u{dca}', NSM), ('\u{dcf}', '\u{dd1}', L), ('\u{dd2}', '\u{dd4}', NSM), ('\u{dd6}', '\u{dd6}', NSM), ('\u{dd8}', '\u{ddf}', L), ('\u{de6}', '\u{def}', L), ('\u{df2}', '\u{df4}', L), ('\u{e01}', '\u{e30}', L), ('\u{e31}', '\u{e31}', NSM), ('\u{e32}', '\u{e33}', L), ('\u{e34}', '\u{e3a}', NSM), ('\u{e3f}', '\u{e3f}', ET), ('\u{e40}', '\u{e46}', L), ('\u{e47}', '\u{e4e}', NSM), ('\u{e4f}', '\u{e5b}', L), ('\u{e81}', '\u{e82}', L), ('\u{e84}', '\u{e84}', L), ('\u{e86}', '\u{e8a}', L), ('\u{e8c}', '\u{ea3}', L), ('\u{ea5}', '\u{ea5}', L), ('\u{ea7}', '\u{eb0}', L), ('\u{eb1}', '\u{eb1}', NSM), ('\u{eb2}', '\u{eb3}', L), ('\u{eb4}', '\u{ebc}', NSM), ('\u{ebd}', '\u{ebd}', L), ('\u{ec0}', '\u{ec4}', L), ('\u{ec6}', '\u{ec6}', L), ('\u{ec8}', '\u{ece}', NSM), ('\u{ed0}', '\u{ed9}', L), ('\u{edc}', '\u{edf}', L), ('\u{f00}', '\u{f17}', L), ('\u{f18}', '\u{f19}', NSM), ('\u{f1a}', '\u{f34}', L), ('\u{f35}', '\u{f35}', NSM), ('\u{f36}', '\u{f36}', L), ('\u{f37}', '\u{f37}', NSM), ('\u{f38}', '\u{f38}', L), ('\u{f39}', '\u{f39}', NSM), ('\u{f3a}', '\u{f3d}', ON), ('\u{f3e}', '\u{f47}', L), ('\u{f49}', '\u{f6c}', L), ('\u{f71}', '\u{f7e}', NSM), ('\u{f7f}', '\u{f7f}', L), ('\u{f80}', '\u{f84}', NSM), ('\u{f85}', '\u{f85}', L), ('\u{f86}', '\u{f87}', NSM), ('\u{f88}', '\u{f8c}', L), ('\u{f8d}', '\u{f97}', NSM), ('\u{f99}', '\u{fbc}', NSM), ('\u{fbe}', '\u{fc5}', L), ('\u{fc6}', '\u{fc6}', NSM), ('\u{fc7}', '\u{fcc}', L), ('\u{fce}', '\u{fda}', L), ('\u{1000}', '\u{102c}', L), ('\u{102d}', '\u{1030}', NSM), ('\u{1031}', '\u{1031}', L), ('\u{1032}', '\u{1037}', NSM), ('\u{1038}', '\u{1038}', L), ('\u{1039}', '\u{103a}', NSM), ('\u{103b}', '\u{103c}', L), ('\u{103d}', '\u{103e}', NSM), ('\u{103f}', '\u{1057}', L), ('\u{1058}', '\u{1059}', NSM), ('\u{105a}', '\u{105d}', L), ('\u{105e}', '\u{1060}', NSM), ('\u{1061}', '\u{1070}', L), ('\u{1071}', '\u{1074}', NSM), ('\u{1075}', '\u{1081}', L), ('\u{1082}', '\u{1082}', NSM), ('\u{1083}', '\u{1084}', L), ('\u{1085}', '\u{1086}', NSM), ('\u{1087}', '\u{108c}', L), ('\u{108d}', '\u{108d}', NSM), ('\u{108e}', '\u{109c}', L), ('\u{109d}', '\u{109d}', NSM), ('\u{109e}', '\u{10c5}', L), ('\u{10c7}', '\u{10c7}', L), ('\u{10cd}', '\u{10cd}', L), ('\u{10d0}', '\u{1248}', L), ('\u{124a}', '\u{124d}', L), ('\u{1250}', '\u{1256}', L), ('\u{1258}', '\u{1258}', L), ('\u{125a}', '\u{125d}', L), ('\u{1260}', '\u{1288}', L), ('\u{128a}', '\u{128d}', L), ('\u{1290}', '\u{12b0}', L), ('\u{12b2}', '\u{12b5}', L), ('\u{12b8}', '\u{12be}', L), ('\u{12c0}', '\u{12c0}', L), ('\u{12c2}', '\u{12c5}', L), ('\u{12c8}', '\u{12d6}', L), ('\u{12d8}', '\u{1310}', L), ('\u{1312}', '\u{1315}', L), ('\u{1318}', '\u{135a}', L), ('\u{135d}', '\u{135f}', NSM), ('\u{1360}', '\u{137c}', L), ('\u{1380}', '\u{138f}', L), ('\u{1390}', '\u{1399}', ON), ('\u{13a0}', '\u{13f5}', L), ('\u{13f8}', '\u{13fd}', L), ('\u{1400}', '\u{1400}', ON), ('\u{1401}', '\u{167f}', L), ('\u{1680}', '\u{1680}', WS), ('\u{1681}', '\u{169a}', L), ('\u{169b}', '\u{169c}', ON), ('\u{16a0}', '\u{16f8}', L), ('\u{1700}', '\u{1711}', L), ('\u{1712}', '\u{1714}', NSM), ('\u{1715}', '\u{1715}', L), ('\u{171f}', '\u{1731}', L), ('\u{1732}', '\u{1733}', NSM), ('\u{1734}', '\u{1736}', L), ('\u{1740}', '\u{1751}', L), ('\u{1752}', '\u{1753}', NSM), ('\u{1760}', '\u{176c}', L), ('\u{176e}', '\u{1770}', L), ('\u{1772}', '\u{1773}', NSM), ('\u{1780}', '\u{17b3}', L), ('\u{17b4}', '\u{17b5}', NSM), ('\u{17b6}', '\u{17b6}', L), ('\u{17b7}', '\u{17bd}', NSM), ('\u{17be}', '\u{17c5}', L), ('\u{17c6}', '\u{17c6}', NSM), ('\u{17c7}', '\u{17c8}', L), ('\u{17c9}', '\u{17d3}', NSM), ('\u{17d4}', '\u{17da}', L), ('\u{17db}', '\u{17db}', ET), ('\u{17dc}', '\u{17dc}', L), ('\u{17dd}', '\u{17dd}', NSM), ('\u{17e0}', '\u{17e9}', L), ('\u{17f0}', '\u{17f9}', ON), ('\u{1800}', '\u{180a}', ON), ('\u{180b}', '\u{180d}', NSM), ('\u{180e}', '\u{180e}', BN), ('\u{180f}', '\u{180f}', NSM), ('\u{1810}', '\u{1819}', L), ('\u{1820}', '\u{1878}', L), ('\u{1880}', '\u{1884}', L), ('\u{1885}', '\u{1886}', NSM), ('\u{1887}', '\u{18a8}', L), ('\u{18a9}', '\u{18a9}', NSM), ('\u{18aa}', '\u{18aa}', L), ('\u{18b0}', '\u{18f5}', L), ('\u{1900}', '\u{191e}', L), ('\u{1920}', '\u{1922}', NSM), ('\u{1923}', '\u{1926}', L), ('\u{1927}', '\u{1928}', NSM), ('\u{1929}', '\u{192b}', L), ('\u{1930}', '\u{1931}', L), ('\u{1932}', '\u{1932}', NSM), ('\u{1933}', '\u{1938}', L), ('\u{1939}', '\u{193b}', NSM), ('\u{1940}', '\u{1940}', ON), ('\u{1944}', '\u{1945}', ON), ('\u{1946}', '\u{196d}', L), ('\u{1970}', '\u{1974}', L), ('\u{1980}', '\u{19ab}', L), ('\u{19b0}', '\u{19c9}', L), ('\u{19d0}', '\u{19da}', L), ('\u{19de}', '\u{19ff}', ON), ('\u{1a00}', '\u{1a16}', L), ('\u{1a17}', '\u{1a18}', NSM), ('\u{1a19}', '\u{1a1a}', L), ('\u{1a1b}', '\u{1a1b}', NSM), ('\u{1a1e}', '\u{1a55}', L), ('\u{1a56}', '\u{1a56}', NSM), ('\u{1a57}', '\u{1a57}', L), ('\u{1a58}', '\u{1a5e}', NSM), ('\u{1a60}', '\u{1a60}', NSM), ('\u{1a61}', '\u{1a61}', L), ('\u{1a62}', '\u{1a62}', NSM), ('\u{1a63}', '\u{1a64}', L), ('\u{1a65}', '\u{1a6c}', NSM), ('\u{1a6d}', '\u{1a72}', L), ('\u{1a73}', '\u{1a7c}', NSM), ('\u{1a7f}', '\u{1a7f}', NSM), ('\u{1a80}', '\u{1a89}', L), ('\u{1a90}', '\u{1a99}', L), ('\u{1aa0}', '\u{1aad}', L), ('\u{1ab0}', '\u{1ace}', NSM), ('\u{1b00}', '\u{1b03}', NSM), ('\u{1b04}', '\u{1b33}', L), ('\u{1b34}', '\u{1b34}', NSM), ('\u{1b35}', '\u{1b35}', L), ('\u{1b36}', '\u{1b3a}', NSM), ('\u{1b3b}', '\u{1b3b}', L), ('\u{1b3c}', '\u{1b3c}', NSM), ('\u{1b3d}', '\u{1b41}', L), ('\u{1b42}', '\u{1b42}', NSM), ('\u{1b43}', '\u{1b4c}', L), ('\u{1b50}', '\u{1b6a}', L), ('\u{1b6b}', '\u{1b73}', NSM), ('\u{1b74}', '\u{1b7e}', L), ('\u{1b80}', '\u{1b81}', NSM), ('\u{1b82}', '\u{1ba1}', L), ('\u{1ba2}', '\u{1ba5}', NSM), ('\u{1ba6}', '\u{1ba7}', L), ('\u{1ba8}', '\u{1ba9}', NSM), ('\u{1baa}', '\u{1baa}', L), ('\u{1bab}', '\u{1bad}', NSM), ('\u{1bae}', '\u{1be5}', L), ('\u{1be6}', '\u{1be6}', NSM), ('\u{1be7}', '\u{1be7}', L), ('\u{1be8}', '\u{1be9}', NSM), ('\u{1bea}', '\u{1bec}', L), ('\u{1bed}', '\u{1bed}', NSM), ('\u{1bee}', '\u{1bee}', L), ('\u{1bef}', '\u{1bf1}', NSM), ('\u{1bf2}', '\u{1bf3}', L), ('\u{1bfc}', '\u{1c2b}', L), ('\u{1c2c}', '\u{1c33}', NSM), ('\u{1c34}', '\u{1c35}', L), ('\u{1c36}', '\u{1c37}', NSM), ('\u{1c3b}', '\u{1c49}', L), ('\u{1c4d}', '\u{1c88}', L), ('\u{1c90}', '\u{1cba}', L), ('\u{1cbd}', '\u{1cc7}', L), ('\u{1cd0}', '\u{1cd2}', NSM), ('\u{1cd3}', '\u{1cd3}', L), ('\u{1cd4}', '\u{1ce0}', NSM), ('\u{1ce1}', '\u{1ce1}', L), ('\u{1ce2}', '\u{1ce8}', NSM), ('\u{1ce9}', '\u{1cec}', L), ('\u{1ced}', '\u{1ced}', NSM), ('\u{1cee}', '\u{1cf3}', L), ('\u{1cf4}', '\u{1cf4}', NSM), ('\u{1cf5}', '\u{1cf7}', L), ('\u{1cf8}', '\u{1cf9}', NSM), ('\u{1cfa}', '\u{1cfa}', L), ('\u{1d00}', '\u{1dbf}', L), ('\u{1dc0}', '\u{1dff}', NSM), ('\u{1e00}', '\u{1f15}', L), ('\u{1f18}', '\u{1f1d}', L), ('\u{1f20}', '\u{1f45}', L), ('\u{1f48}', '\u{1f4d}', L), ('\u{1f50}', '\u{1f57}', L), ('\u{1f59}', '\u{1f59}', L), ('\u{1f5b}', '\u{1f5b}', L), ('\u{1f5d}', '\u{1f5d}', L), ('\u{1f5f}', '\u{1f7d}', L), ('\u{1f80}', '\u{1fb4}', L), ('\u{1fb6}', '\u{1fbc}', L), ('\u{1fbd}', '\u{1fbd}', ON), ('\u{1fbe}', '\u{1fbe}', L), ('\u{1fbf}', '\u{1fc1}', ON), ('\u{1fc2}', '\u{1fc4}', L), ('\u{1fc6}', '\u{1fcc}', L), ('\u{1fcd}', '\u{1fcf}', ON), ('\u{1fd0}', '\u{1fd3}', L), ('\u{1fd6}', '\u{1fdb}', L), ('\u{1fdd}', '\u{1fdf}', ON), ('\u{1fe0}', '\u{1fec}', L), ('\u{1fed}', '\u{1fef}', ON), ('\u{1ff2}', '\u{1ff4}', L), ('\u{1ff6}', '\u{1ffc}', L), ('\u{1ffd}', '\u{1ffe}', ON), ('\u{2000}', '\u{200a}', WS), ('\u{200b}', '\u{200d}', BN), ('\u{200e}', '\u{200e}', L), ('\u{200f}', '\u{200f}', R), ('\u{2010}', '\u{2027}', ON), ('\u{2028}', '\u{2028}', WS), ('\u{2029}', '\u{2029}', B), ('\u{202a}', '\u{202a}', LRE), ('\u{202b}', '\u{202b}', RLE), ('\u{202c}', '\u{202c}', PDF), ('\u{202d}', '\u{202d}', LRO), ('\u{202e}', '\u{202e}', RLO), ('\u{202f}', '\u{202f}', CS), ('\u{2030}', '\u{2034}', ET), ('\u{2035}', '\u{2043}', ON), ('\u{2044}', '\u{2044}', CS), ('\u{2045}', '\u{205e}', ON), ('\u{205f}', '\u{205f}', WS), ('\u{2060}', '\u{2064}', BN), ('\u{2066}', '\u{2066}', LRI), ('\u{2067}', '\u{2067}', RLI), ('\u{2068}', '\u{2068}', FSI), ('\u{2069}', '\u{2069}', PDI), ('\u{206a}', '\u{206f}', BN), ('\u{2070}', '\u{2070}', EN), ('\u{2071}', '\u{2071}', L), ('\u{2074}', '\u{2079}', EN), ('\u{207a}', '\u{207b}', ES), ('\u{207c}', '\u{207e}', ON), ('\u{207f}', '\u{207f}', L), ('\u{2080}', '\u{2089}', EN), ('\u{208a}', '\u{208b}', ES), ('\u{208c}', '\u{208e}', ON), ('\u{2090}', '\u{209c}', L), ('\u{20a0}', '\u{20cf}', ET), ('\u{20d0}', '\u{20f0}', NSM), ('\u{2100}', '\u{2101}', ON), ('\u{2102}', '\u{2102}', L), ('\u{2103}', '\u{2106}', ON), ('\u{2107}', '\u{2107}', L), ('\u{2108}', '\u{2109}', ON), ('\u{210a}', '\u{2113}', L), ('\u{2114}', '\u{2114}', ON), ('\u{2115}', '\u{2115}', L), ('\u{2116}', '\u{2118}', ON), ('\u{2119}', '\u{211d}', L), ('\u{211e}', '\u{2123}', ON), ('\u{2124}', '\u{2124}', L), ('\u{2125}', '\u{2125}', ON), ('\u{2126}', '\u{2126}', L), ('\u{2127}', '\u{2127}', ON), ('\u{2128}', '\u{2128}', L), ('\u{2129}', '\u{2129}', ON), ('\u{212a}', '\u{212d}', L), ('\u{212e}', '\u{212e}', ET), ('\u{212f}', '\u{2139}', L), ('\u{213a}', '\u{213b}', ON), ('\u{213c}', '\u{213f}', L), ('\u{2140}', '\u{2144}', ON), ('\u{2145}', '\u{2149}', L), ('\u{214a}', '\u{214d}', ON), ('\u{214e}', '\u{214f}', L), ('\u{2150}', '\u{215f}', ON), ('\u{2160}', '\u{2188}', L), ('\u{2189}', '\u{218b}', ON), ('\u{2190}', '\u{2211}', ON), ('\u{2212}', '\u{2212}', ES), ('\u{2213}', '\u{2213}', ET), ('\u{2214}', '\u{2335}', ON), ('\u{2336}', '\u{237a}', L), ('\u{237b}', '\u{2394}', ON), ('\u{2395}', '\u{2395}', L), ('\u{2396}', '\u{2426}', ON), ('\u{2440}', '\u{244a}', ON), ('\u{2460}', '\u{2487}', ON), ('\u{2488}', '\u{249b}', EN), ('\u{249c}', '\u{24e9}', L), ('\u{24ea}', '\u{26ab}', ON), ('\u{26ac}', '\u{26ac}', L), ('\u{26ad}', '\u{27ff}', ON), ('\u{2800}', '\u{28ff}', L), ('\u{2900}', '\u{2b73}', ON), ('\u{2b76}', '\u{2b95}', ON), ('\u{2b97}', '\u{2bff}', ON), ('\u{2c00}', '\u{2ce4}', L), ('\u{2ce5}', '\u{2cea}', ON), ('\u{2ceb}', '\u{2cee}', L), ('\u{2cef}', '\u{2cf1}', NSM), ('\u{2cf2}', '\u{2cf3}', L), ('\u{2cf9}', '\u{2cff}', ON), ('\u{2d00}', '\u{2d25}', L), ('\u{2d27}', '\u{2d27}', L), ('\u{2d2d}', '\u{2d2d}', L), ('\u{2d30}', '\u{2d67}', L), ('\u{2d6f}', '\u{2d70}', L), ('\u{2d7f}', '\u{2d7f}', NSM), ('\u{2d80}', '\u{2d96}', L), ('\u{2da0}', '\u{2da6}', L), ('\u{2da8}', '\u{2dae}', L), ('\u{2db0}', '\u{2db6}', L), ('\u{2db8}', '\u{2dbe}', L), ('\u{2dc0}', '\u{2dc6}', L), ('\u{2dc8}', '\u{2dce}', L), ('\u{2dd0}', '\u{2dd6}', L), ('\u{2dd8}', '\u{2dde}', L), ('\u{2de0}', '\u{2dff}', NSM), ('\u{2e00}', '\u{2e5d}', ON), ('\u{2e80}', '\u{2e99}', ON), ('\u{2e9b}', '\u{2ef3}', ON), ('\u{2f00}', '\u{2fd5}', ON), ('\u{2ff0}', '\u{2ffb}', ON), ('\u{3000}', '\u{3000}', WS), ('\u{3001}', '\u{3004}', ON), ('\u{3005}', '\u{3007}', L), ('\u{3008}', '\u{3020}', ON), ('\u{3021}', '\u{3029}', L), ('\u{302a}', '\u{302d}', NSM), ('\u{302e}', '\u{302f}', L), ('\u{3030}', '\u{3030}', ON), ('\u{3031}', '\u{3035}', L), ('\u{3036}', '\u{3037}', ON), ('\u{3038}', '\u{303c}', L), ('\u{303d}', '\u{303f}', ON), ('\u{3041}', '\u{3096}', L), ('\u{3099}', '\u{309a}', NSM), ('\u{309b}', '\u{309c}', ON), ('\u{309d}', '\u{309f}', L), ('\u{30a0}', '\u{30a0}', ON), ('\u{30a1}', '\u{30fa}', L), ('\u{30fb}', '\u{30fb}', ON), ('\u{30fc}', '\u{30ff}', L), ('\u{3105}', '\u{312f}', L), ('\u{3131}', '\u{318e}', L), ('\u{3190}', '\u{31bf}', L), ('\u{31c0}', '\u{31e3}', ON), ('\u{31f0}', '\u{321c}', L), ('\u{321d}', '\u{321e}', ON), ('\u{3220}', '\u{324f}', L), ('\u{3250}', '\u{325f}', ON), ('\u{3260}', '\u{327b}', L), ('\u{327c}', '\u{327e}', ON), ('\u{327f}', '\u{32b0}', L), ('\u{32b1}', '\u{32bf}', ON), ('\u{32c0}', '\u{32cb}', L), ('\u{32cc}', '\u{32cf}', ON), ('\u{32d0}', '\u{3376}', L), ('\u{3377}', '\u{337a}', ON), ('\u{337b}', '\u{33dd}', L), ('\u{33de}', '\u{33df}', ON), ('\u{33e0}', '\u{33fe}', L), ('\u{33ff}', '\u{33ff}', ON), ('\u{3400}', '\u{4dbf}', L), ('\u{4dc0}', '\u{4dff}', ON), ('\u{4e00}', '\u{a48c}', L), ('\u{a490}', '\u{a4c6}', ON), ('\u{a4d0}', '\u{a60c}', L), ('\u{a60d}', '\u{a60f}', ON), ('\u{a610}', '\u{a62b}', L), ('\u{a640}', '\u{a66e}', L), ('\u{a66f}', '\u{a672}', NSM), ('\u{a673}', '\u{a673}', ON), ('\u{a674}', '\u{a67d}', NSM), ('\u{a67e}', '\u{a67f}', ON), ('\u{a680}', '\u{a69d}', L), ('\u{a69e}', '\u{a69f}', NSM), ('\u{a6a0}', '\u{a6ef}', L), ('\u{a6f0}', '\u{a6f1}', NSM), ('\u{a6f2}', '\u{a6f7}', L), ('\u{a700}', '\u{a721}', ON), ('\u{a722}', '\u{a787}', L), ('\u{a788}', '\u{a788}', ON), ('\u{a789}', '\u{a7ca}', L), ('\u{a7d0}', '\u{a7d1}', L), ('\u{a7d3}', '\u{a7d3}', L), ('\u{a7d5}', '\u{a7d9}', L), ('\u{a7f2}', '\u{a801}', L), ('\u{a802}', '\u{a802}', NSM), ('\u{a803}', '\u{a805}', L), ('\u{a806}', '\u{a806}', NSM), ('\u{a807}', '\u{a80a}', L), ('\u{a80b}', '\u{a80b}', NSM), ('\u{a80c}', '\u{a824}', L), ('\u{a825}', '\u{a826}', NSM), ('\u{a827}', '\u{a827}', L), ('\u{a828}', '\u{a82b}', ON), ('\u{a82c}', '\u{a82c}', NSM), ('\u{a830}', '\u{a837}', L), ('\u{a838}', '\u{a839}', ET), ('\u{a840}', '\u{a873}', L), ('\u{a874}', '\u{a877}', ON), ('\u{a880}', '\u{a8c3}', L), ('\u{a8c4}', '\u{a8c5}', NSM), ('\u{a8ce}', '\u{a8d9}', L), ('\u{a8e0}', '\u{a8f1}', NSM), ('\u{a8f2}', '\u{a8fe}', L), ('\u{a8ff}', '\u{a8ff}', NSM), ('\u{a900}', '\u{a925}', L), ('\u{a926}', '\u{a92d}', NSM), ('\u{a92e}', '\u{a946}', L), ('\u{a947}', '\u{a951}', NSM), ('\u{a952}', '\u{a953}', L), ('\u{a95f}', '\u{a97c}', L), ('\u{a980}', '\u{a982}', NSM), ('\u{a983}', '\u{a9b2}', L), ('\u{a9b3}', '\u{a9b3}', NSM), ('\u{a9b4}', '\u{a9b5}', L), ('\u{a9b6}', '\u{a9b9}', NSM), ('\u{a9ba}', '\u{a9bb}', L), ('\u{a9bc}', '\u{a9bd}', NSM), ('\u{a9be}', '\u{a9cd}', L), ('\u{a9cf}', '\u{a9d9}', L), ('\u{a9de}', '\u{a9e4}', L), ('\u{a9e5}', '\u{a9e5}', NSM), ('\u{a9e6}', '\u{a9fe}', L), ('\u{aa00}', '\u{aa28}', L), ('\u{aa29}', '\u{aa2e}', NSM), ('\u{aa2f}', '\u{aa30}', L), ('\u{aa31}', '\u{aa32}', NSM), ('\u{aa33}', '\u{aa34}', L), ('\u{aa35}', '\u{aa36}', NSM), ('\u{aa40}', '\u{aa42}', L), ('\u{aa43}', '\u{aa43}', NSM), ('\u{aa44}', '\u{aa4b}', L), ('\u{aa4c}', '\u{aa4c}', NSM), ('\u{aa4d}', '\u{aa4d}', L), ('\u{aa50}', '\u{aa59}', L), ('\u{aa5c}', '\u{aa7b}', L), ('\u{aa7c}', '\u{aa7c}', NSM), ('\u{aa7d}', '\u{aaaf}', L), ('\u{aab0}', '\u{aab0}', NSM), ('\u{aab1}', '\u{aab1}', L), ('\u{aab2}', '\u{aab4}', NSM), ('\u{aab5}', '\u{aab6}', L), ('\u{aab7}', '\u{aab8}', NSM), ('\u{aab9}', '\u{aabd}', L), ('\u{aabe}', '\u{aabf}', NSM), ('\u{aac0}', '\u{aac0}', L), ('\u{aac1}', '\u{aac1}', NSM), ('\u{aac2}', '\u{aac2}', L), ('\u{aadb}', '\u{aaeb}', L), ('\u{aaec}', '\u{aaed}', NSM), ('\u{aaee}', '\u{aaf5}', L), ('\u{aaf6}', '\u{aaf6}', NSM), ('\u{ab01}', '\u{ab06}', L), ('\u{ab09}', '\u{ab0e}', L), ('\u{ab11}', '\u{ab16}', L), ('\u{ab20}', '\u{ab26}', L), ('\u{ab28}', '\u{ab2e}', L), ('\u{ab30}', '\u{ab69}', L), ('\u{ab6a}', '\u{ab6b}', ON), ('\u{ab70}', '\u{abe4}', L), ('\u{abe5}', '\u{abe5}', NSM), ('\u{abe6}', '\u{abe7}', L), ('\u{abe8}', '\u{abe8}', NSM), ('\u{abe9}', '\u{abec}', L), ('\u{abed}', '\u{abed}', NSM), ('\u{abf0}', '\u{abf9}', L), ('\u{ac00}', '\u{d7a3}', L), ('\u{d7b0}', '\u{d7c6}', L), ('\u{d7cb}', '\u{d7fb}', L), ('\u{e000}', '\u{fa6d}', L), ('\u{fa70}', '\u{fad9}', L), ('\u{fb00}', '\u{fb06}', L), ('\u{fb13}', '\u{fb17}', L), ('\u{fb1d}', '\u{fb1d}', R), ('\u{fb1e}', '\u{fb1e}', NSM), ('\u{fb1f}', '\u{fb28}', R), ('\u{fb29}', '\u{fb29}', ES), ('\u{fb2a}', '\u{fb4f}', R), ('\u{fb50}', '\u{fd3d}', AL), ('\u{fd3e}', '\u{fd4f}', ON), ('\u{fd50}', '\u{fdce}', AL), ('\u{fdcf}', '\u{fdcf}', ON), ('\u{fdf0}', '\u{fdfc}', AL), ('\u{fdfd}', '\u{fdff}', ON), ('\u{fe00}', '\u{fe0f}', NSM), ('\u{fe10}', '\u{fe19}', ON), ('\u{fe20}', '\u{fe2f}', NSM), ('\u{fe30}', '\u{fe4f}', ON), ('\u{fe50}', '\u{fe50}', CS), ('\u{fe51}', '\u{fe51}', ON), ('\u{fe52}', '\u{fe52}', CS), ('\u{fe54}', '\u{fe54}', ON), ('\u{fe55}', '\u{fe55}', CS), ('\u{fe56}', '\u{fe5e}', ON), ('\u{fe5f}', '\u{fe5f}', ET), ('\u{fe60}', '\u{fe61}', ON), ('\u{fe62}', '\u{fe63}', ES), ('\u{fe64}', '\u{fe66}', ON), ('\u{fe68}', '\u{fe68}', ON), ('\u{fe69}', '\u{fe6a}', ET), ('\u{fe6b}', '\u{fe6b}', ON), ('\u{fe70}', '\u{fefe}', AL), ('\u{feff}', '\u{feff}', BN), ('\u{ff01}', '\u{ff02}', ON), ('\u{ff03}', '\u{ff05}', ET), ('\u{ff06}', '\u{ff0a}', ON), ('\u{ff0b}', '\u{ff0b}', ES), ('\u{ff0c}', '\u{ff0c}', CS), ('\u{ff0d}', '\u{ff0d}', ES), ('\u{ff0e}', '\u{ff0f}', CS), ('\u{ff10}', '\u{ff19}', EN), ('\u{ff1a}', '\u{ff1a}', CS), ('\u{ff1b}', '\u{ff20}', ON), ('\u{ff21}', '\u{ff3a}', L), ('\u{ff3b}', '\u{ff40}', ON), ('\u{ff41}', '\u{ff5a}', L), ('\u{ff5b}', '\u{ff65}', ON), ('\u{ff66}', '\u{ffbe}', L), ('\u{ffc2}', '\u{ffc7}', L), ('\u{ffca}', '\u{ffcf}', L), ('\u{ffd2}', '\u{ffd7}', L), ('\u{ffda}', '\u{ffdc}', L), ('\u{ffe0}', '\u{ffe1}', ET), ('\u{ffe2}', '\u{ffe4}', ON), ('\u{ffe5}', '\u{ffe6}', ET), ('\u{ffe8}', '\u{ffee}', ON), ('\u{fff9}', '\u{fffd}', ON), ('\u{10000}', '\u{1000b}', L), ('\u{1000d}', '\u{10026}', L), ('\u{10028}', '\u{1003a}', L), ('\u{1003c}', '\u{1003d}', L), ('\u{1003f}', '\u{1004d}', L), ('\u{10050}', '\u{1005d}', L), ('\u{10080}', '\u{100fa}', L), ('\u{10100}', '\u{10100}', L), ('\u{10101}', '\u{10101}', ON), ('\u{10102}', '\u{10102}', L), ('\u{10107}', '\u{10133}', L), ('\u{10137}', '\u{1013f}', L), ('\u{10140}', '\u{1018c}', ON), ('\u{1018d}', '\u{1018e}', L), ('\u{10190}', '\u{1019c}', ON), ('\u{101a0}', '\u{101a0}', ON), ('\u{101d0}', '\u{101fc}', L), ('\u{101fd}', '\u{101fd}', NSM), ('\u{10280}', '\u{1029c}', L), ('\u{102a0}', '\u{102d0}', L), ('\u{102e0}', '\u{102e0}', NSM), ('\u{102e1}', '\u{102fb}', EN), ('\u{10300}', '\u{10323}', L), ('\u{1032d}', '\u{1034a}', L), ('\u{10350}', '\u{10375}', L), ('\u{10376}', '\u{1037a}', NSM), ('\u{10380}', '\u{1039d}', L), ('\u{1039f}', '\u{103c3}', L), ('\u{103c8}', '\u{103d5}', L), ('\u{10400}', '\u{1049d}', L), ('\u{104a0}', '\u{104a9}', L), ('\u{104b0}', '\u{104d3}', L), ('\u{104d8}', '\u{104fb}', L), ('\u{10500}', '\u{10527}', L), ('\u{10530}', '\u{10563}', L), ('\u{1056f}', '\u{1057a}', L), ('\u{1057c}', '\u{1058a}', L), ('\u{1058c}', '\u{10592}', L), ('\u{10594}', '\u{10595}', L), ('\u{10597}', '\u{105a1}', L), ('\u{105a3}', '\u{105b1}', L), ('\u{105b3}', '\u{105b9}', L), ('\u{105bb}', '\u{105bc}', L), ('\u{10600}', '\u{10736}', L), ('\u{10740}', '\u{10755}', L), ('\u{10760}', '\u{10767}', L), ('\u{10780}', '\u{10785}', L), ('\u{10787}', '\u{107b0}', L), ('\u{107b2}', '\u{107ba}', L), ('\u{10800}', '\u{1091e}', R), ('\u{1091f}', '\u{1091f}', ON), ('\u{10920}', '\u{10a00}', R), ('\u{10a01}', '\u{10a03}', NSM), ('\u{10a04}', '\u{10a04}', R), ('\u{10a05}', '\u{10a06}', NSM), ('\u{10a07}', '\u{10a0b}', R), ('\u{10a0c}', '\u{10a0f}', NSM), ('\u{10a10}', '\u{10a37}', R), ('\u{10a38}', '\u{10a3a}', NSM), ('\u{10a3b}', '\u{10a3e}', R), ('\u{10a3f}', '\u{10a3f}', NSM), ('\u{10a40}', '\u{10ae4}', R), ('\u{10ae5}', '\u{10ae6}', NSM), ('\u{10ae7}', '\u{10b38}', R), ('\u{10b39}', '\u{10b3f}', ON), ('\u{10b40}', '\u{10cff}', R), ('\u{10d00}', '\u{10d23}', AL), ('\u{10d24}', '\u{10d27}', NSM), ('\u{10d28}', '\u{10d2f}', R), ('\u{10d30}', '\u{10d39}', AN), ('\u{10d3a}', '\u{10e5f}', R), ('\u{10e60}', '\u{10e7e}', AN), ('\u{10e7f}', '\u{10eaa}', R), ('\u{10eab}', '\u{10eac}', NSM), ('\u{10ead}', '\u{10efc}', R), ('\u{10efd}', '\u{10eff}', NSM), ('\u{10f00}', '\u{10f2f}', R), ('\u{10f30}', '\u{10f45}', AL), ('\u{10f46}', '\u{10f50}', NSM), ('\u{10f51}', '\u{10f59}', AL), ('\u{10f5a}', '\u{10f81}', R), ('\u{10f82}', '\u{10f85}', NSM), ('\u{10f86}', '\u{10fff}', R), ('\u{11000}', '\u{11000}', L), ('\u{11001}', '\u{11001}', NSM), ('\u{11002}', '\u{11037}', L), ('\u{11038}', '\u{11046}', NSM), ('\u{11047}', '\u{1104d}', L), ('\u{11052}', '\u{11065}', ON), ('\u{11066}', '\u{1106f}', L), ('\u{11070}', '\u{11070}', NSM), ('\u{11071}', '\u{11072}', L), ('\u{11073}', '\u{11074}', NSM), ('\u{11075}', '\u{11075}', L), ('\u{1107f}', '\u{11081}', NSM), ('\u{11082}', '\u{110b2}', L), ('\u{110b3}', '\u{110b6}', NSM), ('\u{110b7}', '\u{110b8}', L), ('\u{110b9}', '\u{110ba}', NSM), ('\u{110bb}', '\u{110c1}', L), ('\u{110c2}', '\u{110c2}', NSM), ('\u{110cd}', '\u{110cd}', L), ('\u{110d0}', '\u{110e8}', L), ('\u{110f0}', '\u{110f9}', L), ('\u{11100}', '\u{11102}', NSM), ('\u{11103}', '\u{11126}', L), ('\u{11127}', '\u{1112b}', NSM), ('\u{1112c}', '\u{1112c}', L), ('\u{1112d}', '\u{11134}', NSM), ('\u{11136}', '\u{11147}', L), ('\u{11150}', '\u{11172}', L), ('\u{11173}', '\u{11173}', NSM), ('\u{11174}', '\u{11176}', L), ('\u{11180}', '\u{11181}', NSM), ('\u{11182}', '\u{111b5}', L), ('\u{111b6}', '\u{111be}', NSM), ('\u{111bf}', '\u{111c8}', L), ('\u{111c9}', '\u{111cc}', NSM), ('\u{111cd}', '\u{111ce}', L), ('\u{111cf}', '\u{111cf}', NSM), ('\u{111d0}', '\u{111df}', L), ('\u{111e1}', '\u{111f4}', L), ('\u{11200}', '\u{11211}', L), ('\u{11213}', '\u{1122e}', L), ('\u{1122f}', '\u{11231}', NSM), ('\u{11232}', '\u{11233}', L), ('\u{11234}', '\u{11234}', NSM), ('\u{11235}', '\u{11235}', L), ('\u{11236}', '\u{11237}', NSM), ('\u{11238}', '\u{1123d}', L), ('\u{1123e}', '\u{1123e}', NSM), ('\u{1123f}', '\u{11240}', L), ('\u{11241}', '\u{11241}', NSM), ('\u{11280}', '\u{11286}', L), ('\u{11288}', '\u{11288}', L), ('\u{1128a}', '\u{1128d}', L), ('\u{1128f}', '\u{1129d}', L), ('\u{1129f}', '\u{112a9}', L), ('\u{112b0}', '\u{112de}', L), ('\u{112df}', '\u{112df}', NSM), ('\u{112e0}', '\u{112e2}', L), ('\u{112e3}', '\u{112ea}', NSM), ('\u{112f0}', '\u{112f9}', L), ('\u{11300}', '\u{11301}', NSM), ('\u{11302}', '\u{11303}', L), ('\u{11305}', '\u{1130c}', L), ('\u{1130f}', '\u{11310}', L), ('\u{11313}', '\u{11328}', L), ('\u{1132a}', '\u{11330}', L), ('\u{11332}', '\u{11333}', L), ('\u{11335}', '\u{11339}', L), ('\u{1133b}', '\u{1133c}', NSM), ('\u{1133d}', '\u{1133f}', L), ('\u{11340}', '\u{11340}', NSM), ('\u{11341}', '\u{11344}', L), ('\u{11347}', '\u{11348}', L), ('\u{1134b}', '\u{1134d}', L), ('\u{11350}', '\u{11350}', L), ('\u{11357}', '\u{11357}', L), ('\u{1135d}', '\u{11363}', L), ('\u{11366}', '\u{1136c}', NSM), ('\u{11370}', '\u{11374}', NSM), ('\u{11400}', '\u{11437}', L), ('\u{11438}', '\u{1143f}', NSM), ('\u{11440}', '\u{11441}', L), ('\u{11442}', '\u{11444}', NSM), ('\u{11445}', '\u{11445}', L), ('\u{11446}', '\u{11446}', NSM), ('\u{11447}', '\u{1145b}', L), ('\u{1145d}', '\u{1145d}', L), ('\u{1145e}', '\u{1145e}', NSM), ('\u{1145f}', '\u{11461}', L), ('\u{11480}', '\u{114b2}', L), ('\u{114b3}', '\u{114b8}', NSM), ('\u{114b9}', '\u{114b9}', L), ('\u{114ba}', '\u{114ba}', NSM), ('\u{114bb}', '\u{114be}', L), ('\u{114bf}', '\u{114c0}', NSM), ('\u{114c1}', '\u{114c1}', L), ('\u{114c2}', '\u{114c3}', NSM), ('\u{114c4}', '\u{114c7}', L), ('\u{114d0}', '\u{114d9}', L), ('\u{11580}', '\u{115b1}', L), ('\u{115b2}', '\u{115b5}', NSM), ('\u{115b8}', '\u{115bb}', L), ('\u{115bc}', '\u{115bd}', NSM), ('\u{115be}', '\u{115be}', L), ('\u{115bf}', '\u{115c0}', NSM), ('\u{115c1}', '\u{115db}', L), ('\u{115dc}', '\u{115dd}', NSM), ('\u{11600}', '\u{11632}', L), ('\u{11633}', '\u{1163a}', NSM), ('\u{1163b}', '\u{1163c}', L), ('\u{1163d}', '\u{1163d}', NSM), ('\u{1163e}', '\u{1163e}', L), ('\u{1163f}', '\u{11640}', NSM), ('\u{11641}', '\u{11644}', L), ('\u{11650}', '\u{11659}', L), ('\u{11660}', '\u{1166c}', ON), ('\u{11680}', '\u{116aa}', L), ('\u{116ab}', '\u{116ab}', NSM), ('\u{116ac}', '\u{116ac}', L), ('\u{116ad}', '\u{116ad}', NSM), ('\u{116ae}', '\u{116af}', L), ('\u{116b0}', '\u{116b5}', NSM), ('\u{116b6}', '\u{116b6}', L), ('\u{116b7}', '\u{116b7}', NSM), ('\u{116b8}', '\u{116b9}', L), ('\u{116c0}', '\u{116c9}', L), ('\u{11700}', '\u{1171a}', L), ('\u{1171d}', '\u{1171f}', NSM), ('\u{11720}', '\u{11721}', L), ('\u{11722}', '\u{11725}', NSM), ('\u{11726}', '\u{11726}', L), ('\u{11727}', '\u{1172b}', NSM), ('\u{11730}', '\u{11746}', L), ('\u{11800}', '\u{1182e}', L), ('\u{1182f}', '\u{11837}', NSM), ('\u{11838}', '\u{11838}', L), ('\u{11839}', '\u{1183a}', NSM), ('\u{1183b}', '\u{1183b}', L), ('\u{118a0}', '\u{118f2}', L), ('\u{118ff}', '\u{11906}', L), ('\u{11909}', '\u{11909}', L), ('\u{1190c}', '\u{11913}', L), ('\u{11915}', '\u{11916}', L), ('\u{11918}', '\u{11935}', L), ('\u{11937}', '\u{11938}', L), ('\u{1193b}', '\u{1193c}', NSM), ('\u{1193d}', '\u{1193d}', L), ('\u{1193e}', '\u{1193e}', NSM), ('\u{1193f}', '\u{11942}', L), ('\u{11943}', '\u{11943}', NSM), ('\u{11944}', '\u{11946}', L), ('\u{11950}', '\u{11959}', L), ('\u{119a0}', '\u{119a7}', L), ('\u{119aa}', '\u{119d3}', L), ('\u{119d4}', '\u{119d7}', NSM), ('\u{119da}', '\u{119db}', NSM), ('\u{119dc}', '\u{119df}', L), ('\u{119e0}', '\u{119e0}', NSM), ('\u{119e1}', '\u{119e4}', L), ('\u{11a00}', '\u{11a00}', L), ('\u{11a01}', '\u{11a06}', NSM), ('\u{11a07}', '\u{11a08}', L), ('\u{11a09}', '\u{11a0a}', NSM), ('\u{11a0b}', '\u{11a32}', L), ('\u{11a33}', '\u{11a38}', NSM), ('\u{11a39}', '\u{11a3a}', L), ('\u{11a3b}', '\u{11a3e}', NSM), ('\u{11a3f}', '\u{11a46}', L), ('\u{11a47}', '\u{11a47}', NSM), ('\u{11a50}', '\u{11a50}', L), ('\u{11a51}', '\u{11a56}', NSM), ('\u{11a57}', '\u{11a58}', L), ('\u{11a59}', '\u{11a5b}', NSM), ('\u{11a5c}', '\u{11a89}', L), ('\u{11a8a}', '\u{11a96}', NSM), ('\u{11a97}', '\u{11a97}', L), ('\u{11a98}', '\u{11a99}', NSM), ('\u{11a9a}', '\u{11aa2}', L), ('\u{11ab0}', '\u{11af8}', L), ('\u{11b00}', '\u{11b09}', L), ('\u{11c00}', '\u{11c08}', L), ('\u{11c0a}', '\u{11c2f}', L), ('\u{11c30}', '\u{11c36}', NSM), ('\u{11c38}', '\u{11c3d}', NSM), ('\u{11c3e}', '\u{11c45}', L), ('\u{11c50}', '\u{11c6c}', L), ('\u{11c70}', '\u{11c8f}', L), ('\u{11c92}', '\u{11ca7}', NSM), ('\u{11ca9}', '\u{11ca9}', L), ('\u{11caa}', '\u{11cb0}', NSM), ('\u{11cb1}', '\u{11cb1}', L), ('\u{11cb2}', '\u{11cb3}', NSM), ('\u{11cb4}', '\u{11cb4}', L), ('\u{11cb5}', '\u{11cb6}', NSM), ('\u{11d00}', '\u{11d06}', L), ('\u{11d08}', '\u{11d09}', L), ('\u{11d0b}', '\u{11d30}', L), ('\u{11d31}', '\u{11d36}', NSM), ('\u{11d3a}', '\u{11d3a}', NSM), ('\u{11d3c}', '\u{11d3d}', NSM), ('\u{11d3f}', '\u{11d45}', NSM), ('\u{11d46}', '\u{11d46}', L), ('\u{11d47}', '\u{11d47}', NSM), ('\u{11d50}', '\u{11d59}', L), ('\u{11d60}', '\u{11d65}', L), ('\u{11d67}', '\u{11d68}', L), ('\u{11d6a}', '\u{11d8e}', L), ('\u{11d90}', '\u{11d91}', NSM), ('\u{11d93}', '\u{11d94}', L), ('\u{11d95}', '\u{11d95}', NSM), ('\u{11d96}', '\u{11d96}', L), ('\u{11d97}', '\u{11d97}', NSM), ('\u{11d98}', '\u{11d98}', L), ('\u{11da0}', '\u{11da9}', L), ('\u{11ee0}', '\u{11ef2}', L), ('\u{11ef3}', '\u{11ef4}', NSM), ('\u{11ef5}', '\u{11ef8}', L), ('\u{11f00}', '\u{11f01}', NSM), ('\u{11f02}', '\u{11f10}', L), ('\u{11f12}', '\u{11f35}', L), ('\u{11f36}', '\u{11f3a}', NSM), ('\u{11f3e}', '\u{11f3f}', L), ('\u{11f40}', '\u{11f40}', NSM), ('\u{11f41}', '\u{11f41}', L), ('\u{11f42}', '\u{11f42}', NSM), ('\u{11f43}', '\u{11f59}', L), ('\u{11fb0}', '\u{11fb0}', L), ('\u{11fc0}', '\u{11fd4}', L), ('\u{11fd5}', '\u{11fdc}', ON), ('\u{11fdd}', '\u{11fe0}', ET), ('\u{11fe1}', '\u{11ff1}', ON), ('\u{11fff}', '\u{12399}', L), ('\u{12400}', '\u{1246e}', L), ('\u{12470}', '\u{12474}', L), ('\u{12480}', '\u{12543}', L), ('\u{12f90}', '\u{12ff2}', L), ('\u{13000}', '\u{1343f}', L), ('\u{13440}', '\u{13440}', NSM), ('\u{13441}', '\u{13446}', L), ('\u{13447}', '\u{13455}', NSM), ('\u{14400}', '\u{14646}', L), ('\u{16800}', '\u{16a38}', L), ('\u{16a40}', '\u{16a5e}', L), ('\u{16a60}', '\u{16a69}', L), ('\u{16a6e}', '\u{16abe}', L), ('\u{16ac0}', '\u{16ac9}', L), ('\u{16ad0}', '\u{16aed}', L), ('\u{16af0}', '\u{16af4}', NSM), ('\u{16af5}', '\u{16af5}', L), ('\u{16b00}', '\u{16b2f}', L), ('\u{16b30}', '\u{16b36}', NSM), ('\u{16b37}', '\u{16b45}', L), ('\u{16b50}', '\u{16b59}', L), ('\u{16b5b}', '\u{16b61}', L), ('\u{16b63}', '\u{16b77}', L), ('\u{16b7d}', '\u{16b8f}', L), ('\u{16e40}', '\u{16e9a}', L), ('\u{16f00}', '\u{16f4a}', L), ('\u{16f4f}', '\u{16f4f}', NSM), ('\u{16f50}', '\u{16f87}', L), ('\u{16f8f}', '\u{16f92}', NSM), ('\u{16f93}', '\u{16f9f}', L), ('\u{16fe0}', '\u{16fe1}', L), ('\u{16fe2}', '\u{16fe2}', ON), ('\u{16fe3}', '\u{16fe3}', L), ('\u{16fe4}', '\u{16fe4}', NSM), ('\u{16ff0}', '\u{16ff1}', L), ('\u{17000}', '\u{187f7}', L), ('\u{18800}', '\u{18cd5}', L), ('\u{18d00}', '\u{18d08}', L), ('\u{1aff0}', '\u{1aff3}', L), ('\u{1aff5}', '\u{1affb}', L), ('\u{1affd}', '\u{1affe}', L), ('\u{1b000}', '\u{1b122}', L), ('\u{1b132}', '\u{1b132}', L), ('\u{1b150}', '\u{1b152}', L), ('\u{1b155}', '\u{1b155}', L), ('\u{1b164}', '\u{1b167}', L), ('\u{1b170}', '\u{1b2fb}', L), ('\u{1bc00}', '\u{1bc6a}', L), ('\u{1bc70}', '\u{1bc7c}', L), ('\u{1bc80}', '\u{1bc88}', L), ('\u{1bc90}', '\u{1bc99}', L), ('\u{1bc9c}', '\u{1bc9c}', L), ('\u{1bc9d}', '\u{1bc9e}', NSM), ('\u{1bc9f}', '\u{1bc9f}', L), ('\u{1bca0}', '\u{1bca3}', BN), ('\u{1cf00}', '\u{1cf2d}', NSM), ('\u{1cf30}', '\u{1cf46}', NSM), ('\u{1cf50}', '\u{1cfc3}', L), ('\u{1d000}', '\u{1d0f5}', L), ('\u{1d100}', '\u{1d126}', L), ('\u{1d129}', '\u{1d166}', L), ('\u{1d167}', '\u{1d169}', NSM), ('\u{1d16a}', '\u{1d172}', L), ('\u{1d173}', '\u{1d17a}', BN), ('\u{1d17b}', '\u{1d182}', NSM), ('\u{1d183}', '\u{1d184}', L), ('\u{1d185}', '\u{1d18b}', NSM), ('\u{1d18c}', '\u{1d1a9}', L), ('\u{1d1aa}', '\u{1d1ad}', NSM), ('\u{1d1ae}', '\u{1d1e8}', L), ('\u{1d1e9}', '\u{1d1ea}', ON), ('\u{1d200}', '\u{1d241}', ON), ('\u{1d242}', '\u{1d244}', NSM), ('\u{1d245}', '\u{1d245}', ON), ('\u{1d2c0}', '\u{1d2d3}', L), ('\u{1d2e0}', '\u{1d2f3}', L), ('\u{1d300}', '\u{1d356}', ON), ('\u{1d360}', '\u{1d378}', L), ('\u{1d400}', '\u{1d454}', L), ('\u{1d456}', '\u{1d49c}', L), ('\u{1d49e}', '\u{1d49f}', L), ('\u{1d4a2}', '\u{1d4a2}', L), ('\u{1d4a5}', '\u{1d4a6}', L), ('\u{1d4a9}', '\u{1d4ac}', L), ('\u{1d4ae}', '\u{1d4b9}', L), ('\u{1d4bb}', '\u{1d4bb}', L), ('\u{1d4bd}', '\u{1d4c3}', L), ('\u{1d4c5}', '\u{1d505}', L), ('\u{1d507}', '\u{1d50a}', L), ('\u{1d50d}', '\u{1d514}', L), ('\u{1d516}', '\u{1d51c}', L), ('\u{1d51e}', '\u{1d539}', L), ('\u{1d53b}', '\u{1d53e}', L), ('\u{1d540}', '\u{1d544}', L), ('\u{1d546}', '\u{1d546}', L), ('\u{1d54a}', '\u{1d550}', L), ('\u{1d552}', '\u{1d6a5}', L), ('\u{1d6a8}', '\u{1d6da}', L), ('\u{1d6db}', '\u{1d6db}', ON), ('\u{1d6dc}', '\u{1d714}', L), ('\u{1d715}', '\u{1d715}', ON), ('\u{1d716}', '\u{1d74e}', L), ('\u{1d74f}', '\u{1d74f}', ON), ('\u{1d750}', '\u{1d788}', L), ('\u{1d789}', '\u{1d789}', ON), ('\u{1d78a}', '\u{1d7c2}', L), ('\u{1d7c3}', '\u{1d7c3}', ON), ('\u{1d7c4}', '\u{1d7cb}', L), ('\u{1d7ce}', '\u{1d7ff}', EN), ('\u{1d800}', '\u{1d9ff}', L), ('\u{1da00}', '\u{1da36}', NSM), ('\u{1da37}', '\u{1da3a}', L), ('\u{1da3b}', '\u{1da6c}', NSM), ('\u{1da6d}', '\u{1da74}', L), ('\u{1da75}', '\u{1da75}', NSM), ('\u{1da76}', '\u{1da83}', L), ('\u{1da84}', '\u{1da84}', NSM), ('\u{1da85}', '\u{1da8b}', L), ('\u{1da9b}', '\u{1da9f}', NSM), ('\u{1daa1}', '\u{1daaf}', NSM), ('\u{1df00}', '\u{1df1e}', L), ('\u{1df25}', '\u{1df2a}', L), ('\u{1e000}', '\u{1e006}', NSM), ('\u{1e008}', '\u{1e018}', NSM), ('\u{1e01b}', '\u{1e021}', NSM), ('\u{1e023}', '\u{1e024}', NSM), ('\u{1e026}', '\u{1e02a}', NSM), ('\u{1e030}', '\u{1e06d}', L), ('\u{1e08f}', '\u{1e08f}', NSM), ('\u{1e100}', '\u{1e12c}', L), ('\u{1e130}', '\u{1e136}', NSM), ('\u{1e137}', '\u{1e13d}', L), ('\u{1e140}', '\u{1e149}', L), ('\u{1e14e}', '\u{1e14f}', L), ('\u{1e290}', '\u{1e2ad}', L), ('\u{1e2ae}', '\u{1e2ae}', NSM), ('\u{1e2c0}', '\u{1e2eb}', L), ('\u{1e2ec}', '\u{1e2ef}', NSM), ('\u{1e2f0}', '\u{1e2f9}', L), ('\u{1e2ff}', '\u{1e2ff}', ET), ('\u{1e4d0}', '\u{1e4eb}', L), ('\u{1e4ec}', '\u{1e4ef}', NSM), ('\u{1e4f0}', '\u{1e4f9}', L), ('\u{1e7e0}', '\u{1e7e6}', L), ('\u{1e7e8}', '\u{1e7eb}', L), ('\u{1e7ed}', '\u{1e7ee}', L), ('\u{1e7f0}', '\u{1e7fe}', L), ('\u{1e800}', '\u{1e8cf}', R), ('\u{1e8d0}', '\u{1e8d6}', NSM), ('\u{1e8d7}', '\u{1e943}', R), ('\u{1e944}', '\u{1e94a}', NSM), ('\u{1e94b}', '\u{1ec70}', R), ('\u{1ec71}', '\u{1ecb4}', AL), ('\u{1ecb5}', '\u{1ed00}', R), ('\u{1ed01}', '\u{1ed3d}', AL), ('\u{1ed3e}', '\u{1edff}', R), ('\u{1ee00}', '\u{1eeef}', AL), ('\u{1eef0}', '\u{1eef1}', ON), ('\u{1eef2}', '\u{1eeff}', AL), ('\u{1ef00}', '\u{1efff}', R), ('\u{1f000}', '\u{1f02b}', ON), ('\u{1f030}', '\u{1f093}', ON), ('\u{1f0a0}', '\u{1f0ae}', ON), ('\u{1f0b1}', '\u{1f0bf}', ON), ('\u{1f0c1}', '\u{1f0cf}', ON), ('\u{1f0d1}', '\u{1f0f5}', ON), ('\u{1f100}', '\u{1f10a}', EN), ('\u{1f10b}', '\u{1f10f}', ON), ('\u{1f110}', '\u{1f12e}', L), ('\u{1f12f}', '\u{1f12f}', ON), ('\u{1f130}', '\u{1f169}', L), ('\u{1f16a}', '\u{1f16f}', ON), ('\u{1f170}', '\u{1f1ac}', L), ('\u{1f1ad}', '\u{1f1ad}', ON), ('\u{1f1e6}', '\u{1f202}', L), ('\u{1f210}', '\u{1f23b}', L), ('\u{1f240}', '\u{1f248}', L), ('\u{1f250}', '\u{1f251}', L), ('\u{1f260}', '\u{1f265}', ON), ('\u{1f300}', '\u{1f6d7}', ON), ('\u{1f6dc}', '\u{1f6ec}', ON), ('\u{1f6f0}', '\u{1f6fc}', ON), ('\u{1f700}', '\u{1f776}', ON), ('\u{1f77b}', '\u{1f7d9}', ON), ('\u{1f7e0}', '\u{1f7eb}', ON), ('\u{1f7f0}', '\u{1f7f0}', ON), ('\u{1f800}', '\u{1f80b}', ON), ('\u{1f810}', '\u{1f847}', ON), ('\u{1f850}', '\u{1f859}', ON), ('\u{1f860}', '\u{1f887}', ON), ('\u{1f890}', '\u{1f8ad}', ON), ('\u{1f8b0}', '\u{1f8b1}', ON), ('\u{1f900}', '\u{1fa53}', ON), ('\u{1fa60}', '\u{1fa6d}', ON), ('\u{1fa70}', '\u{1fa7c}', ON), ('\u{1fa80}', '\u{1fa88}', ON), ('\u{1fa90}', '\u{1fabd}', ON), ('\u{1fabf}', '\u{1fac5}', ON), ('\u{1face}', '\u{1fadb}', ON), ('\u{1fae0}', '\u{1fae8}', ON), ('\u{1faf0}', '\u{1faf8}', ON), ('\u{1fb00}', '\u{1fb92}', ON), ('\u{1fb94}', '\u{1fbca}', ON), ('\u{1fbf0}', '\u{1fbf9}', EN), ('\u{20000}', '\u{2a6df}', L), ('\u{2a700}', '\u{2b739}', L), ('\u{2b740}', '\u{2b81d}', L), ('\u{2b820}', '\u{2cea1}', L), ('\u{2ceb0}', '\u{2ebe0}', L), ('\u{2f800}', '\u{2fa1d}', L), ('\u{30000}', '\u{3134a}', L), ('\u{31350}', '\u{323af}', L), ('\u{e0001}', '\u{e0001}', BN), ('\u{e0020}', '\u{e007f}', BN), ('\u{e0100}', '\u{e01ef}', NSM), ('\u{f0000}', '\u{ffffd}', L), ('\u{100000}', '\u{10fffd}', L) ]; pub const bidi_pairs_table: &'static [(char, char, Option)] = &[ ('\u{28}', '\u{29}', None), ('\u{5b}', '\u{5d}', None), ('\u{7b}', '\u{7d}', None), ('\u{f3a}', '\u{f3b}', None), ('\u{f3c}', '\u{f3d}', None), ('\u{169b}', '\u{169c}', None), ('\u{2045}', '\u{2046}', None), ('\u{207d}', '\u{207e}', None), ('\u{208d}', '\u{208e}', None), ('\u{2308}', '\u{2309}', None), ('\u{230a}', '\u{230b}', None), ('\u{2329}', '\u{232a}', Some('\u{3008}')), ('\u{2768}', '\u{2769}', None), ('\u{276a}', '\u{276b}', None), ('\u{276c}', '\u{276d}', None), ('\u{276e}', '\u{276f}', None), ('\u{2770}', '\u{2771}', None), ('\u{2772}', '\u{2773}', None), ('\u{2774}', '\u{2775}', None), ('\u{27c5}', '\u{27c6}', None), ('\u{27e6}', '\u{27e7}', None), ('\u{27e8}', '\u{27e9}', None), ('\u{27ea}', '\u{27eb}', None), ('\u{27ec}', '\u{27ed}', None), ('\u{27ee}', '\u{27ef}', None), ('\u{2983}', '\u{2984}', None), ('\u{2985}', '\u{2986}', None), ('\u{2987}', '\u{2988}', None), ('\u{2989}', '\u{298a}', None), ('\u{298b}', '\u{298c}', None), ('\u{298d}', '\u{2990}', None), ('\u{298f}', '\u{298e}', None), ('\u{2991}', '\u{2992}', None), ('\u{2993}', '\u{2994}', None), ('\u{2995}', '\u{2996}', None), ('\u{2997}', '\u{2998}', None), ('\u{29d8}', '\u{29d9}', None), ('\u{29da}', '\u{29db}', None), ('\u{29fc}', '\u{29fd}', None), ('\u{2e22}', '\u{2e23}', None), ('\u{2e24}', '\u{2e25}', None), ('\u{2e26}', '\u{2e27}', None), ('\u{2e28}', '\u{2e29}', None), ('\u{2e55}', '\u{2e56}', None), ('\u{2e57}', '\u{2e58}', None), ('\u{2e59}', '\u{2e5a}', None), ('\u{2e5b}', '\u{2e5c}', None), ('\u{3008}', '\u{3009}', None), ('\u{300a}', '\u{300b}', None), ('\u{300c}', '\u{300d}', None), ('\u{300e}', '\u{300f}', None), ('\u{3010}', '\u{3011}', None), ('\u{3014}', '\u{3015}', None), ('\u{3016}', '\u{3017}', None), ('\u{3018}', '\u{3019}', None), ('\u{301a}', '\u{301b}', None), ('\u{fe59}', '\u{fe5a}', None), ('\u{fe5b}', '\u{fe5c}', None), ('\u{fe5d}', '\u{fe5e}', None), ('\u{ff08}', '\u{ff09}', None), ('\u{ff3b}', '\u{ff3d}', None), ('\u{ff5b}', '\u{ff5d}', None), ('\u{ff5f}', '\u{ff60}', None), ('\u{ff62}', '\u{ff63}', None) ]; unicode-bidi-0.3.13/src/data_source.rs000064400000000000000000000043041046102023000156720ustar 00000000000000// Copyright 2015 The Servo Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. use crate::BidiClass; /// This is the return value of [`BidiDataSource::bidi_matched_opening_bracket()`]. /// /// It represents the matching *normalized* opening bracket for a given bracket in a bracket pair, /// and whether or not that bracket is opening. #[derive(Debug, Copy, Clone)] pub struct BidiMatchedOpeningBracket { /// The corresponding opening bracket in this bracket pair, normalized /// /// In case of opening brackets, this will be the bracket itself, except for when the bracket /// is not normalized, in which case it will be the normalized form. pub opening: char, /// Whether or not the requested bracket was an opening bracket. True for opening pub is_open: bool, } /// This trait abstracts over a data source that is able to produce the Unicode Bidi class for a given /// character pub trait BidiDataSource { fn bidi_class(&self, c: char) -> BidiClass; /// If this character is a bracket according to BidiBrackets.txt, /// return the corresponding *normalized* *opening bracket* of the pair, /// and whether or not it itself is an opening bracket. /// /// This effectively buckets brackets into equivalence classes keyed on the /// normalized opening bracket. /// /// The default implementation will pull in a small amount of hardcoded data, /// regardless of the `hardcoded-data` feature. This is in part for convenience /// (since this data is small and changes less often), and in part so that this method can be /// added without needing a breaking version bump. /// Override this method in your custom data source to prevent the use of hardcoded data. fn bidi_matched_opening_bracket(&self, c: char) -> Option { crate::char_data::bidi_matched_opening_bracket(c) } } unicode-bidi-0.3.13/src/deprecated.rs000064400000000000000000000057401046102023000155060ustar 00000000000000// Copyright 2015 The Servo Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! This module holds deprecated assets only. use alloc::vec::Vec; use super::*; /// Find the level runs within a line and return them in visual order. /// /// NOTE: This implementation is incomplete. The algorithm needs information about the text, /// including original `BidiClass` property of each character, to be able to perform correctly. /// Please see [`BidiInfo::visual_runs()`](../struct.BidiInfo.html#method.visual_runs) for the /// improved implementation. /// /// `line` is a range of bytes indices within `levels`. /// /// #[deprecated( since = "0.3.0", note = "please use `BidiInfo::visual_runs()` instead." )] pub fn visual_runs(line: Range, levels: &[Level]) -> Vec { assert!(line.start <= levels.len()); assert!(line.end <= levels.len()); let mut runs = Vec::new(); // Find consecutive level runs. let mut start = line.start; let mut run_level = levels[start]; let mut min_level = run_level; let mut max_level = run_level; for (i, &new_level) in levels.iter().enumerate().take(line.end).skip(start + 1) { if new_level != run_level { // End of the previous run, start of a new one. runs.push(start..i); start = i; run_level = new_level; min_level = cmp::min(run_level, min_level); max_level = cmp::max(run_level, max_level); } } runs.push(start..line.end); let run_count = runs.len(); // Re-order the odd runs. // // Stop at the lowest *odd* level. min_level = min_level.new_lowest_ge_rtl().expect("Level error"); while max_level >= min_level { // Look for the start of a sequence of consecutive runs of max_level or higher. let mut seq_start = 0; while seq_start < run_count { if levels[runs[seq_start].start] < max_level { seq_start += 1; continue; } // Found the start of a sequence. Now find the end. let mut seq_end = seq_start + 1; while seq_end < run_count { if levels[runs[seq_end].start] < max_level { break; } seq_end += 1; } // Reverse the runs within this sequence. runs[seq_start..seq_end].reverse(); seq_start = seq_end; } max_level .lower(1) .expect("Lowering embedding level below zero"); } runs } unicode-bidi-0.3.13/src/explicit.rs000064400000000000000000000166451046102023000152350ustar 00000000000000// Copyright 2015 The Servo Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! 3.3.2 Explicit Levels and Directions //! //! use alloc::vec::Vec; use super::char_data::{ is_rtl, BidiClass::{self, *}, }; use super::level::Level; /// Compute explicit embedding levels for one paragraph of text (X1-X8). /// /// `processing_classes[i]` must contain the `BidiClass` of the char at byte index `i`, /// for each char in `text`. #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn compute( text: &str, para_level: Level, original_classes: &[BidiClass], levels: &mut [Level], processing_classes: &mut [BidiClass], ) { assert_eq!(text.len(), original_classes.len()); // let mut stack = DirectionalStatusStack::new(); stack.push(para_level, OverrideStatus::Neutral); let mut overflow_isolate_count = 0u32; let mut overflow_embedding_count = 0u32; let mut valid_isolate_count = 0u32; for (i, c) in text.char_indices() { match original_classes[i] { // Rules X2-X5c RLE | LRE | RLO | LRO | RLI | LRI | FSI => { let last_level = stack.last().level; // levels[i] = last_level; // X5a-X5c: Isolate initiators get the level of the last entry on the stack. let is_isolate = match original_classes[i] { RLI | LRI | FSI => true, _ => false, }; if is_isolate { // Redundant due to "Retaining explicit formatting characters" step. // levels[i] = last_level; match stack.last().status { OverrideStatus::RTL => processing_classes[i] = R, OverrideStatus::LTR => processing_classes[i] = L, _ => {} } } let new_level = if is_rtl(original_classes[i]) { last_level.new_explicit_next_rtl() } else { last_level.new_explicit_next_ltr() }; if new_level.is_ok() && overflow_isolate_count == 0 && overflow_embedding_count == 0 { let new_level = new_level.unwrap(); stack.push( new_level, match original_classes[i] { RLO => OverrideStatus::RTL, LRO => OverrideStatus::LTR, RLI | LRI | FSI => OverrideStatus::Isolate, _ => OverrideStatus::Neutral, }, ); if is_isolate { valid_isolate_count += 1; } else { // The spec doesn't explicitly mention this step, but it is necessary. // See the reference implementations for comparison. levels[i] = new_level; } } else if is_isolate { overflow_isolate_count += 1; } else if overflow_isolate_count == 0 { overflow_embedding_count += 1; } if !is_isolate { // X9 + // // (PDF handled below) processing_classes[i] = BN; } } // PDI => { if overflow_isolate_count > 0 { overflow_isolate_count -= 1; } else if valid_isolate_count > 0 { overflow_embedding_count = 0; loop { // Pop everything up to and including the last Isolate status. match stack.vec.pop() { None | Some(Status { status: OverrideStatus::Isolate, .. }) => break, _ => continue, } } valid_isolate_count -= 1; } let last = stack.last(); levels[i] = last.level; match last.status { OverrideStatus::RTL => processing_classes[i] = R, OverrideStatus::LTR => processing_classes[i] = L, _ => {} } } // PDF => { if overflow_isolate_count > 0 { // do nothing } else if overflow_embedding_count > 0 { overflow_embedding_count -= 1; } else if stack.last().status != OverrideStatus::Isolate && stack.vec.len() >= 2 { stack.vec.pop(); } // levels[i] = stack.last().level; // X9 part of retaining explicit formatting characters. processing_classes[i] = BN; } // Nothing. // BN case moved down to X6, see B => {} // _ => { let last = stack.last(); levels[i] = last.level; // This condition is not in the spec, but I am pretty sure that is a spec bug. // https://www.unicode.org/L2/L2023/23014-amd-to-uax9.pdf if original_classes[i] != BN { match last.status { OverrideStatus::RTL => processing_classes[i] = R, OverrideStatus::LTR => processing_classes[i] = L, _ => {} } } } } // Handle multi-byte characters. for j in 1..c.len_utf8() { levels[i + j] = levels[i]; processing_classes[i + j] = processing_classes[i]; } } } /// Entries in the directional status stack: struct Status { level: Level, status: OverrideStatus, } #[derive(PartialEq)] enum OverrideStatus { Neutral, RTL, LTR, Isolate, } struct DirectionalStatusStack { vec: Vec, } impl DirectionalStatusStack { fn new() -> Self { DirectionalStatusStack { vec: Vec::with_capacity(Level::max_explicit_depth() as usize + 2), } } fn push(&mut self, level: Level, status: OverrideStatus) { self.vec.push(Status { level, status }); } fn last(&self) -> &Status { self.vec.last().unwrap() } } unicode-bidi-0.3.13/src/format_chars.rs000064400000000000000000000025271046102023000160560ustar 00000000000000// Copyright 2017 The Servo Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! Directional Formatting Characters //! //! // == Implicit == /// ARABIC LETTER MARK pub const ALM: char = '\u{061C}'; /// LEFT-TO-RIGHT MARK pub const LRM: char = '\u{200E}'; /// RIGHT-TO-LEFT MARK pub const RLM: char = '\u{200F}'; // == Explicit Isolates == /// LEFT‑TO‑RIGHT ISOLATE pub const LRI: char = '\u{2066}'; /// RIGHT‑TO‑LEFT ISOLATE pub const RLI: char = '\u{2067}'; /// FIRST STRONG ISOLATE pub const FSI: char = '\u{2068}'; /// POP DIRECTIONAL ISOLATE pub const PDI: char = '\u{2069}'; // == Explicit Embeddings and Overrides == /// LEFT-TO-RIGHT EMBEDDING pub const LRE: char = '\u{202A}'; /// RIGHT-TO-LEFT EMBEDDING pub const RLE: char = '\u{202B}'; /// POP DIRECTIONAL FORMATTING pub const PDF: char = '\u{202C}'; /// LEFT-TO-RIGHT OVERRIDE pub const LRO: char = '\u{202D}'; /// RIGHT-TO-LEFT OVERRIDE pub const RLO: char = '\u{202E}'; unicode-bidi-0.3.13/src/implicit.rs000064400000000000000000000626741046102023000152310ustar 00000000000000// Copyright 2015 The Servo Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! 3.3.4 - 3.3.6. Resolve implicit levels and types. use alloc::vec::Vec; use core::cmp::max; use super::char_data::BidiClass::{self, *}; use super::level::Level; use super::prepare::{not_removed_by_x9, removed_by_x9, IsolatingRunSequence}; use super::BidiDataSource; /// 3.3.4 Resolving Weak Types /// /// #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn resolve_weak( text: &str, sequence: &IsolatingRunSequence, processing_classes: &mut [BidiClass], ) { // Note: The spec treats these steps as individual passes that are applied one after the other // on the entire IsolatingRunSequence at once. We instead collapse it into a single iteration, // which is straightforward for rules that are based on the state of the current character, but not // for rules that care about surrounding characters. To deal with them, we retain additional state // about previous character classes that may have since been changed by later rules. // The previous class for the purposes of rule W4/W6, not tracking changes made after or during W4. let mut prev_class_before_w4 = sequence.sos; // The previous class for the purposes of rule W5. let mut prev_class_before_w5 = sequence.sos; // The previous class for the purposes of rule W1, not tracking changes from any other rules. let mut prev_class_before_w1 = sequence.sos; let mut last_strong_is_al = false; let mut et_run_indices = Vec::new(); // for W5 let mut bn_run_indices = Vec::new(); // for W5 + for (run_index, level_run) in sequence.runs.iter().enumerate() { for i in &mut level_run.clone() { if processing_classes[i] == BN { // // Keeps track of bn runs for W5 in case we see an ET. bn_run_indices.push(i); // BNs aren't real, skip over them. continue; } // Store the processing class of all rules before W2/W1. // Used to keep track of the last strong character for W2. W3 is able to insert new strong // characters, so we don't want to be misled by it. let mut w2_processing_class = processing_classes[i]; // // if processing_classes[i] == NSM { processing_classes[i] = match prev_class_before_w1 { RLI | LRI | FSI | PDI => ON, _ => prev_class_before_w1, }; // W1 occurs before W2, update this. w2_processing_class = processing_classes[i]; } prev_class_before_w1 = processing_classes[i]; // // // match processing_classes[i] { EN => { if last_strong_is_al { // W2. If previous strong char was AL, change EN to AN. processing_classes[i] = AN; } } // W3. AL => processing_classes[i] = R, _ => {} } // update last_strong_is_al. match w2_processing_class { L | R => { last_strong_is_al = false; } AL => { last_strong_is_al = true; } _ => {} } let class_before_w456 = processing_classes[i]; // // // (separators only) // (see below for W6 terminator code) // match processing_classes[i] { // EN => { // W5. If a run of ETs is adjacent to an EN, change the ETs to EN. for j in &et_run_indices { processing_classes[*j] = EN; } et_run_indices.clear(); } // // ES | CS => { // See https://github.com/servo/unicode-bidi/issues/86 for improving this. // We want to make sure we check the correct next character by skipping past the rest // of this one. if let Some(ch) = text.get(i..).and_then(|s| s.chars().next()) { let mut next_class = sequence .iter_forwards_from(i + ch.len_utf8(), run_index) .map(|j| processing_classes[j]) // .find(not_removed_by_x9) .unwrap_or(sequence.eos); if next_class == EN && last_strong_is_al { // Apply W2 to next_class. We know that last_strong_is_al // has no chance of changing on this character so we can still assume its value // will be the same by the time we get to it. next_class = AN; } processing_classes[i] = match (prev_class_before_w4, processing_classes[i], next_class) { // W4 (EN, ES, EN) | (EN, CS, EN) => EN, // W4 (AN, CS, AN) => AN, // W6 (separators only) (_, _, _) => ON, }; // W6 + // We have to do this before W5 gets its grubby hands on these characters and thinks // they're part of an ET run. // We check for ON to ensure that we had hit the W6 branch above, since this `ES | CS` match // arm handles both W4 and W6. if processing_classes[i] == ON { for idx in sequence.iter_backwards_from(i, run_index) { let class = &mut processing_classes[idx]; if *class != BN { break; } *class = ON; } for idx in sequence.iter_forwards_from(i + ch.len_utf8(), run_index) { let class = &mut processing_classes[idx]; if *class != BN { break; } *class = ON; } } } else { // We're in the middle of a character, copy over work done for previous bytes // since it's going to be the same answer. processing_classes[i] = processing_classes[i - 1]; } } // ET => { match prev_class_before_w5 { EN => processing_classes[i] = EN, _ => { // // If there was a BN run before this, that's now a part of this ET run. et_run_indices.extend(&bn_run_indices); // In case this is followed by an EN. et_run_indices.push(i); } } } _ => {} } // Common loop iteration code // // // BN runs would have already continued the loop, clear them before we get to the next one. bn_run_indices.clear(); // W6 above only deals with separators, so it doesn't change anything W5 cares about, // so we still can update this after running that part of W6. prev_class_before_w5 = processing_classes[i]; // (terminators only) // (see above for W6 separator code) // if prev_class_before_w5 != ET { // W6. If we didn't find an adjacent EN, turn any ETs into ON instead. for j in &et_run_indices { processing_classes[*j] = ON; } et_run_indices.clear(); } // We stashed this before W4/5/6 could get their grubby hands on it, and it's not // used in the W6 terminator code below so we can update it now. prev_class_before_w4 = class_before_w456; } } // Rerun this check in case we ended with a sequence of BNs (i.e., we'd never // hit the end of the for loop above). // W6. If we didn't find an adjacent EN, turn any ETs into ON instead. for j in &et_run_indices { processing_classes[*j] = ON; } et_run_indices.clear(); // W7. If the previous strong char was L, change EN to L. let mut last_strong_is_l = sequence.sos == L; for run in &sequence.runs { for i in run.clone() { match processing_classes[i] { EN if last_strong_is_l => { processing_classes[i] = L; } L => { last_strong_is_l = true; } R | AL => { last_strong_is_l = false; } // // Already scanning past BN here. _ => {} } } } } /// 3.3.5 Resolving Neutral Types /// /// #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn resolve_neutral( text: &str, data_source: &D, sequence: &IsolatingRunSequence, levels: &[Level], original_classes: &[BidiClass], processing_classes: &mut [BidiClass], ) { // e = embedding direction let e: BidiClass = levels[sequence.runs[0].start].bidi_class(); let not_e = if e == BidiClass::L { BidiClass::R } else { BidiClass::L }; // N0. Process bracket pairs. // > Identify the bracket pairs in the current isolating run sequence according to BD16. // We use processing_classes, not original_classes, due to BD14/BD15 let bracket_pairs = identify_bracket_pairs(text, data_source, sequence, processing_classes); // > For each bracket-pair element in the list of pairs of text positions // // Note: Rust ranges are interpreted as [start..end), be careful using `pair` directly // for indexing as it will include the opening bracket pair but not the closing one. for pair in bracket_pairs { #[cfg(feature = "std")] debug_assert!( pair.start < processing_classes.len(), "identify_bracket_pairs returned a range that is out of bounds!" ); #[cfg(feature = "std")] debug_assert!( pair.end < processing_classes.len(), "identify_bracket_pairs returned a range that is out of bounds!" ); let mut found_e = false; let mut found_not_e = false; let mut class_to_set = None; let start_len_utf8 = text[pair.start..].chars().next().unwrap().len_utf8(); // > Inspect the bidirectional types of the characters enclosed within the bracket pair. // // `pair` is [start, end) so we will end up processing the opening character but not the closing one. // for enclosed_i in sequence.iter_forwards_from(pair.start + start_len_utf8, pair.start_run) { if enclosed_i >= pair.end { #[cfg(feature = "std")] debug_assert!( enclosed_i == pair.end, "If we skipped past this, the iterator is broken" ); break; } let class = processing_classes[enclosed_i]; if class == e { found_e = true; } else if class == not_e { found_not_e = true; } else if class == BidiClass::EN || class == BidiClass::AN { // > Within this scope, bidirectional types EN and AN are treated as R. if e == BidiClass::L { found_not_e = true; } else { found_e = true; } } // If we have found a character with the class of the embedding direction // we can bail early. if found_e { break; } } // > If any strong type (either L or R) matching the embedding direction is found if found_e { // > .. set the type for both brackets in the pair to match the embedding direction class_to_set = Some(e); // > Otherwise, if there is a strong type it must be opposite the embedding direction } else if found_not_e { // > Therefore, test for an established context with a preceding strong type by // > checking backwards before the opening paired bracket // > until the first strong type (L, R, or sos) is found. // (see note above about processing_classes and character boundaries) let mut previous_strong = sequence .iter_backwards_from(pair.start, pair.start_run) .map(|i| processing_classes[i]) .find(|class| { *class == BidiClass::L || *class == BidiClass::R || *class == BidiClass::EN || *class == BidiClass::AN }) .unwrap_or(sequence.sos); // > Within this scope, bidirectional types EN and AN are treated as R. if previous_strong == BidiClass::EN || previous_strong == BidiClass::AN { previous_strong = BidiClass::R; } // > If the preceding strong type is also opposite the embedding direction, // > context is established, // > so set the type for both brackets in the pair to that direction. // AND // > Otherwise set the type for both brackets in the pair to the embedding direction. // > Either way it gets set to previous_strong // // Both branches amount to setting the type to the strong type. class_to_set = Some(previous_strong); } if let Some(class_to_set) = class_to_set { // Update all processing classes corresponding to the start and end elements, as requested. // We should include all bytes of the character, not the first one. let end_len_utf8 = text[pair.end..].chars().next().unwrap().len_utf8(); for class in &mut processing_classes[pair.start..pair.start + start_len_utf8] { *class = class_to_set; } for class in &mut processing_classes[pair.end..pair.end + end_len_utf8] { *class = class_to_set; } // for idx in sequence.iter_backwards_from(pair.start, pair.start_run) { let class = &mut processing_classes[idx]; if *class != BN { break; } *class = class_to_set; } // > Any number of characters that had original bidirectional character type NSM prior to the application of // > W1 that immediately follow a paired bracket which changed to L or R under N0 should change to match the type of their preceding bracket. // This rule deals with sequences of NSMs, so we can just update them all at once, we don't need to worry // about character boundaries. We do need to be careful to skip the full set of bytes for the parentheses characters. let nsm_start = pair.start + start_len_utf8; for idx in sequence.iter_forwards_from(nsm_start, pair.start_run) { let class = original_classes[idx]; if class == BidiClass::NSM || processing_classes[idx] == BN { processing_classes[idx] = class_to_set; } else { break; } } let nsm_end = pair.end + end_len_utf8; for idx in sequence.iter_forwards_from(nsm_end, pair.end_run) { let class = original_classes[idx]; if class == BidiClass::NSM || processing_classes[idx] == BN { processing_classes[idx] = class_to_set; } else { break; } } } // > Otherwise, there are no strong types within the bracket pair // > Therefore, do not set the type for that bracket pair } // N1 and N2. // Indices of every byte in this isolating run sequence let mut indices = sequence.runs.iter().flat_map(Clone::clone); let mut prev_class = sequence.sos; while let Some(mut i) = indices.next() { // Process sequences of NI characters. let mut ni_run = Vec::new(); // The BN is for if is_NI(processing_classes[i]) || processing_classes[i] == BN { // Consume a run of consecutive NI characters. ni_run.push(i); let mut next_class; loop { match indices.next() { Some(j) => { i = j; next_class = processing_classes[j]; // The BN is for if is_NI(next_class) || next_class == BN { ni_run.push(i); } else { break; } } None => { next_class = sequence.eos; break; } }; } // N1-N2. // // // let new_class = match (prev_class, next_class) { (L, L) => L, (R, R) | (R, AN) | (R, EN) | (AN, R) | (AN, AN) | (AN, EN) | (EN, R) | (EN, AN) | (EN, EN) => R, (_, _) => e, }; for j in &ni_run { processing_classes[*j] = new_class; } ni_run.clear(); } prev_class = processing_classes[i]; } } struct BracketPair { /// The text-relative index of the opening bracket. start: usize, /// The text-relative index of the closing bracket. end: usize, /// The index of the run (in the run sequence) that the opening bracket is in. start_run: usize, /// The index of the run (in the run sequence) that the closing bracket is in. end_run: usize, } /// 3.1.3 Identifying Bracket Pairs /// /// Returns all paired brackets in the source, as indices into the /// text source. /// /// fn identify_bracket_pairs( text: &str, data_source: &D, run_sequence: &IsolatingRunSequence, original_classes: &[BidiClass], ) -> Vec { let mut ret = vec![]; let mut stack = vec![]; for (run_index, level_run) in run_sequence.runs.iter().enumerate() { let slice = if let Some(slice) = text.get(level_run.clone()) { slice } else { #[cfg(feature = "std")] std::debug_assert!( false, "Found broken indices in level run: found indices {}..{} for string of length {}", level_run.start, level_run.end, text.len() ); return ret; }; for (i, ch) in slice.char_indices() { let actual_index = level_run.start + i; // All paren characters are ON. // From BidiBrackets.txt: // > The Unicode property value stability policy guarantees that characters // > which have bpt=o or bpt=c also have bc=ON and Bidi_M=Y if original_classes[level_run.start + i] != BidiClass::ON { continue; } if let Some(matched) = data_source.bidi_matched_opening_bracket(ch) { if matched.is_open { // > If an opening paired bracket is found ... // > ... and there is no room in the stack, // > stop processing BD16 for the remainder of the isolating run sequence. if stack.len() >= 63 { break; } // > ... push its Bidi_Paired_Bracket property value and its text position onto the stack stack.push((matched.opening, actual_index, run_index)) } else { // > If a closing paired bracket is found, do the following // > Declare a variable that holds a reference to the current stack element // > and initialize it with the top element of the stack. // AND // > Else, if the current stack element is not at the bottom of the stack for (stack_index, element) in stack.iter().enumerate().rev() { // > Compare the closing paired bracket being inspected or its canonical // > equivalent to the bracket in the current stack element. if element.0 == matched.opening { // > If the values match, meaning the two characters form a bracket pair, then // > Append the text position in the current stack element together with the // > text position of the closing paired bracket to the list. let pair = BracketPair { start: element.1, end: actual_index, start_run: element.2, end_run: run_index, }; ret.push(pair); // > Pop the stack through the current stack element inclusively. stack.truncate(stack_index); break; } } } } } } // > Sort the list of pairs of text positions in ascending order based on // > the text position of the opening paired bracket. ret.sort_by_key(|r| r.start); ret } /// 3.3.6 Resolving Implicit Levels /// /// Returns the maximum embedding level in the paragraph. /// /// #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn resolve_levels(original_classes: &[BidiClass], levels: &mut [Level]) -> Level { let mut max_level = Level::ltr(); assert_eq!(original_classes.len(), levels.len()); for i in 0..levels.len() { match (levels[i].is_rtl(), original_classes[i]) { (false, AN) | (false, EN) => levels[i].raise(2).expect("Level number error"), (false, R) | (true, L) | (true, EN) | (true, AN) => { levels[i].raise(1).expect("Level number error") } // handled here (_, _) => {} } max_level = max(max_level, levels[i]); } max_level } /// Neutral or Isolate formatting character (B, S, WS, ON, FSI, LRI, RLI, PDI) /// /// #[allow(non_snake_case)] fn is_NI(class: BidiClass) -> bool { match class { B | S | WS | ON | FSI | LRI | RLI | PDI => true, _ => false, } } unicode-bidi-0.3.13/src/level.rs000064400000000000000000000274001046102023000145120ustar 00000000000000// Copyright 2017 The Servo Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! Bidi Embedding Level //! //! See [`Level`](struct.Level.html) for more details. //! //! use alloc::string::{String, ToString}; use alloc::vec::Vec; use core::convert::{From, Into}; use core::slice; use super::char_data::BidiClass; /// Embedding Level /// /// Embedding Levels are numbers between 0 and 126 (inclusive), where even values denote a /// left-to-right (LTR) direction and odd values a right-to-left (RTL) direction. /// /// This struct maintains a *valid* status for level numbers, meaning that creating a new level, or /// mutating an existing level, with the value smaller than `0` (before conversion to `u8`) or /// larger than 125 results in an `Error`. /// /// #[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[repr(transparent)] pub struct Level(u8); pub const LTR_LEVEL: Level = Level(0); pub const RTL_LEVEL: Level = Level(1); const MAX_DEPTH: u8 = 125; /// During explicit level resolution, embedding level can go as high as `max_depth`. pub const MAX_EXPLICIT_DEPTH: u8 = MAX_DEPTH; /// During implicit level resolution, embedding level can go as high as `max_depth + 1`. pub const MAX_IMPLICIT_DEPTH: u8 = MAX_DEPTH + 1; /// Errors that can occur on Level creation or mutation #[derive(Debug, PartialEq)] pub enum Error { /// Out-of-range (invalid) embedding level number. OutOfRangeNumber, } impl Level { /// New LTR level with smallest number value (0). #[inline] pub fn ltr() -> Level { LTR_LEVEL } /// New RTL level with smallest number value (1). #[inline] pub fn rtl() -> Level { RTL_LEVEL } /// Maximum depth of the directional status stack during implicit resolutions. pub fn max_implicit_depth() -> u8 { MAX_IMPLICIT_DEPTH } /// Maximum depth of the directional status stack during explicit resolutions. pub fn max_explicit_depth() -> u8 { MAX_EXPLICIT_DEPTH } // == Inquiries == /// Create new level, fail if number is larger than `max_depth + 1`. #[inline] pub fn new(number: u8) -> Result { if number <= MAX_IMPLICIT_DEPTH { Ok(Level(number)) } else { Err(Error::OutOfRangeNumber) } } /// Create new level, fail if number is larger than `max_depth`. #[inline] pub fn new_explicit(number: u8) -> Result { if number <= MAX_EXPLICIT_DEPTH { Ok(Level(number)) } else { Err(Error::OutOfRangeNumber) } } // == Inquiries == /// The level number. #[inline] pub fn number(&self) -> u8 { self.0 } /// If this level is left-to-right. #[inline] pub fn is_ltr(&self) -> bool { self.0 % 2 == 0 } /// If this level is right-to-left. #[inline] pub fn is_rtl(&self) -> bool { self.0 % 2 == 1 } // == Mutators == /// Raise level by `amount`, fail if number is larger than `max_depth + 1`. #[inline] pub fn raise(&mut self, amount: u8) -> Result<(), Error> { match self.0.checked_add(amount) { Some(number) => { if number <= MAX_IMPLICIT_DEPTH { self.0 = number; Ok(()) } else { Err(Error::OutOfRangeNumber) } } None => Err(Error::OutOfRangeNumber), } } /// Raise level by `amount`, fail if number is larger than `max_depth`. #[inline] pub fn raise_explicit(&mut self, amount: u8) -> Result<(), Error> { match self.0.checked_add(amount) { Some(number) => { if number <= MAX_EXPLICIT_DEPTH { self.0 = number; Ok(()) } else { Err(Error::OutOfRangeNumber) } } None => Err(Error::OutOfRangeNumber), } } /// Lower level by `amount`, fail if number goes below zero. #[inline] pub fn lower(&mut self, amount: u8) -> Result<(), Error> { match self.0.checked_sub(amount) { Some(number) => { self.0 = number; Ok(()) } None => Err(Error::OutOfRangeNumber), } } // == Helpers == /// The next LTR (even) level greater than this, or fail if number is larger than `max_depth`. #[inline] pub fn new_explicit_next_ltr(&self) -> Result { Level::new_explicit((self.0 + 2) & !1) } /// The next RTL (odd) level greater than this, or fail if number is larger than `max_depth`. #[inline] pub fn new_explicit_next_rtl(&self) -> Result { Level::new_explicit((self.0 + 1) | 1) } /// The lowest RTL (odd) level greater than or equal to this, or fail if number is larger than /// `max_depth + 1`. #[inline] pub fn new_lowest_ge_rtl(&self) -> Result { Level::new(self.0 | 1) } /// Generate a character type based on a level (as specified in steps X10 and N2). #[inline] pub fn bidi_class(&self) -> BidiClass { if self.is_rtl() { BidiClass::R } else { BidiClass::L } } pub fn vec(v: &[u8]) -> Vec { v.iter().map(|&x| x.into()).collect() } /// Converts a byte slice to a slice of Levels /// /// Does _not_ check if each level is within bounds (`<=` [`MAX_IMPLICIT_DEPTH`]), /// which is not a requirement for safety but is a requirement for correctness of the algorithm. pub fn from_slice_unchecked(v: &[u8]) -> &[Level] { debug_assert_eq!(core::mem::size_of::(), core::mem::size_of::()); unsafe { // Safety: The two arrays are the same size and layout-compatible since // Level is `repr(transparent)` over `u8` slice::from_raw_parts(v as *const [u8] as *const u8 as *const Level, v.len()) } } } /// If levels has any RTL (odd) level /// /// This information is usually used to skip re-ordering of text when no RTL level is present #[inline] pub fn has_rtl(levels: &[Level]) -> bool { levels.iter().any(|&lvl| lvl.is_rtl()) } impl Into for Level { /// Convert to the level number #[inline] fn into(self) -> u8 { self.number() } } impl From for Level { /// Create level by number #[inline] fn from(number: u8) -> Level { Level::new(number).expect("Level number error") } } /// Used for matching levels in conformance tests impl<'a> PartialEq<&'a str> for Level { #[inline] fn eq(&self, s: &&'a str) -> bool { *s == "x" || *s == self.0.to_string() } } /// Used for matching levels in conformance tests impl<'a> PartialEq for Level { #[inline] fn eq(&self, s: &String) -> bool { self == &s.as_str() } } #[cfg(test)] mod tests { use super::*; #[test] fn test_new() { assert_eq!(Level::new(0), Ok(Level(0))); assert_eq!(Level::new(1), Ok(Level(1))); assert_eq!(Level::new(10), Ok(Level(10))); assert_eq!(Level::new(125), Ok(Level(125))); assert_eq!(Level::new(126), Ok(Level(126))); assert_eq!(Level::new(127), Err(Error::OutOfRangeNumber)); assert_eq!(Level::new(255), Err(Error::OutOfRangeNumber)); } #[test] fn test_new_explicit() { assert_eq!(Level::new_explicit(0), Ok(Level(0))); assert_eq!(Level::new_explicit(1), Ok(Level(1))); assert_eq!(Level::new_explicit(10), Ok(Level(10))); assert_eq!(Level::new_explicit(125), Ok(Level(125))); assert_eq!(Level::new_explicit(126), Err(Error::OutOfRangeNumber)); assert_eq!(Level::new_explicit(255), Err(Error::OutOfRangeNumber)); } #[test] fn test_is_ltr() { assert_eq!(Level(0).is_ltr(), true); assert_eq!(Level(1).is_ltr(), false); assert_eq!(Level(10).is_ltr(), true); assert_eq!(Level(11).is_ltr(), false); assert_eq!(Level(124).is_ltr(), true); assert_eq!(Level(125).is_ltr(), false); } #[test] fn test_is_rtl() { assert_eq!(Level(0).is_rtl(), false); assert_eq!(Level(1).is_rtl(), true); assert_eq!(Level(10).is_rtl(), false); assert_eq!(Level(11).is_rtl(), true); assert_eq!(Level(124).is_rtl(), false); assert_eq!(Level(125).is_rtl(), true); } #[test] fn test_raise() { let mut level = Level::ltr(); assert_eq!(level.number(), 0); assert!(level.raise(100).is_ok()); assert_eq!(level.number(), 100); assert!(level.raise(26).is_ok()); assert_eq!(level.number(), 126); assert!(level.raise(1).is_err()); // invalid! assert!(level.raise(250).is_err()); // overflow! assert_eq!(level.number(), 126); } #[test] fn test_raise_explicit() { let mut level = Level::ltr(); assert_eq!(level.number(), 0); assert!(level.raise_explicit(100).is_ok()); assert_eq!(level.number(), 100); assert!(level.raise_explicit(25).is_ok()); assert_eq!(level.number(), 125); assert!(level.raise_explicit(1).is_err()); // invalid! assert!(level.raise_explicit(250).is_err()); // overflow! assert_eq!(level.number(), 125); } #[test] fn test_lower() { let mut level = Level::rtl(); assert_eq!(level.number(), 1); assert!(level.lower(1).is_ok()); assert_eq!(level.number(), 0); assert!(level.lower(1).is_err()); // underflow! assert!(level.lower(250).is_err()); // underflow! assert_eq!(level.number(), 0); } #[test] fn test_has_rtl() { assert_eq!(has_rtl(&Level::vec(&[0, 0, 0])), false); assert_eq!(has_rtl(&Level::vec(&[0, 1, 0])), true); assert_eq!(has_rtl(&Level::vec(&[0, 2, 0])), false); assert_eq!(has_rtl(&Level::vec(&[0, 125, 0])), true); assert_eq!(has_rtl(&Level::vec(&[0, 126, 0])), false); } #[test] fn test_into() { let level = Level::rtl(); let number: u8 = level.into(); assert_eq!(1u8, number); } #[test] fn test_vec() { assert_eq!( Level::vec(&[0, 1, 125]), vec![Level(0), Level(1), Level(125)] ); } #[test] fn test_str_eq() { assert_eq!(Level::vec(&[0, 1, 4, 125]), vec!["0", "1", "x", "125"]); assert_ne!(Level::vec(&[0, 1, 4, 125]), vec!["0", "1", "5", "125"]); } #[test] fn test_string_eq() { assert_eq!( Level::vec(&[0, 1, 4, 125]), vec!["0".to_string(), "1".to_string(), "x".to_string(), "125".to_string()] ); } } #[cfg(all(feature = "serde", test))] mod serde_tests { use super::*; use serde_test::{assert_tokens, Token}; #[test] fn test_statics() { assert_tokens( &Level::ltr(), &[Token::NewtypeStruct { name: "Level" }, Token::U8(0)], ); assert_tokens( &Level::rtl(), &[Token::NewtypeStruct { name: "Level" }, Token::U8(1)], ); } #[test] fn test_new() { let level = Level::new(42).unwrap(); assert_tokens( &level, &[Token::NewtypeStruct { name: "Level" }, Token::U8(42)], ); } } unicode-bidi-0.3.13/src/lib.rs000064400000000000000000001244171046102023000141570ustar 00000000000000// Copyright 2015 The Servo Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! This crate implements the [Unicode Bidirectional Algorithm][tr9] for display of mixed //! right-to-left and left-to-right text. It is written in safe Rust, compatible with the //! current stable release. //! //! ## Example //! //! ```rust //! # #[cfg(feature = "hardcoded-data")] { //! use unicode_bidi::BidiInfo; //! //! // This example text is defined using `concat!` because some browsers //! // and text editors have trouble displaying bidi strings. //! let text = concat![ //! "א", //! "ב", //! "ג", //! "a", //! "b", //! "c", //! ]; //! //! // Resolve embedding levels within the text. Pass `None` to detect the //! // paragraph level automatically. //! let bidi_info = BidiInfo::new(&text, None); //! //! // This paragraph has embedding level 1 because its first strong character is RTL. //! assert_eq!(bidi_info.paragraphs.len(), 1); //! let para = &bidi_info.paragraphs[0]; //! assert_eq!(para.level.number(), 1); //! assert_eq!(para.level.is_rtl(), true); //! //! // Re-ordering is done after wrapping each paragraph into a sequence of //! // lines. For this example, I'll just use a single line that spans the //! // entire paragraph. //! let line = para.range.clone(); //! //! let display = bidi_info.reorder_line(para, line); //! assert_eq!(display, concat![ //! "a", //! "b", //! "c", //! "ג", //! "ב", //! "א", //! ]); //! # } // feature = "hardcoded-data" //! ``` //! //! # Features //! //! - `std`: Enabled by default, but can be disabled to make `unicode_bidi` //! `#![no_std]` + `alloc` compatible. //! - `hardcoded-data`: Enabled by default. Includes hardcoded Unicode bidi data and more convenient APIs. //! - `serde`: Adds [`serde::Serialize`] and [`serde::Deserialize`] //! implementations to relevant types. //! //! [tr9]: #![no_std] // We need to link to std to make doc tests work on older Rust versions #[cfg(feature = "std")] extern crate std; #[macro_use] extern crate alloc; pub mod data_source; pub mod deprecated; pub mod format_chars; pub mod level; mod char_data; mod explicit; mod implicit; mod prepare; pub use crate::char_data::{BidiClass, UNICODE_VERSION}; pub use crate::data_source::BidiDataSource; pub use crate::level::{Level, LTR_LEVEL, RTL_LEVEL}; pub use crate::prepare::LevelRun; #[cfg(feature = "hardcoded-data")] pub use crate::char_data::{bidi_class, HardcodedBidiData}; use alloc::borrow::Cow; use alloc::string::String; use alloc::vec::Vec; use core::cmp; use core::iter::repeat; use core::ops::Range; use crate::format_chars as chars; use crate::BidiClass::*; #[derive(PartialEq, Debug)] pub enum Direction { Ltr, Rtl, Mixed, } /// Bidi information about a single paragraph #[derive(Debug, PartialEq)] pub struct ParagraphInfo { /// The paragraphs boundaries within the text, as byte indices. /// /// TODO: Shrink this to only include the starting index? pub range: Range, /// The paragraph embedding level. /// /// pub level: Level, } impl ParagraphInfo { /// Gets the length of the paragraph in the source text. pub fn len(&self) -> usize { self.range.end - self.range.start } } /// Initial bidi information of the text. /// /// Contains the text paragraphs and `BidiClass` of its characters. #[derive(PartialEq, Debug)] pub struct InitialInfo<'text> { /// The text pub text: &'text str, /// The BidiClass of the character at each byte in the text. /// If a character is multiple bytes, its class will appear multiple times in the vector. pub original_classes: Vec, /// The boundaries and level of each paragraph within the text. pub paragraphs: Vec, } impl<'text> InitialInfo<'text> { /// Find the paragraphs and BidiClasses in a string of text. /// /// /// /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong /// character is found before the matching PDI. If no strong character is found, the class will /// remain FSI, and it's up to later stages to treat these as LRI when needed. /// /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this. #[cfg_attr(feature = "flame_it", flamer::flame)] #[cfg(feature = "hardcoded-data")] pub fn new(text: &str, default_para_level: Option) -> InitialInfo<'_> { Self::new_with_data_source(&HardcodedBidiData, text, default_para_level) } /// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`] /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`] /// instead (enabled with tbe default `hardcoded-data` Cargo feature) /// /// /// /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong /// character is found before the matching PDI. If no strong character is found, the class will /// remain FSI, and it's up to later stages to treat these as LRI when needed. #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn new_with_data_source<'a, D: BidiDataSource>( data_source: &D, text: &'a str, default_para_level: Option, ) -> InitialInfo<'a> { let mut original_classes = Vec::with_capacity(text.len()); // The stack contains the starting byte index for each nested isolate we're inside. let mut isolate_stack = Vec::new(); let mut paragraphs = Vec::new(); let mut para_start = 0; let mut para_level = default_para_level; #[cfg(feature = "flame_it")] flame::start("InitialInfo::new(): iter text.char_indices()"); for (i, c) in text.char_indices() { let class = data_source.bidi_class(c); #[cfg(feature = "flame_it")] flame::start("original_classes.extend()"); original_classes.extend(repeat(class).take(c.len_utf8())); #[cfg(feature = "flame_it")] flame::end("original_classes.extend()"); match class { B => { // P1. Split the text into separate paragraphs. The paragraph separator is kept // with the previous paragraph. let para_end = i + c.len_utf8(); paragraphs.push(ParagraphInfo { range: para_start..para_end, // P3. If no character is found in p2, set the paragraph level to zero. level: para_level.unwrap_or(LTR_LEVEL), }); // Reset state for the start of the next paragraph. para_start = para_end; // TODO: Support defaulting to direction of previous paragraph // // para_level = default_para_level; isolate_stack.clear(); } L | R | AL => { match isolate_stack.last() { Some(&start) => { if original_classes[start] == FSI { // X5c. If the first strong character between FSI and its matching // PDI is R or AL, treat it as RLI. Otherwise, treat it as LRI. for j in 0..chars::FSI.len_utf8() { original_classes[start + j] = if class == L { LRI } else { RLI }; } } } None => { if para_level.is_none() { // P2. Find the first character of type L, AL, or R, while skipping // any characters between an isolate initiator and its matching // PDI. para_level = Some(if class != L { RTL_LEVEL } else { LTR_LEVEL }); } } } } RLI | LRI | FSI => { isolate_stack.push(i); } PDI => { isolate_stack.pop(); } _ => {} } } if para_start < text.len() { paragraphs.push(ParagraphInfo { range: para_start..text.len(), level: para_level.unwrap_or(LTR_LEVEL), }); } assert_eq!(original_classes.len(), text.len()); #[cfg(feature = "flame_it")] flame::end("InitialInfo::new(): iter text.char_indices()"); InitialInfo { text, original_classes, paragraphs, } } } /// Bidi information of the text. /// /// The `original_classes` and `levels` vectors are indexed by byte offsets into the text. If a /// character is multiple bytes wide, then its class and level will appear multiple times in these /// vectors. // TODO: Impl `struct StringProperty { values: Vec }` and use instead of Vec #[derive(Debug, PartialEq)] pub struct BidiInfo<'text> { /// The text pub text: &'text str, /// The BidiClass of the character at each byte in the text. pub original_classes: Vec, /// The directional embedding level of each byte in the text. pub levels: Vec, /// The boundaries and paragraph embedding level of each paragraph within the text. /// /// TODO: Use SmallVec or similar to avoid overhead when there are only one or two paragraphs? /// Or just don't include the first paragraph, which always starts at 0? pub paragraphs: Vec, } impl<'text> BidiInfo<'text> { /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph. /// /// /// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this. /// /// TODO: In early steps, check for special cases that allow later steps to be skipped. like /// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison. /// /// TODO: Support auto-RTL base direction #[cfg_attr(feature = "flame_it", flamer::flame)] #[cfg(feature = "hardcoded-data")] pub fn new(text: &str, default_para_level: Option) -> BidiInfo<'_> { Self::new_with_data_source(&HardcodedBidiData, text, default_para_level) } /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph, with a custom [`BidiDataSource`] /// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`] /// instead (enabled with tbe default `hardcoded-data` Cargo feature). /// /// TODO: In early steps, check for special cases that allow later steps to be skipped. like /// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison. /// /// TODO: Support auto-RTL base direction #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn new_with_data_source<'a, D: BidiDataSource>( data_source: &D, text: &'a str, default_para_level: Option, ) -> BidiInfo<'a> { let InitialInfo { original_classes, paragraphs, .. } = InitialInfo::new_with_data_source(data_source, text, default_para_level); let mut levels = Vec::::with_capacity(text.len()); let mut processing_classes = original_classes.clone(); for para in ¶graphs { let text = &text[para.range.clone()]; let original_classes = &original_classes[para.range.clone()]; let processing_classes = &mut processing_classes[para.range.clone()]; let new_len = levels.len() + para.range.len(); levels.resize(new_len, para.level); let levels = &mut levels[para.range.clone()]; explicit::compute( text, para.level, original_classes, levels, processing_classes, ); let sequences = prepare::isolating_run_sequences(para.level, original_classes, levels); for sequence in &sequences { implicit::resolve_weak(text, sequence, processing_classes); implicit::resolve_neutral( text, data_source, sequence, levels, original_classes, processing_classes, ); } implicit::resolve_levels(processing_classes, levels); assign_levels_to_removed_chars(para.level, original_classes, levels); } BidiInfo { text, original_classes, paragraphs, levels, } } /// Re-order a line based on resolved levels and return only the embedding levels, one `Level` /// per *byte*. #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn reordered_levels(&self, para: &ParagraphInfo, line: Range) -> Vec { let (levels, _) = self.visual_runs(para, line); levels } /// Re-order a line based on resolved levels and return only the embedding levels, one `Level` /// per *character*. #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn reordered_levels_per_char( &self, para: &ParagraphInfo, line: Range, ) -> Vec { let levels = self.reordered_levels(para, line); self.text.char_indices().map(|(i, _)| levels[i]).collect() } /// Re-order a line based on resolved levels and return the line in display order. #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn reorder_line(&self, para: &ParagraphInfo, line: Range) -> Cow<'text, str> { let (levels, runs) = self.visual_runs(para, line.clone()); // If all isolating run sequences are LTR, no reordering is needed if runs.iter().all(|run| levels[run.start].is_ltr()) { return self.text[line].into(); } let mut result = String::with_capacity(line.len()); for run in runs { if levels[run.start].is_rtl() { result.extend(self.text[run].chars().rev()); } else { result.push_str(&self.text[run]); } } result.into() } /// Reorders pre-calculated levels of a sequence of characters. /// /// NOTE: This is a convenience method that does not use a `Paragraph` object. It is /// intended to be used when an application has determined the levels of the objects (character sequences) /// and just needs to have them reordered. /// /// the index map will result in `indexMap[visualIndex]==logicalIndex`. /// /// This only runs [Rule L2](http://www.unicode.org/reports/tr9/#L2) as it does not have /// information about the actual text. /// /// Furthermore, if `levels` is an array that is aligned with code units, bytes within a codepoint may be /// reversed. You may need to fix up the map to deal with this. Alternatively, only pass in arrays where each `Level` /// is for a single code point. /// /// /// # # Example /// ``` /// use unicode_bidi::BidiInfo; /// use unicode_bidi::Level; /// /// let l0 = Level::from(0); /// let l1 = Level::from(1); /// let l2 = Level::from(2); /// /// let levels = vec![l0, l0, l0, l0]; /// let index_map = BidiInfo::reorder_visual(&levels); /// assert_eq!(levels.len(), index_map.len()); /// assert_eq!(index_map, [0, 1, 2, 3]); /// /// let levels: Vec = vec![l0, l0, l0, l1, l1, l1, l2, l2]; /// let index_map = BidiInfo::reorder_visual(&levels); /// assert_eq!(levels.len(), index_map.len()); /// assert_eq!(index_map, [0, 1, 2, 6, 7, 5, 4, 3]); /// ``` pub fn reorder_visual(levels: &[Level]) -> Vec { // Gets the next range of characters after start_index with a level greater // than or equal to `max` fn next_range(levels: &[level::Level], mut start_index: usize, max: Level) -> Range { if levels.is_empty() || start_index >= levels.len() { return start_index..start_index; } while let Some(l) = levels.get(start_index) { if *l >= max { break; } start_index += 1; } if levels.get(start_index).is_none() { // If at the end of the array, adding one will // produce an out-of-range end element return start_index..start_index; } let mut end_index = start_index + 1; while let Some(l) = levels.get(end_index) { if *l < max { return start_index..end_index; } end_index += 1; } start_index..end_index } // This implementation is similar to the L2 implementation in `visual_runs()` // but it cannot benefit from a precalculated LevelRun vector so needs to be different. if levels.is_empty() { return vec![]; } // Get the min and max levels let (mut min, mut max) = levels .iter() .fold((levels[0], levels[0]), |(min, max), &l| { (cmp::min(min, l), cmp::max(max, l)) }); // Initialize an index map let mut result: Vec = (0..levels.len()).collect(); if min == max && min.is_ltr() { // Everything is LTR and at the same level, do nothing return result; } // Stop at the lowest *odd* level, since everything below that // is LTR and does not need further reordering min = min.new_lowest_ge_rtl().expect("Level error"); // For each max level, take all contiguous chunks of // levels ≥ max and reverse them // // We can do this check with the original levels instead of checking reorderings because all // prior reorderings will have been for contiguous chunks of levels >> max, which will // be a subset of these chunks anyway. while min <= max { let mut range = 0..0; loop { range = next_range(levels, range.end, max); result[range.clone()].reverse(); if range.end >= levels.len() { break; } } max.lower(1).expect("Level error"); } result } /// Find the level runs within a line and return them in visual order. /// /// `line` is a range of bytes indices within `levels`. /// /// #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn visual_runs( &self, para: &ParagraphInfo, line: Range, ) -> (Vec, Vec) { assert!(line.start <= self.levels.len()); assert!(line.end <= self.levels.len()); let mut levels = self.levels.clone(); let line_classes = &self.original_classes[line.clone()]; let line_levels = &mut levels[line.clone()]; // Reset some whitespace chars to paragraph level. // let line_str: &str = &self.text[line.clone()]; let mut reset_from: Option = Some(0); let mut reset_to: Option = None; let mut prev_level = para.level; for (i, c) in line_str.char_indices() { match line_classes[i] { // Segment separator, Paragraph separator B | S => { assert_eq!(reset_to, None); reset_to = Some(i + c.len_utf8()); if reset_from == None { reset_from = Some(i); } } // Whitespace, isolate formatting WS | FSI | LRI | RLI | PDI => { if reset_from == None { reset_from = Some(i); } } // // same as above + set the level RLE | LRE | RLO | LRO | PDF | BN => { if reset_from == None { reset_from = Some(i); } // also set the level to previous line_levels[i] = prev_level; } _ => { reset_from = None; } } if let (Some(from), Some(to)) = (reset_from, reset_to) { for level in &mut line_levels[from..to] { *level = para.level; } reset_from = None; reset_to = None; } prev_level = line_levels[i]; } if let Some(from) = reset_from { for level in &mut line_levels[from..] { *level = para.level; } } // Find consecutive level runs. let mut runs = Vec::new(); let mut start = line.start; let mut run_level = levels[start]; let mut min_level = run_level; let mut max_level = run_level; for (i, &new_level) in levels.iter().enumerate().take(line.end).skip(start + 1) { if new_level != run_level { // End of the previous run, start of a new one. runs.push(start..i); start = i; run_level = new_level; min_level = cmp::min(run_level, min_level); max_level = cmp::max(run_level, max_level); } } runs.push(start..line.end); let run_count = runs.len(); // Re-order the odd runs. // // Stop at the lowest *odd* level. min_level = min_level.new_lowest_ge_rtl().expect("Level error"); // This loop goes through contiguous chunks of level runs that have a level // ≥ max_level and reverses their contents, reducing max_level by 1 each time. // // It can do this check with the original levels instead of checking reorderings because all // prior reorderings will have been for contiguous chunks of levels >> max, which will // be a subset of these chunks anyway. while max_level >= min_level { // Look for the start of a sequence of consecutive runs of max_level or higher. let mut seq_start = 0; while seq_start < run_count { if self.levels[runs[seq_start].start] < max_level { seq_start += 1; continue; } // Found the start of a sequence. Now find the end. let mut seq_end = seq_start + 1; while seq_end < run_count { if self.levels[runs[seq_end].start] < max_level { break; } seq_end += 1; } // Reverse the runs within this sequence. runs[seq_start..seq_end].reverse(); seq_start = seq_end; } max_level .lower(1) .expect("Lowering embedding level below zero"); } (levels, runs) } /// If processed text has any computed RTL levels /// /// This information is usually used to skip re-ordering of text when no RTL level is present #[inline] pub fn has_rtl(&self) -> bool { level::has_rtl(&self.levels) } } /// Contains a reference of `BidiInfo` and one of its `paragraphs`. /// And it supports all operation in the `Paragraph` that needs also its /// `BidiInfo` such as `direction`. #[derive(Debug)] pub struct Paragraph<'a, 'text> { pub info: &'a BidiInfo<'text>, pub para: &'a ParagraphInfo, } impl<'a, 'text> Paragraph<'a, 'text> { pub fn new(info: &'a BidiInfo<'text>, para: &'a ParagraphInfo) -> Paragraph<'a, 'text> { Paragraph { info, para } } /// Returns if the paragraph is Left direction, right direction or mixed. pub fn direction(&self) -> Direction { let mut ltr = false; let mut rtl = false; for i in self.para.range.clone() { if self.info.levels[i].is_ltr() { ltr = true; } if self.info.levels[i].is_rtl() { rtl = true; } } if ltr && rtl { return Direction::Mixed; } if ltr { return Direction::Ltr; } Direction::Rtl } /// Returns the `Level` of a certain character in the paragraph. pub fn level_at(&self, pos: usize) -> Level { let actual_position = self.para.range.start + pos; self.info.levels[actual_position] } } /// Assign levels to characters removed by rule X9. /// /// The levels assigned to these characters are not specified by the algorithm. This function /// assigns each one the level of the previous character, to avoid breaking level runs. #[cfg_attr(feature = "flame_it", flamer::flame)] fn assign_levels_to_removed_chars(para_level: Level, classes: &[BidiClass], levels: &mut [Level]) { for i in 0..levels.len() { if prepare::removed_by_x9(classes[i]) { levels[i] = if i > 0 { levels[i - 1] } else { para_level }; } } } #[cfg(test)] #[cfg(feature = "hardcoded-data")] mod tests { use super::*; #[test] fn test_initial_text_info() { let text = "a1"; assert_eq!( InitialInfo::new(text, None), InitialInfo { text, original_classes: vec![L, EN], paragraphs: vec![ParagraphInfo { range: 0..2, level: LTR_LEVEL, },], } ); let text = "غ א"; assert_eq!( InitialInfo::new(text, None), InitialInfo { text, original_classes: vec![AL, AL, WS, R, R], paragraphs: vec![ParagraphInfo { range: 0..5, level: RTL_LEVEL, },], } ); let text = "a\u{2029}b"; assert_eq!( InitialInfo::new(text, None), InitialInfo { text, original_classes: vec![L, B, B, B, L], paragraphs: vec![ ParagraphInfo { range: 0..4, level: LTR_LEVEL, }, ParagraphInfo { range: 4..5, level: LTR_LEVEL, }, ], } ); let text = format!("{}א{}a", chars::FSI, chars::PDI); assert_eq!( InitialInfo::new(&text, None), InitialInfo { text: &text, original_classes: vec![RLI, RLI, RLI, R, R, PDI, PDI, PDI, L], paragraphs: vec![ParagraphInfo { range: 0..9, level: LTR_LEVEL, },], } ); } #[test] #[cfg(feature = "hardcoded-data")] fn test_process_text() { let text = "abc123"; assert_eq!( BidiInfo::new(text, Some(LTR_LEVEL)), BidiInfo { text, levels: Level::vec(&[0, 0, 0, 0, 0, 0]), original_classes: vec![L, L, L, EN, EN, EN], paragraphs: vec![ParagraphInfo { range: 0..6, level: LTR_LEVEL, },], } ); let text = "abc אבג"; assert_eq!( BidiInfo::new(text, Some(LTR_LEVEL)), BidiInfo { text, levels: Level::vec(&[0, 0, 0, 0, 1, 1, 1, 1, 1, 1]), original_classes: vec![L, L, L, WS, R, R, R, R, R, R], paragraphs: vec![ParagraphInfo { range: 0..10, level: LTR_LEVEL, },], } ); assert_eq!( BidiInfo::new(text, Some(RTL_LEVEL)), BidiInfo { text, levels: Level::vec(&[2, 2, 2, 1, 1, 1, 1, 1, 1, 1]), original_classes: vec![L, L, L, WS, R, R, R, R, R, R], paragraphs: vec![ParagraphInfo { range: 0..10, level: RTL_LEVEL, },], } ); let text = "אבג abc"; assert_eq!( BidiInfo::new(text, Some(LTR_LEVEL)), BidiInfo { text, levels: Level::vec(&[1, 1, 1, 1, 1, 1, 0, 0, 0, 0]), original_classes: vec![R, R, R, R, R, R, WS, L, L, L], paragraphs: vec![ParagraphInfo { range: 0..10, level: LTR_LEVEL, },], } ); assert_eq!( BidiInfo::new(text, None), BidiInfo { text, levels: Level::vec(&[1, 1, 1, 1, 1, 1, 1, 2, 2, 2]), original_classes: vec![R, R, R, R, R, R, WS, L, L, L], paragraphs: vec![ParagraphInfo { range: 0..10, level: RTL_LEVEL, },], } ); let text = "غ2ظ א2ג"; assert_eq!( BidiInfo::new(text, Some(LTR_LEVEL)), BidiInfo { text, levels: Level::vec(&[1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1]), original_classes: vec![AL, AL, EN, AL, AL, WS, R, R, EN, R, R], paragraphs: vec![ParagraphInfo { range: 0..11, level: LTR_LEVEL, },], } ); let text = "a א.\nג"; assert_eq!( BidiInfo::new(text, None), BidiInfo { text, original_classes: vec![L, WS, R, R, CS, B, R, R], levels: Level::vec(&[0, 0, 1, 1, 0, 0, 1, 1]), paragraphs: vec![ ParagraphInfo { range: 0..6, level: LTR_LEVEL, }, ParagraphInfo { range: 6..8, level: RTL_LEVEL, }, ], } ); // BidiTest:69635 (AL ET EN) let bidi_info = BidiInfo::new("\u{060B}\u{20CF}\u{06F9}", None); assert_eq!(bidi_info.original_classes, vec![AL, AL, ET, ET, ET, EN, EN]); } #[test] #[cfg(feature = "hardcoded-data")] fn test_bidi_info_has_rtl() { // ASCII only assert_eq!(BidiInfo::new("123", None).has_rtl(), false); assert_eq!(BidiInfo::new("123", Some(LTR_LEVEL)).has_rtl(), false); assert_eq!(BidiInfo::new("123", Some(RTL_LEVEL)).has_rtl(), false); assert_eq!(BidiInfo::new("abc", None).has_rtl(), false); assert_eq!(BidiInfo::new("abc", Some(LTR_LEVEL)).has_rtl(), false); assert_eq!(BidiInfo::new("abc", Some(RTL_LEVEL)).has_rtl(), false); assert_eq!(BidiInfo::new("abc 123", None).has_rtl(), false); assert_eq!(BidiInfo::new("abc\n123", None).has_rtl(), false); // With Hebrew assert_eq!(BidiInfo::new("אבּג", None).has_rtl(), true); assert_eq!(BidiInfo::new("אבּג", Some(LTR_LEVEL)).has_rtl(), true); assert_eq!(BidiInfo::new("אבּג", Some(RTL_LEVEL)).has_rtl(), true); assert_eq!(BidiInfo::new("abc אבּג", None).has_rtl(), true); assert_eq!(BidiInfo::new("abc\nאבּג", None).has_rtl(), true); assert_eq!(BidiInfo::new("אבּג abc", None).has_rtl(), true); assert_eq!(BidiInfo::new("אבּג\nabc", None).has_rtl(), true); assert_eq!(BidiInfo::new("אבּג 123", None).has_rtl(), true); assert_eq!(BidiInfo::new("אבּג\n123", None).has_rtl(), true); } #[cfg(feature = "hardcoded-data")] fn reorder_paras(text: &str) -> Vec> { let bidi_info = BidiInfo::new(text, None); bidi_info .paragraphs .iter() .map(|para| bidi_info.reorder_line(para, para.range.clone())) .collect() } #[test] #[cfg(feature = "hardcoded-data")] fn test_reorder_line() { // Bidi_Class: L L L B L L L B L L L assert_eq!( reorder_paras("abc\ndef\nghi"), vec!["abc\n", "def\n", "ghi"] ); // Bidi_Class: L L EN B L L EN B L L EN assert_eq!( reorder_paras("ab1\nde2\ngh3"), vec!["ab1\n", "de2\n", "gh3"] ); // Bidi_Class: L L L B AL AL AL assert_eq!(reorder_paras("abc\nابج"), vec!["abc\n", "جبا"]); // Bidi_Class: AL AL AL B L L L assert_eq!(reorder_paras("ابج\nabc"), vec!["\nجبا", "abc"]); assert_eq!(reorder_paras("1.-2"), vec!["1.-2"]); assert_eq!(reorder_paras("1-.2"), vec!["1-.2"]); assert_eq!(reorder_paras("abc אבג"), vec!["abc גבא"]); // Numbers being weak LTR characters, cannot reorder strong RTL assert_eq!(reorder_paras("123 אבג"), vec!["גבא 123"]); assert_eq!(reorder_paras("abc\u{202A}def"), vec!["abc\u{202A}def"]); assert_eq!( reorder_paras("abc\u{202A}def\u{202C}ghi"), vec!["abc\u{202A}def\u{202C}ghi"] ); assert_eq!( reorder_paras("abc\u{2066}def\u{2069}ghi"), vec!["abc\u{2066}def\u{2069}ghi"] ); // Testing for RLE Character assert_eq!( reorder_paras("\u{202B}abc אבג\u{202C}"), vec!["\u{202B}\u{202C}גבא abc"] ); // Testing neutral characters assert_eq!(reorder_paras("אבג? אבג"), vec!["גבא ?גבא"]); // Testing neutral characters with special case assert_eq!(reorder_paras("A אבג?"), vec!["A גבא?"]); // Testing neutral characters with Implicit RTL Marker assert_eq!(reorder_paras("A אבג?\u{200F}"), vec!["A \u{200F}?גבא"]); assert_eq!(reorder_paras("אבג abc"), vec!["abc גבא"]); assert_eq!( reorder_paras("abc\u{2067}.-\u{2069}ghi"), vec!["abc\u{2067}-.\u{2069}ghi"] ); assert_eq!( reorder_paras("Hello, \u{2068}\u{202E}world\u{202C}\u{2069}!"), vec!["Hello, \u{2068}\u{202E}\u{202C}dlrow\u{2069}!"] ); // With mirrorable characters in RTL run assert_eq!(reorder_paras("א(ב)ג."), vec![".ג)ב(א"]); // With mirrorable characters on level boundry assert_eq!(reorder_paras("אב(גד[&ef].)gh"), vec!["gh).]ef&[דג(בא"]); } fn reordered_levels_for_paras(text: &str) -> Vec> { let bidi_info = BidiInfo::new(text, None); bidi_info .paragraphs .iter() .map(|para| bidi_info.reordered_levels(para, para.range.clone())) .collect() } fn reordered_levels_per_char_for_paras(text: &str) -> Vec> { let bidi_info = BidiInfo::new(text, None); bidi_info .paragraphs .iter() .map(|para| bidi_info.reordered_levels_per_char(para, para.range.clone())) .collect() } #[test] #[cfg(feature = "hardcoded-data")] fn test_reordered_levels() { // BidiTest:946 (LRI PDI) let text = "\u{2067}\u{2069}"; assert_eq!( reordered_levels_for_paras(text), vec![Level::vec(&[0, 0, 0, 0, 0, 0])] ); assert_eq!( reordered_levels_per_char_for_paras(text), vec![Level::vec(&[0, 0])] ); let text = "aa טֶ"; let bidi_info = BidiInfo::new(text, None); assert_eq!( bidi_info.reordered_levels(&bidi_info.paragraphs[0], 3..7), Level::vec(&[0, 0, 0, 1, 1, 1, 1]), ) /* TODO /// BidiTest:69635 (AL ET EN) let text = "\u{060B}\u{20CF}\u{06F9}"; assert_eq!( reordered_levels_for_paras(text), vec![Level::vec(&[1, 1, 1, 1, 1, 2, 2])] ); assert_eq!( reordered_levels_per_char_for_paras(text), vec![Level::vec(&[1, 1, 2])] ); */ /* TODO // BidiTest:291284 (AN RLI PDF R) assert_eq!( reordered_levels_per_char_for_paras("\u{0605}\u{2067}\u{202C}\u{0590}"), vec![&["2", "0", "x", "1"]] ); */ } #[test] fn test_paragraph_info_len() { let text = "hello world"; let bidi_info = BidiInfo::new(text, None); assert_eq!(bidi_info.paragraphs.len(), 1); assert_eq!(bidi_info.paragraphs[0].len(), text.len()); let text2 = "How are you"; let whole_text = format!("{}\n{}", text, text2); let bidi_info = BidiInfo::new(&whole_text, None); assert_eq!(bidi_info.paragraphs.len(), 2); // The first paragraph include the paragraph separator. // TODO: investigate if the paragraph separator character // should not be part of any paragraph. assert_eq!(bidi_info.paragraphs[0].len(), text.len() + 1); assert_eq!(bidi_info.paragraphs[1].len(), text2.len()); } #[test] fn test_direction() { let ltr_text = "hello world"; let rtl_text = "أهلا بكم"; let all_paragraphs = format!("{}\n{}\n{}{}", ltr_text, rtl_text, ltr_text, rtl_text); let bidi_info = BidiInfo::new(&all_paragraphs, None); assert_eq!(bidi_info.paragraphs.len(), 3); let p_ltr = Paragraph::new(&bidi_info, &bidi_info.paragraphs[0]); let p_rtl = Paragraph::new(&bidi_info, &bidi_info.paragraphs[1]); let p_mixed = Paragraph::new(&bidi_info, &bidi_info.paragraphs[2]); assert_eq!(p_ltr.direction(), Direction::Ltr); assert_eq!(p_rtl.direction(), Direction::Rtl); assert_eq!(p_mixed.direction(), Direction::Mixed); } #[test] fn test_edge_cases_direction() { // No paragraphs for empty text. let empty = ""; let bidi_info = BidiInfo::new(empty, Option::from(RTL_LEVEL)); assert_eq!(bidi_info.paragraphs.len(), 0); // The paragraph separator will take the value of the default direction // which is left to right. let empty = "\n"; let bidi_info = BidiInfo::new(empty, None); assert_eq!(bidi_info.paragraphs.len(), 1); let p = Paragraph::new(&bidi_info, &bidi_info.paragraphs[0]); assert_eq!(p.direction(), Direction::Ltr); // The paragraph separator will take the value of the given initial direction // which is left to right. let empty = "\n"; let bidi_info = BidiInfo::new(empty, Option::from(LTR_LEVEL)); assert_eq!(bidi_info.paragraphs.len(), 1); let p = Paragraph::new(&bidi_info, &bidi_info.paragraphs[0]); assert_eq!(p.direction(), Direction::Ltr); // The paragraph separator will take the value of the given initial direction // which is right to left. let empty = "\n"; let bidi_info = BidiInfo::new(empty, Option::from(RTL_LEVEL)); assert_eq!(bidi_info.paragraphs.len(), 1); let p = Paragraph::new(&bidi_info, &bidi_info.paragraphs[0]); assert_eq!(p.direction(), Direction::Rtl); } #[test] fn test_level_at() { let ltr_text = "hello world"; let rtl_text = "أهلا بكم"; let all_paragraphs = format!("{}\n{}\n{}{}", ltr_text, rtl_text, ltr_text, rtl_text); let bidi_info = BidiInfo::new(&all_paragraphs, None); assert_eq!(bidi_info.paragraphs.len(), 3); let p_ltr = Paragraph::new(&bidi_info, &bidi_info.paragraphs[0]); let p_rtl = Paragraph::new(&bidi_info, &bidi_info.paragraphs[1]); let p_mixed = Paragraph::new(&bidi_info, &bidi_info.paragraphs[2]); assert_eq!(p_ltr.level_at(0), LTR_LEVEL); assert_eq!(p_rtl.level_at(0), RTL_LEVEL); assert_eq!(p_mixed.level_at(0), LTR_LEVEL); assert_eq!(p_mixed.info.levels.len(), 54); assert_eq!(p_mixed.para.range.start, 28); assert_eq!(p_mixed.level_at(ltr_text.len()), RTL_LEVEL); } } #[cfg(all(feature = "serde", feature = "hardcoded-data", test))] mod serde_tests { use super::*; use serde_test::{assert_tokens, Token}; #[test] fn test_levels() { let text = "abc אבג"; let bidi_info = BidiInfo::new(text, None); let levels = bidi_info.levels; assert_eq!(text.as_bytes().len(), 10); assert_eq!(levels.len(), 10); assert_tokens( &levels, &[ Token::Seq { len: Some(10) }, Token::NewtypeStruct { name: "Level" }, Token::U8(0), Token::NewtypeStruct { name: "Level" }, Token::U8(0), Token::NewtypeStruct { name: "Level" }, Token::U8(0), Token::NewtypeStruct { name: "Level" }, Token::U8(0), Token::NewtypeStruct { name: "Level" }, Token::U8(1), Token::NewtypeStruct { name: "Level" }, Token::U8(1), Token::NewtypeStruct { name: "Level" }, Token::U8(1), Token::NewtypeStruct { name: "Level" }, Token::U8(1), Token::NewtypeStruct { name: "Level" }, Token::U8(1), Token::NewtypeStruct { name: "Level" }, Token::U8(1), Token::SeqEnd, ], ); } } unicode-bidi-0.3.13/src/prepare.rs000064400000000000000000000356331046102023000150500ustar 00000000000000// Copyright 2015 The Servo Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! 3.3.3 Preparations for Implicit Processing //! //! use alloc::vec::Vec; use core::cmp::max; use core::ops::Range; use super::level::Level; use super::BidiClass::{self, *}; /// A maximal substring of characters with the same embedding level. /// /// Represented as a range of byte indices. pub type LevelRun = Range; /// Output of `isolating_run_sequences` (steps X9-X10) #[derive(Debug, PartialEq)] pub struct IsolatingRunSequence { pub runs: Vec, pub sos: BidiClass, // Start-of-sequence type. pub eos: BidiClass, // End-of-sequence type. } /// Compute the set of isolating run sequences. /// /// An isolating run sequence is a maximal sequence of level runs such that for all level runs /// except the last one in the sequence, the last character of the run is an isolate initiator /// whose matching PDI is the first character of the next level run in the sequence. /// /// Note: This function does *not* return the sequences in order by their first characters. #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn isolating_run_sequences( para_level: Level, original_classes: &[BidiClass], levels: &[Level], ) -> Vec { let runs = level_runs(levels, original_classes); // Compute the set of isolating run sequences. // let mut sequences = Vec::with_capacity(runs.len()); // When we encounter an isolate initiator, we push the current sequence onto the // stack so we can resume it after the matching PDI. let mut stack = vec![Vec::new()]; for run in runs { assert!(run.len() > 0); assert!(!stack.is_empty()); let start_class = original_classes[run.start]; let end_class = original_classes[run.end - 1]; let mut sequence = if start_class == PDI && stack.len() > 1 { // Continue a previous sequence interrupted by an isolate. stack.pop().unwrap() } else { // Start a new sequence. Vec::new() }; sequence.push(run); if let RLI | LRI | FSI = end_class { // Resume this sequence after the isolate. stack.push(sequence); } else { // This sequence is finished. sequences.push(sequence); } } // Pop any remaning sequences off the stack. sequences.extend(stack.into_iter().rev().filter(|seq| !seq.is_empty())); // Determine the `sos` and `eos` class for each sequence. // sequences .into_iter() .map(|sequence: Vec| { assert!(!sequence.is_empty()); let mut result = IsolatingRunSequence { runs: sequence, sos: L, eos: L, }; let start_of_seq = result.runs[0].start; let runs_len = result.runs.len(); let end_of_seq = result.runs[runs_len - 1].end; // > (not counting characters removed by X9) let seq_level = result .iter_forwards_from(start_of_seq, 0) .filter(|i| not_removed_by_x9(&original_classes[*i])) .map(|i| levels[i]) .next() .unwrap_or(levels[start_of_seq]); // XXXManishearth the spec talks of a start and end level, // but for a given IRS the two should be equivalent, yes? let end_level = result .iter_backwards_from(end_of_seq, runs_len - 1) .filter(|i| not_removed_by_x9(&original_classes[*i])) .map(|i| levels[i]) .next() .unwrap_or(levels[end_of_seq - 1]); #[cfg(test)] for run in result.runs.clone() { for idx in run { if not_removed_by_x9(&original_classes[idx]) { assert_eq!(seq_level, levels[idx]); } } } // Get the level of the last non-removed char before the runs. let pred_level = match original_classes[..start_of_seq] .iter() .rposition(not_removed_by_x9) { Some(idx) => levels[idx], None => para_level, }; // Get the last non-removed character to check if it is an isolate initiator. // The spec calls for an unmatched one, but matched isolate initiators // will never be at the end of a level run (otherwise there would be more to the run). // We unwrap_or(BN) because BN marks removed classes and it won't matter for the check. let last_non_removed = original_classes[..end_of_seq] .iter() .copied() .rev() .find(not_removed_by_x9) .unwrap_or(BN); // Get the level of the next non-removed char after the runs. let succ_level = if let RLI | LRI | FSI = last_non_removed { para_level } else { match original_classes[end_of_seq..] .iter() .position(not_removed_by_x9) { Some(idx) => levels[end_of_seq + idx], None => para_level, } }; result.sos = max(seq_level, pred_level).bidi_class(); result.eos = max(end_level, succ_level).bidi_class(); result }) .collect() } impl IsolatingRunSequence { /// Returns the full range of text represented by this isolating run sequence pub(crate) fn text_range(&self) -> Range { if let (Some(start), Some(end)) = (self.runs.first(), self.runs.last()) { start.start..end.end } else { return 0..0; } } /// Given a text-relative position `pos` and an index of the level run it is in, /// produce an iterator of all characters after and pos (`pos..`) that are in this /// run sequence pub(crate) fn iter_forwards_from( &self, pos: usize, level_run_index: usize, ) -> impl Iterator + '_ { let runs = &self.runs[level_run_index..]; // Check that it is in range // (we can't use contains() since we want an inclusive range) #[cfg(feature = "std")] debug_assert!(runs[0].start <= pos && pos <= runs[0].end); (pos..runs[0].end).chain(runs[1..].iter().flat_map(Clone::clone)) } /// Given a text-relative position `pos` and an index of the level run it is in, /// produce an iterator of all characters before and excludingpos (`..pos`) that are in this /// run sequence pub(crate) fn iter_backwards_from( &self, pos: usize, level_run_index: usize, ) -> impl Iterator + '_ { let prev_runs = &self.runs[..level_run_index]; let current = &self.runs[level_run_index]; // Check that it is in range // (we can't use contains() since we want an inclusive range) #[cfg(feature = "std")] debug_assert!(current.start <= pos && pos <= current.end); (current.start..pos) .rev() .chain(prev_runs.iter().rev().flat_map(Clone::clone)) } } /// Finds the level runs in a paragraph. /// /// fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec { assert_eq!(levels.len(), original_classes.len()); let mut runs = Vec::new(); if levels.is_empty() { return runs; } let mut current_run_level = levels[0]; let mut current_run_start = 0; for i in 1..levels.len() { if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level { // End the last run and start a new one. runs.push(current_run_start..i); current_run_level = levels[i]; current_run_start = i; } } runs.push(current_run_start..levels.len()); runs } /// Should this character be ignored in steps after X9? /// /// pub fn removed_by_x9(class: BidiClass) -> bool { match class { RLE | LRE | RLO | LRO | PDF | BN => true, _ => false, } } // For use as a predicate for `position` / `rposition` pub fn not_removed_by_x9(class: &BidiClass) -> bool { !removed_by_x9(*class) } #[cfg(test)] mod tests { use super::*; #[test] fn test_level_runs() { assert_eq!(level_runs(&Level::vec(&[]), &[]), &[]); assert_eq!( level_runs(&Level::vec(&[0, 0, 0, 1, 1, 2, 0, 0]), &[L; 8]), &[0..3, 3..5, 5..6, 6..8] ); } // From #[rustfmt::skip] #[test] fn test_isolating_run_sequences() { // == Example 1 == // text1·RLE·text2·PDF·RLE·text3·PDF·text4 // index 0 1 2 3 4 5 6 7 let classes = &[L, RLE, L, PDF, RLE, L, PDF, L]; let levels = &[0, 1, 1, 1, 1, 1, 1, 0]; let para_level = Level::ltr(); let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::>(), vec![vec![0..2], vec![2..7], vec![7..8]] ); // == Example 2 == // text1·RLI·text2·PDI·RLI·text3·PDI·text4 // index 0 1 2 3 4 5 6 7 let classes = &[L, RLI, L, PDI, RLI, L, PDI, L]; let levels = &[0, 0, 1, 0, 0, 1, 0, 0]; let para_level = Level::ltr(); let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::>(), vec![vec![0..2, 3..5, 6..8], vec![2..3], vec![5..6]] ); // == Example 3 == // text1·RLI·text2·LRI·text3·RLE·text4·PDF·text5·PDI·text6·PDI·text7 // index 0 1 2 3 4 5 6 7 8 9 10 11 12 let classes = &[L, RLI, L, LRI, L, RLE, L, PDF, L, PDI, L, PDI, L]; let levels = &[0, 0, 1, 1, 2, 3, 3, 3, 2, 1, 1, 0, 0]; let para_level = Level::ltr(); let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::>(), vec![vec![0..2, 11..13], vec![2..4, 9..11], vec![4..6], vec![6..8], vec![8..9]] ); } // From #[rustfmt::skip] #[test] fn test_isolating_run_sequences_sos_and_eos() { // == Example 1 == // text1·RLE·text2·LRE·text3·PDF·text4·PDF·RLE·text5·PDF·text6 // index 0 1 2 3 4 5 6 7 8 9 10 11 let classes = &[L, RLE, L, LRE, L, PDF, L, PDF, RLE, L, PDF, L]; let levels = &[0, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 0]; let para_level = Level::ltr(); let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); // text1 assert_eq!( &sequences[0], &IsolatingRunSequence { runs: vec![0..2], sos: L, eos: R, } ); // text2 assert_eq!( &sequences[1], &IsolatingRunSequence { runs: vec![2..4], sos: R, eos: L, } ); // text3 assert_eq!( &sequences[2], &IsolatingRunSequence { runs: vec![4..6], sos: L, eos: L, } ); // text4 text5 assert_eq!( &sequences[3], &IsolatingRunSequence { runs: vec![6..11], sos: L, eos: R, } ); // text6 assert_eq!( &sequences[4], &IsolatingRunSequence { runs: vec![11..12], sos: R, eos: L, } ); // == Example 2 == // text1·RLI·text2·LRI·text3·PDI·text4·PDI·RLI·text5·PDI·text6 // index 0 1 2 3 4 5 6 7 8 9 10 11 let classes = &[L, RLI, L, LRI, L, PDI, L, PDI, RLI, L, PDI, L]; let levels = &[0, 0, 1, 1, 2, 1, 1, 0, 0, 1, 0, 0]; let para_level = Level::ltr(); let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); // text1·RLI·PDI·RLI·PDI·text6 assert_eq!( &sequences[0], &IsolatingRunSequence { runs: vec![0..2, 7..9, 10..12], sos: L, eos: L, } ); // text2·LRI·PDI·text4 assert_eq!( &sequences[1], &IsolatingRunSequence { runs: vec![2..4, 5..7], sos: R, eos: R, } ); // text3 assert_eq!( &sequences[2], &IsolatingRunSequence { runs: vec![4..5], sos: L, eos: L, } ); // text5 assert_eq!( &sequences[3], &IsolatingRunSequence { runs: vec![9..10], sos: R, eos: R, } ); } #[test] fn test_removed_by_x9() { let rem_classes = &[RLE, LRE, RLO, LRO, PDF, BN]; let not_classes = &[L, RLI, AL, LRI, PDI]; for x in rem_classes { assert_eq!(removed_by_x9(*x), true); } for x in not_classes { assert_eq!(removed_by_x9(*x), false); } } #[test] fn test_not_removed_by_x9() { let non_x9_classes = &[L, R, AL, EN, ES, ET, AN, CS, NSM, B, S, WS, ON, LRI, RLI, FSI, PDI]; for x in non_x9_classes { assert_eq!(not_removed_by_x9(&x), true); } } }