unicode-bidi-0.3.7/.appveyor.yml000064400000000000000000000010510072674642500146610ustar 00000000000000install: - ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-nightly-i686-pc-windows-gnu.exe' - rust-nightly-i686-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust" - SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin - rustc -V - cargo -V - git submodule update --init --recursive build: false environment: RUST_BACKTRACE: full test_script: - cargo build --verbose --all - cargo doc --verbose --all --no-deps - cargo test --verbose --all - cargo test --verbose --all --features serde unicode-bidi-0.3.7/.cargo_vcs_info.json0000644000000001120000000000100133700ustar { "git": { "sha1": "121c26b55190e14cac4b56fea02bb1e4420c4c00" } } unicode-bidi-0.3.7/.github/workflows/main.yml000064400000000000000000000014270072674642500172660ustar 00000000000000name: CI on: push: branches: ['master'] pull_request: jobs: Test: strategy: matrix: os: [ubuntu-latest] rust: [1.36.0, stable, beta, nightly] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: ${{ matrix.rust }} override: true - uses: actions-rs/cargo@v1 with: command: build args: --all-targets - uses: actions-rs/cargo@v1 with: command: test - uses: actions-rs/cargo@v1 with: command: test args: --features "serde" - uses: actions-rs/cargo@v1 with: command: test args: --no-default-features unicode-bidi-0.3.7/.gitignore000064400000000000000000000000430072674642500142030ustar 00000000000000Cargo.lock /data/ /target/ /*.html unicode-bidi-0.3.7/.rustfmt.toml000064400000000000000000000002240072674642500146730ustar 00000000000000array_layout = "Block" array_width = 80 fn_args_layout = "Block" fn_brace_style = "SameLineWhere" fn_call_style = "Block" generics_indent = "Block" unicode-bidi-0.3.7/AUTHORS000064400000000000000000000001710072674642500132650ustar 00000000000000This software was written by the following people: Matt Brubeck Behnam Esfahbod unicode-bidi-0.3.7/COPYRIGHT000064400000000000000000000006340072674642500135140ustar 00000000000000This project is copyright 2015, The Servo Project Developers (given in the file AUTHORS). Licensed under the Apache License, Version 2.0 or the MIT license , at your option. All files in the project carrying such notice may not be copied, modified, or distributed except according to those terms. unicode-bidi-0.3.7/Cargo.lock0000644000000106040000000000100113520ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "flame" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fc2706461e1ee94f55cab2ed2e3d34ae9536cfa830358ef80acff1a3dacab30" dependencies = [ "lazy_static", "serde", "serde_derive", "serde_json", "thread-id", ] [[package]] name = "flamer" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "36b732da54fd4ea34452f2431cf464ac7be94ca4b339c9cd3d3d12eb06fe7aab" dependencies = [ "flame", "quote", "syn", ] [[package]] name = "itoa" version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" [[package]] name = "lazy_static" version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73" [[package]] name = "libc" version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41" [[package]] name = "proc-macro2" version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a152013215dca273577e18d2bf00fa862b89b24169fb78c4c95aeb07992c9cec" dependencies = [ "unicode-xid", ] [[package]] name = "quote" version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" dependencies = [ "proc-macro2", ] [[package]] name = "redox_syscall" version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" [[package]] name = "ryu" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" [[package]] name = "serde" version = "1.0.125" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "558dc50e1a5a5fa7112ca2ce4effcb321b0300c0d4ccf0776a9f60cd89031171" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.125" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b093b7a2bb58203b5da3056c05b4ec1fed827dcfdb37347a8841695263b3d06d" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "799e97dc9fdae36a5c8b8f2cae9ce2ee9fdce2058c57a93e6099d919fd982f79" dependencies = [ "itoa", "ryu", "serde", ] [[package]] name = "serde_test" version = "1.0.125" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4bb5fef7eaf5a97917567183607ac4224c5b451c15023930f23b937cce879fe" dependencies = [ "serde", ] [[package]] name = "syn" version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ce15dd3ed8aa2f8eeac4716d6ef5ab58b6b9256db41d7e1a0224c2788e8fd87" dependencies = [ "proc-macro2", "quote", "unicode-xid", ] [[package]] name = "thread-id" version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7fbf4c9d56b320106cd64fd024dadfa0be7cb4706725fc44a7d7ce952d820c1" dependencies = [ "libc", "redox_syscall", "winapi", ] [[package]] name = "unicode-bidi" version = "0.3.7" dependencies = [ "flame", "flamer", "serde", "serde_test", ] [[package]] name = "unicode-xid" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" [[package]] name = "winapi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" unicode-bidi-0.3.7/Cargo.toml0000644000000027470000000000100114060ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "unicode-bidi" version = "0.3.7" authors = ["The Servo Project Developers"] exclude = ["benches/**", "data/**", "examples/**", "tests/**", "tools/**"] description = "Implementation of the Unicode Bidirectional Algorithm" documentation = "https://docs.rs/unicode-bidi/" readme = "README.md" keywords = ["rtl", "unicode", "text", "layout", "bidi"] categories = ["no-std", "encoding", "text-processing"] license = "MIT / Apache-2.0" repository = "https://github.com/servo/unicode-bidi" [lib] name = "unicode_bidi" [dependencies.flame] version = "0.2" optional = true [dependencies.flamer] version = "0.4" optional = true [dependencies.serde] version = ">=0.8, <2.0" features = ["derive"] optional = true default-features = false [dev-dependencies.serde_test] version = ">=0.8, <2.0" [features] bench_it = [] default = ["std"] flame_it = ["flame", "flamer"] std = [] unstable = [] with_serde = ["serde"] [badges.appveyor] repository = "servo/unicode-bidi" [badges.travis-ci] repository = "servo/unicode-bidi" unicode-bidi-0.3.7/Cargo.toml.orig000064400000000000000000000025110072674642500151040ustar 00000000000000[package] name = "unicode-bidi" version = "0.3.7" authors = ["The Servo Project Developers"] license = "MIT / Apache-2.0" description = "Implementation of the Unicode Bidirectional Algorithm" repository = "https://github.com/servo/unicode-bidi" documentation = "https://docs.rs/unicode-bidi/" keywords = ["rtl", "unicode", "text", "layout", "bidi"] readme="README.md" edition = "2018" categories = [ "no-std", "encoding", "text-processing", ] # No data is shipped; benches, examples and tests also depend on data. exclude = [ "benches/**", "data/**", "examples/**", "tests/**", "tools/**", ] [badges] travis-ci = { repository = "servo/unicode-bidi" } appveyor = { repository = "servo/unicode-bidi" } [lib] name = "unicode_bidi" [dependencies] flame = { version = "0.2", optional = true } flamer = { version = "0.4", optional = true } serde = { version = ">=0.8, <2.0", default-features = false, optional = true, features = ["derive"] } [dev-dependencies] serde_test = ">=0.8, <2.0" [features] # Note: We don't actually use the `std` feature for anything other than making # doctests work. But it may come in handy in the future. default = ["std"] std = [] unstable = [] # travis-cargo needs it bench_it = [] flame_it = ["flame", "flamer"] with_serde = ["serde"] # DEPRECATED, please use `serde` feature, instead. unicode-bidi-0.3.7/LICENSE-APACHE000064400000000000000000000251370072674642500141520ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. unicode-bidi-0.3.7/LICENSE-MIT000064400000000000000000000020570072674642500136560ustar 00000000000000Copyright (c) 2015 The Rust Project Developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. unicode-bidi-0.3.7/README.md000064400000000000000000000010530072674642500134740ustar 00000000000000# unicode-bidi This crate implements the [Unicode Bidirectional Algorithm][tr9] for display of mixed right-to-left and left-to-right text. It is written in safe Rust, compatible with the current stable release. [Documentation](https://docs.rs/unicode-bidi/) [![Travis-CI](https://travis-ci.org/servo/unicode-bidi.svg?branch=master)](https://travis-ci.org/servo/unicode-bidi) [![AppVeyor](https://img.shields.io/appveyor/ci/servo/unicode-bidi/master.svg)](https://ci.appveyor.com/project/servo/unicode-bidi) [tr9]: http://www.unicode.org/reports/tr9/ unicode-bidi-0.3.7/src/char_data/mod.rs000064400000000000000000000104750072674642500160470ustar 00000000000000// Copyright 2015 The Servo Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! Accessor for `Bidi_Class` property from Unicode Character Database (UCD) mod tables; pub use self::tables::{BidiClass, UNICODE_VERSION}; use core::cmp::Ordering::{Equal, Less, Greater}; use core::char; use self::tables::bidi_class_table; use crate::BidiClass::*; /// Find the `BidiClass` of a single char. pub fn bidi_class(c: char) -> BidiClass { bsearch_range_value_table(c, bidi_class_table) } pub fn is_rtl(bidi_class: BidiClass) -> bool { match bidi_class { RLE | RLO | RLI => true, _ => false, } } fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) -> BidiClass { match r.binary_search_by(|&(lo, hi, _)| if lo <= c && c <= hi { Equal } else if hi < c { Less } else { Greater }) { Ok(idx) => { let (_, _, cat) = r[idx]; cat } // UCD/extracted/DerivedBidiClass.txt: "All code points not explicitly listed // for Bidi_Class have the value Left_To_Right (L)." Err(_) => L, } } #[cfg(test)] mod tests { use super::*; #[test] fn test_ascii() { assert_eq!(bidi_class('\u{0000}'), BN); assert_eq!(bidi_class('\u{0040}'), ON); assert_eq!(bidi_class('\u{0041}'), L); assert_eq!(bidi_class('\u{0062}'), L); assert_eq!(bidi_class('\u{007F}'), BN); } #[test] fn test_bmp() { // Hebrew assert_eq!(bidi_class('\u{0590}'), R); assert_eq!(bidi_class('\u{05D0}'), R); assert_eq!(bidi_class('\u{05D1}'), R); assert_eq!(bidi_class('\u{05FF}'), R); // Arabic assert_eq!(bidi_class('\u{0600}'), AN); assert_eq!(bidi_class('\u{0627}'), AL); assert_eq!(bidi_class('\u{07BF}'), AL); // Default R + Arabic Extras assert_eq!(bidi_class('\u{07C0}'), R); assert_eq!(bidi_class('\u{085F}'), R); assert_eq!(bidi_class('\u{0860}'), AL); assert_eq!(bidi_class('\u{0870}'), R); assert_eq!(bidi_class('\u{089F}'), R); assert_eq!(bidi_class('\u{08A0}'), AL); assert_eq!(bidi_class('\u{089F}'), R); assert_eq!(bidi_class('\u{08FF}'), NSM); // Default ET assert_eq!(bidi_class('\u{20A0}'), ET); assert_eq!(bidi_class('\u{20CF}'), ET); // Arabic Presentation Forms assert_eq!(bidi_class('\u{FB1D}'), R); assert_eq!(bidi_class('\u{FB4F}'), R); assert_eq!(bidi_class('\u{FB50}'), AL); assert_eq!(bidi_class('\u{FDCF}'), AL); assert_eq!(bidi_class('\u{FDF0}'), AL); assert_eq!(bidi_class('\u{FDFF}'), AL); assert_eq!(bidi_class('\u{FE70}'), AL); assert_eq!(bidi_class('\u{FEFE}'), AL); assert_eq!(bidi_class('\u{FEFF}'), BN); // noncharacters assert_eq!(bidi_class('\u{FDD0}'), L); assert_eq!(bidi_class('\u{FDD1}'), L); assert_eq!(bidi_class('\u{FDEE}'), L); assert_eq!(bidi_class('\u{FDEF}'), L); assert_eq!(bidi_class('\u{FFFE}'), L); assert_eq!(bidi_class('\u{FFFF}'), L); } #[test] fn test_smp() { // Default AL + R assert_eq!(bidi_class('\u{10800}'), R); assert_eq!(bidi_class('\u{10FFF}'), R); assert_eq!(bidi_class('\u{1E800}'), R); assert_eq!(bidi_class('\u{1EDFF}'), R); assert_eq!(bidi_class('\u{1EE00}'), AL); assert_eq!(bidi_class('\u{1EEFF}'), AL); assert_eq!(bidi_class('\u{1EF00}'), R); assert_eq!(bidi_class('\u{1EFFF}'), R); } #[test] fn test_unassigned_planes() { assert_eq!(bidi_class('\u{30000}'), L); assert_eq!(bidi_class('\u{40000}'), L); assert_eq!(bidi_class('\u{50000}'), L); assert_eq!(bidi_class('\u{60000}'), L); assert_eq!(bidi_class('\u{70000}'), L); assert_eq!(bidi_class('\u{80000}'), L); assert_eq!(bidi_class('\u{90000}'), L); assert_eq!(bidi_class('\u{a0000}'), L); } } unicode-bidi-0.3.7/src/char_data/tables.rs000064400000000000000000001200340072674642500165330ustar 00000000000000// NOTE: // The following code was generated by "tools/generate.py". do not edit directly #![allow(missing_docs, non_upper_case_globals, non_snake_case)] #![cfg_attr(rustfmt, rustfmt_skip)] /// The [Unicode version](http://www.unicode.org/versions/) of data pub const UNICODE_VERSION: (u64, u64, u64) = (10, 0, 0); #[allow(non_camel_case_types)] #[derive(Clone, Copy, Debug, PartialEq, Eq)] /// Represents values of the Unicode character property /// [`Bidi_Class`](http://www.unicode.org/reports/tr44/#Bidi_Class), also /// known as the *bidirectional character type*. /// /// * /// * pub enum BidiClass { AL, AN, B, BN, CS, EN, ES, ET, FSI, L, LRE, LRI, LRO, NSM, ON, PDF, PDI, R, RLE, RLI, RLO, S, WS, } use self::BidiClass::*; pub const bidi_class_table: &'static [(char, char, BidiClass)] = &[ ('\u{0}', '\u{8}', BN), ('\u{9}', '\u{9}', S), ('\u{a}', '\u{a}', B), ('\u{b}', '\u{b}', S), ('\u{c}', '\u{c}', WS), ('\u{d}', '\u{d}', B), ('\u{e}', '\u{1b}', BN), ('\u{1c}', '\u{1e}', B), ('\u{1f}', '\u{1f}', S), ('\u{20}', '\u{20}', WS), ('\u{21}', '\u{22}', ON), ('\u{23}', '\u{25}', ET), ('\u{26}', '\u{2a}', ON), ('\u{2b}', '\u{2b}', ES), ('\u{2c}', '\u{2c}', CS), ('\u{2d}', '\u{2d}', ES), ('\u{2e}', '\u{2f}', CS), ('\u{30}', '\u{39}', EN), ('\u{3a}', '\u{3a}', CS), ('\u{3b}', '\u{40}', ON), ('\u{41}', '\u{5a}', L), ('\u{5b}', '\u{60}', ON), ('\u{61}', '\u{7a}', L), ('\u{7b}', '\u{7e}', ON), ('\u{7f}', '\u{84}', BN), ('\u{85}', '\u{85}', B), ('\u{86}', '\u{9f}', BN), ('\u{a0}', '\u{a0}', CS), ('\u{a1}', '\u{a1}', ON), ('\u{a2}', '\u{a5}', ET), ('\u{a6}', '\u{a9}', ON), ('\u{aa}', '\u{aa}', L), ('\u{ab}', '\u{ac}', ON), ('\u{ad}', '\u{ad}', BN), ('\u{ae}', '\u{af}', ON), ('\u{b0}', '\u{b1}', ET), ('\u{b2}', '\u{b3}', EN), ('\u{b4}', '\u{b4}', ON), ('\u{b5}', '\u{b5}', L), ('\u{b6}', '\u{b8}', ON), ('\u{b9}', '\u{b9}', EN), ('\u{ba}', '\u{ba}', L), ('\u{bb}', '\u{bf}', ON), ('\u{c0}', '\u{d6}', L), ('\u{d7}', '\u{d7}', ON), ('\u{d8}', '\u{f6}', L), ('\u{f7}', '\u{f7}', ON), ('\u{f8}', '\u{2b8}', L), ('\u{2b9}', '\u{2ba}', ON), ('\u{2bb}', '\u{2c1}', L), ('\u{2c2}', '\u{2cf}', ON), ('\u{2d0}', '\u{2d1}', L), ('\u{2d2}', '\u{2df}', ON), ('\u{2e0}', '\u{2e4}', L), ('\u{2e5}', '\u{2ed}', ON), ('\u{2ee}', '\u{2ee}', L), ('\u{2ef}', '\u{2ff}', ON), ('\u{300}', '\u{36f}', NSM), ('\u{370}', '\u{373}', L), ('\u{374}', '\u{375}', ON), ('\u{376}', '\u{377}', L), ('\u{37a}', '\u{37d}', L), ('\u{37e}', '\u{37e}', ON), ('\u{37f}', '\u{37f}', L), ('\u{384}', '\u{385}', ON), ('\u{386}', '\u{386}', L), ('\u{387}', '\u{387}', ON), ('\u{388}', '\u{38a}', L), ('\u{38c}', '\u{38c}', L), ('\u{38e}', '\u{3a1}', L), ('\u{3a3}', '\u{3f5}', L), ('\u{3f6}', '\u{3f6}', ON), ('\u{3f7}', '\u{482}', L), ('\u{483}', '\u{489}', NSM), ('\u{48a}', '\u{52f}', L), ('\u{531}', '\u{556}', L), ('\u{559}', '\u{55f}', L), ('\u{561}', '\u{587}', L), ('\u{589}', '\u{589}', L), ('\u{58a}', '\u{58a}', ON), ('\u{58d}', '\u{58e}', ON), ('\u{58f}', '\u{58f}', ET), ('\u{590}', '\u{590}', R), ('\u{591}', '\u{5bd}', NSM), ('\u{5be}', '\u{5be}', R), ('\u{5bf}', '\u{5bf}', NSM), ('\u{5c0}', '\u{5c0}', R), ('\u{5c1}', '\u{5c2}', NSM), ('\u{5c3}', '\u{5c3}', R), ('\u{5c4}', '\u{5c5}', NSM), ('\u{5c6}', '\u{5c6}', R), ('\u{5c7}', '\u{5c7}', NSM), ('\u{5c8}', '\u{5ff}', R), ('\u{600}', '\u{605}', AN), ('\u{606}', '\u{607}', ON), ('\u{608}', '\u{608}', AL), ('\u{609}', '\u{60a}', ET), ('\u{60b}', '\u{60b}', AL), ('\u{60c}', '\u{60c}', CS), ('\u{60d}', '\u{60d}', AL), ('\u{60e}', '\u{60f}', ON), ('\u{610}', '\u{61a}', NSM), ('\u{61b}', '\u{64a}', AL), ('\u{64b}', '\u{65f}', NSM), ('\u{660}', '\u{669}', AN), ('\u{66a}', '\u{66a}', ET), ('\u{66b}', '\u{66c}', AN), ('\u{66d}', '\u{66f}', AL), ('\u{670}', '\u{670}', NSM), ('\u{671}', '\u{6d5}', AL), ('\u{6d6}', '\u{6dc}', NSM), ('\u{6dd}', '\u{6dd}', AN), ('\u{6de}', '\u{6de}', ON), ('\u{6df}', '\u{6e4}', NSM), ('\u{6e5}', '\u{6e6}', AL), ('\u{6e7}', '\u{6e8}', NSM), ('\u{6e9}', '\u{6e9}', ON), ('\u{6ea}', '\u{6ed}', NSM), ('\u{6ee}', '\u{6ef}', AL), ('\u{6f0}', '\u{6f9}', EN), ('\u{6fa}', '\u{710}', AL), ('\u{711}', '\u{711}', NSM), ('\u{712}', '\u{72f}', AL), ('\u{730}', '\u{74a}', NSM), ('\u{74b}', '\u{7a5}', AL), ('\u{7a6}', '\u{7b0}', NSM), ('\u{7b1}', '\u{7bf}', AL), ('\u{7c0}', '\u{7ea}', R), ('\u{7eb}', '\u{7f3}', NSM), ('\u{7f4}', '\u{7f5}', R), ('\u{7f6}', '\u{7f9}', ON), ('\u{7fa}', '\u{815}', R), ('\u{816}', '\u{819}', NSM), ('\u{81a}', '\u{81a}', R), ('\u{81b}', '\u{823}', NSM), ('\u{824}', '\u{824}', R), ('\u{825}', '\u{827}', NSM), ('\u{828}', '\u{828}', R), ('\u{829}', '\u{82d}', NSM), ('\u{82e}', '\u{858}', R), ('\u{859}', '\u{85b}', NSM), ('\u{85c}', '\u{85f}', R), ('\u{860}', '\u{86a}', AL), ('\u{86b}', '\u{89f}', R), ('\u{8a0}', '\u{8d3}', AL), ('\u{8d4}', '\u{8e1}', NSM), ('\u{8e2}', '\u{8e2}', AN), ('\u{8e3}', '\u{902}', NSM), ('\u{903}', '\u{939}', L), ('\u{93a}', '\u{93a}', NSM), ('\u{93b}', '\u{93b}', L), ('\u{93c}', '\u{93c}', NSM), ('\u{93d}', '\u{940}', L), ('\u{941}', '\u{948}', NSM), ('\u{949}', '\u{94c}', L), ('\u{94d}', '\u{94d}', NSM), ('\u{94e}', '\u{950}', L), ('\u{951}', '\u{957}', NSM), ('\u{958}', '\u{961}', L), ('\u{962}', '\u{963}', NSM), ('\u{964}', '\u{980}', L), ('\u{981}', '\u{981}', NSM), ('\u{982}', '\u{983}', L), ('\u{985}', '\u{98c}', L), ('\u{98f}', '\u{990}', L), ('\u{993}', '\u{9a8}', L), ('\u{9aa}', '\u{9b0}', L), ('\u{9b2}', '\u{9b2}', L), ('\u{9b6}', '\u{9b9}', L), ('\u{9bc}', '\u{9bc}', NSM), ('\u{9bd}', '\u{9c0}', L), ('\u{9c1}', '\u{9c4}', NSM), ('\u{9c7}', '\u{9c8}', L), ('\u{9cb}', '\u{9cc}', L), ('\u{9cd}', '\u{9cd}', NSM), ('\u{9ce}', '\u{9ce}', L), ('\u{9d7}', '\u{9d7}', L), ('\u{9dc}', '\u{9dd}', L), ('\u{9df}', '\u{9e1}', L), ('\u{9e2}', '\u{9e3}', NSM), ('\u{9e6}', '\u{9f1}', L), ('\u{9f2}', '\u{9f3}', ET), ('\u{9f4}', '\u{9fa}', L), ('\u{9fb}', '\u{9fb}', ET), ('\u{9fc}', '\u{9fd}', L), ('\u{a01}', '\u{a02}', NSM), ('\u{a03}', '\u{a03}', L), ('\u{a05}', '\u{a0a}', L), ('\u{a0f}', '\u{a10}', L), ('\u{a13}', '\u{a28}', L), ('\u{a2a}', '\u{a30}', L), ('\u{a32}', '\u{a33}', L), ('\u{a35}', '\u{a36}', L), ('\u{a38}', '\u{a39}', L), ('\u{a3c}', '\u{a3c}', NSM), ('\u{a3e}', '\u{a40}', L), ('\u{a41}', '\u{a42}', NSM), ('\u{a47}', '\u{a48}', NSM), ('\u{a4b}', '\u{a4d}', NSM), ('\u{a51}', '\u{a51}', NSM), ('\u{a59}', '\u{a5c}', L), ('\u{a5e}', '\u{a5e}', L), ('\u{a66}', '\u{a6f}', L), ('\u{a70}', '\u{a71}', NSM), ('\u{a72}', '\u{a74}', L), ('\u{a75}', '\u{a75}', NSM), ('\u{a81}', '\u{a82}', NSM), ('\u{a83}', '\u{a83}', L), ('\u{a85}', '\u{a8d}', L), ('\u{a8f}', '\u{a91}', L), ('\u{a93}', '\u{aa8}', L), ('\u{aaa}', '\u{ab0}', L), ('\u{ab2}', '\u{ab3}', L), ('\u{ab5}', '\u{ab9}', L), ('\u{abc}', '\u{abc}', NSM), ('\u{abd}', '\u{ac0}', L), ('\u{ac1}', '\u{ac5}', NSM), ('\u{ac7}', '\u{ac8}', NSM), ('\u{ac9}', '\u{ac9}', L), ('\u{acb}', '\u{acc}', L), ('\u{acd}', '\u{acd}', NSM), ('\u{ad0}', '\u{ad0}', L), ('\u{ae0}', '\u{ae1}', L), ('\u{ae2}', '\u{ae3}', NSM), ('\u{ae6}', '\u{af0}', L), ('\u{af1}', '\u{af1}', ET), ('\u{af9}', '\u{af9}', L), ('\u{afa}', '\u{aff}', NSM), ('\u{b01}', '\u{b01}', NSM), ('\u{b02}', '\u{b03}', L), ('\u{b05}', '\u{b0c}', L), ('\u{b0f}', '\u{b10}', L), ('\u{b13}', '\u{b28}', L), ('\u{b2a}', '\u{b30}', L), ('\u{b32}', '\u{b33}', L), ('\u{b35}', '\u{b39}', L), ('\u{b3c}', '\u{b3c}', NSM), ('\u{b3d}', '\u{b3e}', L), ('\u{b3f}', '\u{b3f}', NSM), ('\u{b40}', '\u{b40}', L), ('\u{b41}', '\u{b44}', NSM), ('\u{b47}', '\u{b48}', L), ('\u{b4b}', '\u{b4c}', L), ('\u{b4d}', '\u{b4d}', NSM), ('\u{b56}', '\u{b56}', NSM), ('\u{b57}', '\u{b57}', L), ('\u{b5c}', '\u{b5d}', L), ('\u{b5f}', '\u{b61}', L), ('\u{b62}', '\u{b63}', NSM), ('\u{b66}', '\u{b77}', L), ('\u{b82}', '\u{b82}', NSM), ('\u{b83}', '\u{b83}', L), ('\u{b85}', '\u{b8a}', L), ('\u{b8e}', '\u{b90}', L), ('\u{b92}', '\u{b95}', L), ('\u{b99}', '\u{b9a}', L), ('\u{b9c}', '\u{b9c}', L), ('\u{b9e}', '\u{b9f}', L), ('\u{ba3}', '\u{ba4}', L), ('\u{ba8}', '\u{baa}', L), ('\u{bae}', '\u{bb9}', L), ('\u{bbe}', '\u{bbf}', L), ('\u{bc0}', '\u{bc0}', NSM), ('\u{bc1}', '\u{bc2}', L), ('\u{bc6}', '\u{bc8}', L), ('\u{bca}', '\u{bcc}', L), ('\u{bcd}', '\u{bcd}', NSM), ('\u{bd0}', '\u{bd0}', L), ('\u{bd7}', '\u{bd7}', L), ('\u{be6}', '\u{bf2}', L), ('\u{bf3}', '\u{bf8}', ON), ('\u{bf9}', '\u{bf9}', ET), ('\u{bfa}', '\u{bfa}', ON), ('\u{c00}', '\u{c00}', NSM), ('\u{c01}', '\u{c03}', L), ('\u{c05}', '\u{c0c}', L), ('\u{c0e}', '\u{c10}', L), ('\u{c12}', '\u{c28}', L), ('\u{c2a}', '\u{c39}', L), ('\u{c3d}', '\u{c3d}', L), ('\u{c3e}', '\u{c40}', NSM), ('\u{c41}', '\u{c44}', L), ('\u{c46}', '\u{c48}', NSM), ('\u{c4a}', '\u{c4d}', NSM), ('\u{c55}', '\u{c56}', NSM), ('\u{c58}', '\u{c5a}', L), ('\u{c60}', '\u{c61}', L), ('\u{c62}', '\u{c63}', NSM), ('\u{c66}', '\u{c6f}', L), ('\u{c78}', '\u{c7e}', ON), ('\u{c7f}', '\u{c80}', L), ('\u{c81}', '\u{c81}', NSM), ('\u{c82}', '\u{c83}', L), ('\u{c85}', '\u{c8c}', L), ('\u{c8e}', '\u{c90}', L), ('\u{c92}', '\u{ca8}', L), ('\u{caa}', '\u{cb3}', L), ('\u{cb5}', '\u{cb9}', L), ('\u{cbc}', '\u{cbc}', NSM), ('\u{cbd}', '\u{cc4}', L), ('\u{cc6}', '\u{cc8}', L), ('\u{cca}', '\u{ccb}', L), ('\u{ccc}', '\u{ccd}', NSM), ('\u{cd5}', '\u{cd6}', L), ('\u{cde}', '\u{cde}', L), ('\u{ce0}', '\u{ce1}', L), ('\u{ce2}', '\u{ce3}', NSM), ('\u{ce6}', '\u{cef}', L), ('\u{cf1}', '\u{cf2}', L), ('\u{d00}', '\u{d01}', NSM), ('\u{d02}', '\u{d03}', L), ('\u{d05}', '\u{d0c}', L), ('\u{d0e}', '\u{d10}', L), ('\u{d12}', '\u{d3a}', L), ('\u{d3b}', '\u{d3c}', NSM), ('\u{d3d}', '\u{d40}', L), ('\u{d41}', '\u{d44}', NSM), ('\u{d46}', '\u{d48}', L), ('\u{d4a}', '\u{d4c}', L), ('\u{d4d}', '\u{d4d}', NSM), ('\u{d4e}', '\u{d4f}', L), ('\u{d54}', '\u{d61}', L), ('\u{d62}', '\u{d63}', NSM), ('\u{d66}', '\u{d7f}', L), ('\u{d82}', '\u{d83}', L), ('\u{d85}', '\u{d96}', L), ('\u{d9a}', '\u{db1}', L), ('\u{db3}', '\u{dbb}', L), ('\u{dbd}', '\u{dbd}', L), ('\u{dc0}', '\u{dc6}', L), ('\u{dca}', '\u{dca}', NSM), ('\u{dcf}', '\u{dd1}', L), ('\u{dd2}', '\u{dd4}', NSM), ('\u{dd6}', '\u{dd6}', NSM), ('\u{dd8}', '\u{ddf}', L), ('\u{de6}', '\u{def}', L), ('\u{df2}', '\u{df4}', L), ('\u{e01}', '\u{e30}', L), ('\u{e31}', '\u{e31}', NSM), ('\u{e32}', '\u{e33}', L), ('\u{e34}', '\u{e3a}', NSM), ('\u{e3f}', '\u{e3f}', ET), ('\u{e40}', '\u{e46}', L), ('\u{e47}', '\u{e4e}', NSM), ('\u{e4f}', '\u{e5b}', L), ('\u{e81}', '\u{e82}', L), ('\u{e84}', '\u{e84}', L), ('\u{e87}', '\u{e88}', L), ('\u{e8a}', '\u{e8a}', L), ('\u{e8d}', '\u{e8d}', L), ('\u{e94}', '\u{e97}', L), ('\u{e99}', '\u{e9f}', L), ('\u{ea1}', '\u{ea3}', L), ('\u{ea5}', '\u{ea5}', L), ('\u{ea7}', '\u{ea7}', L), ('\u{eaa}', '\u{eab}', L), ('\u{ead}', '\u{eb0}', L), ('\u{eb1}', '\u{eb1}', NSM), ('\u{eb2}', '\u{eb3}', L), ('\u{eb4}', '\u{eb9}', NSM), ('\u{ebb}', '\u{ebc}', NSM), ('\u{ebd}', '\u{ebd}', L), ('\u{ec0}', '\u{ec4}', L), ('\u{ec6}', '\u{ec6}', L), ('\u{ec8}', '\u{ecd}', NSM), ('\u{ed0}', '\u{ed9}', L), ('\u{edc}', '\u{edf}', L), ('\u{f00}', '\u{f17}', L), ('\u{f18}', '\u{f19}', NSM), ('\u{f1a}', '\u{f34}', L), ('\u{f35}', '\u{f35}', NSM), ('\u{f36}', '\u{f36}', L), ('\u{f37}', '\u{f37}', NSM), ('\u{f38}', '\u{f38}', L), ('\u{f39}', '\u{f39}', NSM), ('\u{f3a}', '\u{f3d}', ON), ('\u{f3e}', '\u{f47}', L), ('\u{f49}', '\u{f6c}', L), ('\u{f71}', '\u{f7e}', NSM), ('\u{f7f}', '\u{f7f}', L), ('\u{f80}', '\u{f84}', NSM), ('\u{f85}', '\u{f85}', L), ('\u{f86}', '\u{f87}', NSM), ('\u{f88}', '\u{f8c}', L), ('\u{f8d}', '\u{f97}', NSM), ('\u{f99}', '\u{fbc}', NSM), ('\u{fbe}', '\u{fc5}', L), ('\u{fc6}', '\u{fc6}', NSM), ('\u{fc7}', '\u{fcc}', L), ('\u{fce}', '\u{fda}', L), ('\u{1000}', '\u{102c}', L), ('\u{102d}', '\u{1030}', NSM), ('\u{1031}', '\u{1031}', L), ('\u{1032}', '\u{1037}', NSM), ('\u{1038}', '\u{1038}', L), ('\u{1039}', '\u{103a}', NSM), ('\u{103b}', '\u{103c}', L), ('\u{103d}', '\u{103e}', NSM), ('\u{103f}', '\u{1057}', L), ('\u{1058}', '\u{1059}', NSM), ('\u{105a}', '\u{105d}', L), ('\u{105e}', '\u{1060}', NSM), ('\u{1061}', '\u{1070}', L), ('\u{1071}', '\u{1074}', NSM), ('\u{1075}', '\u{1081}', L), ('\u{1082}', '\u{1082}', NSM), ('\u{1083}', '\u{1084}', L), ('\u{1085}', '\u{1086}', NSM), ('\u{1087}', '\u{108c}', L), ('\u{108d}', '\u{108d}', NSM), ('\u{108e}', '\u{109c}', L), ('\u{109d}', '\u{109d}', NSM), ('\u{109e}', '\u{10c5}', L), ('\u{10c7}', '\u{10c7}', L), ('\u{10cd}', '\u{10cd}', L), ('\u{10d0}', '\u{1248}', L), ('\u{124a}', '\u{124d}', L), ('\u{1250}', '\u{1256}', L), ('\u{1258}', '\u{1258}', L), ('\u{125a}', '\u{125d}', L), ('\u{1260}', '\u{1288}', L), ('\u{128a}', '\u{128d}', L), ('\u{1290}', '\u{12b0}', L), ('\u{12b2}', '\u{12b5}', L), ('\u{12b8}', '\u{12be}', L), ('\u{12c0}', '\u{12c0}', L), ('\u{12c2}', '\u{12c5}', L), ('\u{12c8}', '\u{12d6}', L), ('\u{12d8}', '\u{1310}', L), ('\u{1312}', '\u{1315}', L), ('\u{1318}', '\u{135a}', L), ('\u{135d}', '\u{135f}', NSM), ('\u{1360}', '\u{137c}', L), ('\u{1380}', '\u{138f}', L), ('\u{1390}', '\u{1399}', ON), ('\u{13a0}', '\u{13f5}', L), ('\u{13f8}', '\u{13fd}', L), ('\u{1400}', '\u{1400}', ON), ('\u{1401}', '\u{167f}', L), ('\u{1680}', '\u{1680}', WS), ('\u{1681}', '\u{169a}', L), ('\u{169b}', '\u{169c}', ON), ('\u{16a0}', '\u{16f8}', L), ('\u{1700}', '\u{170c}', L), ('\u{170e}', '\u{1711}', L), ('\u{1712}', '\u{1714}', NSM), ('\u{1720}', '\u{1731}', L), ('\u{1732}', '\u{1734}', NSM), ('\u{1735}', '\u{1736}', L), ('\u{1740}', '\u{1751}', L), ('\u{1752}', '\u{1753}', NSM), ('\u{1760}', '\u{176c}', L), ('\u{176e}', '\u{1770}', L), ('\u{1772}', '\u{1773}', NSM), ('\u{1780}', '\u{17b3}', L), ('\u{17b4}', '\u{17b5}', NSM), ('\u{17b6}', '\u{17b6}', L), ('\u{17b7}', '\u{17bd}', NSM), ('\u{17be}', '\u{17c5}', L), ('\u{17c6}', '\u{17c6}', NSM), ('\u{17c7}', '\u{17c8}', L), ('\u{17c9}', '\u{17d3}', NSM), ('\u{17d4}', '\u{17da}', L), ('\u{17db}', '\u{17db}', ET), ('\u{17dc}', '\u{17dc}', L), ('\u{17dd}', '\u{17dd}', NSM), ('\u{17e0}', '\u{17e9}', L), ('\u{17f0}', '\u{17f9}', ON), ('\u{1800}', '\u{180a}', ON), ('\u{180b}', '\u{180d}', NSM), ('\u{180e}', '\u{180e}', BN), ('\u{1810}', '\u{1819}', L), ('\u{1820}', '\u{1877}', L), ('\u{1880}', '\u{1884}', L), ('\u{1885}', '\u{1886}', NSM), ('\u{1887}', '\u{18a8}', L), ('\u{18a9}', '\u{18a9}', NSM), ('\u{18aa}', '\u{18aa}', L), ('\u{18b0}', '\u{18f5}', L), ('\u{1900}', '\u{191e}', L), ('\u{1920}', '\u{1922}', NSM), ('\u{1923}', '\u{1926}', L), ('\u{1927}', '\u{1928}', NSM), ('\u{1929}', '\u{192b}', L), ('\u{1930}', '\u{1931}', L), ('\u{1932}', '\u{1932}', NSM), ('\u{1933}', '\u{1938}', L), ('\u{1939}', '\u{193b}', NSM), ('\u{1940}', '\u{1940}', ON), ('\u{1944}', '\u{1945}', ON), ('\u{1946}', '\u{196d}', L), ('\u{1970}', '\u{1974}', L), ('\u{1980}', '\u{19ab}', L), ('\u{19b0}', '\u{19c9}', L), ('\u{19d0}', '\u{19da}', L), ('\u{19de}', '\u{19ff}', ON), ('\u{1a00}', '\u{1a16}', L), ('\u{1a17}', '\u{1a18}', NSM), ('\u{1a19}', '\u{1a1a}', L), ('\u{1a1b}', '\u{1a1b}', NSM), ('\u{1a1e}', '\u{1a55}', L), ('\u{1a56}', '\u{1a56}', NSM), ('\u{1a57}', '\u{1a57}', L), ('\u{1a58}', '\u{1a5e}', NSM), ('\u{1a60}', '\u{1a60}', NSM), ('\u{1a61}', '\u{1a61}', L), ('\u{1a62}', '\u{1a62}', NSM), ('\u{1a63}', '\u{1a64}', L), ('\u{1a65}', '\u{1a6c}', NSM), ('\u{1a6d}', '\u{1a72}', L), ('\u{1a73}', '\u{1a7c}', NSM), ('\u{1a7f}', '\u{1a7f}', NSM), ('\u{1a80}', '\u{1a89}', L), ('\u{1a90}', '\u{1a99}', L), ('\u{1aa0}', '\u{1aad}', L), ('\u{1ab0}', '\u{1abe}', NSM), ('\u{1b00}', '\u{1b03}', NSM), ('\u{1b04}', '\u{1b33}', L), ('\u{1b34}', '\u{1b34}', NSM), ('\u{1b35}', '\u{1b35}', L), ('\u{1b36}', '\u{1b3a}', NSM), ('\u{1b3b}', '\u{1b3b}', L), ('\u{1b3c}', '\u{1b3c}', NSM), ('\u{1b3d}', '\u{1b41}', L), ('\u{1b42}', '\u{1b42}', NSM), ('\u{1b43}', '\u{1b4b}', L), ('\u{1b50}', '\u{1b6a}', L), ('\u{1b6b}', '\u{1b73}', NSM), ('\u{1b74}', '\u{1b7c}', L), ('\u{1b80}', '\u{1b81}', NSM), ('\u{1b82}', '\u{1ba1}', L), ('\u{1ba2}', '\u{1ba5}', NSM), ('\u{1ba6}', '\u{1ba7}', L), ('\u{1ba8}', '\u{1ba9}', NSM), ('\u{1baa}', '\u{1baa}', L), ('\u{1bab}', '\u{1bad}', NSM), ('\u{1bae}', '\u{1be5}', L), ('\u{1be6}', '\u{1be6}', NSM), ('\u{1be7}', '\u{1be7}', L), ('\u{1be8}', '\u{1be9}', NSM), ('\u{1bea}', '\u{1bec}', L), ('\u{1bed}', '\u{1bed}', NSM), ('\u{1bee}', '\u{1bee}', L), ('\u{1bef}', '\u{1bf1}', NSM), ('\u{1bf2}', '\u{1bf3}', L), ('\u{1bfc}', '\u{1c2b}', L), ('\u{1c2c}', '\u{1c33}', NSM), ('\u{1c34}', '\u{1c35}', L), ('\u{1c36}', '\u{1c37}', NSM), ('\u{1c3b}', '\u{1c49}', L), ('\u{1c4d}', '\u{1c88}', L), ('\u{1cc0}', '\u{1cc7}', L), ('\u{1cd0}', '\u{1cd2}', NSM), ('\u{1cd3}', '\u{1cd3}', L), ('\u{1cd4}', '\u{1ce0}', NSM), ('\u{1ce1}', '\u{1ce1}', L), ('\u{1ce2}', '\u{1ce8}', NSM), ('\u{1ce9}', '\u{1cec}', L), ('\u{1ced}', '\u{1ced}', NSM), ('\u{1cee}', '\u{1cf3}', L), ('\u{1cf4}', '\u{1cf4}', NSM), ('\u{1cf5}', '\u{1cf7}', L), ('\u{1cf8}', '\u{1cf9}', NSM), ('\u{1d00}', '\u{1dbf}', L), ('\u{1dc0}', '\u{1df9}', NSM), ('\u{1dfb}', '\u{1dff}', NSM), ('\u{1e00}', '\u{1f15}', L), ('\u{1f18}', '\u{1f1d}', L), ('\u{1f20}', '\u{1f45}', L), ('\u{1f48}', '\u{1f4d}', L), ('\u{1f50}', '\u{1f57}', L), ('\u{1f59}', '\u{1f59}', L), ('\u{1f5b}', '\u{1f5b}', L), ('\u{1f5d}', '\u{1f5d}', L), ('\u{1f5f}', '\u{1f7d}', L), ('\u{1f80}', '\u{1fb4}', L), ('\u{1fb6}', '\u{1fbc}', L), ('\u{1fbd}', '\u{1fbd}', ON), ('\u{1fbe}', '\u{1fbe}', L), ('\u{1fbf}', '\u{1fc1}', ON), ('\u{1fc2}', '\u{1fc4}', L), ('\u{1fc6}', '\u{1fcc}', L), ('\u{1fcd}', '\u{1fcf}', ON), ('\u{1fd0}', '\u{1fd3}', L), ('\u{1fd6}', '\u{1fdb}', L), ('\u{1fdd}', '\u{1fdf}', ON), ('\u{1fe0}', '\u{1fec}', L), ('\u{1fed}', '\u{1fef}', ON), ('\u{1ff2}', '\u{1ff4}', L), ('\u{1ff6}', '\u{1ffc}', L), ('\u{1ffd}', '\u{1ffe}', ON), ('\u{2000}', '\u{200a}', WS), ('\u{200b}', '\u{200d}', BN), ('\u{200e}', '\u{200e}', L), ('\u{200f}', '\u{200f}', R), ('\u{2010}', '\u{2027}', ON), ('\u{2028}', '\u{2028}', WS), ('\u{2029}', '\u{2029}', B), ('\u{202a}', '\u{202a}', LRE), ('\u{202b}', '\u{202b}', RLE), ('\u{202c}', '\u{202c}', PDF), ('\u{202d}', '\u{202d}', LRO), ('\u{202e}', '\u{202e}', RLO), ('\u{202f}', '\u{202f}', CS), ('\u{2030}', '\u{2034}', ET), ('\u{2035}', '\u{2043}', ON), ('\u{2044}', '\u{2044}', CS), ('\u{2045}', '\u{205e}', ON), ('\u{205f}', '\u{205f}', WS), ('\u{2060}', '\u{2064}', BN), ('\u{2066}', '\u{2066}', LRI), ('\u{2067}', '\u{2067}', RLI), ('\u{2068}', '\u{2068}', FSI), ('\u{2069}', '\u{2069}', PDI), ('\u{206a}', '\u{206f}', BN), ('\u{2070}', '\u{2070}', EN), ('\u{2071}', '\u{2071}', L), ('\u{2074}', '\u{2079}', EN), ('\u{207a}', '\u{207b}', ES), ('\u{207c}', '\u{207e}', ON), ('\u{207f}', '\u{207f}', L), ('\u{2080}', '\u{2089}', EN), ('\u{208a}', '\u{208b}', ES), ('\u{208c}', '\u{208e}', ON), ('\u{2090}', '\u{209c}', L), ('\u{20a0}', '\u{20cf}', ET), ('\u{20d0}', '\u{20f0}', NSM), ('\u{2100}', '\u{2101}', ON), ('\u{2102}', '\u{2102}', L), ('\u{2103}', '\u{2106}', ON), ('\u{2107}', '\u{2107}', L), ('\u{2108}', '\u{2109}', ON), ('\u{210a}', '\u{2113}', L), ('\u{2114}', '\u{2114}', ON), ('\u{2115}', '\u{2115}', L), ('\u{2116}', '\u{2118}', ON), ('\u{2119}', '\u{211d}', L), ('\u{211e}', '\u{2123}', ON), ('\u{2124}', '\u{2124}', L), ('\u{2125}', '\u{2125}', ON), ('\u{2126}', '\u{2126}', L), ('\u{2127}', '\u{2127}', ON), ('\u{2128}', '\u{2128}', L), ('\u{2129}', '\u{2129}', ON), ('\u{212a}', '\u{212d}', L), ('\u{212e}', '\u{212e}', ET), ('\u{212f}', '\u{2139}', L), ('\u{213a}', '\u{213b}', ON), ('\u{213c}', '\u{213f}', L), ('\u{2140}', '\u{2144}', ON), ('\u{2145}', '\u{2149}', L), ('\u{214a}', '\u{214d}', ON), ('\u{214e}', '\u{214f}', L), ('\u{2150}', '\u{215f}', ON), ('\u{2160}', '\u{2188}', L), ('\u{2189}', '\u{218b}', ON), ('\u{2190}', '\u{2211}', ON), ('\u{2212}', '\u{2212}', ES), ('\u{2213}', '\u{2213}', ET), ('\u{2214}', '\u{2335}', ON), ('\u{2336}', '\u{237a}', L), ('\u{237b}', '\u{2394}', ON), ('\u{2395}', '\u{2395}', L), ('\u{2396}', '\u{2426}', ON), ('\u{2440}', '\u{244a}', ON), ('\u{2460}', '\u{2487}', ON), ('\u{2488}', '\u{249b}', EN), ('\u{249c}', '\u{24e9}', L), ('\u{24ea}', '\u{26ab}', ON), ('\u{26ac}', '\u{26ac}', L), ('\u{26ad}', '\u{27ff}', ON), ('\u{2800}', '\u{28ff}', L), ('\u{2900}', '\u{2b73}', ON), ('\u{2b76}', '\u{2b95}', ON), ('\u{2b98}', '\u{2bb9}', ON), ('\u{2bbd}', '\u{2bc8}', ON), ('\u{2bca}', '\u{2bd2}', ON), ('\u{2bec}', '\u{2bef}', ON), ('\u{2c00}', '\u{2c2e}', L), ('\u{2c30}', '\u{2c5e}', L), ('\u{2c60}', '\u{2ce4}', L), ('\u{2ce5}', '\u{2cea}', ON), ('\u{2ceb}', '\u{2cee}', L), ('\u{2cef}', '\u{2cf1}', NSM), ('\u{2cf2}', '\u{2cf3}', L), ('\u{2cf9}', '\u{2cff}', ON), ('\u{2d00}', '\u{2d25}', L), ('\u{2d27}', '\u{2d27}', L), ('\u{2d2d}', '\u{2d2d}', L), ('\u{2d30}', '\u{2d67}', L), ('\u{2d6f}', '\u{2d70}', L), ('\u{2d7f}', '\u{2d7f}', NSM), ('\u{2d80}', '\u{2d96}', L), ('\u{2da0}', '\u{2da6}', L), ('\u{2da8}', '\u{2dae}', L), ('\u{2db0}', '\u{2db6}', L), ('\u{2db8}', '\u{2dbe}', L), ('\u{2dc0}', '\u{2dc6}', L), ('\u{2dc8}', '\u{2dce}', L), ('\u{2dd0}', '\u{2dd6}', L), ('\u{2dd8}', '\u{2dde}', L), ('\u{2de0}', '\u{2dff}', NSM), ('\u{2e00}', '\u{2e49}', ON), ('\u{2e80}', '\u{2e99}', ON), ('\u{2e9b}', '\u{2ef3}', ON), ('\u{2f00}', '\u{2fd5}', ON), ('\u{2ff0}', '\u{2ffb}', ON), ('\u{3000}', '\u{3000}', WS), ('\u{3001}', '\u{3004}', ON), ('\u{3005}', '\u{3007}', L), ('\u{3008}', '\u{3020}', ON), ('\u{3021}', '\u{3029}', L), ('\u{302a}', '\u{302d}', NSM), ('\u{302e}', '\u{302f}', L), ('\u{3030}', '\u{3030}', ON), ('\u{3031}', '\u{3035}', L), ('\u{3036}', '\u{3037}', ON), ('\u{3038}', '\u{303c}', L), ('\u{303d}', '\u{303f}', ON), ('\u{3041}', '\u{3096}', L), ('\u{3099}', '\u{309a}', NSM), ('\u{309b}', '\u{309c}', ON), ('\u{309d}', '\u{309f}', L), ('\u{30a0}', '\u{30a0}', ON), ('\u{30a1}', '\u{30fa}', L), ('\u{30fb}', '\u{30fb}', ON), ('\u{30fc}', '\u{30ff}', L), ('\u{3105}', '\u{312e}', L), ('\u{3131}', '\u{318e}', L), ('\u{3190}', '\u{31ba}', L), ('\u{31c0}', '\u{31e3}', ON), ('\u{31f0}', '\u{321c}', L), ('\u{321d}', '\u{321e}', ON), ('\u{3220}', '\u{324f}', L), ('\u{3250}', '\u{325f}', ON), ('\u{3260}', '\u{327b}', L), ('\u{327c}', '\u{327e}', ON), ('\u{327f}', '\u{32b0}', L), ('\u{32b1}', '\u{32bf}', ON), ('\u{32c0}', '\u{32cb}', L), ('\u{32cc}', '\u{32cf}', ON), ('\u{32d0}', '\u{32fe}', L), ('\u{3300}', '\u{3376}', L), ('\u{3377}', '\u{337a}', ON), ('\u{337b}', '\u{33dd}', L), ('\u{33de}', '\u{33df}', ON), ('\u{33e0}', '\u{33fe}', L), ('\u{33ff}', '\u{33ff}', ON), ('\u{3400}', '\u{4db5}', L), ('\u{4dc0}', '\u{4dff}', ON), ('\u{4e00}', '\u{9fea}', L), ('\u{a000}', '\u{a48c}', L), ('\u{a490}', '\u{a4c6}', ON), ('\u{a4d0}', '\u{a60c}', L), ('\u{a60d}', '\u{a60f}', ON), ('\u{a610}', '\u{a62b}', L), ('\u{a640}', '\u{a66e}', L), ('\u{a66f}', '\u{a672}', NSM), ('\u{a673}', '\u{a673}', ON), ('\u{a674}', '\u{a67d}', NSM), ('\u{a67e}', '\u{a67f}', ON), ('\u{a680}', '\u{a69d}', L), ('\u{a69e}', '\u{a69f}', NSM), ('\u{a6a0}', '\u{a6ef}', L), ('\u{a6f0}', '\u{a6f1}', NSM), ('\u{a6f2}', '\u{a6f7}', L), ('\u{a700}', '\u{a721}', ON), ('\u{a722}', '\u{a787}', L), ('\u{a788}', '\u{a788}', ON), ('\u{a789}', '\u{a7ae}', L), ('\u{a7b0}', '\u{a7b7}', L), ('\u{a7f7}', '\u{a801}', L), ('\u{a802}', '\u{a802}', NSM), ('\u{a803}', '\u{a805}', L), ('\u{a806}', '\u{a806}', NSM), ('\u{a807}', '\u{a80a}', L), ('\u{a80b}', '\u{a80b}', NSM), ('\u{a80c}', '\u{a824}', L), ('\u{a825}', '\u{a826}', NSM), ('\u{a827}', '\u{a827}', L), ('\u{a828}', '\u{a82b}', ON), ('\u{a830}', '\u{a837}', L), ('\u{a838}', '\u{a839}', ET), ('\u{a840}', '\u{a873}', L), ('\u{a874}', '\u{a877}', ON), ('\u{a880}', '\u{a8c3}', L), ('\u{a8c4}', '\u{a8c5}', NSM), ('\u{a8ce}', '\u{a8d9}', L), ('\u{a8e0}', '\u{a8f1}', NSM), ('\u{a8f2}', '\u{a8fd}', L), ('\u{a900}', '\u{a925}', L), ('\u{a926}', '\u{a92d}', NSM), ('\u{a92e}', '\u{a946}', L), ('\u{a947}', '\u{a951}', NSM), ('\u{a952}', '\u{a953}', L), ('\u{a95f}', '\u{a97c}', L), ('\u{a980}', '\u{a982}', NSM), ('\u{a983}', '\u{a9b2}', L), ('\u{a9b3}', '\u{a9b3}', NSM), ('\u{a9b4}', '\u{a9b5}', L), ('\u{a9b6}', '\u{a9b9}', NSM), ('\u{a9ba}', '\u{a9bb}', L), ('\u{a9bc}', '\u{a9bc}', NSM), ('\u{a9bd}', '\u{a9cd}', L), ('\u{a9cf}', '\u{a9d9}', L), ('\u{a9de}', '\u{a9e4}', L), ('\u{a9e5}', '\u{a9e5}', NSM), ('\u{a9e6}', '\u{a9fe}', L), ('\u{aa00}', '\u{aa28}', L), ('\u{aa29}', '\u{aa2e}', NSM), ('\u{aa2f}', '\u{aa30}', L), ('\u{aa31}', '\u{aa32}', NSM), ('\u{aa33}', '\u{aa34}', L), ('\u{aa35}', '\u{aa36}', NSM), ('\u{aa40}', '\u{aa42}', L), ('\u{aa43}', '\u{aa43}', NSM), ('\u{aa44}', '\u{aa4b}', L), ('\u{aa4c}', '\u{aa4c}', NSM), ('\u{aa4d}', '\u{aa4d}', L), ('\u{aa50}', '\u{aa59}', L), ('\u{aa5c}', '\u{aa7b}', L), ('\u{aa7c}', '\u{aa7c}', NSM), ('\u{aa7d}', '\u{aaaf}', L), ('\u{aab0}', '\u{aab0}', NSM), ('\u{aab1}', '\u{aab1}', L), ('\u{aab2}', '\u{aab4}', NSM), ('\u{aab5}', '\u{aab6}', L), ('\u{aab7}', '\u{aab8}', NSM), ('\u{aab9}', '\u{aabd}', L), ('\u{aabe}', '\u{aabf}', NSM), ('\u{aac0}', '\u{aac0}', L), ('\u{aac1}', '\u{aac1}', NSM), ('\u{aac2}', '\u{aac2}', L), ('\u{aadb}', '\u{aaeb}', L), ('\u{aaec}', '\u{aaed}', NSM), ('\u{aaee}', '\u{aaf5}', L), ('\u{aaf6}', '\u{aaf6}', NSM), ('\u{ab01}', '\u{ab06}', L), ('\u{ab09}', '\u{ab0e}', L), ('\u{ab11}', '\u{ab16}', L), ('\u{ab20}', '\u{ab26}', L), ('\u{ab28}', '\u{ab2e}', L), ('\u{ab30}', '\u{ab65}', L), ('\u{ab70}', '\u{abe4}', L), ('\u{abe5}', '\u{abe5}', NSM), ('\u{abe6}', '\u{abe7}', L), ('\u{abe8}', '\u{abe8}', NSM), ('\u{abe9}', '\u{abec}', L), ('\u{abed}', '\u{abed}', NSM), ('\u{abf0}', '\u{abf9}', L), ('\u{ac00}', '\u{d7a3}', L), ('\u{d7b0}', '\u{d7c6}', L), ('\u{d7cb}', '\u{d7fb}', L), ('\u{e000}', '\u{fa6d}', L), ('\u{fa70}', '\u{fad9}', L), ('\u{fb00}', '\u{fb06}', L), ('\u{fb13}', '\u{fb17}', L), ('\u{fb1d}', '\u{fb1d}', R), ('\u{fb1e}', '\u{fb1e}', NSM), ('\u{fb1f}', '\u{fb28}', R), ('\u{fb29}', '\u{fb29}', ES), ('\u{fb2a}', '\u{fb4f}', R), ('\u{fb50}', '\u{fd3d}', AL), ('\u{fd3e}', '\u{fd3f}', ON), ('\u{fd40}', '\u{fdcf}', AL), ('\u{fdf0}', '\u{fdfc}', AL), ('\u{fdfd}', '\u{fdfd}', ON), ('\u{fdfe}', '\u{fdff}', AL), ('\u{fe00}', '\u{fe0f}', NSM), ('\u{fe10}', '\u{fe19}', ON), ('\u{fe20}', '\u{fe2f}', NSM), ('\u{fe30}', '\u{fe4f}', ON), ('\u{fe50}', '\u{fe50}', CS), ('\u{fe51}', '\u{fe51}', ON), ('\u{fe52}', '\u{fe52}', CS), ('\u{fe54}', '\u{fe54}', ON), ('\u{fe55}', '\u{fe55}', CS), ('\u{fe56}', '\u{fe5e}', ON), ('\u{fe5f}', '\u{fe5f}', ET), ('\u{fe60}', '\u{fe61}', ON), ('\u{fe62}', '\u{fe63}', ES), ('\u{fe64}', '\u{fe66}', ON), ('\u{fe68}', '\u{fe68}', ON), ('\u{fe69}', '\u{fe6a}', ET), ('\u{fe6b}', '\u{fe6b}', ON), ('\u{fe70}', '\u{fefe}', AL), ('\u{feff}', '\u{feff}', BN), ('\u{ff01}', '\u{ff02}', ON), ('\u{ff03}', '\u{ff05}', ET), ('\u{ff06}', '\u{ff0a}', ON), ('\u{ff0b}', '\u{ff0b}', ES), ('\u{ff0c}', '\u{ff0c}', CS), ('\u{ff0d}', '\u{ff0d}', ES), ('\u{ff0e}', '\u{ff0f}', CS), ('\u{ff10}', '\u{ff19}', EN), ('\u{ff1a}', '\u{ff1a}', CS), ('\u{ff1b}', '\u{ff20}', ON), ('\u{ff21}', '\u{ff3a}', L), ('\u{ff3b}', '\u{ff40}', ON), ('\u{ff41}', '\u{ff5a}', L), ('\u{ff5b}', '\u{ff65}', ON), ('\u{ff66}', '\u{ffbe}', L), ('\u{ffc2}', '\u{ffc7}', L), ('\u{ffca}', '\u{ffcf}', L), ('\u{ffd2}', '\u{ffd7}', L), ('\u{ffda}', '\u{ffdc}', L), ('\u{ffe0}', '\u{ffe1}', ET), ('\u{ffe2}', '\u{ffe4}', ON), ('\u{ffe5}', '\u{ffe6}', ET), ('\u{ffe8}', '\u{ffee}', ON), ('\u{fff9}', '\u{fffd}', ON), ('\u{10000}', '\u{1000b}', L), ('\u{1000d}', '\u{10026}', L), ('\u{10028}', '\u{1003a}', L), ('\u{1003c}', '\u{1003d}', L), ('\u{1003f}', '\u{1004d}', L), ('\u{10050}', '\u{1005d}', L), ('\u{10080}', '\u{100fa}', L), ('\u{10100}', '\u{10100}', L), ('\u{10101}', '\u{10101}', ON), ('\u{10102}', '\u{10102}', L), ('\u{10107}', '\u{10133}', L), ('\u{10137}', '\u{1013f}', L), ('\u{10140}', '\u{1018c}', ON), ('\u{1018d}', '\u{1018e}', L), ('\u{10190}', '\u{1019b}', ON), ('\u{101a0}', '\u{101a0}', ON), ('\u{101d0}', '\u{101fc}', L), ('\u{101fd}', '\u{101fd}', NSM), ('\u{10280}', '\u{1029c}', L), ('\u{102a0}', '\u{102d0}', L), ('\u{102e0}', '\u{102e0}', NSM), ('\u{102e1}', '\u{102fb}', EN), ('\u{10300}', '\u{10323}', L), ('\u{1032d}', '\u{1034a}', L), ('\u{10350}', '\u{10375}', L), ('\u{10376}', '\u{1037a}', NSM), ('\u{10380}', '\u{1039d}', L), ('\u{1039f}', '\u{103c3}', L), ('\u{103c8}', '\u{103d5}', L), ('\u{10400}', '\u{1049d}', L), ('\u{104a0}', '\u{104a9}', L), ('\u{104b0}', '\u{104d3}', L), ('\u{104d8}', '\u{104fb}', L), ('\u{10500}', '\u{10527}', L), ('\u{10530}', '\u{10563}', L), ('\u{1056f}', '\u{1056f}', L), ('\u{10600}', '\u{10736}', L), ('\u{10740}', '\u{10755}', L), ('\u{10760}', '\u{10767}', L), ('\u{10800}', '\u{1091e}', R), ('\u{1091f}', '\u{1091f}', ON), ('\u{10920}', '\u{10a00}', R), ('\u{10a01}', '\u{10a03}', NSM), ('\u{10a04}', '\u{10a04}', R), ('\u{10a05}', '\u{10a06}', NSM), ('\u{10a07}', '\u{10a0b}', R), ('\u{10a0c}', '\u{10a0f}', NSM), ('\u{10a10}', '\u{10a37}', R), ('\u{10a38}', '\u{10a3a}', NSM), ('\u{10a3b}', '\u{10a3e}', R), ('\u{10a3f}', '\u{10a3f}', NSM), ('\u{10a40}', '\u{10ae4}', R), ('\u{10ae5}', '\u{10ae6}', NSM), ('\u{10ae7}', '\u{10b38}', R), ('\u{10b39}', '\u{10b3f}', ON), ('\u{10b40}', '\u{10e5f}', R), ('\u{10e60}', '\u{10e7e}', AN), ('\u{10e7f}', '\u{10fff}', R), ('\u{11000}', '\u{11000}', L), ('\u{11001}', '\u{11001}', NSM), ('\u{11002}', '\u{11037}', L), ('\u{11038}', '\u{11046}', NSM), ('\u{11047}', '\u{1104d}', L), ('\u{11052}', '\u{11065}', ON), ('\u{11066}', '\u{1106f}', L), ('\u{1107f}', '\u{11081}', NSM), ('\u{11082}', '\u{110b2}', L), ('\u{110b3}', '\u{110b6}', NSM), ('\u{110b7}', '\u{110b8}', L), ('\u{110b9}', '\u{110ba}', NSM), ('\u{110bb}', '\u{110c1}', L), ('\u{110d0}', '\u{110e8}', L), ('\u{110f0}', '\u{110f9}', L), ('\u{11100}', '\u{11102}', NSM), ('\u{11103}', '\u{11126}', L), ('\u{11127}', '\u{1112b}', NSM), ('\u{1112c}', '\u{1112c}', L), ('\u{1112d}', '\u{11134}', NSM), ('\u{11136}', '\u{11143}', L), ('\u{11150}', '\u{11172}', L), ('\u{11173}', '\u{11173}', NSM), ('\u{11174}', '\u{11176}', L), ('\u{11180}', '\u{11181}', NSM), ('\u{11182}', '\u{111b5}', L), ('\u{111b6}', '\u{111be}', NSM), ('\u{111bf}', '\u{111c9}', L), ('\u{111ca}', '\u{111cc}', NSM), ('\u{111cd}', '\u{111cd}', L), ('\u{111d0}', '\u{111df}', L), ('\u{111e1}', '\u{111f4}', L), ('\u{11200}', '\u{11211}', L), ('\u{11213}', '\u{1122e}', L), ('\u{1122f}', '\u{11231}', NSM), ('\u{11232}', '\u{11233}', L), ('\u{11234}', '\u{11234}', NSM), ('\u{11235}', '\u{11235}', L), ('\u{11236}', '\u{11237}', NSM), ('\u{11238}', '\u{1123d}', L), ('\u{1123e}', '\u{1123e}', NSM), ('\u{11280}', '\u{11286}', L), ('\u{11288}', '\u{11288}', L), ('\u{1128a}', '\u{1128d}', L), ('\u{1128f}', '\u{1129d}', L), ('\u{1129f}', '\u{112a9}', L), ('\u{112b0}', '\u{112de}', L), ('\u{112df}', '\u{112df}', NSM), ('\u{112e0}', '\u{112e2}', L), ('\u{112e3}', '\u{112ea}', NSM), ('\u{112f0}', '\u{112f9}', L), ('\u{11300}', '\u{11301}', NSM), ('\u{11302}', '\u{11303}', L), ('\u{11305}', '\u{1130c}', L), ('\u{1130f}', '\u{11310}', L), ('\u{11313}', '\u{11328}', L), ('\u{1132a}', '\u{11330}', L), ('\u{11332}', '\u{11333}', L), ('\u{11335}', '\u{11339}', L), ('\u{1133c}', '\u{1133c}', NSM), ('\u{1133d}', '\u{1133f}', L), ('\u{11340}', '\u{11340}', NSM), ('\u{11341}', '\u{11344}', L), ('\u{11347}', '\u{11348}', L), ('\u{1134b}', '\u{1134d}', L), ('\u{11350}', '\u{11350}', L), ('\u{11357}', '\u{11357}', L), ('\u{1135d}', '\u{11363}', L), ('\u{11366}', '\u{1136c}', NSM), ('\u{11370}', '\u{11374}', NSM), ('\u{11400}', '\u{11437}', L), ('\u{11438}', '\u{1143f}', NSM), ('\u{11440}', '\u{11441}', L), ('\u{11442}', '\u{11444}', NSM), ('\u{11445}', '\u{11445}', L), ('\u{11446}', '\u{11446}', NSM), ('\u{11447}', '\u{11459}', L), ('\u{1145b}', '\u{1145b}', L), ('\u{1145d}', '\u{1145d}', L), ('\u{11480}', '\u{114b2}', L), ('\u{114b3}', '\u{114b8}', NSM), ('\u{114b9}', '\u{114b9}', L), ('\u{114ba}', '\u{114ba}', NSM), ('\u{114bb}', '\u{114be}', L), ('\u{114bf}', '\u{114c0}', NSM), ('\u{114c1}', '\u{114c1}', L), ('\u{114c2}', '\u{114c3}', NSM), ('\u{114c4}', '\u{114c7}', L), ('\u{114d0}', '\u{114d9}', L), ('\u{11580}', '\u{115b1}', L), ('\u{115b2}', '\u{115b5}', NSM), ('\u{115b8}', '\u{115bb}', L), ('\u{115bc}', '\u{115bd}', NSM), ('\u{115be}', '\u{115be}', L), ('\u{115bf}', '\u{115c0}', NSM), ('\u{115c1}', '\u{115db}', L), ('\u{115dc}', '\u{115dd}', NSM), ('\u{11600}', '\u{11632}', L), ('\u{11633}', '\u{1163a}', NSM), ('\u{1163b}', '\u{1163c}', L), ('\u{1163d}', '\u{1163d}', NSM), ('\u{1163e}', '\u{1163e}', L), ('\u{1163f}', '\u{11640}', NSM), ('\u{11641}', '\u{11644}', L), ('\u{11650}', '\u{11659}', L), ('\u{11660}', '\u{1166c}', ON), ('\u{11680}', '\u{116aa}', L), ('\u{116ab}', '\u{116ab}', NSM), ('\u{116ac}', '\u{116ac}', L), ('\u{116ad}', '\u{116ad}', NSM), ('\u{116ae}', '\u{116af}', L), ('\u{116b0}', '\u{116b5}', NSM), ('\u{116b6}', '\u{116b6}', L), ('\u{116b7}', '\u{116b7}', NSM), ('\u{116c0}', '\u{116c9}', L), ('\u{11700}', '\u{11719}', L), ('\u{1171d}', '\u{1171f}', NSM), ('\u{11720}', '\u{11721}', L), ('\u{11722}', '\u{11725}', NSM), ('\u{11726}', '\u{11726}', L), ('\u{11727}', '\u{1172b}', NSM), ('\u{11730}', '\u{1173f}', L), ('\u{118a0}', '\u{118f2}', L), ('\u{118ff}', '\u{118ff}', L), ('\u{11a00}', '\u{11a00}', L), ('\u{11a01}', '\u{11a06}', NSM), ('\u{11a07}', '\u{11a08}', L), ('\u{11a09}', '\u{11a0a}', NSM), ('\u{11a0b}', '\u{11a32}', L), ('\u{11a33}', '\u{11a38}', NSM), ('\u{11a39}', '\u{11a3a}', L), ('\u{11a3b}', '\u{11a3e}', NSM), ('\u{11a3f}', '\u{11a46}', L), ('\u{11a47}', '\u{11a47}', NSM), ('\u{11a50}', '\u{11a50}', L), ('\u{11a51}', '\u{11a56}', NSM), ('\u{11a57}', '\u{11a58}', L), ('\u{11a59}', '\u{11a5b}', NSM), ('\u{11a5c}', '\u{11a83}', L), ('\u{11a86}', '\u{11a89}', L), ('\u{11a8a}', '\u{11a96}', NSM), ('\u{11a97}', '\u{11a97}', L), ('\u{11a98}', '\u{11a99}', NSM), ('\u{11a9a}', '\u{11a9c}', L), ('\u{11a9e}', '\u{11aa2}', L), ('\u{11ac0}', '\u{11af8}', L), ('\u{11c00}', '\u{11c08}', L), ('\u{11c0a}', '\u{11c2f}', L), ('\u{11c30}', '\u{11c36}', NSM), ('\u{11c38}', '\u{11c3d}', NSM), ('\u{11c3e}', '\u{11c45}', L), ('\u{11c50}', '\u{11c6c}', L), ('\u{11c70}', '\u{11c8f}', L), ('\u{11c92}', '\u{11ca7}', NSM), ('\u{11ca9}', '\u{11ca9}', L), ('\u{11caa}', '\u{11cb0}', NSM), ('\u{11cb1}', '\u{11cb1}', L), ('\u{11cb2}', '\u{11cb3}', NSM), ('\u{11cb4}', '\u{11cb4}', L), ('\u{11cb5}', '\u{11cb6}', NSM), ('\u{11d00}', '\u{11d06}', L), ('\u{11d08}', '\u{11d09}', L), ('\u{11d0b}', '\u{11d30}', L), ('\u{11d31}', '\u{11d36}', NSM), ('\u{11d3a}', '\u{11d3a}', NSM), ('\u{11d3c}', '\u{11d3d}', NSM), ('\u{11d3f}', '\u{11d45}', NSM), ('\u{11d46}', '\u{11d46}', L), ('\u{11d47}', '\u{11d47}', NSM), ('\u{11d50}', '\u{11d59}', L), ('\u{12000}', '\u{12399}', L), ('\u{12400}', '\u{1246e}', L), ('\u{12470}', '\u{12474}', L), ('\u{12480}', '\u{12543}', L), ('\u{13000}', '\u{1342e}', L), ('\u{14400}', '\u{14646}', L), ('\u{16800}', '\u{16a38}', L), ('\u{16a40}', '\u{16a5e}', L), ('\u{16a60}', '\u{16a69}', L), ('\u{16a6e}', '\u{16a6f}', L), ('\u{16ad0}', '\u{16aed}', L), ('\u{16af0}', '\u{16af4}', NSM), ('\u{16af5}', '\u{16af5}', L), ('\u{16b00}', '\u{16b2f}', L), ('\u{16b30}', '\u{16b36}', NSM), ('\u{16b37}', '\u{16b45}', L), ('\u{16b50}', '\u{16b59}', L), ('\u{16b5b}', '\u{16b61}', L), ('\u{16b63}', '\u{16b77}', L), ('\u{16b7d}', '\u{16b8f}', L), ('\u{16f00}', '\u{16f44}', L), ('\u{16f50}', '\u{16f7e}', L), ('\u{16f8f}', '\u{16f92}', NSM), ('\u{16f93}', '\u{16f9f}', L), ('\u{16fe0}', '\u{16fe1}', L), ('\u{17000}', '\u{187ec}', L), ('\u{18800}', '\u{18af2}', L), ('\u{1b000}', '\u{1b11e}', L), ('\u{1b170}', '\u{1b2fb}', L), ('\u{1bc00}', '\u{1bc6a}', L), ('\u{1bc70}', '\u{1bc7c}', L), ('\u{1bc80}', '\u{1bc88}', L), ('\u{1bc90}', '\u{1bc99}', L), ('\u{1bc9c}', '\u{1bc9c}', L), ('\u{1bc9d}', '\u{1bc9e}', NSM), ('\u{1bc9f}', '\u{1bc9f}', L), ('\u{1bca0}', '\u{1bca3}', BN), ('\u{1d000}', '\u{1d0f5}', L), ('\u{1d100}', '\u{1d126}', L), ('\u{1d129}', '\u{1d166}', L), ('\u{1d167}', '\u{1d169}', NSM), ('\u{1d16a}', '\u{1d172}', L), ('\u{1d173}', '\u{1d17a}', BN), ('\u{1d17b}', '\u{1d182}', NSM), ('\u{1d183}', '\u{1d184}', L), ('\u{1d185}', '\u{1d18b}', NSM), ('\u{1d18c}', '\u{1d1a9}', L), ('\u{1d1aa}', '\u{1d1ad}', NSM), ('\u{1d1ae}', '\u{1d1e8}', L), ('\u{1d200}', '\u{1d241}', ON), ('\u{1d242}', '\u{1d244}', NSM), ('\u{1d245}', '\u{1d245}', ON), ('\u{1d300}', '\u{1d356}', ON), ('\u{1d360}', '\u{1d371}', L), ('\u{1d400}', '\u{1d454}', L), ('\u{1d456}', '\u{1d49c}', L), ('\u{1d49e}', '\u{1d49f}', L), ('\u{1d4a2}', '\u{1d4a2}', L), ('\u{1d4a5}', '\u{1d4a6}', L), ('\u{1d4a9}', '\u{1d4ac}', L), ('\u{1d4ae}', '\u{1d4b9}', L), ('\u{1d4bb}', '\u{1d4bb}', L), ('\u{1d4bd}', '\u{1d4c3}', L), ('\u{1d4c5}', '\u{1d505}', L), ('\u{1d507}', '\u{1d50a}', L), ('\u{1d50d}', '\u{1d514}', L), ('\u{1d516}', '\u{1d51c}', L), ('\u{1d51e}', '\u{1d539}', L), ('\u{1d53b}', '\u{1d53e}', L), ('\u{1d540}', '\u{1d544}', L), ('\u{1d546}', '\u{1d546}', L), ('\u{1d54a}', '\u{1d550}', L), ('\u{1d552}', '\u{1d6a5}', L), ('\u{1d6a8}', '\u{1d6da}', L), ('\u{1d6db}', '\u{1d6db}', ON), ('\u{1d6dc}', '\u{1d714}', L), ('\u{1d715}', '\u{1d715}', ON), ('\u{1d716}', '\u{1d74e}', L), ('\u{1d74f}', '\u{1d74f}', ON), ('\u{1d750}', '\u{1d788}', L), ('\u{1d789}', '\u{1d789}', ON), ('\u{1d78a}', '\u{1d7c2}', L), ('\u{1d7c3}', '\u{1d7c3}', ON), ('\u{1d7c4}', '\u{1d7cb}', L), ('\u{1d7ce}', '\u{1d7ff}', EN), ('\u{1d800}', '\u{1d9ff}', L), ('\u{1da00}', '\u{1da36}', NSM), ('\u{1da37}', '\u{1da3a}', L), ('\u{1da3b}', '\u{1da6c}', NSM), ('\u{1da6d}', '\u{1da74}', L), ('\u{1da75}', '\u{1da75}', NSM), ('\u{1da76}', '\u{1da83}', L), ('\u{1da84}', '\u{1da84}', NSM), ('\u{1da85}', '\u{1da8b}', L), ('\u{1da9b}', '\u{1da9f}', NSM), ('\u{1daa1}', '\u{1daaf}', NSM), ('\u{1e000}', '\u{1e006}', NSM), ('\u{1e008}', '\u{1e018}', NSM), ('\u{1e01b}', '\u{1e021}', NSM), ('\u{1e023}', '\u{1e024}', NSM), ('\u{1e026}', '\u{1e02a}', NSM), ('\u{1e800}', '\u{1e8cf}', R), ('\u{1e8d0}', '\u{1e8d6}', NSM), ('\u{1e8d7}', '\u{1e943}', R), ('\u{1e944}', '\u{1e94a}', NSM), ('\u{1e94b}', '\u{1edff}', R), ('\u{1ee00}', '\u{1eeef}', AL), ('\u{1eef0}', '\u{1eef1}', ON), ('\u{1eef2}', '\u{1eeff}', AL), ('\u{1ef00}', '\u{1efff}', R), ('\u{1f000}', '\u{1f02b}', ON), ('\u{1f030}', '\u{1f093}', ON), ('\u{1f0a0}', '\u{1f0ae}', ON), ('\u{1f0b1}', '\u{1f0bf}', ON), ('\u{1f0c1}', '\u{1f0cf}', ON), ('\u{1f0d1}', '\u{1f0f5}', ON), ('\u{1f100}', '\u{1f10a}', EN), ('\u{1f10b}', '\u{1f10c}', ON), ('\u{1f110}', '\u{1f12e}', L), ('\u{1f130}', '\u{1f169}', L), ('\u{1f16a}', '\u{1f16b}', ON), ('\u{1f170}', '\u{1f1ac}', L), ('\u{1f1e6}', '\u{1f202}', L), ('\u{1f210}', '\u{1f23b}', L), ('\u{1f240}', '\u{1f248}', L), ('\u{1f250}', '\u{1f251}', L), ('\u{1f260}', '\u{1f265}', ON), ('\u{1f300}', '\u{1f6d4}', ON), ('\u{1f6e0}', '\u{1f6ec}', ON), ('\u{1f6f0}', '\u{1f6f8}', ON), ('\u{1f700}', '\u{1f773}', ON), ('\u{1f780}', '\u{1f7d4}', ON), ('\u{1f800}', '\u{1f80b}', ON), ('\u{1f810}', '\u{1f847}', ON), ('\u{1f850}', '\u{1f859}', ON), ('\u{1f860}', '\u{1f887}', ON), ('\u{1f890}', '\u{1f8ad}', ON), ('\u{1f900}', '\u{1f90b}', ON), ('\u{1f910}', '\u{1f93e}', ON), ('\u{1f940}', '\u{1f94c}', ON), ('\u{1f950}', '\u{1f96b}', ON), ('\u{1f980}', '\u{1f997}', ON), ('\u{1f9c0}', '\u{1f9c0}', ON), ('\u{1f9d0}', '\u{1f9e6}', ON), ('\u{20000}', '\u{2a6d6}', L), ('\u{2a700}', '\u{2b734}', L), ('\u{2b740}', '\u{2b81d}', L), ('\u{2b820}', '\u{2cea1}', L), ('\u{2ceb0}', '\u{2ebe0}', L), ('\u{2f800}', '\u{2fa1d}', L), ('\u{e0001}', '\u{e0001}', BN), ('\u{e0020}', '\u{e007f}', BN), ('\u{e0100}', '\u{e01ef}', NSM), ('\u{f0000}', '\u{ffffd}', L), ('\u{100000}', '\u{10fffd}', L) ]; unicode-bidi-0.3.7/src/deprecated.rs000064400000000000000000000057110072674642500154570ustar 00000000000000// Copyright 2015 The Servo Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! This module holds deprecated assets only. use alloc::vec::Vec; use super::*; /// Find the level runs within a line and return them in visual order. /// /// NOTE: This implementation is incomplete. The algorithm needs information about the text, /// including original `BidiClass` property of each character, to be able to perform correctly. /// Please see [`BidiInfo::visual_runs()`](../struct.BidiInfo.html#method.visual_runs) for the /// improved implementation. /// /// `line` is a range of bytes indices within `levels`. /// /// #[deprecated(since = "0.3.0", note = "please use `BidiInfo::visual_runs()` instead.")] pub fn visual_runs(line: Range, levels: &[Level]) -> Vec { assert!(line.start <= levels.len()); assert!(line.end <= levels.len()); let mut runs = Vec::new(); // Find consecutive level runs. let mut start = line.start; let mut run_level = levels[start]; let mut min_level = run_level; let mut max_level = run_level; for (i, &new_level) in levels.iter().enumerate().take(line.end).skip(start + 1) { if new_level != run_level { // End of the previous run, start of a new one. runs.push(start..i); start = i; run_level = new_level; min_level = min(run_level, min_level); max_level = max(run_level, max_level); } } runs.push(start..line.end); let run_count = runs.len(); // Re-order the odd runs. // // Stop at the lowest *odd* level. min_level = min_level.new_lowest_ge_rtl().expect("Level error"); while max_level >= min_level { // Look for the start of a sequence of consecutive runs of max_level or higher. let mut seq_start = 0; while seq_start < run_count { if levels[runs[seq_start].start] < max_level { seq_start += 1; continue; } // Found the start of a sequence. Now find the end. let mut seq_end = seq_start + 1; while seq_end < run_count { if levels[runs[seq_end].start] < max_level { break; } seq_end += 1; } // Reverse the runs within this sequence. runs[seq_start..seq_end].reverse(); seq_start = seq_end; } max_level.lower(1).expect( "Lowering embedding level below zero", ); } runs } unicode-bidi-0.3.7/src/explicit.rs000064400000000000000000000147020072674642500152000ustar 00000000000000// Copyright 2015 The Servo Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! 3.3.2 Explicit Levels and Directions //! //! use alloc::vec::Vec; use super::char_data::{BidiClass::{self, *}, is_rtl}; use super::level::Level; /// Compute explicit embedding levels for one paragraph of text (X1-X8). /// /// `processing_classes[i]` must contain the `BidiClass` of the char at byte index `i`, /// for each char in `text`. #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn compute( text: &str, para_level: Level, original_classes: &[BidiClass], levels: &mut [Level], processing_classes: &mut [BidiClass], ) { assert_eq!(text.len(), original_classes.len()); // let mut stack = DirectionalStatusStack::new(); stack.push(para_level, OverrideStatus::Neutral); let mut overflow_isolate_count = 0u32; let mut overflow_embedding_count = 0u32; let mut valid_isolate_count = 0u32; for (i, c) in text.char_indices() { match original_classes[i] { // Rules X2-X5c RLE | LRE | RLO | LRO | RLI | LRI | FSI => { let last_level = stack.last().level; // X5a-X5c: Isolate initiators get the level of the last entry on the stack. let is_isolate = match original_classes[i] { RLI | LRI | FSI => true, _ => false, }; if is_isolate { levels[i] = last_level; match stack.last().status { OverrideStatus::RTL => processing_classes[i] = R, OverrideStatus::LTR => processing_classes[i] = L, _ => {} } } let new_level = if is_rtl(original_classes[i]) { last_level.new_explicit_next_rtl() } else { last_level.new_explicit_next_ltr() }; if new_level.is_ok() && overflow_isolate_count == 0 && overflow_embedding_count == 0 { let new_level = new_level.unwrap(); stack.push( new_level, match original_classes[i] { RLO => OverrideStatus::RTL, LRO => OverrideStatus::LTR, RLI | LRI | FSI => OverrideStatus::Isolate, _ => OverrideStatus::Neutral, }, ); if is_isolate { valid_isolate_count += 1; } else { // The spec doesn't explicitly mention this step, but it is necessary. // See the reference implementations for comparison. levels[i] = new_level; } } else if is_isolate { overflow_isolate_count += 1; } else if overflow_isolate_count == 0 { overflow_embedding_count += 1; } } // PDI => { if overflow_isolate_count > 0 { overflow_isolate_count -= 1; } else if valid_isolate_count > 0 { overflow_embedding_count = 0; loop { // Pop everything up to and including the last Isolate status. match stack.vec.pop() { None | Some(Status { status: OverrideStatus::Isolate, .. }) => break, _ => continue, } } valid_isolate_count -= 1; } let last = stack.last(); levels[i] = last.level; match last.status { OverrideStatus::RTL => processing_classes[i] = R, OverrideStatus::LTR => processing_classes[i] = L, _ => {} } } // PDF => { if overflow_isolate_count > 0 { continue; } if overflow_embedding_count > 0 { overflow_embedding_count -= 1; continue; } if stack.last().status != OverrideStatus::Isolate && stack.vec.len() >= 2 { stack.vec.pop(); } // The spec doesn't explicitly mention this step, but it is necessary. // See the reference implementations for comparison. levels[i] = stack.last().level; } // Nothing B | BN => {} // _ => { let last = stack.last(); levels[i] = last.level; match last.status { OverrideStatus::RTL => processing_classes[i] = R, OverrideStatus::LTR => processing_classes[i] = L, _ => {} } } } // Handle multi-byte characters. for j in 1..c.len_utf8() { levels[i + j] = levels[i]; processing_classes[i + j] = processing_classes[i]; } } } /// Entries in the directional status stack: struct Status { level: Level, status: OverrideStatus, } #[derive(PartialEq)] enum OverrideStatus { Neutral, RTL, LTR, Isolate, } struct DirectionalStatusStack { vec: Vec, } impl DirectionalStatusStack { fn new() -> Self { DirectionalStatusStack { vec: Vec::with_capacity(Level::max_explicit_depth() as usize + 2) } } fn push(&mut self, level: Level, status: OverrideStatus) { self.vec.push(Status { level, status }); } fn last(&self) -> &Status { self.vec.last().unwrap() } } unicode-bidi-0.3.7/src/format_chars.rs000064400000000000000000000025270072674642500160310ustar 00000000000000// Copyright 2017 The Servo Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! Directional Formatting Characters //! //! // == Implicit == /// ARABIC LETTER MARK pub const ALM: char = '\u{061C}'; /// LEFT-TO-RIGHT MARK pub const LRM: char = '\u{200E}'; /// RIGHT-TO-LEFT MARK pub const RLM: char = '\u{200F}'; // == Explicit Isolates == /// LEFT‑TO‑RIGHT ISOLATE pub const LRI: char = '\u{2066}'; /// RIGHT‑TO‑LEFT ISOLATE pub const RLI: char = '\u{2067}'; /// FIRST STRONG ISOLATE pub const FSI: char = '\u{2068}'; /// POP DIRECTIONAL ISOLATE pub const PDI: char = '\u{2069}'; // == Explicit Embeddings and Overrides == /// LEFT-TO-RIGHT EMBEDDING pub const LRE: char = '\u{202A}'; /// RIGHT-TO-LEFT EMBEDDING pub const RLE: char = '\u{202B}'; /// POP DIRECTIONAL FORMATTING pub const PDF: char = '\u{202C}'; /// LEFT-TO-RIGHT OVERRIDE pub const LRO: char = '\u{202D}'; /// RIGHT-TO-LEFT OVERRIDE pub const RLO: char = '\u{202E}'; unicode-bidi-0.3.7/src/implicit.rs000064400000000000000000000175020072674642500151720ustar 00000000000000// Copyright 2015 The Servo Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! 3.3.4 - 3.3.6. Resolve implicit levels and types. use core::cmp::max; use alloc::vec::Vec; use super::char_data::BidiClass::{self, *}; use super::prepare::{IsolatingRunSequence, LevelRun, not_removed_by_x9, removed_by_x9}; use super::level::Level; /// 3.3.4 Resolving Weak Types /// /// #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn resolve_weak(sequence: &IsolatingRunSequence, processing_classes: &mut [BidiClass]) { // FIXME (#8): This function applies steps W1-W6 in a single pass. This can produce // incorrect results in cases where a "later" rule changes the value of `prev_class` seen // by an "earlier" rule. We should either split this into separate passes, or preserve // extra state so each rule can see the correct previous class. // FIXME: Also, this could be the cause of increased failure for using longer-UTF-8 chars in // conformance tests, like BidiTest:69635 (AL ET EN) let mut prev_class = sequence.sos; let mut last_strong_is_al = false; let mut et_run_indices = Vec::new(); // for W5 // Like sequence.runs.iter().flat_map(Clone::clone), but make indices itself clonable. fn id(x: LevelRun) -> LevelRun { x } let mut indices = sequence.runs.iter().cloned().flat_map( id as fn(LevelRun) -> LevelRun, ); while let Some(i) = indices.next() { match processing_classes[i] { // NSM => { processing_classes[i] = match prev_class { RLI | LRI | FSI | PDI => ON, _ => prev_class, }; } EN => { if last_strong_is_al { // W2. If previous strong char was AL, change EN to AN. processing_classes[i] = AN; } else { // W5. If a run of ETs is adjacent to an EN, change the ETs to EN. for j in &et_run_indices { processing_classes[*j] = EN; } et_run_indices.clear(); } } // AL => processing_classes[i] = R, // ES | CS => { let next_class = indices .clone() .map(|j| processing_classes[j]) .find(not_removed_by_x9) .unwrap_or(sequence.eos); processing_classes[i] = match (prev_class, processing_classes[i], next_class) { (EN, ES, EN) | (EN, CS, EN) => EN, (AN, CS, AN) => AN, (_, _, _) => ON, } } // ET => { match prev_class { EN => processing_classes[i] = EN, _ => et_run_indices.push(i), // In case this is followed by an EN. } } class => { if removed_by_x9(class) { continue; } } } prev_class = processing_classes[i]; match prev_class { L | R => { last_strong_is_al = false; } AL => { last_strong_is_al = true; } _ => {} } if prev_class != ET { // W6. If we didn't find an adjacent EN, turn any ETs into ON instead. for j in &et_run_indices { processing_classes[*j] = ON; } et_run_indices.clear(); } } // W7. If the previous strong char was L, change EN to L. let mut last_strong_is_l = sequence.sos == L; for run in &sequence.runs { for i in run.clone() { match processing_classes[i] { EN if last_strong_is_l => { processing_classes[i] = L; } L => { last_strong_is_l = true; } R | AL => { last_strong_is_l = false; } _ => {} } } } } /// 3.3.5 Resolving Neutral Types /// /// #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn resolve_neutral( sequence: &IsolatingRunSequence, levels: &[Level], processing_classes: &mut [BidiClass], ) { let e: BidiClass = levels[sequence.runs[0].start].bidi_class(); let mut indices = sequence.runs.iter().flat_map(Clone::clone); let mut prev_class = sequence.sos; while let Some(mut i) = indices.next() { // N0. Process bracket pairs. // TODO // Process sequences of NI characters. let mut ni_run = Vec::new(); if is_NI(processing_classes[i]) { // Consume a run of consecutive NI characters. ni_run.push(i); let mut next_class; loop { match indices.next() { Some(j) => { i = j; if removed_by_x9(processing_classes[i]) { continue; } next_class = processing_classes[j]; if is_NI(next_class) { ni_run.push(i); } else { break; } } None => { next_class = sequence.eos; break; } }; } // N1-N2. // // // let new_class = match (prev_class, next_class) { (L, L) => L, (R, R) | (R, AN) | (R, EN) | (AN, R) | (AN, AN) | (AN, EN) | (EN, R) | (EN, AN) | (EN, EN) => R, (_, _) => e, }; for j in &ni_run { processing_classes[*j] = new_class; } ni_run.clear(); } prev_class = processing_classes[i]; } } /// 3.3.6 Resolving Implicit Levels /// /// Returns the maximum embedding level in the paragraph. /// /// #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn resolve_levels(original_classes: &[BidiClass], levels: &mut [Level]) -> Level { let mut max_level = Level::ltr(); assert_eq!(original_classes.len(), levels.len()); for i in 0..levels.len() { match (levels[i].is_rtl(), original_classes[i]) { (false, AN) | (false, EN) => levels[i].raise(2).expect("Level number error"), (false, R) | (true, L) | (true, EN) | (true, AN) => { levels[i].raise(1).expect("Level number error") } (_, _) => {} } max_level = max(max_level, levels[i]); } max_level } /// Neutral or Isolate formatting character (B, S, WS, ON, FSI, LRI, RLI, PDI) /// /// #[allow(non_snake_case)] fn is_NI(class: BidiClass) -> bool { match class { B | S | WS | ON | FSI | LRI | RLI | PDI => true, _ => false, } } unicode-bidi-0.3.7/src/level.rs000064400000000000000000000261200072674642500144630ustar 00000000000000// Copyright 2017 The Servo Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! Bidi Embedding Level //! //! See [`Level`](struct.Level.html) for more details. //! //! use alloc::vec::Vec; use core::convert::{From, Into}; use alloc::string::{String, ToString}; use super::char_data::BidiClass; /// Embedding Level /// /// Embedding Levels are numbers between 0 and 126 (inclusive), where even values denote a /// left-to-right (LTR) direction and odd values a right-to-left (RTL) direction. /// /// This struct maintains a *valid* status for level numbers, meaning that creating a new level, or /// mutating an existing level, with the value smaller than `0` (before conversion to `u8`) or /// larger than 125 results in an `Error`. /// /// #[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Level(u8); pub const LTR_LEVEL: Level = Level(0); pub const RTL_LEVEL: Level = Level(1); const MAX_DEPTH: u8 = 125; /// During explicit level resolution, embedding level can go as high as `max_depth`. pub const MAX_EXPLICIT_DEPTH: u8 = MAX_DEPTH; /// During implicit level resolution, embedding level can go as high as `max_depth + 1`. pub const MAX_IMPLICIT_DEPTH: u8 = MAX_DEPTH + 1; /// Errors that can occur on Level creation or mutation #[derive(Debug, PartialEq)] pub enum Error { /// Out-of-range (invalid) embedding level number. OutOfRangeNumber, } impl Level { /// New LTR level with smallest number value (0). #[inline] pub fn ltr() -> Level { LTR_LEVEL } /// New RTL level with smallest number value (1). #[inline] pub fn rtl() -> Level { RTL_LEVEL } /// Maximum depth of the directional status stack during implicit resolutions. pub fn max_implicit_depth() -> u8 { MAX_IMPLICIT_DEPTH } /// Maximum depth of the directional status stack during explicit resolutions. pub fn max_explicit_depth() -> u8 { MAX_EXPLICIT_DEPTH } // == Inquiries == /// Create new level, fail if number is larger than `max_depth + 1`. #[inline] pub fn new(number: u8) -> Result { if number <= MAX_IMPLICIT_DEPTH { Ok(Level(number)) } else { Err(Error::OutOfRangeNumber) } } /// Create new level, fail if number is larger than `max_depth`. #[inline] pub fn new_explicit(number: u8) -> Result { if number <= MAX_EXPLICIT_DEPTH { Ok(Level(number)) } else { Err(Error::OutOfRangeNumber) } } // == Inquiries == /// The level number. #[inline] pub fn number(&self) -> u8 { self.0 } /// If this level is left-to-right. #[inline] pub fn is_ltr(&self) -> bool { self.0 % 2 == 0 } /// If this level is right-to-left. #[inline] pub fn is_rtl(&self) -> bool { self.0 % 2 == 1 } // == Mutators == /// Raise level by `amount`, fail if number is larger than `max_depth + 1`. #[inline] pub fn raise(&mut self, amount: u8) -> Result<(), Error> { match self.0.checked_add(amount) { Some(number) => { if number <= MAX_IMPLICIT_DEPTH { self.0 = number; Ok(()) } else { Err(Error::OutOfRangeNumber) } } None => Err(Error::OutOfRangeNumber), } } /// Raise level by `amount`, fail if number is larger than `max_depth`. #[inline] pub fn raise_explicit(&mut self, amount: u8) -> Result<(), Error> { match self.0.checked_add(amount) { Some(number) => { if number <= MAX_EXPLICIT_DEPTH { self.0 = number; Ok(()) } else { Err(Error::OutOfRangeNumber) } } None => Err(Error::OutOfRangeNumber), } } /// Lower level by `amount`, fail if number goes below zero. #[inline] pub fn lower(&mut self, amount: u8) -> Result<(), Error> { match self.0.checked_sub(amount) { Some(number) => { self.0 = number; Ok(()) } None => Err(Error::OutOfRangeNumber), } } // == Helpers == /// The next LTR (even) level greater than this, or fail if number is larger than `max_depth`. #[inline] pub fn new_explicit_next_ltr(&self) -> Result { Level::new_explicit((self.0 + 2) & !1) } /// The next RTL (odd) level greater than this, or fail if number is larger than `max_depth`. #[inline] pub fn new_explicit_next_rtl(&self) -> Result { Level::new_explicit((self.0 + 1) | 1) } /// The lowest RTL (odd) level greater than or equal to this, or fail if number is larger than /// `max_depth + 1`. #[inline] pub fn new_lowest_ge_rtl(&self) -> Result { Level::new(self.0 | 1) } /// Generate a character type based on a level (as specified in steps X10 and N2). #[inline] pub fn bidi_class(&self) -> BidiClass { if self.is_rtl() { BidiClass::R } else { BidiClass::L } } pub fn vec(v: &[u8]) -> Vec { v.iter().map(|&x| x.into()).collect() } } /// If levels has any RTL (odd) level /// /// This information is usually used to skip re-ordering of text when no RTL level is present #[inline] pub fn has_rtl(levels: &[Level]) -> bool { levels.iter().any(|&lvl| lvl.is_rtl()) } impl Into for Level { /// Convert to the level number #[inline] fn into(self) -> u8 { self.number() } } impl From for Level { /// Create level by number #[inline] fn from(number: u8) -> Level { Level::new(number).expect("Level number error") } } /// Used for matching levels in conformance tests impl<'a> PartialEq<&'a str> for Level { #[inline] fn eq(&self, s: &&'a str) -> bool { *s == "x" || *s == self.0.to_string() } } /// Used for matching levels in conformance tests impl<'a> PartialEq for Level { #[inline] fn eq(&self, s: &String) -> bool { self == &s.as_str() } } #[cfg(test)] mod tests { use super::*; #[test] fn test_new() { assert_eq!(Level::new(0), Ok(Level(0))); assert_eq!(Level::new(1), Ok(Level(1))); assert_eq!(Level::new(10), Ok(Level(10))); assert_eq!(Level::new(125), Ok(Level(125))); assert_eq!(Level::new(126), Ok(Level(126))); assert_eq!(Level::new(127), Err(Error::OutOfRangeNumber)); assert_eq!(Level::new(255), Err(Error::OutOfRangeNumber)); } #[test] fn test_new_explicit() { assert_eq!(Level::new_explicit(0), Ok(Level(0))); assert_eq!(Level::new_explicit(1), Ok(Level(1))); assert_eq!(Level::new_explicit(10), Ok(Level(10))); assert_eq!(Level::new_explicit(125), Ok(Level(125))); assert_eq!(Level::new_explicit(126), Err(Error::OutOfRangeNumber)); assert_eq!(Level::new_explicit(255), Err(Error::OutOfRangeNumber)); } #[test] fn test_is_ltr() { assert_eq!(Level(0).is_ltr(), true); assert_eq!(Level(1).is_ltr(), false); assert_eq!(Level(10).is_ltr(), true); assert_eq!(Level(11).is_ltr(), false); assert_eq!(Level(124).is_ltr(), true); assert_eq!(Level(125).is_ltr(), false); } #[test] fn test_is_rtl() { assert_eq!(Level(0).is_rtl(), false); assert_eq!(Level(1).is_rtl(), true); assert_eq!(Level(10).is_rtl(), false); assert_eq!(Level(11).is_rtl(), true); assert_eq!(Level(124).is_rtl(), false); assert_eq!(Level(125).is_rtl(), true); } #[test] fn test_raise() { let mut level = Level::ltr(); assert_eq!(level.number(), 0); assert!(level.raise(100).is_ok()); assert_eq!(level.number(), 100); assert!(level.raise(26).is_ok()); assert_eq!(level.number(), 126); assert!(level.raise(1).is_err()); // invalid! assert!(level.raise(250).is_err()); // overflow! assert_eq!(level.number(), 126); } #[test] fn test_raise_explicit() { let mut level = Level::ltr(); assert_eq!(level.number(), 0); assert!(level.raise_explicit(100).is_ok()); assert_eq!(level.number(), 100); assert!(level.raise_explicit(25).is_ok()); assert_eq!(level.number(), 125); assert!(level.raise_explicit(1).is_err()); // invalid! assert!(level.raise_explicit(250).is_err()); // overflow! assert_eq!(level.number(), 125); } #[test] fn test_lower() { let mut level = Level::rtl(); assert_eq!(level.number(), 1); assert!(level.lower(1).is_ok()); assert_eq!(level.number(), 0); assert!(level.lower(1).is_err()); // underflow! assert!(level.lower(250).is_err()); // underflow! assert_eq!(level.number(), 0); } #[test] fn test_has_rtl() { assert_eq!(has_rtl(&Level::vec(&[0, 0, 0])), false); assert_eq!(has_rtl(&Level::vec(&[0, 1, 0])), true); assert_eq!(has_rtl(&Level::vec(&[0, 2, 0])), false); assert_eq!(has_rtl(&Level::vec(&[0, 125, 0])), true); assert_eq!(has_rtl(&Level::vec(&[0, 126, 0])), false); } #[test] fn test_into() { let level = Level::rtl(); let number: u8 = level.into(); assert_eq!(1u8, number); } #[test] fn test_vec() { assert_eq!( Level::vec(&[0, 1, 125]), vec![Level(0), Level(1), Level(125)] ); } #[test] fn test_str_eq() { assert_eq!(Level::vec(&[0, 1, 4, 125]), vec!["0", "1", "x", "125"]); assert_ne!(Level::vec(&[0, 1, 4, 125]), vec!["0", "1", "5", "125"]); } #[test] fn test_string_eq() { assert_eq!( Level::vec(&[0, 1, 4, 125]), vec!["0".to_string(), "1".to_string(), "x".to_string(), "125".to_string()] ); } } #[cfg(all(feature = "serde", test))] mod serde_tests { use serde_test::{Token, assert_tokens}; use super::*; #[test] fn test_statics() { assert_tokens( &Level::ltr(), &[Token::NewtypeStruct { name: "Level" }, Token::U8(0)], ); assert_tokens( &Level::rtl(), &[Token::NewtypeStruct { name: "Level" }, Token::U8(1)], ); } #[test] fn test_new() { let level = Level::new(42).unwrap(); assert_tokens( &level, &[Token::NewtypeStruct { name: "Level" }, Token::U8(42)], ); } } unicode-bidi-0.3.7/src/lib.rs000064400000000000000000000733770072674642500141420ustar 00000000000000// Copyright 2015 The Servo Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! This crate implements the [Unicode Bidirectional Algorithm][tr9] for display of mixed //! right-to-left and left-to-right text. It is written in safe Rust, compatible with the //! current stable release. //! //! ## Example //! //! ```rust //! use unicode_bidi::BidiInfo; //! //! // This example text is defined using `concat!` because some browsers //! // and text editors have trouble displaying bidi strings. //! let text = concat![ //! "א", //! "ב", //! "ג", //! "a", //! "b", //! "c", //! ]; //! //! // Resolve embedding levels within the text. Pass `None` to detect the //! // paragraph level automatically. //! let bidi_info = BidiInfo::new(&text, None); //! //! // This paragraph has embedding level 1 because its first strong character is RTL. //! assert_eq!(bidi_info.paragraphs.len(), 1); //! let para = &bidi_info.paragraphs[0]; //! assert_eq!(para.level.number(), 1); //! assert_eq!(para.level.is_rtl(), true); //! //! // Re-ordering is done after wrapping each paragraph into a sequence of //! // lines. For this example, I'll just use a single line that spans the //! // entire paragraph. //! let line = para.range.clone(); //! //! let display = bidi_info.reorder_line(para, line); //! assert_eq!(display, concat![ //! "a", //! "b", //! "c", //! "ג", //! "ב", //! "א", //! ]); //! ``` //! //! # Features //! //! - `std`: Enabled by default, but can be disabled to make `unicode_bidi` //! `#![no_std]` + `alloc` compatible. //! - `serde`: Adds [`serde::Serialize`] and [`serde::Deserialize`] //! implementations to relevant types. //! //! [tr9]: #![forbid(unsafe_code)] #![no_std] // We need to link to std to make doc tests work on older Rust versions #[cfg(feature = "std")] extern crate std; #[macro_use] extern crate alloc; pub mod deprecated; pub mod format_chars; pub mod level; mod char_data; mod explicit; mod implicit; mod prepare; pub use crate::char_data::{BidiClass, bidi_class, UNICODE_VERSION}; pub use crate::level::{Level, LTR_LEVEL, RTL_LEVEL}; pub use crate::prepare::LevelRun; use alloc::borrow::Cow; use alloc::vec::Vec; use alloc::string::String; use core::cmp::{max, min}; use core::iter::repeat; use core::ops::Range; use crate::BidiClass::*; use crate::format_chars as chars; /// Bidi information about a single paragraph #[derive(Debug, PartialEq)] pub struct ParagraphInfo { /// The paragraphs boundaries within the text, as byte indices. /// /// TODO: Shrink this to only include the starting index? pub range: Range, /// The paragraph embedding level. /// /// pub level: Level, } /// Initial bidi information of the text. /// /// Contains the text paragraphs and `BidiClass` of its characters. #[derive(PartialEq, Debug)] pub struct InitialInfo<'text> { /// The text pub text: &'text str, /// The BidiClass of the character at each byte in the text. /// If a character is multiple bytes, its class will appear multiple times in the vector. pub original_classes: Vec, /// The boundaries and level of each paragraph within the text. pub paragraphs: Vec, } impl<'text> InitialInfo<'text> { /// Find the paragraphs and BidiClasses in a string of text. /// /// /// /// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong /// character is found before the matching PDI. If no strong character is found, the class will /// remain FSI, and it's up to later stages to treat these as LRI when needed. #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn new(text: &str, default_para_level: Option) -> InitialInfo<'_> { let mut original_classes = Vec::with_capacity(text.len()); // The stack contains the starting byte index for each nested isolate we're inside. let mut isolate_stack = Vec::new(); let mut paragraphs = Vec::new(); let mut para_start = 0; let mut para_level = default_para_level; #[cfg(feature = "flame_it")] flame::start("InitialInfo::new(): iter text.char_indices()"); for (i, c) in text.char_indices() { let class = bidi_class(c); #[cfg(feature = "flame_it")] flame::start("original_classes.extend()"); original_classes.extend(repeat(class).take(c.len_utf8())); #[cfg(feature = "flame_it")] flame::end("original_classes.extend()"); match class { B => { // P1. Split the text into separate paragraphs. The paragraph separator is kept // with the previous paragraph. let para_end = i + c.len_utf8(); paragraphs.push(ParagraphInfo { range: para_start..para_end, // P3. If no character is found in p2, set the paragraph level to zero. level: para_level.unwrap_or(LTR_LEVEL), }); // Reset state for the start of the next paragraph. para_start = para_end; // TODO: Support defaulting to direction of previous paragraph // // para_level = default_para_level; isolate_stack.clear(); } L | R | AL => { match isolate_stack.last() { Some(&start) => { if original_classes[start] == FSI { // X5c. If the first strong character between FSI and its matching // PDI is R or AL, treat it as RLI. Otherwise, treat it as LRI. for j in 0..chars::FSI.len_utf8() { original_classes[start + j] = if class == L { LRI } else { RLI }; } } } None => { if para_level.is_none() { // P2. Find the first character of type L, AL, or R, while skipping // any characters between an isolate initiator and its matching // PDI. para_level = Some(if class != L { RTL_LEVEL } else { LTR_LEVEL }); } } } } RLI | LRI | FSI => { isolate_stack.push(i); } PDI => { isolate_stack.pop(); } _ => {} } } if para_start < text.len() { paragraphs.push(ParagraphInfo { range: para_start..text.len(), level: para_level.unwrap_or(LTR_LEVEL), }); } assert_eq!(original_classes.len(), text.len()); #[cfg(feature = "flame_it")] flame::end("InitialInfo::new(): iter text.char_indices()"); InitialInfo { text, original_classes, paragraphs, } } } /// Bidi information of the text. /// /// The `original_classes` and `levels` vectors are indexed by byte offsets into the text. If a /// character is multiple bytes wide, then its class and level will appear multiple times in these /// vectors. // TODO: Impl `struct StringProperty { values: Vec }` and use instead of Vec #[derive(Debug, PartialEq)] pub struct BidiInfo<'text> { /// The text pub text: &'text str, /// The BidiClass of the character at each byte in the text. pub original_classes: Vec, /// The directional embedding level of each byte in the text. pub levels: Vec, /// The boundaries and paragraph embedding level of each paragraph within the text. /// /// TODO: Use SmallVec or similar to avoid overhead when there are only one or two paragraphs? /// Or just don't include the first paragraph, which always starts at 0? pub paragraphs: Vec, } impl<'text> BidiInfo<'text> { /// Split the text into paragraphs and determine the bidi embedding levels for each paragraph. /// /// TODO: In early steps, check for special cases that allow later steps to be skipped. like /// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison. /// /// TODO: Support auto-RTL base direction #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn new(text: &str, default_para_level: Option) -> BidiInfo<'_> { let InitialInfo { original_classes, paragraphs, .. } = InitialInfo::new(text, default_para_level); let mut levels = Vec::::with_capacity(text.len()); let mut processing_classes = original_classes.clone(); for para in ¶graphs { let text = &text[para.range.clone()]; let original_classes = &original_classes[para.range.clone()]; let processing_classes = &mut processing_classes[para.range.clone()]; let new_len = levels.len() + para.range.len(); levels.resize(new_len, para.level); let levels = &mut levels[para.range.clone()]; explicit::compute( text, para.level, original_classes, levels, processing_classes, ); let sequences = prepare::isolating_run_sequences(para.level, original_classes, levels); for sequence in &sequences { implicit::resolve_weak(sequence, processing_classes); implicit::resolve_neutral(sequence, levels, processing_classes); } implicit::resolve_levels(processing_classes, levels); assign_levels_to_removed_chars(para.level, original_classes, levels); } BidiInfo { text, original_classes, paragraphs, levels, } } /// Re-order a line based on resolved levels and return only the embedding levels, one `Level` /// per *byte*. #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn reordered_levels(&self, para: &ParagraphInfo, line: Range) -> Vec { let (levels, _) = self.visual_runs(para, line); levels } /// Re-order a line based on resolved levels and return only the embedding levels, one `Level` /// per *character*. #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn reordered_levels_per_char( &self, para: &ParagraphInfo, line: Range, ) -> Vec { let levels = self.reordered_levels(para, line); self.text.char_indices().map(|(i, _)| levels[i]).collect() } /// Re-order a line based on resolved levels and return the line in display order. #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn reorder_line(&self, para: &ParagraphInfo, line: Range) -> Cow<'text, str> { let (levels, runs) = self.visual_runs(para, line.clone()); // If all isolating run sequences are LTR, no reordering is needed if runs.iter().all(|run| levels[run.start].is_ltr()) { return self.text[line].into(); } let mut result = String::with_capacity(line.len()); for run in runs { if levels[run.start].is_rtl() { result.extend(self.text[run].chars().rev()); } else { result.push_str(&self.text[run]); } } result.into() } /// Find the level runs within a line and return them in visual order. /// /// `line` is a range of bytes indices within `levels`. /// /// #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn visual_runs( &self, para: &ParagraphInfo, line: Range, ) -> (Vec, Vec) { assert!(line.start <= self.levels.len()); assert!(line.end <= self.levels.len()); let mut levels = self.levels.clone(); let line_classes = &self.original_classes[line.clone()]; let line_levels = &mut levels[line.clone()]; // Reset some whitespace chars to paragraph level. // let line_str: &str = &self.text[line.clone()]; let mut reset_from: Option = Some(0); let mut reset_to: Option = None; for (i, c) in line_str.char_indices() { match line_classes[i] { // Ignored by X9 RLE | LRE | RLO | LRO | PDF | BN => {} // Segment separator, Paragraph separator B | S => { assert_eq!(reset_to, None); reset_to = Some(i + c.len_utf8()); if reset_from == None { reset_from = Some(i); } } // Whitespace, isolate formatting WS | FSI | LRI | RLI | PDI => { if reset_from == None { reset_from = Some(i); } } _ => { reset_from = None; } } if let (Some(from), Some(to)) = (reset_from, reset_to) { for level in &mut line_levels[from..to] { *level = para.level; } reset_from = None; reset_to = None; } } if let Some(from) = reset_from { for level in &mut line_levels[from..] { *level = para.level; } } // Find consecutive level runs. let mut runs = Vec::new(); let mut start = line.start; let mut run_level = levels[start]; let mut min_level = run_level; let mut max_level = run_level; for (i, &new_level) in levels.iter().enumerate().take(line.end).skip(start + 1) { if new_level != run_level { // End of the previous run, start of a new one. runs.push(start..i); start = i; run_level = new_level; min_level = min(run_level, min_level); max_level = max(run_level, max_level); } } runs.push(start..line.end); let run_count = runs.len(); // Re-order the odd runs. // // Stop at the lowest *odd* level. min_level = min_level.new_lowest_ge_rtl().expect("Level error"); while max_level >= min_level { // Look for the start of a sequence of consecutive runs of max_level or higher. let mut seq_start = 0; while seq_start < run_count { if self.levels[runs[seq_start].start] < max_level { seq_start += 1; continue; } // Found the start of a sequence. Now find the end. let mut seq_end = seq_start + 1; while seq_end < run_count { if self.levels[runs[seq_end].start] < max_level { break; } seq_end += 1; } // Reverse the runs within this sequence. runs[seq_start..seq_end].reverse(); seq_start = seq_end; } max_level.lower(1).expect( "Lowering embedding level below zero", ); } (levels, runs) } /// If processed text has any computed RTL levels /// /// This information is usually used to skip re-ordering of text when no RTL level is present #[inline] pub fn has_rtl(&self) -> bool { level::has_rtl(&self.levels) } } /// Assign levels to characters removed by rule X9. /// /// The levels assigned to these characters are not specified by the algorithm. This function /// assigns each one the level of the previous character, to avoid breaking level runs. #[cfg_attr(feature = "flame_it", flamer::flame)] fn assign_levels_to_removed_chars(para_level: Level, classes: &[BidiClass], levels: &mut [Level]) { for i in 0..levels.len() { if prepare::removed_by_x9(classes[i]) { levels[i] = if i > 0 { levels[i - 1] } else { para_level }; } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_initial_text_info() { let text = "a1"; assert_eq!( InitialInfo::new(text, None), InitialInfo { text, original_classes: vec![L, EN], paragraphs: vec![ ParagraphInfo { range: 0..2, level: LTR_LEVEL, }, ], } ); let text = "غ א"; assert_eq!( InitialInfo::new(text, None), InitialInfo { text, original_classes: vec![AL, AL, WS, R, R], paragraphs: vec![ ParagraphInfo { range: 0..5, level: RTL_LEVEL, }, ], } ); let text = "a\u{2029}b"; assert_eq!( InitialInfo::new(text, None), InitialInfo { text, original_classes: vec![L, B, B, B, L], paragraphs: vec![ ParagraphInfo { range: 0..4, level: LTR_LEVEL, }, ParagraphInfo { range: 4..5, level: LTR_LEVEL, }, ], } ); let text = format!("{}א{}a", chars::FSI, chars::PDI); assert_eq!( InitialInfo::new(&text, None), InitialInfo { text: &text, original_classes: vec![RLI, RLI, RLI, R, R, PDI, PDI, PDI, L], paragraphs: vec![ ParagraphInfo { range: 0..9, level: LTR_LEVEL, }, ], } ); } #[test] fn test_process_text() { let text = "abc123"; assert_eq!( BidiInfo::new(text, Some(LTR_LEVEL)), BidiInfo { text, levels: Level::vec(&[0, 0, 0, 0, 0, 0]), original_classes: vec![L, L, L, EN, EN, EN], paragraphs: vec![ ParagraphInfo { range: 0..6, level: LTR_LEVEL, }, ], } ); let text = "abc אבג"; assert_eq!( BidiInfo::new(text, Some(LTR_LEVEL)), BidiInfo { text, levels: Level::vec(&[0, 0, 0, 0, 1, 1, 1, 1, 1, 1]), original_classes: vec![L, L, L, WS, R, R, R, R, R, R], paragraphs: vec![ ParagraphInfo { range: 0..10, level: LTR_LEVEL, }, ], } ); assert_eq!( BidiInfo::new(text, Some(RTL_LEVEL)), BidiInfo { text, levels: Level::vec(&[2, 2, 2, 1, 1, 1, 1, 1, 1, 1]), original_classes: vec![L, L, L, WS, R, R, R, R, R, R], paragraphs: vec![ ParagraphInfo { range: 0..10, level: RTL_LEVEL, }, ], } ); let text = "אבג abc"; assert_eq!( BidiInfo::new(text, Some(LTR_LEVEL)), BidiInfo { text, levels: Level::vec(&[1, 1, 1, 1, 1, 1, 0, 0, 0, 0]), original_classes: vec![R, R, R, R, R, R, WS, L, L, L], paragraphs: vec![ ParagraphInfo { range: 0..10, level: LTR_LEVEL, }, ], } ); assert_eq!( BidiInfo::new(text, None), BidiInfo { text, levels: Level::vec(&[1, 1, 1, 1, 1, 1, 1, 2, 2, 2]), original_classes: vec![R, R, R, R, R, R, WS, L, L, L], paragraphs: vec![ ParagraphInfo { range: 0..10, level: RTL_LEVEL, }, ], } ); let text = "غ2ظ א2ג"; assert_eq!( BidiInfo::new(text, Some(LTR_LEVEL)), BidiInfo { text, levels: Level::vec(&[1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1]), original_classes: vec![AL, AL, EN, AL, AL, WS, R, R, EN, R, R], paragraphs: vec![ ParagraphInfo { range: 0..11, level: LTR_LEVEL, }, ], } ); let text = "a א.\nג"; assert_eq!( BidiInfo::new(text, None), BidiInfo { text, original_classes: vec![L, WS, R, R, CS, B, R, R], levels: Level::vec(&[0, 0, 1, 1, 0, 0, 1, 1]), paragraphs: vec![ ParagraphInfo { range: 0..6, level: LTR_LEVEL, }, ParagraphInfo { range: 6..8, level: RTL_LEVEL, }, ], } ); // BidiTest:69635 (AL ET EN) let bidi_info = BidiInfo::new("\u{060B}\u{20CF}\u{06F9}", None); assert_eq!(bidi_info.original_classes, vec![AL, AL, ET, ET, ET, EN, EN]); } #[test] fn test_bidi_info_has_rtl() { // ASCII only assert_eq!(BidiInfo::new("123", None).has_rtl(), false); assert_eq!(BidiInfo::new("123", Some(LTR_LEVEL)).has_rtl(), false); assert_eq!(BidiInfo::new("123", Some(RTL_LEVEL)).has_rtl(), false); assert_eq!(BidiInfo::new("abc", None).has_rtl(), false); assert_eq!(BidiInfo::new("abc", Some(LTR_LEVEL)).has_rtl(), false); assert_eq!(BidiInfo::new("abc", Some(RTL_LEVEL)).has_rtl(), false); assert_eq!(BidiInfo::new("abc 123", None).has_rtl(), false); assert_eq!(BidiInfo::new("abc\n123", None).has_rtl(), false); // With Hebrew assert_eq!(BidiInfo::new("אבּג", None).has_rtl(), true); assert_eq!(BidiInfo::new("אבּג", Some(LTR_LEVEL)).has_rtl(), true); assert_eq!(BidiInfo::new("אבּג", Some(RTL_LEVEL)).has_rtl(), true); assert_eq!(BidiInfo::new("abc אבּג", None).has_rtl(), true); assert_eq!(BidiInfo::new("abc\nאבּג", None).has_rtl(), true); assert_eq!(BidiInfo::new("אבּג abc", None).has_rtl(), true); assert_eq!(BidiInfo::new("אבּג\nabc", None).has_rtl(), true); assert_eq!(BidiInfo::new("אבּג 123", None).has_rtl(), true); assert_eq!(BidiInfo::new("אבּג\n123", None).has_rtl(), true); } fn reorder_paras(text: &str) -> Vec> { let bidi_info = BidiInfo::new(text, None); bidi_info .paragraphs .iter() .map(|para| bidi_info.reorder_line(para, para.range.clone())) .collect() } #[test] fn test_reorder_line() { // Bidi_Class: L L L B L L L B L L L assert_eq!( reorder_paras("abc\ndef\nghi"), vec!["abc\n", "def\n", "ghi"] ); // Bidi_Class: L L EN B L L EN B L L EN assert_eq!( reorder_paras("ab1\nde2\ngh3"), vec!["ab1\n", "de2\n", "gh3"] ); // Bidi_Class: L L L B AL AL AL assert_eq!(reorder_paras("abc\nابج"), vec!["abc\n", "جبا"]); // Bidi_Class: AL AL AL B L L L assert_eq!(reorder_paras("ابج\nabc"), vec!["\nجبا", "abc"]); assert_eq!(reorder_paras("1.-2"), vec!["1.-2"]); assert_eq!(reorder_paras("1-.2"), vec!["1-.2"]); assert_eq!(reorder_paras("abc אבג"), vec!["abc גבא"]); // Numbers being weak LTR characters, cannot reorder strong RTL assert_eq!(reorder_paras("123 אבג"), vec!["גבא 123"]); assert_eq!(reorder_paras("abc\u{202A}def"), vec!["abc\u{202A}def"]); assert_eq!( reorder_paras("abc\u{202A}def\u{202C}ghi"), vec!["abc\u{202A}def\u{202C}ghi"] ); assert_eq!( reorder_paras("abc\u{2066}def\u{2069}ghi"), vec!["abc\u{2066}def\u{2069}ghi"] ); // Testing for RLE Character assert_eq!( reorder_paras("\u{202B}abc אבג\u{202C}"), vec!["\u{202B}\u{202C}גבא abc"] ); // Testing neutral characters assert_eq!(reorder_paras("אבג? אבג"), vec!["גבא ?גבא"]); // Testing neutral characters with special case assert_eq!(reorder_paras("A אבג?"), vec!["A גבא?"]); // Testing neutral characters with Implicit RTL Marker assert_eq!( reorder_paras("A אבג?\u{200F}"), vec!["A \u{200F}?גבא"] ); assert_eq!(reorder_paras("אבג abc"), vec!["abc גבא"]); assert_eq!( reorder_paras("abc\u{2067}.-\u{2069}ghi"), vec!["abc\u{2067}-.\u{2069}ghi"] ); assert_eq!( reorder_paras("Hello, \u{2068}\u{202E}world\u{202C}\u{2069}!"), vec!["Hello, \u{2068}\u{202E}\u{202C}dlrow\u{2069}!"] ); // With mirrorable characters in RTL run assert_eq!(reorder_paras("א(ב)ג."), vec![".ג)ב(א"]); // With mirrorable characters on level boundry assert_eq!( reorder_paras("אב(גד[&ef].)gh"), vec!["ef].)gh&[דג(בא"] ); } fn reordered_levels_for_paras(text: &str) -> Vec> { let bidi_info = BidiInfo::new(text, None); bidi_info .paragraphs .iter() .map(|para| bidi_info.reordered_levels(para, para.range.clone())) .collect() } fn reordered_levels_per_char_for_paras(text: &str) -> Vec> { let bidi_info = BidiInfo::new(text, None); bidi_info .paragraphs .iter() .map(|para| { bidi_info.reordered_levels_per_char(para, para.range.clone()) }) .collect() } #[test] fn test_reordered_levels() { // BidiTest:946 (LRI PDI) let text = "\u{2067}\u{2069}"; assert_eq!( reordered_levels_for_paras(text), vec![Level::vec(&[0, 0, 0, 0, 0, 0])] ); assert_eq!( reordered_levels_per_char_for_paras(text), vec![Level::vec(&[0, 0])] ); let text = "aa טֶ"; let bidi_info = BidiInfo::new(text, None); assert_eq!( bidi_info.reordered_levels(&bidi_info.paragraphs[0], 3..7), Level::vec(&[0, 0, 0, 1, 1, 1, 1]), ) /* TODO /// BidiTest:69635 (AL ET EN) let text = "\u{060B}\u{20CF}\u{06F9}"; assert_eq!( reordered_levels_for_paras(text), vec![Level::vec(&[1, 1, 1, 1, 1, 2, 2])] ); assert_eq!( reordered_levels_per_char_for_paras(text), vec![Level::vec(&[1, 1, 2])] ); */ /* TODO // BidiTest:291284 (AN RLI PDF R) assert_eq!( reordered_levels_per_char_for_paras("\u{0605}\u{2067}\u{202C}\u{0590}"), vec![&["2", "0", "x", "1"]] ); */ } } #[cfg(all(feature = "serde", test))] mod serde_tests { use serde_test::{Token, assert_tokens}; use super::*; #[test] fn test_levels() { let text = "abc אבג"; let bidi_info = BidiInfo::new(text, None); let levels = bidi_info.levels; assert_eq!(text.as_bytes().len(), 10); assert_eq!(levels.len(), 10); assert_tokens( &levels, &[ Token::Seq { len: Some(10) }, Token::NewtypeStruct { name: "Level" }, Token::U8(0), Token::NewtypeStruct { name: "Level" }, Token::U8(0), Token::NewtypeStruct { name: "Level" }, Token::U8(0), Token::NewtypeStruct { name: "Level" }, Token::U8(0), Token::NewtypeStruct { name: "Level" }, Token::U8(1), Token::NewtypeStruct { name: "Level" }, Token::U8(1), Token::NewtypeStruct { name: "Level" }, Token::U8(1), Token::NewtypeStruct { name: "Level" }, Token::U8(1), Token::NewtypeStruct { name: "Level" }, Token::U8(1), Token::NewtypeStruct { name: "Level" }, Token::U8(1), Token::SeqEnd, ], ); } } unicode-bidi-0.3.7/src/prepare.rs000064400000000000000000000273500072674642500150200ustar 00000000000000// Copyright 2015 The Servo Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! 3.3.3 Preparations for Implicit Processing //! //! use core::cmp::max; use core::ops::Range; use alloc::vec::Vec; use super::BidiClass::{self, *}; use super::level::Level; /// A maximal substring of characters with the same embedding level. /// /// Represented as a range of byte indices. pub type LevelRun = Range; /// Output of `isolating_run_sequences` (steps X9-X10) #[derive(Debug, PartialEq)] pub struct IsolatingRunSequence { pub runs: Vec, pub sos: BidiClass, // Start-of-sequence type. pub eos: BidiClass, // End-of-sequence type. } /// Compute the set of isolating run sequences. /// /// An isolating run sequence is a maximal sequence of level runs such that for all level runs /// except the last one in the sequence, the last character of the run is an isolate initiator /// whose matching PDI is the first character of the next level run in the sequence. /// /// Note: This function does *not* return the sequences in order by their first characters. #[cfg_attr(feature = "flame_it", flamer::flame)] pub fn isolating_run_sequences( para_level: Level, original_classes: &[BidiClass], levels: &[Level], ) -> Vec { let runs = level_runs(levels, original_classes); // Compute the set of isolating run sequences. // let mut sequences = Vec::with_capacity(runs.len()); // When we encounter an isolate initiator, we push the current sequence onto the // stack so we can resume it after the matching PDI. let mut stack = vec![Vec::new()]; for run in runs { assert!(run.len() > 0); assert!(!stack.is_empty()); let start_class = original_classes[run.start]; let end_class = original_classes[run.end - 1]; let mut sequence = if start_class == PDI && stack.len() > 1 { // Continue a previous sequence interrupted by an isolate. stack.pop().unwrap() } else { // Start a new sequence. Vec::new() }; sequence.push(run); if let RLI | LRI | FSI = end_class { // Resume this sequence after the isolate. stack.push(sequence); } else { // This sequence is finished. sequences.push(sequence); } } // Pop any remaning sequences off the stack. sequences.extend(stack.into_iter().rev().filter(|seq| !seq.is_empty())); // Determine the `sos` and `eos` class for each sequence. // sequences .into_iter() .map(|sequence: Vec| { assert!(!sequence.is_empty()); let start_of_seq = sequence[0].start; let end_of_seq = sequence[sequence.len() - 1].end; let seq_level = levels[start_of_seq]; #[cfg(test)] for run in sequence.clone() { for idx in run { if not_removed_by_x9(&original_classes[idx]) { assert_eq!(seq_level, levels[idx]); } } } // Get the level of the last non-removed char before the runs. let pred_level = match original_classes[..start_of_seq].iter().rposition( not_removed_by_x9, ) { Some(idx) => levels[idx], None => para_level, }; // Get the level of the next non-removed char after the runs. let succ_level = if let RLI | LRI | FSI = original_classes[end_of_seq - 1] { para_level } else { match original_classes[end_of_seq..].iter().position( not_removed_by_x9, ) { Some(idx) => levels[end_of_seq + idx], None => para_level, } }; IsolatingRunSequence { runs: sequence, sos: max(seq_level, pred_level).bidi_class(), eos: max(seq_level, succ_level).bidi_class(), } }) .collect() } /// Finds the level runs in a paragraph. /// /// fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec { assert_eq!(levels.len(), original_classes.len()); let mut runs = Vec::new(); if levels.is_empty() { return runs; } let mut current_run_level = levels[0]; let mut current_run_start = 0; for i in 1..levels.len() { if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level { // End the last run and start a new one. runs.push(current_run_start..i); current_run_level = levels[i]; current_run_start = i; } } runs.push(current_run_start..levels.len()); runs } /// Should this character be ignored in steps after X9? /// /// pub fn removed_by_x9(class: BidiClass) -> bool { match class { RLE | LRE | RLO | LRO | PDF | BN => true, _ => false, } } // For use as a predicate for `position` / `rposition` pub fn not_removed_by_x9(class: &BidiClass) -> bool { !removed_by_x9(*class) } #[cfg(test)] mod tests { use super::*; #[test] fn test_level_runs() { assert_eq!(level_runs(&Level::vec(&[]), &[]), &[]); assert_eq!( level_runs(&Level::vec(&[0, 0, 0, 1, 1, 2, 0, 0]), &[L; 8]), &[0..3, 3..5, 5..6, 6..8] ); } // From #[rustfmt::skip] #[test] fn test_isolating_run_sequences() { // == Example 1 == // text1·RLE·text2·PDF·RLE·text3·PDF·text4 // index 0 1 2 3 4 5 6 7 let classes = &[L, RLE, L, PDF, RLE, L, PDF, L]; let levels = &[0, 1, 1, 1, 1, 1, 1, 0]; let para_level = Level::ltr(); let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::>(), vec![vec![0..2], vec![2..7], vec![7..8]] ); // == Example 2 == // text1·RLI·text2·PDI·RLI·text3·PDI·text4 // index 0 1 2 3 4 5 6 7 let classes = &[L, RLI, L, PDI, RLI, L, PDI, L]; let levels = &[0, 0, 1, 0, 0, 1, 0, 0]; let para_level = Level::ltr(); let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::>(), vec![vec![0..2, 3..5, 6..8], vec![2..3], vec![5..6]] ); // == Example 3 == // text1·RLI·text2·LRI·text3·RLE·text4·PDF·text5·PDI·text6·PDI·text7 // index 0 1 2 3 4 5 6 7 8 9 10 11 12 let classes = &[L, RLI, L, LRI, L, RLE, L, PDF, L, PDI, L, PDI, L]; let levels = &[0, 0, 1, 1, 2, 3, 3, 3, 2, 1, 1, 0, 0]; let para_level = Level::ltr(); let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); assert_eq!( sequences.iter().map(|s| s.runs.clone()).collect::>(), vec![vec![0..2, 11..13], vec![2..4, 9..11], vec![4..6], vec![6..8], vec![8..9]] ); } // From #[rustfmt::skip] #[test] fn test_isolating_run_sequences_sos_and_eos() { // == Example 1 == // text1·RLE·text2·LRE·text3·PDF·text4·PDF·RLE·text5·PDF·text6 // index 0 1 2 3 4 5 6 7 8 9 10 11 let classes = &[L, RLE, L, LRE, L, PDF, L, PDF, RLE, L, PDF, L]; let levels = &[0, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 0]; let para_level = Level::ltr(); let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); // text1 assert_eq!( &sequences[0], &IsolatingRunSequence { runs: vec![0..2], sos: L, eos: R, } ); // text2 assert_eq!( &sequences[1], &IsolatingRunSequence { runs: vec![2..4], sos: R, eos: L, } ); // text3 assert_eq!( &sequences[2], &IsolatingRunSequence { runs: vec![4..6], sos: L, eos: L, } ); // text4 text5 assert_eq!( &sequences[3], &IsolatingRunSequence { runs: vec![6..11], sos: L, eos: R, } ); // text6 assert_eq!( &sequences[4], &IsolatingRunSequence { runs: vec![11..12], sos: R, eos: L, } ); // == Example 2 == // text1·RLI·text2·LRI·text3·PDI·text4·PDI·RLI·text5·PDI·text6 // index 0 1 2 3 4 5 6 7 8 9 10 11 let classes = &[L, RLI, L, LRI, L, PDI, L, PDI, RLI, L, PDI, L]; let levels = &[0, 0, 1, 1, 2, 1, 1, 0, 0, 1, 0, 0]; let para_level = Level::ltr(); let mut sequences = isolating_run_sequences(para_level, classes, &Level::vec(levels)); sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone())); // text1·RLI·PDI·RLI·PDI·text6 assert_eq!( &sequences[0], &IsolatingRunSequence { runs: vec![0..2, 7..9, 10..12], sos: L, eos: L, } ); // text2·LRI·PDI·text4 assert_eq!( &sequences[1], &IsolatingRunSequence { runs: vec![2..4, 5..7], sos: R, eos: R, } ); // text3 assert_eq!( &sequences[2], &IsolatingRunSequence { runs: vec![4..5], sos: L, eos: L, } ); // text5 assert_eq!( &sequences[3], &IsolatingRunSequence { runs: vec![9..10], sos: R, eos: R, } ); } #[test] fn test_removed_by_x9() { let rem_classes = &[RLE, LRE, RLO, LRO, PDF, BN]; let not_classes = &[L, RLI, AL, LRI, PDI]; for x in rem_classes { assert_eq!(removed_by_x9(*x), true); } for x in not_classes { assert_eq!(removed_by_x9(*x), false); } } #[test] fn test_not_removed_by_x9() { let non_x9_classes = &[L, R, AL, EN, ES, ET, AN, CS, NSM, B, S, WS, ON, LRI, RLI, FSI, PDI]; for x in non_x9_classes { assert_eq!(not_removed_by_x9(&x), true); } } }