unic-ucd-hangul-0.9.0/Cargo.toml.orig010064400007650000024000000016211343665764000156270ustar0000000000000000[package] name = "unic-ucd-hangul" version = "0.9.0" edition = "2018" authors = ["The UNIC Project Developers"] repository = "https://github.com/open-i18n/rust-unic/" license = "MIT/Apache-2.0" description = "UNIC — Unicode Character Database — Hangul Syllable Composition & Decomposition" keywords = ["text", "unicode", "hangul"] categories = ["internationalization", "text-processing", "parsing", "rendering"] # No tests/benches that depends on /data/ exclude = [] [dependencies] unic-ucd-version = { path = "../version/", version = "0.9.0" } [badges] maintenance = { status = "actively-developed" } is-it-maintained-issue-resolution = { repository = "open-i18n/rust-unic" } is-it-maintained-open-issues = { repository = "open-i18n/rust-unic" } appveyor = { repository = "open-i18n/rust-unic", branch = "master", service = "github" } travis-ci = { repository = "open-i18n/rust-unic", branch = "master" } unic-ucd-hangul-0.9.0/Cargo.toml0000644000000025330000000000000120650ustar00# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g. crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2018" name = "unic-ucd-hangul" version = "0.9.0" authors = ["The UNIC Project Developers"] exclude = [] description = "UNIC — Unicode Character Database — Hangul Syllable Composition & Decomposition" keywords = ["text", "unicode", "hangul"] categories = ["internationalization", "text-processing", "parsing", "rendering"] license = "MIT/Apache-2.0" repository = "https://github.com/open-i18n/rust-unic/" [dependencies.unic-ucd-version] version = "0.9.0" [badges.appveyor] branch = "master" repository = "open-i18n/rust-unic" service = "github" [badges.is-it-maintained-issue-resolution] repository = "open-i18n/rust-unic" [badges.is-it-maintained-open-issues] repository = "open-i18n/rust-unic" [badges.maintenance] status = "actively-developed" [badges.travis-ci] branch = "master" repository = "open-i18n/rust-unic" unic-ucd-hangul-0.9.0/README.md010064400007650000024000000010021343520353600141760ustar0000000000000000# UNIC — Unicode Character Database — Hangul Syllable Composition & Decomposition [![Crates.io](https://img.shields.io/crates/v/unic-ucd-hangul.svg)](https://crates.io/crates/unic-ucd-hangul) [![Documentation](https://docs.rs/unic-ucd-hangul/badge.svg)](https://docs.rs/unic-ucd-hangul/) This UCD component provides algorithms described in [Unicode® Standard - 3.12 Conjoining Jamo Behavior](https://www.unicode.org/versions/latest/ch03.pdf), used for detecting, composing and decomposing Hangul syllables. unic-ucd-hangul-0.9.0/src/hangul.rs010064400007650000024000000052431343520353600153450ustar0000000000000000// Copyright 2012-2015 The Rust Project Developers. // Copyright 2017 The UNIC Project Developers. // // See the COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! Conjoining Jamo composition to/decomposition from Hangul syllables. //! //! Reference: Section 3.12 Conjoining Jamo Behavior, Unicode 9.0.0 //! use core::char; pub const S_BASE: u32 = 0xAC00; pub const L_BASE: u32 = 0x1100; pub const V_BASE: u32 = 0x1161; pub const T_BASE: u32 = 0x11A7; pub const L_COUNT: u32 = 19; pub const V_COUNT: u32 = 21; pub const T_COUNT: u32 = 28; pub const N_COUNT: u32 = (V_COUNT * T_COUNT); pub const S_COUNT: u32 = (L_COUNT * N_COUNT); /// Whether the character is a (precomposed) Hangul Syllable pub fn is_syllable(ch: char) -> bool { let cp = ch as u32; cp >= S_BASE && cp < (S_BASE + S_COUNT) } /// Decompose a precomposed Hangul syllable // FIXME: This is a workaround, we should use `F` instead of `&mut F` #[allow(unsafe_code)] #[inline] pub fn decompose_syllable(syllable: char, f: &mut F) where F: FnMut(char), { let si = syllable as u32 - S_BASE; let li = si / N_COUNT; unsafe { (*f)(char::from_u32_unchecked(L_BASE + li)); let vi = (si % N_COUNT) / T_COUNT; (*f)(char::from_u32_unchecked(V_BASE + vi)); let ti = si % T_COUNT; if ti > 0 { (*f)(char::from_u32_unchecked(T_BASE + ti)); } } } /// Compose a pair of Hangul Jamo #[allow(unsafe_code)] #[inline] pub fn compose_syllable(jamo1: char, jamo2: char) -> Option { let l = jamo1 as u32; let v = jamo2 as u32; // Compose an LPart and a VPart if L_BASE <= l && l < (L_BASE + L_COUNT) // l should be an L choseong jamo && V_BASE <= v && v < (V_BASE + V_COUNT) { // v should be a V jungseong jamo let r = S_BASE + (l - L_BASE) * N_COUNT + (v - V_BASE) * T_COUNT; return unsafe { Some(char::from_u32_unchecked(r)) }; } // Compose an LVPart and a TPart if S_BASE <= l && l <= (S_BASE+S_COUNT-T_COUNT) // l should be a syllable block && T_BASE <= v && v < (T_BASE+T_COUNT) // v should be a T jongseong jamo && (l - S_BASE) % T_COUNT == 0 { // l should be an LV syllable block (not LVT) let r = l + (v - T_BASE); return unsafe { Some(char::from_u32_unchecked(r)) }; } None } unic-ucd-hangul-0.9.0/src/lib.rs010064400007650000024000000036301343520353600146330ustar0000000000000000// Copyright 2012-2015 The Rust Project Developers. // Copyright 2017 The UNIC Project Developers. // // See the COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. #![no_std] #![warn( bad_style, missing_debug_implementations, missing_docs, unconditional_recursion )] #![deny(unsafe_code)] //! # UNIC - UCD - Hangul Syllable Composition & Decomposition //! //! This UCD component provides algorithms described in [Unicode® //! Standard - 3.12 Conjoining Jamo Behavior](https://www.unicode.org/versions/latest/ch03.pdf), //! used for detecting, composing and decomposing Hangul syllables. //! //! # Examples //! //! ```rust //! # use unic_ucd_hangul::{is_syllable, compose_syllable, decompose_syllable}; //! //! let sample_hangul = '갓'; //! assert!(is_syllable(sample_hangul)); //! //! let mut decomposed = vec![]; //! { //! let mut collect_decomposed = |chr| { //! decomposed.push(chr); //! }; //! decompose_syllable(sample_hangul, &mut collect_decomposed); //! } //! assert_eq!(decomposed, ['ᄀ', 'ᅡ', 'ᆺ']); //! //! let composed_lv = compose_syllable(decomposed[0], decomposed[1]).unwrap(); //! let composed = compose_syllable(composed_lv, decomposed[2]).unwrap(); //! assert_eq!(composed, sample_hangul); //! ``` use unic_ucd_version::UnicodeVersion; mod hangul; pub use crate::hangul::{compose_syllable, decompose_syllable, is_syllable}; mod pkg_info; pub use crate::pkg_info::{PKG_DESCRIPTION, PKG_NAME, PKG_VERSION}; /// The [Unicode version](https://www.unicode.org/versions/) of data pub const UNICODE_VERSION: UnicodeVersion = include!("../tables/unicode_version.rsv"); unic-ucd-hangul-0.9.0/src/pkg_info.rs010064400007650000024000000013121343520353600156540ustar0000000000000000// Copyright 2017 The UNIC Project Developers. // // See the COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! Package information /// UNIC component version. pub const PKG_VERSION: &str = env!("CARGO_PKG_VERSION"); /// UNIC component name. pub const PKG_NAME: &str = env!("CARGO_PKG_NAME"); /// UNIC component description. pub const PKG_DESCRIPTION: &str = env!("CARGO_PKG_DESCRIPTION"); unic-ucd-hangul-0.9.0/tables/unicode_version.rsv010064400007650000024000000002071343520353600201260ustar0000000000000000// WARNING: Auto-generated by the `unic-gen` crate. // WARNING: DO NOT EDIT MANUALLY! UnicodeVersion { major: 10, minor: 0, micro: 0 } unic-ucd-hangul-0.9.0/tests/basic_tests.rs010064400007650000024000000037001343520353600167410ustar0000000000000000// Copyright 2017 The UNIC Project Developers. // // See the COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. use unic_ucd_hangul::{compose_syllable, decompose_syllable, is_syllable}; #[test] fn test_is_syllable() { assert!(is_syllable('갏')); assert!(!is_syllable('A')); assert!(!is_syllable('韩')); } #[test] fn test_decompose() { let mut decomposed_lv = vec![]; { let mut collect_decomposed = |chr| { decomposed_lv.push(chr); }; decompose_syllable('쮀', &mut collect_decomposed); } assert_eq!(decomposed_lv, ['ᄍ', 'ᅰ']); let mut decomposed_lvt = vec![]; { let mut collect_decomposed = |chr| { decomposed_lvt.push(chr); }; decompose_syllable('퓛', &mut collect_decomposed); } assert_eq!(decomposed_lvt, ['ᄑ', 'ᅱ', 'ᆶ']); } #[test] fn test_compose() { let l = 'ᄑ'; let v = 'ᅱ'; let t = 'ᆶ'; let lv = compose_syllable(l, v).unwrap(); assert_eq!(lv, '퓌'); let lvt = compose_syllable(lv, t).unwrap(); assert_eq!(lvt, '퓛'); } #[test] fn test_decompose_then_compose() { let sample_hangul = '갓'; assert!(is_syllable(sample_hangul)); let mut decomposed = vec![]; { let mut collect_decomposed = |chr| { decomposed.push(chr); }; decompose_syllable(sample_hangul, &mut collect_decomposed); } assert_eq!(decomposed, ['ᄀ', 'ᅡ', 'ᆺ']); let composed_lv = compose_syllable(decomposed[0], decomposed[1]).unwrap(); let composed = compose_syllable(composed_lv, decomposed[2]).unwrap(); assert_eq!(composed, sample_hangul); } unic-ucd-hangul-0.9.0/tests/unicode_version_tests.rs010064400007650000024000000011751343520353600210570ustar0000000000000000// Copyright 2017 The UNIC Project Developers. // // See the COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. use unic_ucd_hangul; use unic_ucd_version; #[test] fn test_unicode_version_against_ucd_version() { assert_eq!( unic_ucd_hangul::UNICODE_VERSION, unic_ucd_version::UNICODE_VERSION ) } unic-ucd-hangul-0.9.0/.cargo_vcs_info.json0000644000000001120000000000000140560ustar00{ "git": { "sha1": "8a6ce83063d90b91ae2ce59eddb803edd393fca9" } }