unicode-display_width-1.1.3/0000755000004100000410000000000013040704400016024 5ustar www-datawww-dataunicode-display_width-1.1.3/Rakefile0000644000004100000410000000247213040704400017476 0ustar www-datawww-data# # # # Get gemspec info gemspec_file = Dir['*.gemspec'].first gemspec = eval File.read(gemspec_file), binding, gemspec_file info = "#{gemspec.name} | #{gemspec.version} | " \ "#{gemspec.runtime_dependencies.size} dependencies | " \ "#{gemspec.files.size} files" # # # # Gem build and install task desc info task :gem do puts info + "\n\n" print " "; sh "gem build #{gemspec_file}" FileUtils.mkdir_p 'pkg' FileUtils.mv "#{gemspec.name}-#{gemspec.version}.gem", 'pkg' puts; sh %{gem install --no-document pkg/#{gemspec.name}-#{gemspec.version}.gem} end # # # # Start an IRB session with the gem loaded desc "#{gemspec.name} | IRB" task :irb do sh "irb -I ./lib -r #{gemspec.name.gsub '-','/'}" end # # # # Run all specs desc "#{gemspec.name} | Test" task :test do sh "rspec spec" end task :default => :test # # # # Update index table namespace :update do desc "#{gemspec.name} | Update index" task :index do require File.dirname(__FILE__) + '/lib/unicode/display_width/index_builder' Unicode::DisplayWidth::IndexBuilder.build! end end # # # # Update data file namespace :update do desc "#{gemspec.name} | Update unicode data" task :data do require File.dirname(__FILE__) + '/lib/unicode/display_width/index_builder' Unicode::DisplayWidth::IndexBuilder.fetch! end end unicode-display_width-1.1.3/data/0000755000004100000410000000000013040704400016735 5ustar www-datawww-dataunicode-display_width-1.1.3/data/display_width.marshal.gz0000644000004100000410000000273513040704400023600 0ustar www-datawww-dataZpiWYIv9 erGJີs.T>lURH?o}_Y%OO^o]ۗ|Zu]9>7d?%m_oFKA!>B bjA`#U^dmfCkP\1 Jm>#Ty tCcA1^ >}/Zrq`xNL )<2Ch!aKU'mܐ鋓,L<wtB>}fqy~$,I`{idP"(wM~l- khؘ31;&$w~pd~=DO*RfxޖNфGIJDҋTS܆M |MnV~yL;1C:'`Rn)5{M7[On;bTds@ 4!РzF2@*1\DsA&gX`SD׮ʵ^8/ nF/@["ddA,/>5;TI0ˤ~x#NMf[k߭WW&OZDd@ͅ4Kj.y`S"wͧ@K|kw.EAȱe~8.n.̞pΙ\oUYC  VFI.6 }?)1R ̿JK8 O61I$] iHvY_~eVz(C Yǖ?nU茔Px6}/鿹.̤O\] lwa\^;`/b_duP]ܞK٫u^9':~l{V$j.4W `¹^Sg£K}kY:YԨk_~h!QCdN$unicode-display_width-1.1.3/spec/0000755000004100000410000000000013040704400016756 5ustar www-datawww-dataunicode-display_width-1.1.3/spec/display_width_spec.rb0000644000004100000410000000555613040704400023174 0ustar www-datawww-data# coding: utf-8 require 'unicode/display_width' describe 'Unicode::DisplayWidth.of' do describe '[east asian width]' do it 'returns 2 for F' do expect( '!'.display_width ).to eq 2 end it 'returns 2 for W' do expect( '一'.display_width ).to eq 2 end it 'returns 1 for N' do expect( 'À'.display_width ).to eq 1 end it 'returns 1 for Na' do expect( 'A'.display_width ).to eq 1 end it 'returns 1 for H' do expect( '。'.display_width ).to eq 1 end it 'returns first argument of display_width for A' do expect( '·'.display_width(1) ).to eq 1 end it 'returns first argument of display_width for A' do expect( '·'.display_width(2) ).to eq 2 end it 'returns 1 for A if no argument given' do expect( '·'.display_width ).to eq 1 end end describe '[zero width]' do it 'returns 0 for Mn chars' do expect( 'ֿ'.display_width ).to eq 0 end it 'returns 0 for Me chars' do expect( '҈'.display_width ).to eq 0 end it 'returns 0 for Cf chars' do expect( '​'.display_width ).to eq 0 end it 'returns 0 for HANGUL JUNGSEONG chars' do expect( 'ᅠ'.display_width ).to eq 0 end end describe '[special characters]' do it 'returns 0 for ␀' do expect( "\0".display_width ).to eq 0 end it 'returns 0 for ␅' do expect( "\x05".display_width ).to eq 0 end it 'returns 0 for ␇' do expect( "\a".display_width ).to eq 0 end it 'returns -1 for ␈' do expect( "aaaa\b".display_width ).to eq 3 end it 'returns -1 for ␈, but at least 0' do expect( "\b".display_width ).to eq 0 end it 'returns 0 for ␊' do expect( "\n".display_width ).to eq 0 end it 'returns 0 for ␋' do expect( "\v".display_width ).to eq 0 end it 'returns 0 for ␌' do expect( "\f".display_width ).to eq 0 end it 'returns 0 for ␍' do expect( "\r".display_width ).to eq 0 end it 'returns 0 for ␎' do expect( "\x0E".display_width ).to eq 0 end it 'returns 0 for ␏' do expect( "\x0F".display_width ).to eq 0 end it 'returns 1 for other C0 characters' do expect( "\x10".display_width ).to eq 1 end it 'returns 1 for SOFT HYPHEN' do expect( "­".display_width ).to eq 1 end it 'returns 2 for THREE-EM DASH' do expect( "⸺".display_width ).to eq 2 end it 'returns 3 for THREE-EM DASH' do expect( "⸻".display_width ).to eq 3 end end describe '[overwrite]' do it 'can be passed a 3rd parameter with overwrites' do expect( "\t".display_width(1, 0x09 => 12) ).to eq 12 end end describe '[encoding]' do it 'works with non-utf8 Unicode encodings' do expect( 'À'.encode("UTF-16LE").display_width ).to eq 1 end end end unicode-display_width-1.1.3/lib/0000755000004100000410000000000013040704400016572 5ustar www-datawww-dataunicode-display_width-1.1.3/lib/unicode/0000755000004100000410000000000013040704400020220 5ustar www-datawww-dataunicode-display_width-1.1.3/lib/unicode/display_width.rb0000644000004100000410000000222413040704400023411 0ustar www-datawww-datarequire_relative 'display_width/constants' require_relative 'display_width/index' module Unicode module DisplayWidth DEPTHS = [0x10000, 0x1000, 0x100, 0x10].freeze def self.of(string, ambiguous = 1, overwrite = {}) res = string.codepoints.inject(0){ |total_width, codepoint| index_or_value = INDEX codepoint_depth_offset = codepoint DEPTHS.each{ |depth| index_or_value = index_or_value[codepoint_depth_offset / depth] codepoint_depth_offset = codepoint_depth_offset % depth break unless index_or_value.is_a? Array } width = index_or_value.is_a?(Array) ? index_or_value[codepoint_depth_offset] : index_or_value width = ambiguous if width == :A total_width + (overwrite[codepoint] || width || 1) } res < 0 ? 0 : res end end end # Allows you to opt-out of the default string extension. Will eventually be removed, # so you must opt-in for the core extension by requiring 'display_width/string_ext' unless defined?(Unicode::DisplayWidth::NO_STRING_EXT) && Unicode::DisplayWidth::NO_STRING_EXT require_relative 'display_width/string_ext' end unicode-display_width-1.1.3/lib/unicode/display_width/0000755000004100000410000000000013040704400023064 5ustar www-datawww-dataunicode-display_width-1.1.3/lib/unicode/display_width/string_ext.rb0000644000004100000410000000101413040704400025573 0ustar www-datawww-datarequire_relative '../display_width' unless defined? Unicode::DisplayWidth class String def display_width(ambiguous = 1, overwrite = {}) Unicode::DisplayWidth.of(self, ambiguous, overwrite) end def display_size(*args) warn "Deprecation warning: Please use `String#display_width` instead of `String#display_size`" display_width(*args) end def display_length(*args) warn "Deprecation warning: Please use `String#display_width` instead of `String#display_length`" display_width(*args) end end unicode-display_width-1.1.3/lib/unicode/display_width/index.rb0000644000004100000410000000022013040704400024512 0ustar www-datawww-datarequire_relative 'constants' module Unicode module DisplayWidth INDEX = Marshal.load(Gem.gunzip(File.binread(INDEX_FILENAME))) end end unicode-display_width-1.1.3/lib/unicode/display_width/no_string_ext.rb0000644000004100000410000000015513040704400026274 0ustar www-datawww-datamodule Unicode module DisplayWidth NO_STRING_EXT = true end end require_relative '../display_width' unicode-display_width-1.1.3/lib/unicode/display_width/constants.rb0000644000004100000410000000041613040704400025426 0ustar www-datawww-datamodule Unicode module DisplayWidth VERSION = '1.1.3' UNICODE_VERSION = "9.0.0".freeze DATA_DIRECTORY = File.expand_path(File.dirname(__FILE__) + '/../../../data/').freeze INDEX_FILENAME = (DATA_DIRECTORY + '/display_width.marshal.gz').freeze end end unicode-display_width-1.1.3/MIT-LICENSE.txt0000644000004100000410000000207113040704400020276 0ustar www-datawww-dataThe MIT LICENSE Copyright (c) 2011, 2015-2017 Jan Lelis Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. unicode-display_width-1.1.3/CHANGELOG.txt0000644000004100000410000000331513040704400020056 0ustar www-datawww-data# CHANGELOG ## 1.1.3 - Fix that non-UTF-8 encodings do not throw errors, patch by @windwiny ## 1.1.2 - Reduce memory consumption and increase performance, patch by @rrosenblum ## 1.1.1 * Always load index into memory, fixes #9 ## 1.1.0 * Support Unicode 9.0 ## 1.0.5 * Actually include new index from 1.0.4 ## 1.0.4 * New index format (much smaller) and internal API changes * Move index generation to a builder plugin for the unicoder gem * No public API changes ## 1.0.3 * Avoid circular dependency warning ## 1.0.2 * Fix error that gemspec might be invalid under some circumstances (see gh#6) ## 1.0.1 * Inofficially allow Ruby 1.9 ## 1.0.0 * Faster than 0.3.1 * Advanced determination of character width * This includes: Treat width of most chars of general categories (Mn, Me, Cf) as 0 * This includes: Introduce list of characters with special widths * Allow custom overrides for specific codepoints * Set required Ruby version to 2.0 * Add NO_STRING_EXT mode to disable monkey patching * Internal API & index format changed drastically * Remove require 'unicode/display_size' (use 'unicode/display_width' instead) ## 0.3.1 * Faster than 0.3.0 * Deprecate usage of aliases: String#display_size and String#display_length * Eliminate Ruby warnings (@amatsuda) ## 0.3.0 * Update EastAsianWidth from 7.0 to 8.0 * Add rake task to update EastAsianWidth.txt * Move code to generate index from library to Rakefile * Update project's meta files * Deprecate requiring 'unicode-display_size' ## 0.2.0 * Update EastAsianWidth from 6.0 to 7.0 * Don't build index table automatically when not available * Don't include EastAsianWidth.txt in gem (only index) ## 0.1.0 * Fix github issue #1 ## 0.1.0 * Initial release unicode-display_width-1.1.3/unicode-display_width.gemspec0000644000004100000410000000173413040704400023666 0ustar www-datawww-data# -*- encoding: utf-8 -*- require File.dirname(__FILE__) + "/lib/unicode/display_width" Gem::Specification.new do |s| s.name = "unicode-display_width" s.version = Unicode::DisplayWidth::VERSION s.authors = ["Jan Lelis"] s.email = "mail@janlelis.de" s.homepage = "http://github.com/janlelis/unicode-display_width" s.summary = "Determines the monospace display width of a string in Ruby." s.description = "[Unicode #{Unicode::DisplayWidth::UNICODE_VERSION}] Determines the monospace display width of a string using EastAsianWidth.txt, Unicode general category, and other data." s.files = Dir.glob(%w[{lib,spec}/**/*.rb [A-Z]*.{txt,rdoc} data/display_width.marshal.gz]) + %w{Rakefile unicode-display_width.gemspec} s.extra_rdoc_files = ["README.md", "MIT-LICENSE.txt", "CHANGELOG.txt"] s.license = 'MIT' s.required_ruby_version = '>= 1.9.3' s.add_development_dependency 'rspec', '~> 3.4' s.add_development_dependency 'rake', '~> 10.4' end unicode-display_width-1.1.3/README.md0000644000004100000410000001003513040704400017302 0ustar www-datawww-data## Unicode::DisplayWidth [![[version]](https://badge.fury.io/rb/unicode-display_width.svg)](http://badge.fury.io/rb/unicode-display_width) [](https://travis-ci.org/janlelis/unicode-display_width) Determines the monospace display width of a string in Ruby. Implementation based on [EastAsianWidth.txt](http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt) and other data, 100% in Ruby. Other than [wcwidth()](https://github.com/janlelis/wcswidth-ruby), which fulfills a similar purpose, it does not rely on the OS vendor to provide an up-to-date method for measuring string width. Unicode version: **9.0.0** ## Introduction to Character Widths Guesing the correct space a character will consume on terminals is not easy. There is no single standard. Most implementations combine data from [East Asian Width](http://www.unicode.org/reports/tr11/), some [General Categories](https://en.wikipedia.org/wiki/Unicode_character_property#General_Category), and hand-picked adjustments. ### How this Library Handles Widths Further at the top means higher precedence. Please expect changes to this algorithm with every MINOR version update (the X in 1.X.0)! Width | Characters | Comment -------|------------------------------|-------------------------------------------------- X | (user defined) | Overwrites any other values -1 | `"\b"` | Backspace (total width never below 0) 0 | `"\0"`, `"\x05"`, `"\a"`, `"\n"`, `"\v"`, `"\f"`, `"\r"`, `"\x0E"`, `"\x0F"` | [C0 control codes](https://en.wikipedia.org/wiki/C0_and_C1_control_codes#C0_.28ASCII_and_derivatives.29) that do not change horizontal width 1 | `"\u{00AD}"` | SOFT HYPHEN 2 | `"\u{2E3A}"` | TWO-EM DASH 3 | `"\u{2E3B}"` | THREE-EM DASH 0 | General Categories: Mn, Me, Cf (non-arabic) | Excludes ARABIC format characters 0 | `"\u{1160}".."\u{11FF}"` | HANGUL JUNGSEONG 2 | East Asian Width: F, W | Full-width characters 1 or 2 | East Asian Width: A | Ambiguous characters, user defined, default: 1 1 | All other codepoints | - ## Install Install the gem with: gem install unicode-display_width Or add to your Gemfile: gem 'unicode-display_width' ## Usage ```ruby require 'unicode/display_width' Unicode::DisplayWidth.of("⚀") # => 1 Unicode::DisplayWidth.of("一") # => 2 ``` ### Ambiguous Characters The second parameter defines the value returned by characterrs defined as ambiguous: ```ruby Unicode::DisplayWidth.of("·", 1) # => 1 Unicode::DisplayWidth.of("·", 2) # => 2 ``` ### Custom Overwrites You can overwrite how to handle specific code points by passing a hash (or even a proc) as third parameter: ```ruby Unicode::DisplayWidth.of("a\tb", 1, 0x09 => 10)) # => 12 ``` ### Usage with String Extension Activated by default. Will be deactivated in version 2.0: ```ruby require 'unicode/display_width/string_ext' "⚀".display_width #=> 1 '一'.display_width #=> 2 ``` You can actively opt-out from the string extension with: `require 'unicode/display_width/no_string_ext'` ### Usage From the CLI Use this one-liner to print out display widths for strings from the command-line: ``` $ gem install unicode-display_width $ ruby -r unicode/display_width -e 'puts Unicode::DisplayWidth.of $*[0]' -- "一" ``` Replace "一" with the actual string to measure ## Other Implementations & Discussion - Python: https://github.com/jquast/wcwidth - JavaScript: https://github.com/mycoboco/wcwidth.js - C: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c - C for Julia: https://github.com/JuliaLang/utf8proc/issues/2 See [unicode-x](https://github.com/janlelis/unicode-x) for more Unicode related micro libraries. ## Copyright & Info - Copyright (c) 2011, 2015-2017 Jan Lelis, http://janlelis.com, released under the MIT license - Early versions based on runpaint's unicode-data interface: Copyright (c) 2009 Run Paint Run Run - Unicode data: http://www.unicode.org/copyright.html#Exhibit1