regexp-property-values-0.3.4/0000755000175100017510000000000013372265773015234 5ustar pravipraviregexp-property-values-0.3.4/Rakefile0000644000175100017510000000127513372265773016706 0ustar pravipravirequire 'bundler/gem_tasks' require 'rubygems/package_task' require 'rspec/core/rake_task' RSpec::Core::RakeTask.new(:spec) task :default => :spec require 'rake/extensiontask' Rake::ExtensionTask.new('regexp_property_values') do |ext| ext.lib_dir = 'lib/regexp_property_values' end namespace :java do java_gemspec = eval File.read('./regexp_property_values.gemspec') java_gemspec.platform = 'java' java_gemspec.extensions = [] Gem::PackageTask.new(java_gemspec) do |pkg| pkg.need_zip = true pkg.need_tar = true pkg.package_dir = 'pkg' end end task package: 'java:gem' if RUBY_PLATFORM !~ /java/i # recompile before running specs task(:spec).enhance([:compile]) end regexp-property-values-0.3.4/bin/0000755000175100017510000000000013372265773016004 5ustar pravipraviregexp-property-values-0.3.4/bin/setup0000755000175100017510000000020313372265773017065 0ustar pravipravi#!/usr/bin/env bash set -euo pipefail IFS=$'\n\t' set -vx bundle install # Do any other automated setup that you need to do here regexp-property-values-0.3.4/bin/console0000755000175100017510000000060013372265773017370 0ustar pravipravi#!/usr/bin/env ruby require "bundler/setup" require "regexp_property_values" # You can add fixtures and/or initialization code here to make experimenting # with your gem easier. You can also use a different console, if you like. # (If you use this, don't forget to add pry to your Gemfile!) # require "pry" # Pry.start PV = RegexpPropertyValues require "irb" IRB.start(__FILE__) regexp-property-values-0.3.4/README.md0000644000175100017510000000352413372265773016517 0ustar pravipravi# RegexpPropertyValues [![Gem Version](https://badge.fury.io/rb/regexp_property_values.svg)](http://badge.fury.io/rb/regexp_property_values) [![Build Status](https://travis-ci.org/janosch-x/regexp_property_values.svg?branch=master)](https://travis-ci.org/janosch-x/regexp_property_values) This small library lets you see which property values are supported by the regular expression engine of the Ruby version you are running and directly reads out their codepoint ranges from there. That is, it determines all supported values for `\p{value}` expressions and what they match. ## Usage ##### Browse all property values (supported by any Ruby, ever) ```ruby require 'regexp_property_values' PV = RegexpPropertyValues PV.all # => ["Alpha", "Blank", "Cntrl", ...] PV.by_category # => {"POSIX brackets" => ["Alpha", ...], "Special" => ...} PV.short_and_long_names # => [["M", "Grek", ...], ["Mark", "Greek", ...]] ``` ##### Browse property values supported by the Ruby you are running ```ruby PV.all_for_current_ruby # => ["Alpha", "Blank", "Cntrl", ...] PV.by_category.map { |k, v| [k, v.select(&:supported_by_current_ruby?] } # etc. ``` ##### Inspect properties ```ruby PV['alpha'].supported_by_current_ruby? # => true PV['foobar'].supported_by_current_ruby? # => false PV['AHex'].matched_characters # => %w[0 1 2 3 4 5 6 7 8 9 A B C ...] PV['AHex'].matched_codepoints # => [48, 49, 50, ...] PV['AHex'].matched_ranges # => [48..57, 65..70, 97..102] ``` If [`character_set`](https://github.com/janosch-x/character_set) is installed, you can also do this: ```ruby PV['AHex'].character_set # => # ``` ##### Utility methods ```ruby # This one takes a few seconds (or minutes, without the C extension) PV.alias_hash # => {"M" => "Mark", "Grek" => "Greek", ...} # download the latest list of possible properties PV.update ``` regexp-property-values-0.3.4/.gitignore0000644000175100017510000000054413372265773017227 0ustar pravipravi*.bundle *.gem *.iml *.stTheme.cache *.sublime-project *.sublime-workspace *.swp *.tmlanguage.cache *.tmPreferences.cache *~ .byebug_history .DS_Store .idea/ .ruby-gemset .ruby-version .tags .tags1 bbin/ binstubs/* bundler_stubs/*/.yardoc Gemfile.lock /.bundle/ /_yardoc/ /coverage/ /doc/ /pkg/ /spec/reports/ /tmp/ # rspec failure tracking .rspec_status regexp-property-values-0.3.4/.travis.yml0000644000175100017510000000022213372265773017341 0ustar pravipravisudo: false language: ruby rvm: - 2.1 - 2.4 - 2.5 - 2.6 - jruby-9.1.9.0 before_install: - gem update --system - gem install bundler regexp-property-values-0.3.4/lib/0000755000175100017510000000000013372265773016002 5ustar pravipraviregexp-property-values-0.3.4/lib/UnicodeProps.txt0000644000175100017510000003127313372265773021163 0ustar pravipraviOnigmo (Oniguruma-mod) Unicode Properties Version 6.2.0 2017/07/17 * POSIX brackets Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit Word Alnum ASCII XPosixPunct * Special Any Assigned * Major and General Categories C Cc Cf Cn Co Cs L LC Ll Lm Lo Lt Lu M Mc Me Mn N Nd Nl No P Pc Pd Pe Pf Pi Po Ps S Sc Sk Sm So Z Zl Zp Zs * Scripts Adlam Ahom Anatolian_Hieroglyphs Arabic Armenian Avestan Balinese Bamum Bassa_Vah Batak Bengali Bhaiksuki Bopomofo Brahmi Braille Buginese Buhid Canadian_Aboriginal Carian Caucasian_Albanian Chakma Cham Cherokee Common Coptic Cuneiform Cypriot Cyrillic Deseret Devanagari Duployan Egyptian_Hieroglyphs Elbasan Ethiopic Georgian Glagolitic Gothic Grantha Greek Gujarati Gurmukhi Han Hangul Hanunoo Hatran Hebrew Hiragana Imperial_Aramaic Inherited Inscriptional_Pahlavi Inscriptional_Parthian Javanese Kaithi Kannada Katakana Kayah_Li Kharoshthi Khmer Khojki Khudawadi Lao Latin Lepcha Limbu Linear_A Linear_B Lisu Lycian Lydian Mahajani Malayalam Mandaic Manichaean Marchen Masaram_Gondi Meetei_Mayek Mende_Kikakui Meroitic_Cursive Meroitic_Hieroglyphs Miao Modi Mongolian Mro Multani Myanmar Nabataean New_Tai_Lue Newa Nko Nushu Ogham Ol_Chiki Old_Hungarian Old_Italic Old_North_Arabian Old_Permic Old_Persian Old_South_Arabian Old_Turkic Oriya Osage Osmanya Pahawh_Hmong Palmyrene Pau_Cin_Hau Phags_Pa Phoenician Psalter_Pahlavi Rejang Runic Samaritan Saurashtra Sharada Shavian Siddham SignWriting Sinhala Sora_Sompeng Soyombo Sundanese Syloti_Nagri Syriac Tagalog Tagbanwa Tai_Le Tai_Tham Tai_Viet Takri Tamil Tangut Telugu Thaana Thai Tibetan Tifinagh Tirhuta Ugaritic Unknown Vai Warang_Citi Yi Zanabazar_Square * DerivedCoreProperties Alphabetic Case_Ignorable Cased Changes_When_Casefolded Changes_When_Casemapped Changes_When_Lowercased Changes_When_Titlecased Changes_When_Uppercased Default_Ignorable_Code_Point Grapheme_Base Grapheme_Extend Grapheme_Link ID_Continue ID_Start Lowercase Math Uppercase XID_Continue XID_Start * PropList ASCII_Hex_Digit Bidi_Control Dash Deprecated Diacritic Extender Hex_Digit Hyphen IDS_Binary_Operator IDS_Trinary_Operator Ideographic Join_Control Logical_Order_Exception Noncharacter_Code_Point Other_Alphabetic Other_Default_Ignorable_Code_Point Other_Grapheme_Extend Other_ID_Continue Other_ID_Start Other_Lowercase Other_Math Other_Uppercase Pattern_Syntax Pattern_White_Space Prepended_Concatenation_Mark Quotation_Mark Radical Regional_Indicator Sentence_Terminal Soft_Dotted Terminal_Punctuation Unified_Ideograph Variation_Selector White_Space * Emoji Emoji Emoji_Component Emoji_Modifier Emoji_Modifier_Base Emoji_Presentation * PropertyAliases AHex Bidi_C CI CWCF CWCM CWL CWT CWU DI Dep Dia Ext Gr_Base Gr_Ext Gr_Link Hex IDC IDS IDSB IDST Ideo Join_C LOE NChar OAlpha ODI OGr_Ext OIDC OIDS OLower OMath OUpper PCM Pat_Syn Pat_WS QMark RI SD STerm Term UIdeo VS WSpace XIDC XIDS * PropertyValueAliases (General_Category) Other Control Format Unassigned Private_Use Surrogate Letter Cased_Letter Lowercase_Letter Modifier_Letter Other_Letter Titlecase_Letter Uppercase_Letter Mark Combining_Mark Spacing_Mark Enclosing_Mark Nonspacing_Mark Number Decimal_Number Letter_Number Other_Number Punctuation Connector_Punctuation Dash_Punctuation Close_Punctuation Final_Punctuation Initial_Punctuation Other_Punctuation Open_Punctuation Symbol Currency_Symbol Modifier_Symbol Math_Symbol Other_Symbol Separator Line_Separator Paragraph_Separator Space_Separator * PropertyValueAliases (Script) Adlm Aghb Arab Armi Armn Avst Bali Bamu Bass Batk Beng Bhks Bopo Brah Brai Bugi Buhd Cakm Cans Cari Cher Copt Qaac Cprt Cyrl Deva Dsrt Dupl Egyp Elba Ethi Geor Glag Gonm Goth Gran Grek Gujr Guru Hang Hani Hano Hatr Hebr Hira Hluw Hmng Hung Ital Java Kali Kana Khar Khmr Khoj Knda Kthi Lana Laoo Latn Lepc Limb Lina Linb Lyci Lydi Mahj Mand Mani Marc Mend Merc Mero Mlym Mong Mroo Mtei Mult Mymr Narb Nbat Nkoo Nshu Ogam Olck Orkh Orya Osge Osma Palm Pauc Perm Phag Phli Phlp Phnx Plrd Prti Rjng Runr Samr Sarb Saur Sgnw Shaw Shrd Sidd Sind Sinh Sora Soyo Sund Sylo Syrc Tagb Takr Tale Talu Taml Tang Tavt Telu Tfng Tglg Thaa Tibt Tirh Ugar Vaii Wara Xpeo Xsux Yiii Zanb Zinh Qaai Zyyy Zzzz * DerivedAges Age=1.1 Age=10.0 Age=2.0 Age=2.1 Age=3.0 Age=3.1 Age=3.2 Age=4.0 Age=4.1 Age=5.0 Age=5.1 Age=5.2 Age=6.0 Age=6.1 Age=6.2 Age=6.3 Age=7.0 Age=8.0 Age=9.0 * Blocks In_Basic_Latin In_Latin_1_Supplement In_Latin_Extended_A In_Latin_Extended_B In_IPA_Extensions In_Spacing_Modifier_Letters In_Combining_Diacritical_Marks In_Greek_and_Coptic In_Cyrillic In_Cyrillic_Supplement In_Armenian In_Hebrew In_Arabic In_Syriac In_Arabic_Supplement In_Thaana In_NKo In_Samaritan In_Mandaic In_Syriac_Supplement In_Arabic_Extended_A In_Devanagari In_Bengali In_Gurmukhi In_Gujarati In_Oriya In_Tamil In_Telugu In_Kannada In_Malayalam In_Sinhala In_Thai In_Lao In_Tibetan In_Myanmar In_Georgian In_Hangul_Jamo In_Ethiopic In_Ethiopic_Supplement In_Cherokee In_Unified_Canadian_Aboriginal_Syllabics In_Ogham In_Runic In_Tagalog In_Hanunoo In_Buhid In_Tagbanwa In_Khmer In_Mongolian In_Unified_Canadian_Aboriginal_Syllabics_Extended In_Limbu In_Tai_Le In_New_Tai_Lue In_Khmer_Symbols In_Buginese In_Tai_Tham In_Combining_Diacritical_Marks_Extended In_Balinese In_Sundanese In_Batak In_Lepcha In_Ol_Chiki In_Cyrillic_Extended_C In_Sundanese_Supplement In_Vedic_Extensions In_Phonetic_Extensions In_Phonetic_Extensions_Supplement In_Combining_Diacritical_Marks_Supplement In_Latin_Extended_Additional In_Greek_Extended In_General_Punctuation In_Superscripts_and_Subscripts In_Currency_Symbols In_Combining_Diacritical_Marks_for_Symbols In_Letterlike_Symbols In_Number_Forms In_Arrows In_Mathematical_Operators In_Miscellaneous_Technical In_Control_Pictures In_Optical_Character_Recognition In_Enclosed_Alphanumerics In_Box_Drawing In_Block_Elements In_Geometric_Shapes In_Miscellaneous_Symbols In_Dingbats In_Miscellaneous_Mathematical_Symbols_A In_Supplemental_Arrows_A In_Braille_Patterns In_Supplemental_Arrows_B In_Miscellaneous_Mathematical_Symbols_B In_Supplemental_Mathematical_Operators In_Miscellaneous_Symbols_and_Arrows In_Glagolitic In_Latin_Extended_C In_Coptic In_Georgian_Supplement In_Tifinagh In_Ethiopic_Extended In_Cyrillic_Extended_A In_Supplemental_Punctuation In_CJK_Radicals_Supplement In_Kangxi_Radicals In_Ideographic_Description_Characters In_CJK_Symbols_and_Punctuation In_Hiragana In_Katakana In_Bopomofo In_Hangul_Compatibility_Jamo In_Kanbun In_Bopomofo_Extended In_CJK_Strokes In_Katakana_Phonetic_Extensions In_Enclosed_CJK_Letters_and_Months In_CJK_Compatibility In_CJK_Unified_Ideographs_Extension_A In_Yijing_Hexagram_Symbols In_CJK_Unified_Ideographs In_Yi_Syllables In_Yi_Radicals In_Lisu In_Vai In_Cyrillic_Extended_B In_Bamum In_Modifier_Tone_Letters In_Latin_Extended_D In_Syloti_Nagri In_Common_Indic_Number_Forms In_Phags_pa In_Saurashtra In_Devanagari_Extended In_Kayah_Li In_Rejang In_Hangul_Jamo_Extended_A In_Javanese In_Myanmar_Extended_B In_Cham In_Myanmar_Extended_A In_Tai_Viet In_Meetei_Mayek_Extensions In_Ethiopic_Extended_A In_Latin_Extended_E In_Cherokee_Supplement In_Meetei_Mayek In_Hangul_Syllables In_Hangul_Jamo_Extended_B In_High_Surrogates In_High_Private_Use_Surrogates In_Low_Surrogates In_Private_Use_Area In_CJK_Compatibility_Ideographs In_Alphabetic_Presentation_Forms In_Arabic_Presentation_Forms_A In_Variation_Selectors In_Vertical_Forms In_Combining_Half_Marks In_CJK_Compatibility_Forms In_Small_Form_Variants In_Arabic_Presentation_Forms_B In_Halfwidth_and_Fullwidth_Forms In_Specials In_Linear_B_Syllabary In_Linear_B_Ideograms In_Aegean_Numbers In_Ancient_Greek_Numbers In_Ancient_Symbols In_Phaistos_Disc In_Lycian In_Carian In_Coptic_Epact_Numbers In_Old_Italic In_Gothic In_Old_Permic In_Ugaritic In_Old_Persian In_Deseret In_Shavian In_Osmanya In_Osage In_Elbasan In_Caucasian_Albanian In_Linear_A In_Cypriot_Syllabary In_Imperial_Aramaic In_Palmyrene In_Nabataean In_Hatran In_Phoenician In_Lydian In_Meroitic_Hieroglyphs In_Meroitic_Cursive In_Kharoshthi In_Old_South_Arabian In_Old_North_Arabian In_Manichaean In_Avestan In_Inscriptional_Parthian In_Inscriptional_Pahlavi In_Psalter_Pahlavi In_Old_Turkic In_Old_Hungarian In_Rumi_Numeral_Symbols In_Brahmi In_Kaithi In_Sora_Sompeng In_Chakma In_Mahajani In_Sharada In_Sinhala_Archaic_Numbers In_Khojki In_Multani In_Khudawadi In_Grantha In_Newa In_Tirhuta In_Siddham In_Modi In_Mongolian_Supplement In_Takri In_Ahom In_Warang_Citi In_Zanabazar_Square In_Soyombo In_Pau_Cin_Hau In_Bhaiksuki In_Marchen In_Masaram_Gondi In_Cuneiform In_Cuneiform_Numbers_and_Punctuation In_Early_Dynastic_Cuneiform In_Egyptian_Hieroglyphs In_Anatolian_Hieroglyphs In_Bamum_Supplement In_Mro In_Bassa_Vah In_Pahawh_Hmong In_Miao In_Ideographic_Symbols_and_Punctuation In_Tangut In_Tangut_Components In_Kana_Supplement In_Kana_Extended_A In_Nushu In_Duployan In_Shorthand_Format_Controls In_Byzantine_Musical_Symbols In_Musical_Symbols In_Ancient_Greek_Musical_Notation In_Tai_Xuan_Jing_Symbols In_Counting_Rod_Numerals In_Mathematical_Alphanumeric_Symbols In_Sutton_SignWriting In_Glagolitic_Supplement In_Mende_Kikakui In_Adlam In_Arabic_Mathematical_Alphabetic_Symbols In_Mahjong_Tiles In_Domino_Tiles In_Playing_Cards In_Enclosed_Alphanumeric_Supplement In_Enclosed_Ideographic_Supplement In_Miscellaneous_Symbols_and_Pictographs In_Emoticons In_Ornamental_Dingbats In_Transport_and_Map_Symbols In_Alchemical_Symbols In_Geometric_Shapes_Extended In_Supplemental_Arrows_C In_Supplemental_Symbols_and_Pictographs In_CJK_Unified_Ideographs_Extension_B In_CJK_Unified_Ideographs_Extension_C In_CJK_Unified_Ideographs_Extension_D In_CJK_Unified_Ideographs_Extension_E In_CJK_Unified_Ideographs_Extension_F In_CJK_Compatibility_Ideographs_Supplement In_Tags In_Variation_Selectors_Supplement In_Supplementary_Private_Use_Area_A In_Supplementary_Private_Use_Area_B In_No_Block regexp-property-values-0.3.4/lib/regexp_property_values/0000755000175100017510000000000013372265773022617 5ustar pravipraviregexp-property-values-0.3.4/lib/regexp_property_values/extension.rb0000644000175100017510000000313513372265773025162 0ustar pravipravimodule RegexpPropertyValues module Extension def supported_by_current_ruby? !!regexp rescue ArgumentError false end def regexp @regexp ||= /\p{#{self}}/u rescue RegexpError, SyntaxError raise ArgumentError, "Unknown property name #{self}" end if const_defined?(:OnigRegexpPropertyHelper) # C extension loaded def matched_codepoints matched_ranges.flat_map(&:to_a) end def matched_ranges OnigRegexpPropertyHelper.matched_ranges(self.encode('utf-8')) end def matched_characters matched_codepoints.map { |cp| cp.chr('utf-8') } end def character_set require 'character_set' CharacterSet.from_ranges(*matched_ranges) end else # Ruby fallback - this stuff is slow as hell, and it wont get much faster def matched_codepoints matched_characters.map(&:ord) end def matched_ranges require 'set' matched_codepoints .to_set(SortedSet) .divide { |i, j| (i - j).abs == 1 } .map { |s| a = s.to_a; a.first..a.last } end def matched_characters regexp.respond_to?(:match?) || regexp.define_singleton_method(:match?) { |str| !!match(str) } @@characters ||= ((0..0xD7FF).to_a + (0xE000..0x10FFFF).to_a) .map { |cp_number| [cp_number].pack('U') } @@characters.select { |char| regexp.match?(char) } end def character_set require 'character_set' CharacterSet.new(matched_codepoints) end end end end regexp-property-values-0.3.4/lib/regexp_property_values/version.rb0000644000175100017510000000006413372265773024631 0ustar pravipravimodule RegexpPropertyValues VERSION = '0.3.4' end regexp-property-values-0.3.4/lib/regexp_property_values.rb0000644000175100017510000000436113372265773023150 0ustar pravipravibegin require 'regexp_property_values/regexp_property_values' rescue LoadError warn 'regexp_property_values could not load C extension, using slower Ruby' end require 'regexp_property_values/extension' require 'regexp_property_values/version' module RegexpPropertyValues module_function LIST_URL = 'https://raw.githubusercontent.com/k-takata/Onigmo/master/doc/UnicodeProps.txt' def update puts "Downloading #{LIST_URL}" require 'open-uri' File.open(file_path, 'w') { |f| IO.copy_stream(open(LIST_URL), f) } puts 'Done!' end def file_path File.expand_path('../UnicodeProps.txt', __FILE__) end def all by_category.values.flatten end def all_for_current_ruby all.select(&:supported_by_current_ruby?) end def by_category result = File.foreach(file_path).each_with_object({}) do |line, hash| if /^\* (?\S.+)/ =~ line @current_category = category hash[@current_category] ||= [] elsif /^ {4}(?\S.*)/ =~ line hash[@current_category] << value_name.extend(Extension) end end add_oniguruma_properties(result) result end def add_oniguruma_properties(props_by_category) props_by_category['Special'] << 'Newline'.extend(Extension) end def alias_hash short_names, long_names = short_and_long_names return {} if short_names.empty? long_names -= by_category['POSIX brackets'] by_matched_codepoints.each_value.each_with_object({}) do |props, hash| next if props.count < 2 long_name = (props & long_names)[0] || fail("no long name for #{props}") (props & short_names).each { |short_name| hash[short_name] = long_name } end end def short_and_long_names short_name_categories = ['Major and General Categories', 'PropertyAliases', 'PropertyValueAliases (Script)'] by_category.each_with_object([[], []]) do |(cat_name, props), (short, long)| (short_name_categories.include?(cat_name) ? short : long).concat(props) end end def by_matched_codepoints puts 'Establishing property codepoints, this may take a bit ...' all_for_current_ruby.group_by(&:matched_codepoints) end def [](prop) prop.extend(Extension) end end regexp-property-values-0.3.4/regexp_property_values.gemspec0000644000175100017510000000236713372265773023426 0ustar pravipravilib = File.expand_path("../lib", __FILE__) $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) require 'regexp_property_values/version' Gem::Specification.new do |s| s.name = 'regexp_property_values' s.version = RegexpPropertyValues::VERSION s.authors = ['Janosch Müller'] s.email = ['janosch84@gmail.com'] s.summary = "Inspect property values supported by Ruby's regex engine" s.description = 'This small library lets you see which property values '\ 'are supported by the regular expression engine of the '\ 'Ruby version you are running, and what they match.' s.homepage = 'https://github.com/janosch-x/regexp_property_values' s.license = 'MIT' s.files = `git ls-files -z`.split("\x0").reject do |f| f.match(%r{^(test|spec|features)/}) end s.require_paths = ['lib'] s.extensions = %w[ext/regexp_property_values/extconf.rb] s.required_ruby_version = '>= 2.0.0' s.add_development_dependency 'bundler', '~> 1.16' s.add_development_dependency 'character_set', '~> 1.0.0' s.add_development_dependency 'rake', '~> 10.0' s.add_development_dependency 'rake-compiler', '~> 1.0' s.add_development_dependency 'rspec', '~> 3.0' end regexp-property-values-0.3.4/LICENSE.txt0000644000175100017510000000207413372265773017062 0ustar pravipraviThe MIT License (MIT) Copyright (c) 2018 Jannosch Müller Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. regexp-property-values-0.3.4/Gemfile0000644000175100017510000000026113372265773016526 0ustar pravipravisource "https://rubygems.org" git_source(:github) {|repo_name| "https://github.com/#{repo_name}" } # Specify your gem's dependencies in regexp_property_values.gemspec gemspec regexp-property-values-0.3.4/.rspec0000644000175100017510000000006513372265773016352 0ustar pravipravi--format documentation --color --require spec_helper regexp-property-values-0.3.4/ext/0000755000175100017510000000000013372265773016034 5ustar pravipraviregexp-property-values-0.3.4/ext/regexp_property_values/0000755000175100017510000000000013372265773022651 5ustar pravipraviregexp-property-values-0.3.4/ext/regexp_property_values/regexp_property_values.c0000644000175100017510000000305613372265773027636 0ustar pravipravi#include "ruby.h" #include "ruby/encoding.h" #include "ruby/oniguruma.h" // still in recent rubies f. backwards compatibility static int prop_name_to_ctype(char* name, rb_encoding *enc) { UChar *uname; int ctype; uname = (UChar*)name; ctype = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, uname, uname + strlen(name)); if (ctype < 0) rb_raise(rb_eArgError, "Unknown property name `%s`", name); return ctype; } VALUE onig_ranges_to_rb(const OnigCodePoint *onig_ranges) { unsigned int range_count, i; VALUE result, sub_range; range_count = onig_ranges[0]; result = rb_ary_new2(range_count); // rb_ary_new_capa not avail. in Ruby 2.0 for (i = 0; i < range_count; i++) { sub_range = rb_range_new(INT2FIX(onig_ranges[(i * 2) + 1]), INT2FIX(onig_ranges[(i * 2) + 2]), 0); rb_ary_store(result, i, sub_range); } return result; } VALUE rb_prop_ranges(char* name, rb_encoding *enc) { int ctype; const OnigCodePoint *onig_ranges; OnigCodePoint sb_out; ctype = prop_name_to_ctype(name, enc); ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &onig_ranges); return onig_ranges_to_rb(onig_ranges); } VALUE method_matched_ranges(VALUE self, VALUE arg) { char *prop_name; rb_encoding *enc; prop_name = StringValueCStr(arg); enc = rb_enc_get(arg); return rb_prop_ranges(prop_name, enc); } void Init_regexp_property_values() { VALUE module; module = rb_define_module("OnigRegexpPropertyHelper"); rb_define_singleton_method(module, "matched_ranges", method_matched_ranges, 1); } regexp-property-values-0.3.4/ext/regexp_property_values/extconf.rb0000644000175100017510000000012413372265773024641 0ustar pravipravirequire 'mkmf' name = 'regexp_property_values' create_makefile("#{name}/#{name}")