numerizer-0.2.0/0000755000004100000410000000000013342353677013576 5ustar www-datawww-datanumerizer-0.2.0/test/0000755000004100000410000000000013342353677014555 5ustar www-datawww-datanumerizer-0.2.0/test/test_numerizer.rb0000644000004100000410000000763413342353677020173 0ustar www-datawww-datarequire File.join(File.dirname(__FILE__), 'test_helper') class NumerizerTest < Test::Unit::TestCase def test_straight_parsing strings = { 1 => 'one', 5 => 'five', 10 => 'ten', 11 => 'eleven', 12 => 'twelve', 13 => 'thirteen', 14 => 'fourteen', 15 => 'fifteen', 16 => 'sixteen', 17 => 'seventeen', 18 => 'eighteen', 19 => 'nineteen', 20 => 'twenty', 27 => 'twenty seven', 31 => 'thirty-one', 37 => 'thirty-seven', 41 => 'forty one', 42 => 'fourty two', 59 => 'fifty nine', 100 => 'a hundred', 100 => 'one hundred', 150 => 'one hundred and fifty', # 150 => 'one fifty', 200 => 'two-hundred', 500 => '5 hundred', 999 => 'nine hundred and ninety nine', 1_000 => 'one thousand', 1_200 => 'twelve hundred', 1_200 => 'one thousand two hundred', 17_000 => 'seventeen thousand', 21_473 => 'twentyone-thousand-four-hundred-and-seventy-three', 74_002 => 'seventy four thousand and two', 99_999 => 'ninety nine thousand nine hundred ninety nine', 100_000 => '100 thousand', 250_000 => 'two hundred fifty thousand', 1_000_000 => 'one million', 1_250_007 => 'one million two hundred fifty thousand and seven', 1_000_000_000 => 'one billion', 1_000_000_001 => 'one billion and one' } strings.keys.sort.each do |key| assert_equal key, Numerizer.numerize(strings[key]).to_i end assert_equal "2.5", Numerizer.numerize("two and a half") assert_equal "1/2", Numerizer.numerize("one half") end def test_combined_double_digets assert_equal "21", Numerizer.numerize("twentyone") assert_equal "37", Numerizer.numerize("thirtyseven") end def test_fractions_in_words assert_equal "1/4", Numerizer.numerize("1 quarter") assert_equal "1/4", Numerizer.numerize("one quarter") assert_equal "1/4", Numerizer.numerize("a quarter") assert_equal "1/8", Numerizer.numerize("one eighth") assert_equal "3/4", Numerizer.numerize("three quarters") assert_equal "2/4", Numerizer.numerize("two fourths") assert_equal "3/8", Numerizer.numerize("three eighths") end def test_fractional_addition assert_equal "1.25", Numerizer.numerize("one and a quarter") assert_equal "2.375", Numerizer.numerize("two and three eighths") assert_equal "3.5 hours", Numerizer.numerize("three and a half hours") end def test_word_with_a_number assert_equal "pennyweight", Numerizer.numerize("pennyweight") end def test_edges assert_equal "27 Oct 2006 7:30am", Numerizer.numerize("27 Oct 2006 7:30am") end def test_multiple_slashes_should_not_be_evaluated assert_equal '11/02/2007', Numerizer.numerize('11/02/2007') end def test_compatability assert_equal '1/2', Numerizer.numerize('1/2') assert_equal '05/06', Numerizer.numerize('05/06') assert_equal "3.5 hours", Numerizer.numerize("three and a half hours") end def test_ordinal_strings { 'first' => '1st', 'second' => 'second', 'third' => '3rd', 'fifth' => '5th', 'seventh' => '7th', 'eighth' => '8th', 'tenth' => '10th', 'eleventh' => '11th', 'twelfth' => '12th', 'thirteenth' => '13th', 'sixteenth' => '16th', 'twentieth' => '20th', 'twenty-third' => '23rd', 'thirtieth' => '30th', 'thirty-first' => '31st', 'fourtieth' => '40th', 'fourty ninth' => '49th', 'fiftieth' => '50th', 'sixtieth' => '60th', 'seventieth' => '70th', 'eightieth' => '80th', 'ninetieth' => '90th', 'hundredth' => '100th', 'thousandth' => '1000th', 'millionth' => '1000000th', 'billionth' => '1000000000th', 'trillionth' => '1000000000000th', 'first day month two' => '1st day month 2' }.each do |key, val| assert_equal val, Numerizer.numerize(key) end end end numerizer-0.2.0/test/test_helper.rb0000644000004100000410000000031213342353677017414 0ustar www-datawww-datarequire 'rubygems' require 'test/unit' $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) $LOAD_PATH.unshift(File.dirname(__FILE__)) require 'numerizer' class Test::Unit::TestCase end numerizer-0.2.0/LICENSE0000644000004100000410000000204613342353677014605 0ustar www-datawww-dataCopyright (c) 2009 Tom Preston-Werner Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. numerizer-0.2.0/.document0000644000004100000410000000007413342353677015416 0ustar www-datawww-dataREADME.rdoc lib/**/*.rb bin/* features/**/*.feature LICENSE numerizer-0.2.0/numerizer.gemspec0000644000004100000410000000222013342353677017157 0ustar www-datawww-data# Generated by jeweler # DO NOT EDIT THIS FILE DIRECTLY # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec' # -*- encoding: utf-8 -*- # stub: numerizer 0.2.0 ruby lib Gem::Specification.new do |s| s.name = "numerizer" s.version = "0.2.0" s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= s.require_paths = ["lib"] s.authors = ["John Duff"] s.date = "2014-04-23" s.description = "Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem http://github.com/evaryont/chronic." s.email = "duff.john@gmail.com" s.extra_rdoc_files = [ "LICENSE", "README.rdoc" ] s.files = [ ".document", "LICENSE", "README.rdoc", "Rakefile", "VERSION", "lib/numerizer.rb", "numerizer.gemspec", "test/test_helper.rb", "test/test_numerizer.rb" ] s.homepage = "http://github.com/jduff/numerizer" s.licenses = ["MIT"] s.rubygems_version = "2.2.2" s.summary = "Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two)." end numerizer-0.2.0/Rakefile0000644000004100000410000000314613342353677015247 0ustar www-datawww-datarequire 'rubygems' require 'rake' begin require 'jeweler' Jeweler::Tasks.new do |gem| gem.name = "numerizer" gem.summary = "Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two)." gem.description = "Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem http://github.com/evaryont/chronic." gem.email = "duff.john@gmail.com" gem.homepage = "http://github.com/jduff/numerizer" gem.license = 'MIT' gem.authors = ["John Duff"] # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings end Jeweler::GemcutterTasks.new rescue LoadError puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler" end require 'rake/testtask' Rake::TestTask.new(:test) do |t| t.libs << 'test' t.test_files = Dir['test/test_*.rb'] end begin require 'rcov/rcovtask' Rcov::RcovTask.new do |test| test.libs << 'test' test.pattern = 'test/**/test_*.rb' test.verbose = true end rescue LoadError task :rcov do abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov" end end # task :test => :check_dependencies task :default => :test # require 'rake/rdoctask' # Rake::RDocTask.new do |rdoc| # if File.exist?('VERSION') # version = File.read('VERSION') # else # version = "" # end # rdoc.rdoc_dir = 'rdoc' # rdoc.title = "numerizer #{version}" # rdoc.rdoc_files.include('README*') # rdoc.rdoc_files.include('lib/**/*.rb') # end numerizer-0.2.0/lib/0000755000004100000410000000000013342353677014344 5ustar www-datawww-datanumerizer-0.2.0/lib/numerizer.rb0000644000004100000410000001134613342353677016716 0ustar www-datawww-data# LICENSE: # # (The MIT License) # # Copyright © 2008 Tom Preston-Werner # # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. require 'strscan' class Numerizer DIRECT_NUMS = [ ['eleven', '11'], ['twelve', '12'], ['thirteen', '13'], ['fourteen', '14'], ['fifteen', '15'], ['sixteen', '16'], ['seventeen', '17'], ['eighteen', '18'], ['nineteen', '19'], ['ninteen', '19'], # Common mis-spelling ['zero', '0'], ['ten', '10'], ['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1 ] SINGLE_NUMS = [ ['one', 1], ['two', 2], ['three', 3], ['four', 4], ['five', 5], ['six', 6], ['seven', 7], ['eight', 8], ['nine', 9] ] TEN_PREFIXES = [ ['twenty', 20], ['thirty', 30], ['forty', 40], ['fourty', 40], # Common misspelling ['fifty', 50], ['sixty', 60], ['seventy', 70], ['eighty', 80], ['ninety', 90] ] BIG_PREFIXES = [ ['hundred', 100], ['thousand', 1000], ['million', 1_000_000], ['billion', 1_000_000_000], ['trillion', 1_000_000_000_000], ] FRACTIONS = [ ['half', 2], ['third(s)?', 3], ['fourth(s)?', 4], ['quarter(s)?', 4], ['fifth(s)?', 5], ['sixth(s)?', 6], ['seventh(s)?', 7], ['eighth(s)?', 8], ['nineth(s)?', 9], ] SINGLE_ORDINALS = [ ['first', 1], ['third', 3], ['fourth', 4], ['fifth', 5], ['sixth', 6], ['seventh', 7], ['eighth', 8], ['ninth', 9] ] DIRECT_ORDINALS = [ ['tenth', '10'], ['eleventh', '11'], ['twelfth', '12'], ['thirteenth', '13'], ['fourteenth', '14'], ['fifteenth', '15'], ['sixteenth', '16'], ['seventeenth', '17'], ['eighteenth', '18'], ['nineteenth', '19'], ['twentieth', '20'], ['thirtieth', '30'], ['fourtieth', '40'], ['fiftieth', '50'], ['sixtieth', '60'], ['seventieth', '70'], ['eightieth', '80'], ['ninetieth', '90'] ] def self.numerize(string) string = string.dup # preprocess string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words # easy/direct replacements (DIRECT_NUMS + SINGLE_NUMS).each do |dn| string.gsub!(/(^|\W)#{dn[0]}(?=$|\W)/i, '\1' + dn[1].to_s) end # ten, twenty, etc. TEN_PREFIXES.each do |tp| SINGLE_NUMS.each do |dn| string.gsub!(/(^|\W)#{tp[0]}#{dn[0]}(?=$|\W)/i, '\1' + (tp[1] + dn[1]).to_s) end SINGLE_ORDINALS.each do |dn| string.gsub!(/(^|\W)#{tp[0]}(\s)?#{dn[0]}(?=$|\W)/i, '\1' + (tp[1] + dn[1]).to_s + dn[0][-2, 2]) end string.gsub!(/(^|\W)#{tp[0]}(?=$|\W)/i, '\1' + tp[1].to_s) end # handle fractions FRACTIONS.each do |tp| string.gsub!(/a #{tp[0]}(?=$|\W)/i, '1/' + tp[1].to_s) string.gsub!(/\s#{tp[0]}(?=$|\W)/i, '/' + tp[1].to_s) end (DIRECT_ORDINALS + SINGLE_ORDINALS).each do |on| string.gsub!(/(^|\W)#{on[0]}(?=$|\W)/i, '\1' + on[1].to_s + on[0][-2, 2]) end # evaluate fractions when preceded by another number string.gsub!(/(\d+)(?: | and |-)+(|\s)*(\d+)\s*\/\s*(\d+)/i) { ($1.to_f + ($3.to_f/$4.to_f)).to_s } # hundreds, thousands, millions, etc. BIG_PREFIXES.each do |bp| string.gsub!(/(?:)?(\d*) *#{bp[0]}/i) { $1.empty? ? bp[1] : '' + (bp[1] * $1.to_i).to_s } andition(string) end andition(string) string.gsub(//, '') end class << self private def andition(string) sc = StringScanner.new(string) while(sc.scan_until(/(\d+)( | and )(\d+)(?=[^\w]|$)/i)) if sc[2] =~ /and/ || sc[1].size > sc[3].size string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '' + (sc[1].to_i + sc[3].to_i).to_s sc.reset end end end end end numerizer-0.2.0/README.rdoc0000644000004100000410000000115113342353677015402 0ustar www-datawww-data= Numerizer Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem http://github.com/evaryont/chronic. == Installation $ sudo gem sources -a http://gemcutter.org $ sudo gem install numerizer == Usage >> require 'numerizer' => true >> Numerizer.numerize('forty two') => "42" >> Numerizer.numerize('two and a half') => "2.5" >> Numerizer.numerize('three quarters') => "3/4" >> Numerizer.numerize('two and three eighths') => "2.375" == Contributors Tom Preston-Werner, John Duffnumerizer-0.2.0/VERSION0000644000004100000410000000000513342353677014641 0ustar www-datawww-data0.2.0