numerizer-0.1.1/0000755000175600017570000000000012730750743012602 5ustar pravipravinumerizer-0.1.1/LICENSE0000644000175600017570000000204612730750743013611 0ustar pravipraviCopyright (c) 2009 Tom Preston-Werner Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. numerizer-0.1.1/VERSION0000644000175600017570000000000612730750743013646 0ustar pravipravi0.1.1 numerizer-0.1.1/README.rdoc0000644000175600017570000000115112730750743014406 0ustar pravipravi= Numerizer Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem http://github.com/evaryont/chronic. == Installation $ sudo gem sources -a http://gemcutter.org $ sudo gem install numerizer == Usage >> require 'numerizer' => true >> Numerizer.numerize('forty two') => "42" >> Numerizer.numerize('two and a half') => "2.5" >> Numerizer.numerize('three quarters') => "3/4" >> Numerizer.numerize('two and three eighths') => "2.375" == Contributors Tom Preston-Werner, John Duffnumerizer-0.1.1/.document0000644000175600017570000000007412730750743014422 0ustar pravipraviREADME.rdoc lib/**/*.rb bin/* features/**/*.feature LICENSE numerizer-0.1.1/test/0000755000175600017570000000000012730750743013561 5ustar pravipravinumerizer-0.1.1/test/test_helper.rb0000644000175600017570000000031212730750743016420 0ustar pravipravirequire 'rubygems' require 'test/unit' $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) $LOAD_PATH.unshift(File.dirname(__FILE__)) require 'numerizer' class Test::Unit::TestCase end numerizer-0.1.1/test/test_numerizer.rb0000644000175600017570000000550112730750743017166 0ustar pravipravirequire File.join(File.dirname(__FILE__), 'test_helper') class NumerizerTest < Test::Unit::TestCase def test_straight_parsing strings = { 1 => 'one', 5 => 'five', 10 => 'ten', 11 => 'eleven', 12 => 'twelve', 13 => 'thirteen', 14 => 'fourteen', 15 => 'fifteen', 16 => 'sixteen', 17 => 'seventeen', 18 => 'eighteen', 19 => 'nineteen', 20 => 'twenty', 27 => 'twenty seven', 31 => 'thirty-one', 41 => 'forty one', 42 => 'fourty two', 59 => 'fifty nine', 100 => 'a hundred', 100 => 'one hundred', 150 => 'one hundred and fifty', # 150 => 'one fifty', 200 => 'two-hundred', 500 => '5 hundred', 999 => 'nine hundred and ninety nine', 1_000 => 'one thousand', 1_200 => 'twelve hundred', 1_200 => 'one thousand two hundred', 17_000 => 'seventeen thousand', 21_473 => 'twentyone-thousand-four-hundred-and-seventy-three', 74_002 => 'seventy four thousand and two', 99_999 => 'ninety nine thousand nine hundred ninety nine', 100_000 => '100 thousand', 250_000 => 'two hundred fifty thousand', 1_000_000 => 'one million', 1_250_007 => 'one million two hundred fifty thousand and seven', 1_000_000_000 => 'one billion', 1_000_000_001 => 'one billion and one' } strings.keys.sort.each do |key| assert_equal key, Numerizer.numerize(strings[key]).to_i end assert_equal "2.5", Numerizer.numerize("two and a half") assert_equal "1/2", Numerizer.numerize("one half") end def test_combined_double_digets assert_equal "21", Numerizer.numerize("twentyone") assert_equal "37", Numerizer.numerize("thirtyseven") end def test_fractions_in_words assert_equal "1/4", Numerizer.numerize("1 quarter") assert_equal "1/4", Numerizer.numerize("one quarter") assert_equal "1/4", Numerizer.numerize("a quarter") assert_equal "1/8", Numerizer.numerize("one eighth") assert_equal "3/4", Numerizer.numerize("three quarters") assert_equal "2/4", Numerizer.numerize("two fourths") assert_equal "3/8", Numerizer.numerize("three eighths") end def test_fractional_addition assert_equal "1.25", Numerizer.numerize("one and a quarter") assert_equal "2.375", Numerizer.numerize("two and three eighths") assert_equal "3.5 hours", Numerizer.numerize("three and a half hours") end def test_word_with_a_number assert_equal "pennyweight", Numerizer.numerize("pennyweight") end def test_edges assert_equal "27 Oct 2006 7:30am", Numerizer.numerize("27 Oct 2006 7:30am") end def test_multiple_slashes_should_not_be_evaluated assert_equal '11/02/2007', Numerizer.numerize('11/02/2007') end def test_compatability assert_equal '1/2', Numerizer.numerize('1/2') assert_equal '05/06', Numerizer.numerize('05/06') assert_equal "3.5 hours", Numerizer.numerize("three and a half hours") end end numerizer-0.1.1/numerizer.gemspec0000644000175600017570000000276612730750743016202 0ustar pravipravi# Generated by jeweler # DO NOT EDIT THIS FILE # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec` # -*- encoding: utf-8 -*- Gem::Specification.new do |s| s.name = %q{numerizer} s.version = "0.1.1" s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= s.authors = ["John Duff"] s.date = %q{2010-01-01} s.description = %q{Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem http://github.com/evaryont/chronic.} s.email = %q{duff.john@gmail.com} s.extra_rdoc_files = [ "LICENSE", "README.rdoc" ] s.files = [ ".document", ".gitignore", "LICENSE", "README.rdoc", "Rakefile", "VERSION", "lib/numerizer.rb", "numerizer.gemspec", "test/test_helper.rb", "test/test_numerizer.rb" ] s.homepage = %q{http://github.com/jduff/numerizer} s.rdoc_options = ["--charset=UTF-8"] s.require_paths = ["lib"] s.rubygems_version = %q{1.3.5} s.summary = %q{Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two).} s.test_files = [ "test/test_helper.rb", "test/test_numerizer.rb" ] if s.respond_to? :specification_version then current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION s.specification_version = 3 if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then else end else end end numerizer-0.1.1/lib/0000755000175600017570000000000012730750743013350 5ustar pravipravinumerizer-0.1.1/lib/numerizer.rb0000644000175600017570000000763712730750743015732 0ustar pravipravi# LICENSE: # # (The MIT License) # # Copyright © 2008 Tom Preston-Werner # # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. require 'strscan' class Numerizer DIRECT_NUMS = [ ['eleven', '11'], ['twelve', '12'], ['thirteen', '13'], ['fourteen', '14'], ['fifteen', '15'], ['sixteen', '16'], ['seventeen', '17'], ['eighteen', '18'], ['nineteen', '19'], ['ninteen', '19'], # Common mis-spelling ['zero', '0'], ['ten', '10'], ['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1 ] SINGLE_NUMS = [ ['one', 1], ['two', 2], ['three', 3], #['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty ['four', 4], ['five', 5], ['six', 6], ['seven', 7], ['eight', 8], ['nine', 9] ] TEN_PREFIXES = [ ['twenty', 20], ['thirty', 30], ['forty', 40], ['fourty', 40], # Common misspelling ['fifty', 50], ['sixty', 60], ['seventy', 70], ['eighty', 80], ['ninety', 90] ] BIG_PREFIXES = [ ['hundred', 100], ['thousand', 1000], ['million', 1_000_000], ['billion', 1_000_000_000], ['trillion', 1_000_000_000_000], ] FRACTIONS = [ ['half', 2], ['third(s)?', 3], ['fourth(s)?', 4], ['quarter(s)?', 4], ['fifth(s)?', 5], ['sixth(s)?', 6], ['seventh(s)?', 7], ['eighth(s)?', 8], ['nineth(s)?', 9], ] def self.numerize(string) string = string.dup # preprocess string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words # easy/direct replacements (DIRECT_NUMS + SINGLE_NUMS).each do |dn| # string.gsub!(/#{dn[0]}/i, '' + dn[1]) string.gsub!(/(^|\W+)#{dn[0]}($|\W+)/i) {"#{$1}" + dn[1].to_s + $2} end # ten, twenty, etc. # TEN_PREFIXES.each do |tp| # string.gsub!(/(?:#{tp[0]}) *(\d(?=[^\d]|$))*/i) {'' + (tp[1] + $1.to_i).to_s} # end TEN_PREFIXES.each do |tp| SINGLE_NUMS.each do |dn| string.gsub!(/(^|\W+)#{tp[0]}#{dn[0]}($|\W+)/i) { "#{$1}" + (tp[1] + dn[1]).to_s + $2 } end string.gsub!(/(^|\W+)#{tp[0]}($|\W+)/i) { "#{$1}" + tp[1].to_s + $2 } end # handle fractions FRACTIONS.each do |tp| string.gsub!(/a #{tp[0]}/i) { '1/' + tp[1].to_s } string.gsub!(/\s#{tp[0]}/i) { '/' + tp[1].to_s } end # evaluate fractions when preceded by another number string.gsub!(/(\d+)(?: | and |-)+(|\s)*(\d+)\s*\/\s*(\d+)/i) { ($1.to_f + ($3.to_f/$4.to_f)).to_s } # hundreds, thousands, millions, etc. BIG_PREFIXES.each do |bp| string.gsub!(/(?:)?(\d*) *#{bp[0]}/i) { '' + (bp[1] * $1.to_i).to_s} andition(string) end andition(string) string.gsub(//, '') end private def self.andition(string) sc = StringScanner.new(string) while(sc.scan_until(/(\d+)( | and )(\d+)(?=[^\w]|$)/i)) if sc[2] =~ /and/ || sc[1].size > sc[3].size string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '' + (sc[1].to_i + sc[3].to_i).to_s sc.reset end end end end numerizer-0.1.1/Rakefile0000644000175600017570000000312712730750743014252 0ustar pravipravirequire 'rubygems' require 'rake' begin require 'jeweler' Jeweler::Tasks.new do |gem| gem.name = "numerizer" gem.summary = "Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two)." gem.description = "Numerizer is a gem to help with parsing numbers in natural language from strings (ex forty two). It was extracted from the awesome Chronic gem http://github.com/evaryont/chronic." gem.email = "duff.john@gmail.com" gem.homepage = "http://github.com/jduff/numerizer" gem.authors = ["John Duff"] # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings end Jeweler::GemcutterTasks.new rescue LoadError puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler" end require 'rake/testtask' Rake::TestTask.new(:test) do |test| test.libs << 'lib' << 'test' test.pattern = 'test/**/test_*.rb' test.verbose = true end begin require 'rcov/rcovtask' Rcov::RcovTask.new do |test| test.libs << 'test' test.pattern = 'test/**/test_*.rb' test.verbose = true end rescue LoadError task :rcov do abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov" end end task :test => :check_dependencies task :default => :test require 'rake/rdoctask' Rake::RDocTask.new do |rdoc| if File.exist?('VERSION') version = File.read('VERSION') else version = "" end rdoc.rdoc_dir = 'rdoc' rdoc.title = "numerizer #{version}" rdoc.rdoc_files.include('README*') rdoc.rdoc_files.include('lib/**/*.rb') end numerizer-0.1.1/.gitignore0000644000175600017570000000004212730750743014566 0ustar pravipravi*.sw? .DS_Store coverage rdoc pkg