words2nums/0000755000000000000000000000000012355064261010073 5ustar words2nums/README0000644000000000000000000000066712355064261010764 0ustar Lingua::EN::Words2Nums parses English representations of numbers like "two thousand and one". Some samples of the range of inputs it can handle can be found in the file "samples", which is used by the regression test suite. Web site is http://kitenet.net/~joey/code/words2nums/ Copyright 2001, 2003 Joey Hess This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. words2nums/TODO0000644000000000000000000000046712355064261010572 0ustar * spelled out numbers, ie, "nine one one" => 911, (currently, it returns 9 + 1 + 1) (getting this right is a pre-req for decimals..) * fractions and decimals * support strings that start with a number and end with non-numbers, returning the remaining part. HARD -- numbers are currently parsed backwards! words2nums/samples0000644000000000000000000000466612355064261011476 0ustar # Sample conversions. The result is on the left, and an input is on the # right. This file is used for regression testing. Note that this file # assumes that a billion is 10^9, but you can configure it otherwise when # you use the module. 2001 two thousand one 3424 three thousand four hundred twenty four 3424 3 thousand 4 hundred 24 798681 seven hundred ninety eight thousand six hundred eighty-one 798000 798 thousand 306172 three hundred six thousand, one hundred seventy two 306172 3 hundred and six thousand, one hundred and seventy-2 42524 fourty-two thousand five hundred twenty-four 0 zero 1 one 1 first 2 second 3 three 3 third 9 ninth 59 fifty-ninth 1000 thousand 1000 one thousand 16 sixteen 1000524 1000,524 999.3333333 999.3333333 30303.30303 30303.30303 65569565609 65569565609 -1 -1 -12211.1133 -12,211.1133 153 one hundred fifty three 88 eighty-eight 42 fourtytwo 1000000 millionth 3424 thirty-four hundred twenty-four 11059 eleven thousand and fifty-nine 9622000 nine million, six hundred and twenty-two thousand 5600000 fifty-six hundred thousand 167 one hundred and sixty-seventh 25300 two hundred and fifty three hundred 65065065065 sixty-five thousand sixty-five million sixty-five thousand and sixty-five 11011011011 eleven billion eleven million eleven thousand eleven 90 four score and ten 501000000 five hundred and one million 12 dozen 48 four dozen 13 baker's dozen 13 bakers dozen 13 baker dozen 39 three baker's dozens 4000 four thousands 1001 thousand one 4603 four thousand six hundred and three 4103 four thousand, hundred and three 288 two gross 288 two grosses 1000000 a million 1000000 million 1000000000 billion 1000000000000 trillion 1 1st 2 2nd 3 3rd 4 4th 4023 4023rd # Perl's numbers don't go this high. # Also, the return for infinity varies with platforms, so this is not a # good test. #inf googolplex #inf seven googolplexes # Maybe when we get Bignum support.. #10314424798490535546171949056 Ten octillion, three hundred fourteen septillion, four hundred twenty-four sextillion, seven hundred ninety-eight quintillion, four hundred ninety quadrillion, five hundred thirty-five trillion, five hundred forty-six billion, one hundred seventy-one million, nine hundred forty-nine thousand, and fifty six # Some things that should not parse to a number: undef and undef , undef undef now is the time for all good men to come to the aid of their country undef gazillion undef hexillion words2nums/testnum0000755000000000000000000000040712355064261011521 0ustar #!/usr/bin/perl use blib; # work on uninstalled package use Words2Nums; import Lingua::EN::Words2Nums; $Lingua::EN::Words2Nums::debug=1; if (@ARGV) { print "$ARGV[0] => ".words2nums(shift)."\n"; } else { while (<>) { print "$_ => ".words2nums($_)."\n"; } } words2nums/Makefile.PL0000755000000000000000000000101512355064261012045 0ustar #!/usr/bin/perl use ExtUtils::MakeMaker; WriteMakefile( NAME => 'Lingua::EN::Words2Nums', VERSION => getversion(), LICENSE => 'perl', $] < 5.005 ? () : ( AUTHOR => 'Joey Hess ', ABSTRACT_FROM => 'Words2Nums.pm', ) ); # The version is pulled from the debian changelog, to avoid having to keep # two copies synced. sub getversion { open(DCH, "debian/changelog") or die "debian/changelog: $!"; $_=; my ($vers) = m/\((.+)\)/; die "no version" unless length $vers; return $vers; } words2nums/debian/0000755000000000000000000000000012427274467011330 5ustar words2nums/debian/changelog0000644000000000000000000001054712427274455013206 0ustar liblingua-en-words2nums-perl (0.19) unstable; urgency=medium * Orphaning the Debian package. -- Joey Hess Fri, 07 Nov 2014 22:00:12 -0400 liblingua-en-words2nums-perl (0.18) unstable; urgency=low * Remove the PM_FILTER to support Windows. (rt.cpan.org #38101) -- Joey Hess Fri, 05 Feb 2010 16:42:32 -0500 liblingua-en-words2nums-perl (0.17) unstable; urgency=low * Use debhelper v7; rules file minimisation. -- Joey Hess Tue, 22 Jul 2008 00:29:12 -0400 liblingua-en-words2nums-perl (0.16) unstable; urgency=low * The repository has moved from svn to git. * Minor improvement to debian/rules clean. -- Joey Hess Fri, 19 Oct 2007 22:21:04 -0400 liblingua-en-words2nums-perl (0.15) unstable; urgency=low * Update url to the web site. * Current standards-version (no real changes). * Fix unicode error in man page. * Fix lintian warning about rules file. -- Joey Hess Mon, 04 Jun 2007 16:49:19 -0400 liblingua-en-words2nums-perl (0.14) unstable; urgency=low * Remove the tests that involve exponentents, as they may not on 64 bit machines, and will cause false test failures. Closes: #250610 -- Joey Hess Fri, 4 Jun 2004 15:09:13 -0300 liblingua-en-words2nums-perl (0.13) unstable; urgency=low * Remove quoting in Makefile.PL so it will build under 5.8.1 (this breaks building under earlier versions of perl though). Closes: #213928 -- Joey Hess Mon, 6 Oct 2003 19:49:19 -0400 liblingua-en-words2nums-perl (0.12) unstable; urgency=low * Move from build-depends-indep to build-depends to meet current policy. -- Joey Hess Wed, 3 Sep 2003 12:14:45 -0400 liblingua-en-words2nums-perl (0.11) unstable; urgency=low * Do not pass through things of the form "10-11", since they're note really numbers. * Fix testnum to work with library in same directory. -- Joey Hess Mon, 26 May 2003 15:48:37 -0400 liblingua-en-words2nums-perl (0.10) unstable; urgency=low * Add proper spelling of "forty", and alternate "nought" spelling. * Make regression test work on win32, with exponents with leading zeroes. -- Joey Hess Wed, 7 May 2003 01:34:11 -0400 liblingua-en-words2nums-perl (0.09) unstable; urgency=low * Corrected parsing of "fourth". Oops! * Moved pm file out of deep directory in source tarball, which was unnecessary for such a small package. * Added AUTHOR and ABSTRACT_FROM to Makefile.PL. * Accept douvigintillion, as well as dovigintillion; I don't know which is right. * Thanks to Erick Calder for his help. -- Joey Hess Mon, 3 Feb 2003 12:16:02 -0500 liblingua-en-words2nums-perl (0.08) unstable; urgency=low * Localize $_. -- Joey Hess Fri, 18 Oct 2002 16:09:59 -0400 liblingua-en-words2nums-perl (0.07) unstable; urgency=low * Use debhelper v4. -- Joey Hess Sat, 1 Jun 2002 18:15:26 -0400 liblingua-en-words2nums-perl (0.06) unstable; urgency=low * Don't try to test for inf, since it seems "Infinity" is the string on some platforms. -- Joey Hess Sat, 1 Jun 2002 10:01:09 -0400 liblingua-en-words2nums-perl (0.05) unstable; urgency=low * Added big numbers between undecillion and trigintillion. Also centillion, and billiard through quindecilliard. -- Joey Hess Tue, 26 Feb 2002 23:09:29 -0500 liblingua-en-words2nums-perl (0.04) unstable; urgency=low * Corrected parsing of otherwise plain numbers that have commas in them (123,456.789) -- Joey Hess Sat, 12 Jan 2002 17:33:22 -0500 liblingua-en-words2nums-perl (0.03) unstable; urgency=low * If the entire string is ignorables ("and", "a", punctuation), don't return 0, but undef. -- Joey Hess Sat, 12 Jan 2002 14:17:16 -0500 liblingua-en-words2nums-perl (0.02) unstable; urgency=low * Added support for trillion through googolplex, and added localization code for the different billions and other numbers. * Lots of bugfixes, including getting the ordinals right (I hope). -- Joey Hess Wed, 19 Dec 2001 23:08:03 -0500 liblingua-en-words2nums-perl (0.01) unstable; urgency=low * First release. -- Joey Hess Wed, 19 Dec 2001 14:23:03 -0500 words2nums/debian/examples0000644000000000000000000000001012355064261013045 0ustar samples words2nums/debian/compat0000644000000000000000000000000212355064261012513 0ustar 7 words2nums/debian/control0000644000000000000000000000122112427274371012721 0ustar Source: liblingua-en-words2nums-perl Section: perl Priority: optional Build-Depends: debhelper (>= 7), perl5, dpkg-dev (>= 1.9.0) Maintainer: Debian QA Group Standards-Version: 3.8.4 Vcs-Git: git://git.kitenet.net/words2nums Homepage: http://kitenet.net/~joey/code/words2nums/ Package: liblingua-en-words2nums-perl Architecture: all Depends: ${perl:Depends}, ${misc:Depends} Description: convert English text to numbers A perl module that can parse a wide variety of English text and deduce the number it represents. For example, it can convert "five million, one thousand and sixteen" to 5001016, and "twenty-seventh" to 27. words2nums/debian/rules0000755000000000000000000000022012355064261012367 0ustar #!/usr/bin/make -f %: dh $@ # Not intended for use by anyone except the author. announcedir: @echo ${HOME}/src/joeywiki/code/words2nums/news words2nums/debian/copyright0000644000000000000000000000072012355064261013247 0ustar Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: Lingua::EN::Words2Nums Source: native package Files: * Copyright: 2001-2010 Joey Hess License: perl This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. . That means it's dual licensed under the GPL (/usr/share/common-licenses/GPL) and Artistic (/usr/share/common-licenses/Artistic) licenses. words2nums/debian/docs0000644000000000000000000000001412355064261012163 0ustar README TODO words2nums/Words2Nums.pm0000644000000000000000000001773712355064261012473 0ustar #!/usr/bin/perl =head1 NAME Lingua::EN::Words2Nums - convert English text to numbers =cut package Lingua::EN::Words2Nums; use warnings; use strict; require Exporter; our @ISA=qw(Exporter); our @EXPORT=qw(&words2nums); =head1 SYNOPSIS use Lingua::EN::Words2Nums; $num=words2nums("two thousand and one"); $num=words2nums("twenty-second"); $num=words2nums("15 billion, 6 million, and ninteen"); =head1 DESCRIPTION This module converts English text into numbers. It supports both ordinal and cardinal numbers, negative numbers, and very large numbers. The main subroutine, which is exported by default, is words2nums(). This subroutine, when fed a string, will attempt to convert it into a number. If it succeeds, the number will be returned. If it fails, it returns undef. =head1 VARIABLES There are a number of variables that can be used to tweak the behavior of this module. For example, debugging can be be enabled by setting $Lingua::EN::Words2Nums::debug=1 =over 4 =cut # Public global variables. our $debug = 0; our $billion = 10 ** 9; =item $Lingua::EN::Words2Nums::debug Default: 0. If set to a true value, outputs on standard error some useful messages if parsing fails for some reason. =item $Lingua::EN::Words2Nums::billion Default: 10 ** 9. This is the number that will be returned for "one billion". It defaults to the American version; the English will want to set it to 10 ** 12. Setting this number automatically changes all the larger numbers (trillion, quadrillion, etc) to match. =back =head1 NOTES It does not understand decimals or fractions, yet. Scores are supported, eg: "four score and ten". So are dozens. So is a baker's dozen. And a gross. Various mispellings of numbers are understood. While it handles googol correctly, googolplex is too large to fit in perl's standard scalar type, and "inf" will be returned. =cut our %nametosub = ( naught => [ \&num, 0 ], # Cardinal numbers, leaving out the a nought => [ \&num, 0 ], zero => [ \&num, 0 ], # ones that just add "th". one => [ \&num, 1 ], first => [ \&num, 1 ], two => [ \&num, 2 ], second => [ \&num, 2 ], three => [ \&num, 3 ], third => [ \&num, 3 ], four => [ \&num, 4 ], fourth => [ \&num, 4 ], five => [ \&num, 5 ], fifth => [ \&num, 5 ], six => [ \&num, 6 ], seven => [ \&num, 7 ], seven => [ \&num, 7 ], eight => [ \&num, 8 ], eighth => [ \&num, 8 ], nine => [ \&num, 9 ], ninth => [ \&num, 9 ], ten => [ \&num, 10 ], eleven => [ \&num, 11 ], twelve => [ \&num, 12 ], twelfth => [ \&num, 12 ], thirteen => [ \&num, 13 ], fifteen => [ \&num, 15 ], eighteen => [ \&num, 18 ], ninteen => [ \&num, 19 ], # common(?) mispelling teen => [ \&suffix, 10 ], # takes care of the regular teens twenty => [ \&num, 20 ], twentieth => [ \&num, 20 ], thirty => [ \&num, 30 ], thirtieth => [ \&num, 30 ], forty => [ \&num, 40 ], fortieth => [ \&num, 40 ], fourty => [ \&num, 40 ], fourtieth => [ \&num, 40 ], # at least I mispell it like this fifty => [ \&num, 50 ], fiftieth => [ \&num, 50 ], sixty => [ \&num, 60 ], sixtieth => [ \&num, 60 ], seventy => [ \&num, 70 ], seventieth => [ \&num, 70 ], eighty => [ \&num, 80 ], eightieth => [ \&num, 80 ], ninety => [ \&num, 90 ], ninetieth => [ \&num, 90 ], ninty => [ \&num, 90 ], # common mispelling hundred => [ \&prefix, 100 ], thousand => [ \&prefix, 1000 ], million => [ \&prefix, 10 ** 6 ], milion => [ \&prefix, 10 ** 6 ], # common(?) mispelling milliard => [ \&prefix, 10 ** 9 ], billion => [ \&powprefix, 2 ], # These vary depending on country. billiard => [ \&prefix, 10 ** 15 ], trillion => [ \&powprefix, 3 ], trilliard => [ \&prefix, 10 ** 21 ], quadrillion => [ \&powprefix, 4 ], quadrilliard => [ \&prefix, 10 ** 27 ], quintillion => [ \&powprefix, 5 ], quintilliard => [ \&prefix, 10 ** 33 ], sextillion => [ \&powprefix, 6 ], sextilliard => [ \&prefix, 10 ** 39 ], septillion => [ \&powprefix, 7 ], septilliard => [ \&prefix, 10 ** 45 ], octillion => [ \&powprefix, 8 ], octilliard => [ \&prefix, 10 ** 51 ], nonillion => [ \&powprefix, 9 ], nonilliard => [ \&prefix, 10 ** 57 ], decillion => [ \&powprefix, 10 ], decilliard => [ \&prefix, 10 ** 63 ], undecillion => [ \&powprefix, 11 ], undecilliard => [ \&prefix, 10 ** 69 ], duodecillion => [ \&powprefix, 12 ], duodecilliard => [ \&prefix, 10 ** 75 ], tredecillion => [ \&powprefix, 13 ], tredecilliard => [ \&prefix, 10 ** 81 ], quattuordecillion => [ \&powprefix, 14 ], quattuordecilliard => [ \&prefix, 10 ** 87 ], quindecillion => [ \&powprefix, 15 ], quindecilliard => [ \&prefix, 10 ** 93 ], sexdecillion => [ \&powprefix, 16 ], septendecillion => [ \&powprefix, 17 ], octodecillion => [ \&powprefix, 18 ], novemdecillion => [ \&powprefix, 19 ], vigintillion => [ \&powprefix, 20 ], unvigintillion => [ \&powprefix, 21 ], duovigintillion => [ \&powprefix, 22 ], duvigintillion => [ \&powprefix, 22 ], # some use this spelling trevigintillion => [ \&powprefix, 23 ], quattuorvigintillion => [ \&powprefix, 24 ], quinvigintillion => [ \&powprefix, 25 ], sexvigintillion => [ \&powprefix, 26 ], septenvigintillion => [ \&powprefix, 27 ], octovigintillion => [ \&powprefix, 28 ], novemvigintillion => [ \&powprefix, 29 ], trigintillion => [ \&powprefix, 30 ], # This process can be continued indefinitely, but one has to stop # somewhere. -- A Dictionary of Units of Measurement centillion => [ \&powprefix, 100 ], googol => [ \&googol ], googolplex => [ \&googolplex ], negative => [ \&invert ], minus => [ \&invert ], score => [ \&prefix, 20 ], gross => [ \&prefix, 12 * 12 ], dozen => [ \&prefix, 12 ], bakersdozen => [ \&prefix, 13 ], bakerdozen => [ \&prefix, 13 ], eleventyone => [ \&num, 111 ], # This nprogram written on the day eleventyfirst =>[ \&num, 111 ], # FOTR released. s => [ sub {} ], # ignore 's', at the end of a word, # easy pluralization of dozens, etc. es => [ sub {} ], # same for 'es'; for googolplexes, etc. th => [ sub {} ], # ignore 'th', for cardinal nums ); # Note the ordering, so that eg, ninety has a chance to match before nine. my $numregexp = join("|", reverse sort keys %nametosub); $numregexp=qr/($numregexp)/; my ($total, $mult, $oldpre, $newmult, $suffix, $val); sub num ($) { $val = shift; if ($suffix) { $val += $suffix; $suffix = 0; } $total += $val * $mult; $newmult = 0; } sub prefix ($) { my $pre = shift; if ($pre > $oldpre) { # end of a prefix chain $total += $mult if $newmult; # special case for lone "thousand", etc. $mult = 1; } $mult *= $pre; $oldpre = $pre; $newmult = 1; } sub powprefix { my $power = shift; if ($billion == 10 ** 9) { # EN prefix(10 ** (($power + 1) * 3)); } elsif ($billion == 10 ** 12) { # GB prefix(10 ** ($power * 6)); } else { failure("\$billion is set to odd value: $billion"); } } sub suffix ($) { $suffix = shift; } sub invert () { $total *= -1; } sub googol () { prefix(10 ** 100); } sub googolplex () { prefix(10 ** (10 ** 100)); } sub failure ($) { print STDERR shift()."\n" if $debug; return; # undef on failure } sub words2nums ($) { local $_=lc(shift); chomp $_; s/,//; # ignore comma, even if it's in a plain number return $_ if /^[-+]?[.0-9\s]+$/; # short circuit for plain number if (/^[-+0-9.]+$/) { return failure("+ or - not at beginning") if length $_; } s/\b(and|a|of)\b//g; # ignore some common words s/[^A-Za-z0-9.]//g; # ignore spaces and punctuation, except period. return failure("not a number") unless length $_; $total=$oldpre=$suffix=$newmult=0; $mult=1; # Work backwards up the string. while (length $_) { $nametosub{$1}[0]->($nametosub{$1}[1]) while s/$numregexp$//; if (length $_) { if (s/(\d+)(?:st|nd|rd|th)?$//) { num($1); } else { last; } } } return failure("error at $_") if length $_; $total += $mult if $newmult; # special case for lone "thousand", etc. return $total; } =head1 AUTHOR Copyright 2001-2003 Joey Hess This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1 words2nums/t/0000755000000000000000000000000012355064261010336 5ustar words2nums/t/samples.t0000644000000000000000000000102012355064261012160 0ustar #!/usr/bin/perl use strict; use Test; our @samples; BEGIN { open(SAMPLES, "samples") || die "samples: $!"; @samples=grep { ! /^#/ } ; plan tests => (scalar @samples); } use Lingua::EN::Words2Nums; foreach (@samples) { chomp $_; my ($num, $text)=split(' ', $_, 2); if ($num eq 'undef') { ok(! defined words2nums($text)); } else { my $w2n = words2nums($text); # On win32 platform, exponents semm to have leading zero. # This makes it work either way. $w2n =~ s/e+0(\d+)/e+$1/; ok($w2n, $num); } } words2nums/MANIFEST0000644000000000000000000000021112355064261011216 0ustar MANIFEST Makefile.PL README TODO debian/changelog debian/control debian/copyright debian/rules samples t/samples.t testnum Words2Nums.pm words2nums/.gitattributes0000644000000000000000000000005412355064261012765 0ustar debian/changelog merge=dpkg-mergechangelogs