Lingua-EN-FindNumber-1.2/0000755000175200017520000000000010314107710013664 5ustar tonytonyLingua-EN-FindNumber-1.2/t/0000755000175200017520000000000010314107710014127 5ustar tonytonyLingua-EN-FindNumber-1.2/t/1.t0000644000175200017520000000062707764651777014521 0ustar tonytonyuse Test::More tests => 4; use_ok("Lingua::EN::FindNumber"); ok($number_re, "Exported the regular expression"); my $text = "Fourscore and seven years ago, our four fathers..."; is(numify($text), "87 years ago, our 4 fathers...", "numify"); @numbers = extract_numbers($text); is_deeply(\@numbers, ["Fourscore and seven", "four"], "extract_numbers"); my $x = $number_re; #quiet warnings $number_re = $x; Lingua-EN-FindNumber-1.2/t/pod.t0000644000175200017520000000020110314107500015064 0ustar tonytonyuse Test::More; eval "use Test::Pod 1.00"; plan skip_all => "Test::Pod 1.00 required for testing POD" if $@; all_pod_files_ok(); Lingua-EN-FindNumber-1.2/t/pod-coverage.t0000644000175200017520000000024110314107500016661 0ustar tonytonyuse Test::More; eval "use Test::Pod::Coverage 1.00"; plan skip_all => "Test::Pod::Coverage 1.00 required for testing POD coverage" if $@; all_pod_coverage_ok(); Lingua-EN-FindNumber-1.2/lib/0000755000175200017520000000000010314107710014432 5ustar tonytonyLingua-EN-FindNumber-1.2/lib/Lingua/0000755000175200017520000000000010314107710015651 5ustar tonytonyLingua-EN-FindNumber-1.2/lib/Lingua/EN/0000755000175200017520000000000010314107710016153 5ustar tonytonyLingua-EN-FindNumber-1.2/lib/Lingua/EN/FindNumber.pm0000644000175200017520000000651110314107657020557 0ustar tonytonypackage Lingua::EN::FindNumber; use 5.006; our $VERSION = '1.2'; use strict; use warnings; use base 'Exporter'; our @EXPORT = qw( extract_numbers $number_re numify ); use Lingua::EN::Words2Nums; # This is from Lingua::EN::Words2Nums, after being thrown through # Regex::PreSuf my $numbers = qr/((?:b(?:akers?dozen|illi(?:ard|on))|centillion|d(?:ecilli(?:ard|on)|ozen|u(?:o(?:decilli(?:ard|on)|vigintillion)|vigintillion))|e(?:ight(?:een|ieth|[yh])?|leven(?:ty(?:first|one))?|s)|f(?:i(?:ft(?:een|ieth|[yh])|rst|ve)|o(?:rt(?:ieth|y)|ur(?:t(?:ieth|[yh]))?))|g(?:oogol(?:plex)?|ross)|hundred|mi(?:l(?:ion|li(?:ard|on))|nus)|n(?:aught|egative|in(?:et(?:ieth|y)|t(?:een|[yh])|e)|o(?:nilli(?:ard|on)|ught|vem(?:dec|vigint)illion))|o(?:ct(?:illi(?:ard|on)|o(?:dec|vigint)illion)|ne)|qu(?:a(?:drilli(?:ard|on)|ttuor(?:decilli(?:ard|on)|vigintillion))|in(?:decilli(?:ard|on)|tilli(?:ard|on)|vigintillion))|s(?:core|e(?:cond|pt(?:en(?:dec|vigint)illion|illi(?:ard|on))|ven(?:t(?:ieth|y))?|x(?:decillion|tilli(?:ard|on)|vigintillion))|ix(?:t(?:ieth|y))?)|t(?:ee?n|h(?:ir(?:t(?:een|ieth|y)|d)|ousand|ree)|r(?:e(?:decilli(?:ard|on)|vigintillion)|i(?:gintillion|lli(?:ard|on)))|w(?:e(?:l(?:fth|ve)|nt(?:ieth|y))|o)|h)|un(?:decilli(?:ard|on)|vigintillion)|vigintillion|zero|s))/i; my $ok_words = qr/\b(and|a|of)\b/; my $ok_things = qr/[^A-Za-z0-9.]/; our $number_re = qr/\b(($numbers($ok_words|$ok_things)*)+)\b/i; sub extract_numbers { my $text = shift; my @numbers; push @numbers, $1 while $text =~ /$number_re/g; s/\s+$// for @numbers; return @numbers; } sub numify { my $text = shift; $text =~ s/$number_re/words2nums($1). ($1 =~ m{(\s+)$} ? $1 :"")/eg; return $text; } 1; __END__ =head1 NAME Lingua::EN::FindNumber - Locate (written) numbers in English text =head1 SYNOPSIS use Lingua::EN::FindNumber; my $text = "Fourscore and seven years ago, our four fathers..."; numify($text); # "87 years ago, our 4 fathers..." @numbers = extract_numbers($text); # "Fourscore and seven", "four" while ($text =~ /$number_re/g) { # Build your own iterator =head1 DESCRIPTION This module provides a regular expression for finding numbers in English text. It also provides functions for extracting and manipulating such numbers. =head1 EXPORTED METHODS =head2 extract_numbers / numify / $number_re numify($text); # "87 years ago, our 4 fathers..." @numbers = extract_numbers($text); # "Fourscore and seven", "four" while ($text =~ /$number_re/g) { # Build your own iterator =head1 SEE ALSO This module was written for the Natural Languages chapter of the second edition of Advanced Perl Programming. If you liked the module, why not buy the book? http://www.amazon.co.uk/exec/obidos/ASIN/0596004567/tmtm-20 This module works rather well in conjunction with L, which is a very cool module anyway. (And I stole some of this module's code from it. Thanks, Joey!) It may also be involved with L in the future, so check that one out too. =head1 AUTHOR Current maintainer: Tony Bowden Original author: Simon Cozens =head1 BUGS and QUERIES Please direct all correspondence regarding this module to: bug-Lingua-EN-Number@rt.cpan.org =head1 COPYRIGHT AND LICENSE Copyright 2003-2005 by Simon Cozens This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Lingua-EN-FindNumber-1.2/README0000644000175200017520000000331110314107662014550 0ustar tonytonyNAME Lingua::EN::FindNumber - Locate (written) numbers in English text SYNOPSIS use Lingua::EN::FindNumber; my $text = "Fourscore and seven years ago, our four fathers..."; numify($text); # "87 years ago, our 4 fathers..." @numbers = extract_numbers($text); # "Fourscore and seven", "four" while ($text =~ /$number_re/g) { # Build your own iterator DESCRIPTION This module provides a regular expression for finding numbers in English text. It also provides functions for extracting and manipulating such numbers. EXPORTED METHODS extract_numbers / numify / $number_re numify($text); # "87 years ago, our 4 fathers..." @numbers = extract_numbers($text); # "Fourscore and seven", "four" while ($text =~ /$number_re/g) { # Build your own iterator SEE ALSO This module was written for the Natural Languages chapter of the second edition of Advanced Perl Programming. If you liked the module, why not buy the book? http://www.amazon.co.uk/exec/obidos/ASIN/0596004567/tmtm-20 This module works rather well in conjunction with Lingua::EN::Words2Nums, which is a very cool module anyway. (And I stole some of this module's code from it. Thanks, Joey!) It may also be involved with Lingua::EN::NamedEntity in the future, so check that one out too. AUTHOR Current maintainer: Tony Bowden Original author: Simon Cozens BUGS and QUERIES Please direct all correspondence regarding this module to: bug-Lingua-EN-Number@rt.cpan.org COPYRIGHT AND LICENSE Copyright 2003-2005 by Simon Cozens This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Lingua-EN-FindNumber-1.2/Changes0000644000175200017520000000054510314107525015167 0ustar tonytonyRevision history for Perl extension Lingua::EN::FindNumber. 1.2 Tue Sep 20 22:35:07 UTC 2005 - Add link to Advanced Perl Programming now that it's published 1.1 Sun Oct 24 13:44:30 UTC 2004 - Tony Bowden is now maintainer 1.00 Sun Dec 7 16:01:06 2003 - original version; created by h2xs 1.22 with options -b 5.6.0 -AX -n Lingua::EN::FindNumber Lingua-EN-FindNumber-1.2/Makefile.PL0000644000175200017520000000043310136731444015647 0ustar tonytonyuse 5.006; use ExtUtils::MakeMaker; WriteMakefile( NAME => 'Lingua::EN::FindNumber', VERSION_FROM => 'lib/Lingua/EN/FindNumber.pm', ABSTRACT_FROM => 'lib/Lingua/EN/FindNumber.pm', AUTHOR => 'Tony Bowden', PREREQ_PM => { 'Lingua::EN::Words2Nums' => 0 }, ); Lingua-EN-FindNumber-1.2/META.yml0000644000175200017520000000055010314107707015143 0ustar tonytony# http://module-build.sourceforge.net/META-spec.html #XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# name: Lingua-EN-FindNumber version: 1.2 version_from: lib/Lingua/EN/FindNumber.pm installdirs: site requires: Lingua::EN::Words2Nums: 0 distribution_type: module generated_by: ExtUtils::MakeMaker version 6.17 Lingua-EN-FindNumber-1.2/MANIFEST0000644000175200017520000000022010314107674015020 0ustar tonytonyChanges lib/Lingua/EN/FindNumber.pm Makefile.PL MANIFEST META.yml Module meta-data (added by MakeMaker) README t/1.t t/pod-coverage.t t/pod.t