Text-Levenshtein-0.06_01/0000755000000000007650000000000011012446074014220 5ustar root00000000000000Text-Levenshtein-0.06_01/Changes0000644000076500007650000000123411012445641015641 0ustar admin00000000000000Change file for Text::Levenshtein Dree Mistrut Josh Goldberg Version 0.06: 2008/05/13 Bugfixes, especially 26134 Version 0.05: 2004/06/29 Rename distfile for consistency Version 0.04: 2004/03/06 Added several modifications to increase speed Added fastdistance routine when array form can be sacrificed for increased speed Version 0.03 : 2002/07/28 Changed docs to point to Text::WagnerFischer (Thanks to S. Rodighiero and D. Frankowski to point me this) Better initialization of the matrix Version 0.02 : 2002/05/21 Added array support Version 0.01 : 2002/05/20 No changes -- initial release Text-Levenshtein-0.06_01/Levenshtein.pm0000644000076500007650000000571611012445356017204 0ustar admin00000000000000package Text::Levenshtein; use strict; use Exporter; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); $VERSION = '0.06_01'; @ISA = qw(Exporter); @EXPORT = (); @EXPORT_OK = qw(&distance &fastdistance); %EXPORT_TAGS = (); sub _min { return $_[0] < $_[1] ? $_[0] < $_[2] ? $_[0] : $_[2] : $_[1] < $_[2] ? $_[1] : $_[2]; } sub distance { my ($s,@t)=@_; my $n=length($s); my @result; foreach my $t (@t) { if ($s eq $t) { push @result, 0; next; } my @d; my $cost=0; my $m=length($t); push @result,$m and next unless $n; push @result,$n and next unless $m; $d[0][0]=0; foreach my $i (1 .. $n) { $d[$i][0]=$i; } foreach my $j (1 .. $m) { $d[0][$j]=$j; } for my $i (1 .. $n) { my $s_i=substr($s,$i-1,1); for my $j (1 .. $m) { $d[$i][$j]=&_min($d[$i-1][$j]+1, $d[$i][$j-1]+1, $d[$i-1][$j-1]+($s_i eq substr($t,$j-1,1) ? 0 : 1) ) } } push @result,$d[$n][$m]; } if (wantarray) {return @result} else {return $result[0]} } sub fastdistance { my $word1 = shift; my $word2 = shift; return 0 if $word1 eq $word2; my @d; my $len1 = length $word1; my $len2 = length $word2; $d[0][0] = 0; for (1 .. $len1) { $d[$_][0] = $_; return $_ if $_!=$len1 && substr($word1,$_) eq substr($word2,$_); } for (1 .. $len2) { $d[0][$_] = $_; return $_ if $_!=$len2 && substr($word1,$_) eq substr($word2,$_); } for my $i (1 .. $len1) { my $w1 = substr($word1,$i-1,1); for (1 .. $len2) { $d[$i][$_] = _min($d[$i-1][$_]+1, $d[$i][$_-1]+1, $d[$i-1][$_-1]+($w1 eq substr($word2,$_-1,1) ? 0 : 1)); } } return $d[$len1][$len2]; } 1; __END__ =head1 NAME Text::Levenshtein - An implementation of the Levenshtein edit distance =head1 SYNOPSIS use Text::Levenshtein qw(distance); print distance("foo","four"); # prints "2" print fastdistance("foo","four"); # prints "2" faster my @words=("four","foo","bar"); my @distances=distance("foo",@words); print "@distances"; # prints "2 0 3" =head1 DESCRIPTION This module implements the Levenshtein edit distance. The Levenshtein edit distance is a measure of the degree of proximity between two strings. This distance is the number of substitutions, deletions or insertions ("edits") needed to transform one string into the other one (and vice versa). When two strings have distance 0, they are the same. A good point to start is: &fastdistance can be called with two scalars and is faster in most cases. See also Text::LevenshteinXS on CPAN if you do not require a perl-only implementation. It is extremely faster in nearly all cases. See also Text::WagnerFischer on CPAN for a configurable edit distance, i.e. for configurable costs (weights) for the edits. =head1 AUTHOR Copyright 2002 Dree Mistrut > This package is free software and is provided "as is" without express or implied warranty. You can redistribute it and/or modify it under the same terms as Perl itself. =cut Text-Levenshtein-0.06_01/Makefile.PL0000644000076500007650000000045311012446060016316 0ustar admin00000000000000use ExtUtils::MakeMaker; # See lib/ExtUtils/MakeMaker.pm for details of how to influence # the contents of the Makefile that is written. WriteMakefile( NAME => 'Text::Levenshtein', VERSION => '0.06_01', LINKTYPE => '$(INST_PM)', PREREQ_PM => {'Test::More' => 0} ); Text-Levenshtein-0.06_01/MANIFEST0000644000076500007650000000021210022427453015473 0ustar admin00000000000000Changes Makefile.PL README Levenshtein.pm test.pl MANIFEST META.yml Module meta-data (added by MakeMaker) Text-Levenshtein-0.06_01/META.yml0000644000076500007650000000051511012446074015621 0ustar admin00000000000000# http://module-build.sourceforge.net/META-spec.html #XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# name: Text-Levenshtein version: 0.06_01 version_from: installdirs: site requires: Test::More: 0 distribution_type: module generated_by: ExtUtils::MakeMaker version 6.17 Text-Levenshtein-0.06_01/README0000644000076500007650000000242510021760757015240 0ustar admin00000000000000 Text::Levenshtein is an implementation of the Levenshtein edit distance in Perl. A good point to start is: See also Text::WagnerFischer on CPAN for a configurable edit distance. PREREQUISITES This suite requires Perl 5; I tested it only under Perl 5.6. Text::Levenshtein does not use any nonstandard modules. INSTALLATION You install Text::Levenshtein by running these commands in the *nix environment: perl Makefile.PL make make test (optional) make install To install Text::Levenshtein in the Win32 environment, use nmake instead of make. nmake is available for free (in a self extracting executable): After download and inflate, put nmake.exe and nmake.err in c:\windows\command . DOCUMENTATION POD format documentation is included in Levenshtein.pm. POD is readable with the command: perldoc Text::Levenshtein AVAILABILITY The latest version of Text::Levenshtein is available from the CPAN COPYRIGHT Copyright 2002 Dree Mistrut This package is free software and is provided "as is" without express or implied warranty. You can redistribute it and/or modify it under the same terms as Perl itself. Text-Levenshtein-0.06_01/test.pl0000644000076500007650000000146011012444666015671 0ustar admin00000000000000use strict; use Test::More qw (no_plan); use lib 'blib/lib'; use Text::Levenshtein qw(distance fastdistance); is_deeply(distance("foo","four"),2,"Correct distance foo four"); is_deeply(distance("foo","foo"),0,"Correct distance foo foo"); is_deeply(distance("cow","cat"),2,"Correct distance cow cat"); is_deeply(distance("cat","moocow"),5,"Correct distance cat moocow"); is_deeply(distance("cat","cowmoo"),5,"Correct distance cat cowmoo"); is_deeply(distance("sebastian","sebastien"),1,"Correct distance sebastian sebastien"); is_deeply(distance("more","cowbell"),5,"Correct distance more cowbell"); my @foo = distance("foo","four","foo","bar"); my @bar = (2,0,3); is_deeply(\@foo,\@bar,"Array test: Correct distances foo four foo bar"); is_deeply(fastdistance("foo","boo"),1,"Fast test: Correct distance foo boo");