Regexp-Wildcards-1.05/000755 000765 000024 00000000000 12206211220 015036 5ustar00vincestaff000000 000000 Regexp-Wildcards-1.05/Changes000644 000765 000024 00000005724 12206211057 016351 0ustar00vincestaff000000 000000 Revision history for Regexp-Wildcards 1.05 2013-08-24 20:15 UTC This is a maintenance release. The code contains no functional change. Satisfied users of version 1.04 can skip this update. + Doc : POD headings are now properly linkable. + Tst : Author tests are no longer bundled with this distribution. They are only made available to authors in the git repository. + Upd : Package metadata overhaul. 1.04 2011-08-25 12:50 UTC + Chg : Minor code cleanups. + Fix : Use Scalar::Util::blessed() to check objects classes. Scalar::Util is required. + Fix : Work around Kwalitee test misfailures. + Upd : The distribution metadata was updated to modern standards. 1.03 2009-02-26 15:35 UTC + Add : Translating both 'jokers' and 'sql' at the same time. + Doc : Cleanups. + Fix : The ->type forgot how to really accept $^O since the rewrite. Reported by Bruce McKenzie in RT #43643. + Upd : META.yml spec updated to 1.4. 1.02 2008-08-23 09:15 UTC + Add : The 'anchor' metacharacter class. 1.01 2008-08-19 15:20 UTC + Fix : Now we can do both SQL and brackets. + Tst : Add tests for embedded newlines. 1.00 2008-08-18 17:20 UTC + Chg : Rewritten the module in an OO way. It's now easier to specify what you want to translate. + Tst : 100% coverage reached. 0.08 2008-03-09 15:55 UTC + Add : ':funcs' export tag. + Doc : Copyright update. + Fix : Correct dependencies listing in META.yml. + Tst : Author tests overhaul. + Tst : t/95-portability-files.t. 0.07 2007-08-28 12:35 UTC + Fix : Tests are now strict. + Fix : Complete dependencies. 0.06 2007-06-26 12:40 UTC + Add : SQL '%' and '_' wildcards (with corresponding pod & tests). + Fix : Typos in pod (looks like this will never end...). 0.05 2007-06-22 14:40 UTC + Add : Windows strange behaviours caveat. + Chg : Simplified bracket prefix. + Fix : Typos in pod. 0.04 2007-06-20 19:00 UTC + Add : You can supply $^O as the type for wc2re, which will wrap to wc2re_win32 for 'dos', 'os2', 'MSWin32', 'cygwin', and to wc2re_unix in all the other cases. + Add : Generated regexps can now capture the interesting bits of the wildcard expression via the configuration variables $CaptureSingle, $CaptureAny and $CaptureBrackets (see pod) + Add : Corresponding pod & tests 0.03 2007-06-17 14:45 UTC + Fix : Missing PREREQ_PM in Makefile.PL + Fix : Typos in pod. 0.02 2007-06-16 09:15 UTC + Fix : wc2re_unix should escape top-level commas. + Fix : added missing samples/wc2re.pl + Add : tests descriptions in t/12-brackets.t 0.01 2007-06-14 First version, released on an unsuspecting world. Regexp-Wildcards-1.05/lib/000755 000765 000024 00000000000 12206211220 015604 5ustar00vincestaff000000 000000 Regexp-Wildcards-1.05/Makefile.PL000644 000765 000024 00000002352 12205473710 017027 0ustar00vincestaff000000 000000 use strict; use warnings; use ExtUtils::MakeMaker; my $dist = 'Regexp-Wildcards'; (my $name = $dist) =~ s{-}{::}g; (my $file = $dist) =~ s{-}{/}g; $file = "lib/$file.pm"; my %PREREQ_PM = ( 'Carp' => 0, 'Scalar::Util' => 0, 'Text::Balanced' => 0, ); my %META = ( configure_requires => { 'ExtUtils::MakeMaker' => 0, }, build_requires => { 'ExtUtils::MakeMaker' => 0, 'Test::More' => 0, %PREREQ_PM, }, dynamic_config => 0, resources => { bugtracker => "http://rt.cpan.org/Dist/Display.html?Name=$dist", homepage => "http://search.cpan.org/dist/$dist/", license => 'http://dev.perl.org/licenses/', repository => "http://git.profvince.com/?p=perl%2Fmodules%2F$dist.git", }, ); WriteMakefile( NAME => $name, AUTHOR => 'Vincent Pit ', LICENSE => 'perl', VERSION_FROM => $file, ABSTRACT_FROM => $file, PL_FILES => {}, PREREQ_PM => \%PREREQ_PM, MIN_PERL_VERSION => '5.006', META_MERGE => \%META, dist => { PREOP => "pod2text -u $file > \$(DISTVNAME)/README", COMPRESS => 'gzip -9f', SUFFIX => 'gz' }, clean => { FILES => "$dist-* *.gcov *.gcda *.gcno cover_db Debian_CPANTS.txt" }, ); Regexp-Wildcards-1.05/MANIFEST000644 000765 000024 00000000327 12204755602 016210 0ustar00vincestaff000000 000000 Changes MANIFEST META.json META.yml Makefile.PL README lib/Regexp/Wildcards.pm samples/wc2re.pl t/00-load.t t/02-can.t t/10-obj.t t/11-opts.t t/20-jokers.t t/21-commas.t t/22-brackets.t t/23-groups.t t/24-anchors.t Regexp-Wildcards-1.05/META.json000644 000765 000024 00000002761 12206211220 016465 0ustar00vincestaff000000 000000 { "abstract" : "Converts wildcard expressions to Perl regular expressions.", "author" : [ "Vincent Pit " ], "dynamic_config" : 0, "generated_by" : "ExtUtils::MakeMaker version 6.72, CPAN::Meta::Converter version 2.132140", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "Regexp-Wildcards", "no_index" : { "directory" : [ "t", "inc" ] }, "prereqs" : { "build" : { "requires" : { "Carp" : "0", "ExtUtils::MakeMaker" : "0", "Scalar::Util" : "0", "Test::More" : "0", "Text::Balanced" : "0" } }, "configure" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "runtime" : { "requires" : { "Carp" : "0", "Scalar::Util" : "0", "Text::Balanced" : "0", "perl" : "5.006" } } }, "release_status" : "stable", "resources" : { "bugtracker" : { "web" : "http://rt.cpan.org/Dist/Display.html?Name=Regexp-Wildcards" }, "homepage" : "http://search.cpan.org/dist/Regexp-Wildcards/", "license" : [ "http://dev.perl.org/licenses/" ], "repository" : { "url" : "http://git.profvince.com/?p=perl%2Fmodules%2FRegexp-Wildcards.git" } }, "version" : "1.05" } Regexp-Wildcards-1.05/META.yml000644 000765 000024 00000001577 12206211220 016321 0ustar00vincestaff000000 000000 --- abstract: 'Converts wildcard expressions to Perl regular expressions.' author: - 'Vincent Pit ' build_requires: Carp: 0 ExtUtils::MakeMaker: 0 Scalar::Util: 0 Test::More: 0 Text::Balanced: 0 configure_requires: ExtUtils::MakeMaker: 0 dynamic_config: 0 generated_by: 'ExtUtils::MakeMaker version 6.72, CPAN::Meta::Converter version 2.132140' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 name: Regexp-Wildcards no_index: directory: - t - inc requires: Carp: 0 Scalar::Util: 0 Text::Balanced: 0 perl: 5.006 resources: bugtracker: http://rt.cpan.org/Dist/Display.html?Name=Regexp-Wildcards homepage: http://search.cpan.org/dist/Regexp-Wildcards/ license: http://dev.perl.org/licenses/ repository: http://git.profvince.com/?p=perl%2Fmodules%2FRegexp-Wildcards.git version: 1.05 Regexp-Wildcards-1.05/README000644 000765 000024 00000023105 12206211220 015717 0ustar00vincestaff000000 000000 NAME Regexp::Wildcards - Converts wildcard expressions to Perl regular expressions. VERSION Version 1.05 SYNOPSIS use Regexp::Wildcards; my $rw = Regexp::Wildcards->new(type => 'unix'); my $re; $re = $rw->convert('a{b?,c}*'); # Do it Unix shell style. $re = $rw->convert('a?,b*', 'win32'); # Do it Windows shell style. $re = $rw->convert('*{x,y}?', 'jokers'); # Process the jokers and # escape the rest. $re = $rw->convert('%a_c%', 'sql'); # Turn SQL wildcards into # regexps. $rw = Regexp::Wildcards->new( do => [ qw ], # Do jokers and brackets. capture => [ qw ], # Capture *'s greedily. ); $rw->do(add => 'groups'); # Don't escape groups. $rw->capture(rem => [ qw ]); # Actually we want non-greedy # matches. $re = $rw->convert('*a{,(b)?}?c*'); # '(.*?)a(?:|(b).).c(.*?)' $rw->capture(); # No more captures. DESCRIPTION In many situations, users may want to specify patterns to match but don't need the full power of regexps. Wildcards make one of those sets of simplified rules. This module converts wildcard expressions to Perl regular expressions, so that you can use them for matching. It handles the "*" and "?" jokers, as well as Unix bracketed alternatives "{,}", but also "%" and "_" SQL wildcards. If required, it can also keep original "(...)" groups or "^" and "$" anchors. Backspace ("\") is used as an escape character. Typesets that mimic the behaviour of Windows and Unix shells are also provided. METHODS "new" my $rw = Regexp::Wildcards->new(do => $what, capture => $capture); my $rw = Regexp::Wildcards->new(type => $type, capture => $capture); Constructs a new Regexp::Wildcard object. "do" lists all features that should be enabled when converting wildcards to regexps. Refer to "do" for details on what can be passed in $what. The "type" specifies a predefined set of "do" features to use. See "type" for details on which types are valid. The "do" option overrides "type". "capture" lists which atoms should be capturing. Refer to "capture" for more details. "do" $rw->do($what); $rw->do(set => $c1); $rw->do(add => $c2); $rw->do(rem => $c3); Specifies the list of metacharacters to convert or to prevent for escaping. They fit into six classes : * 'jokers' Converts "?" to "." and "*" to ".*". 'a**\\*b??\\?c' ==> 'a.*\\*b..\\?c' * 'sql' Converts "_" to "." and "%" to ".*". 'a%%\\%b__\\_c' ==> 'a.*\\%b..\\_c' * 'commas' Converts all "," to "|" and puts the complete resulting regular expression inside "(?: ... )". 'a,b{c,d},e' ==> '(?:a|b\\{c|d\\}|e)' * 'brackets' Converts all matching "{ ... , ... }" brackets to "(?: ... | ... )" alternations. If some brackets are unbalanced, it tries to substitute as many of them as possible, and then escape the remaining unmatched "{" and "}". Commas outside of any bracket-delimited block are also escaped. 'a,b{c,d},e' ==> 'a\\,b(?:c|d)\\,e' '{a\\{b,c}d,e}' ==> '(?:a\\{b|c)d\\,e\\}' '{a{b,c\\}d,e}' ==> '\\{a\\{b\\,c\\}d\\,e\\}' * 'groups' Keeps the parenthesis "( ... )" of the original string without escaping them. Currently, no check is done to ensure that the parenthesis are matching. 'a(b(c))d\\(\\)' ==> (no change) * 'anchors' Prevents the *beginning-of-line* "^" and *end-of-line* "$" anchors to be escaped. Since "[...]" character class are currently escaped, a "^" will always be interpreted as *beginning-of-line*. 'a^b$c' ==> (no change) Each $c can be any of : * A hash reference, with wanted metacharacter group names (described above) as keys and booleans as values ; * An array reference containing the list of wanted metacharacter classes ; * A plain scalar, when only one group is required. When "set" is present, the classes given as its value replace the current object options. Then the "add" classes are added, and the "rem" classes removed. Passing a sole scalar $what is equivalent as passing "set => $what". No argument means "set => [ ]". $rw->do(set => 'jokers'); # Only translate jokers. $rw->do('jokers'); # Same. $rw->do(add => [ qw ]); # Translate also SQL and commas. $rw->do(rem => 'jokers'); # Specifying both 'sql' and # 'jokers' is useless. $rw->do(); # Translate nothing. The "do" method returns the Regexp::Wildcards object. "type" $rw->type($type); Notifies to convert the metacharacters that corresponds to the predefined type $type. $type can be any of : * 'jokers', 'sql', 'commas', 'brackets' Singleton types that enable the corresponding "do" classes. * 'unix' Covers typical Unix shell globbing features (effectively 'jokers' and 'brackets'). * $^O values for common Unix systems Wrap to 'unix' (see perlport for the list). * "undef" Defaults to 'unix'. * 'win32' Covers typical Windows shell globbing features (effectively 'jokers' and 'commas'). * 'dos', 'os2', 'MSWin32', 'cygwin' Wrap to 'win32'. In particular, you can usually pass $^O as the $type and get the corresponding shell behaviour. $rw->type('win32'); # Set type to win32. $rw->type($^O); # Set type to unix on Unices and win32 on Windows $rw->type(); # Set type to unix. The "type" method returns the Regexp::Wildcards object. "capture" $rw->capture($captures); $rw->capture(set => $c1); $rw->capture(add => $c2); $rw->capture(rem => $c3); Specifies the list of atoms to capture. This method works like "do", except that the classes are different : * 'single' Captures all unescaped *"exactly one"* metacharacters, i.e. "?" for wildcards or "_" for SQL. 'a???b\\??' ==> 'a(.)(.)(.)b\\?(.)' 'a___b\\__' ==> 'a(.)(.)(.)b\\_(.)' * 'any' Captures all unescaped *"any"* metacharacters, i.e. "*" for wildcards or "%" for SQL. 'a***b\\**' ==> 'a(.*)b\\*(.*)' 'a%%%b\\%%' ==> 'a(.*)b\\%(.*)' * 'greedy' When used in conjunction with 'any', it makes the 'any' captures greedy (by default they are not). 'a***b\\**' ==> 'a(.*?)b\\*(.*?)' 'a%%%b\\%%' ==> 'a(.*?)b\\%(.*?)' * 'brackets' Capture matching "{ ... , ... }" alternations. 'a{b\\},\\{c}' ==> 'a(b\\}|\\{c)' $rw->capture(set => 'single'); # Only capture "exactly one" # metacharacters. $rw->capture('single'); # Same. $rw->capture(add => [ qw ]); # Also greedily capture # "any" metacharacters. $rw->capture(rem => 'greedy'); # No more greed please. $rw->capture(); # Capture nothing. The "capture" method returns the Regexp::Wildcards object. "convert" my $rx = $rw->convert($wc); my $rx = $rw->convert($wc, $type); Converts the wildcard expression $wc into a regular expression according to the options stored into the Regexp::Wildcards object, or to $type if it's supplied. It successively escapes all unprotected regexp special characters that doesn't hold any meaning for wildcards, then replace 'jokers', 'sql' and 'commas' or 'brackets' (depending on the "do" or "type" options), all of this by applying the 'capture' rules specified in the constructor or by "capture". EXPORT An object module shouldn't export any function, and so does this one. DEPENDENCIES Carp (core module since perl 5), Scalar::Util, Text::Balanced (since 5.7.3). CAVEATS This module does not implement the strange behaviours of Windows shell that result from the special handling of the three last characters (for the file extension). For example, Windows XP shell matches *a like ".*a", "*a?" like ".*a.?", "*a??" like ".*a.{0,2}" and so on. SEE ALSO Text::Glob. AUTHOR Vincent Pit, "", . You can contact me by mail or on "irc.perl.org" (vincent). BUGS Please report any bugs or feature requests to "bug-regexp-wildcards at rt.cpan.org", or through the web interface at . I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. SUPPORT You can find documentation for this module with the perldoc command. perldoc Regexp::Wildcards Tests code coverage report is available at . COPYRIGHT & LICENSE Copyright 2007,2008,2009,2013 Vincent Pit, all rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Regexp-Wildcards-1.05/samples/000755 000765 000024 00000000000 12206211220 016502 5ustar00vincestaff000000 000000 Regexp-Wildcards-1.05/t/000755 000765 000024 00000000000 12206211220 015301 5ustar00vincestaff000000 000000 Regexp-Wildcards-1.05/t/00-load.t000644 000765 000024 00000000274 12205730401 016633 0ustar00vincestaff000000 000000 #!perl -T use strict; use warnings; use Test::More tests => 1; BEGIN { use_ok( 'Regexp::Wildcards' ); } diag( "Testing Regexp::Wildcards $Regexp::Wildcards::VERSION, Perl $], $^X" ); Regexp-Wildcards-1.05/t/02-can.t000644 000765 000024 00000000272 12200060106 016446 0ustar00vincestaff000000 000000 #!perl -T use strict; use warnings; use Test::More tests => 5; require Regexp::Wildcards; for (qw) { ok(Regexp::Wildcards->can($_), 'RW can ' . $_); } Regexp-Wildcards-1.05/t/10-obj.t000644 000765 000024 00000003314 12205726407 016500 0ustar00vincestaff000000 000000 #!perl -T use strict; use warnings; use Test::More tests => 24; use Regexp::Wildcards; my $rw = Regexp::Wildcards->new; ok(defined $rw, 'RW object is defined'); is(ref $rw, 'Regexp::Wildcards', 'RW object is valid'); my $rw2 = $rw->new; ok(defined $rw2, 'RW::new called as an object method works' ); is(ref $rw2, 'Regexp::Wildcards', 'RW::new called as an object method works is valid'); $rw2 = Regexp::Wildcards::new(); ok(defined $rw2, 'RW::new called without a class works'); is(ref $rw2, 'Regexp::Wildcards', 'RW::new called without a class is valid'); eval { $rw2 = Regexp::Wildcards->new(qw) }; like($@, qr/Optional\s+arguments/, 'RW::new gets parameters as key => value pairs'); my $fake = { }; bless $fake, 'Regexp::Wildcards::Hlagh'; for (qw) { eval "Regexp::Wildcards::$_('Regexp::Wildcards')"; like($@, qr/^First\s+argument/, "RW::$_ isn't a class method"); eval "Regexp::Wildcards::$_(\$fake)"; like($@, qr/^First\s+argument/, "RW::$_ only applies to RW objects"); } for (qw) { eval { $rw->$_(sub { 'dongs' }) }; like($@, qr/Wrong\s+option\s+set/, "RW::$_ don't want code references"); eval { $rw->$_(\*STDERR) }; like($@, qr/Wrong\s+option\s+set/, "RW::$_ don't want globs"); eval { $rw->$_(qw) }; like($@, qr/Arguments\s+must\s+be\s+passed.*unique\s+scalar.*key\s+=>\s+value\s+pairs/, "RW::$_ gets parameters after the first as key => value pairs"); } eval { $rw->type('monkey!') }; like($@, qr/Wrong\s+type/, 'RW::type wants a type it knows'); eval { $rw->convert(undef, 'again monkey!') }; like($@, qr/Wrong\s+type/, 'RW::convert wants a type it knows'); for (qw) { ok(!defined $rw->$_(undef), "RW::$_ returns undef when passed undef"); } Regexp-Wildcards-1.05/t/11-opts.t000644 000765 000024 00000002247 12200060106 016676 0ustar00vincestaff000000 000000 #!perl -T use strict; use warnings; use Test::More tests => 10; use Regexp::Wildcards; my $rw = Regexp::Wildcards->new(); my $wc = 'a,b{c,d}e*f?(g)'; my $none = quotemeta $wc; my $unix = 'a\\,b(?:c|d)e.*f.\\(g\\)'; my $win32 = '(?:a|b\{c|d\}e.*f.\\(g\\))'; my $jokers = 'a\\,b\\{c\\,d\\}e.*f.\\(g\\)'; my $groups = 'a\\,b\\{c\\,d\\}e\\*f\\?(g)'; my $jok_gr = 'a\\,b\\{c\\,d\\}e.*f.(g)'; is($rw->convert($wc), $unix, 'nothing defaults to unix'); $rw->type('win32'); is($rw->convert($wc), $win32, 'set to win32'); $rw->type('darwin'); is($rw->convert($wc), $unix, 'set to darwin'); $rw->type('MSWin32'); is($rw->convert($wc), $win32, 'reset to win32'); $rw->type(); is($rw->convert($wc), $unix, 'reset to unix'); $rw = Regexp::Wildcards->new(do => [ qw ], type => 'win32'); is($rw->convert($wc), $jokers, 'do overrides type in new'); $rw->do(add => 'groups'); is($rw->convert($wc), $jok_gr, 'added groups to jokers'); $rw->do(add => 'jokers'); is($rw->convert($wc), $jok_gr, 'added jokers but it already exists'); $rw->do(rem => 'jokers'); is($rw->convert($wc), $groups, 'removed jokers, only groups remains'); $rw->do(); is($rw->convert($wc), $none, 'reset do'); Regexp-Wildcards-1.05/t/20-jokers.t000644 000765 000024 00000007054 12200060106 017207 0ustar00vincestaff000000 000000 #!perl -T use strict; use warnings; use Test::More tests => 3 * (4 + 2 + 7 + 8 + 6 + 2) * 3; use Regexp::Wildcards; sub try { my ($rw, $s, $x, $y) = @_; $y = $x unless defined $y; my $d = $rw->{do}; $d = join ' ', keys %$d if ref($d) eq 'HASH'; is($rw->convert('ab' . $x), 'ab' . $y, $s . " (begin) [$d]"); is($rw->convert('a' . $x . 'b'), 'a' . $y . 'b', $s . " (middle) [$d]"); is($rw->convert($x . 'ab'), $y . 'ab', $s . " (end) [$d]"); } sub alltests { my ($d, $one, $any) = @_; my $rw = Regexp::Wildcards->new; $rw->do(set => $d); $d = join ' ', keys %$d if ref($d) eq 'HASH'; # Simple try $rw, "simple $any", $any, '.*'; try $rw, "simple $one", $one, '.'; is($rw->convert($one.$any.'ab'), '..*ab', "simple $one and $any (begin) [$d]"); is($rw->convert($one.'a'.$any.'b'), '.a.*b', "simple $one and $any (middle) [$d]"); is($rw->convert($one.'ab'.$any), '.ab.*', "simple $one and $any (end) [$d]"); is($rw->convert($any.'ab'.$one), '.*ab.', "simple $any and $one (begin) [$d]"); is($rw->convert('a'.$any.'b'.$one), 'a.*b.', "simple $any and $one (middle) [$d]"); is($rw->convert('ab'.$any.$one), 'ab.*.', "simple $any and $one (end) [$d]"); # Multiple try $rw, "multiple $any", $any x 2, '.*'; try $rw, "multiple $one", $one x 2, '..'; # Captures $rw->capture('single'); try $rw, "multiple capturing $one", $one.$one.'\\'.$one.$one, '(.)(.)\\'.$one.'(.)'; $rw->capture(add => [ qw ]); try $rw, "multiple capturing $any (greedy)", $any.$any.'\\'.$any.$any, '(.*)\\'.$any.'(.*)'; my $wc = $any.$any.$one.$one.'\\'.$one.$one.'\\'.$any.$any; try $rw, "multiple capturing $any (greedy) and capturing $one", $wc, '(.*)(.)(.)\\'.$one.'(.)\\'.$any.'(.*)'; $rw->capture(set => [ qw ]); try $rw, "multiple capturing $any (greedy) and non-capturing $one", $wc, '(.*)..\\'.$one.'.\\'.$any.'(.*)'; $rw->capture(rem => 'greedy'); try $rw, "multiple capturing $any (non-greedy)", $any.$any.'\\'.$any.$any, '(.*?)\\'.$any.'(.*?)'; try $rw, "multiple capturing $any (non-greedy) and non-capturing $one", $wc, '(.*?)..\\'.$one.'.\\'.$any.'(.*?)'; $rw->capture({ single => 1, any => 1 }); try $rw, "multiple capturing $any (non-greedy) and capturing $one", $wc, '(.*?)(.)(.)\\'.$one.'(.)\\'.$any.'(.*?)'; $rw->capture(); # Escaping try $rw, "escaping $any", '\\'.$any; try $rw, "escaping $any before intermediate newline", '\\'.$any ."\n\\".$any; try $rw, "escaping $one", '\\'.$one; try $rw, "escaping $one before intermediate newline", '\\'.$one ."\n\\".$one; try $rw, "escaping \\\\\\$any", '\\\\\\'.$any; try $rw, "escaping \\\\\\$one", '\\\\\\'.$one; try $rw, "not escaping \\\\$any", '\\\\'.$any, '\\\\.*'; try $rw, "not escaping \\\\$one", '\\\\'.$one, '\\\\.'; # Escaping escapes try $rw, 'escaping \\', '\\', '\\\\'; try $rw, 'not escaping \\', '\\\\', '\\\\'; try $rw, 'escaping \\ before intermediate newline', "\\\n\\", "\\\\\n\\\\"; try $rw, 'not escaping \\ before intermediate newline', "\\\\\n\\\\", "\\\\\n\\\\"; try $rw, 'escaping regex characters', '[]', '\\[\\]'; try $rw, 'not escaping escaped regex characters', '\\\\\\[\\]'; # Mixed try $rw, "mixed $any and \\$any", $any.'\\'.$any.$any, '.*\\'.$any.'.*'; try $rw, "mixed $one and \\$one", $one.'\\'.$one.$one, '.\\'.$one.'.'; } alltests 'jokers', '?', '*'; alltests 'sql', '_', '%'; alltests [ qw ], '_', '*'; Regexp-Wildcards-1.05/t/21-commas.t000644 000765 000024 00000001535 12200060106 017170 0ustar00vincestaff000000 000000 #!perl -T use strict; use warnings; use Test::More tests => 8; use Regexp::Wildcards; my $rw = Regexp::Wildcards->new(); # unix is($rw->convert('a,b,c'), 'a\\,b\\,c', 'unix: commas outside of brackets 1'); is($rw->convert('a\\,b\\\\\\,c'), 'a\\,b\\\\\\,c', 'unix: commas outside of brackets 2'); is($rw->convert(',a,b,c\\\\,'), '\\,a\\,b\\,c\\\\\\,', 'unix: commas outside of brackets at begin/end'); $rw = Regexp::Wildcards->new(type => 'commas'); is($rw->convert('a,b\\\\,c'), '(?:a|b\\\\|c)', 'win32: commas'); is($rw->convert('a\\,b\\\\,c'), '(?:a\\,b\\\\|c)', 'win32: escaped commas 1'); is($rw->convert('a\\,b\\\\\\,c'), 'a\\,b\\\\\\,c', 'win32: escaped commas 2'); is($rw->convert(',a,b\\\\,'), '(?:|a|b\\\\|)', 'win32: commas at begin/end'); is($rw->convert('\\,a,b\\\\\\,'), '(?:\\,a|b\\\\\\,)', 'win32: escaped commas at begin/end'); Regexp-Wildcards-1.05/t/22-brackets.t000644 000765 000024 00000004556 12200060106 017516 0ustar00vincestaff000000 000000 #!perl -T use strict; use warnings; use Test::More tests => 27; use Regexp::Wildcards; my $rw = Regexp::Wildcards->new(qw); is($rw->convert('a{b\\\\,c\\\\}d', 'jokers'), 'a\\{b\\\\\\,c\\\\\\}d','jokers'); is($rw->convert('a{b\\\\,c\\\\}d', 'sql'), 'a\\{b\\\\\\,c\\\\\\}d', 'sql'); is($rw->convert('a{b\\\\,c\\\\}d', 'win32'), '(?:a\\{b\\\\|c\\\\\\}d)','win32'); is($rw->convert('{}'), '(?:)', 'empty brackets'); is($rw->convert('{a}'), '(?:a)', 'brackets 1'); is($rw->convert('{a,b}'), '(?:a|b)', 'brackets 2'); is($rw->convert('{a,b,c}'), '(?:a|b|c)', 'brackets 3'); is($rw->convert('a{b,c}d'), 'a(?:b|c)d', '1 bracketed block'); is($rw->convert('a{b,c}d{e,,f}'), 'a(?:b|c)d(?:e||f)', '2 bracketed blocks'); is($rw->convert('a{b,c}d{e,,f}{g,h,}'), 'a(?:b|c)d(?:e||f)(?:g|h|)', '3 bracketed blocks'); is($rw->convert('{a{b}}'), '(?:a(?:b))', '2 nested bracketed blocks 1'); is($rw->convert('{a,{b},c}'), '(?:a|(?:b)|c)', '2 nested bracketed blocks 2'); is($rw->convert('{a,{b{d}e},c}'), '(?:a|(?:b(?:d)e)|c)', '3 nested bracketed blocks'); is($rw->convert('{a,{b{d{}}e,f,,},c}'), '(?:a|(?:b(?:d(?:))e|f||)|c)', '4 nested bracketed blocks'); is($rw->convert('{a,{b{d{}}e,f,,},c}{,g{{}h,i}}'), '(?:a|(?:b(?:d(?:))e|f||)|c)(?:|g(?:(?:)h|i))', '4+3 nested bracketed blocks'); is($rw->convert('\\{\\\\}'), '\\{\\\\\\}', 'escaping brackets'); is($rw->convert('\\{a,b,c\\\\\\}'), '\\{a\\,b\\,c\\\\\\}', 'escaping commas 1'); is($rw->convert('\\{a\\\\,b\\,c}'), '\\{a\\\\\\,b\\,c\\}', 'escaping commas 2'); is($rw->convert('\\{a\\\\,b\\,c\\}'), '\\{a\\\\\\,b\\,c\\}', 'escaping commas 3'); is($rw->convert('\\{a\\\\,b\\,c\\\\}'), '\\{a\\\\\\,b\\,c\\\\\\}', 'escaping brackets and commas'); is($rw->convert('{a\\},b\\{,c}'), '(?:a\\}|b\\{|c)', 'overlapping brackets'); is($rw->convert('{a\\{b,c}d,e}'), '(?:a\\{b|c)d\\,e\\}', 'partial unbalanced catching 1'); is($rw->convert('{a\\{\\\\}b,c\\\\}'), '(?:a\\{\\\\)b\\,c\\\\\\}', 'partial unbalanced catching 2'); is($rw->convert('{a{b,c\\}d,e}'), '\\{a\\{b\\,c\\}d\\,e\\}', 'no partial unbalanced catching'); is($rw->convert('{a,\\{,\\},b}'), '(?:a|\\{|\\}|b)', 'substituting commas 1'); is($rw->convert('{a,\\{d,e,,\\}b,c}'), '(?:a|\\{d|e||\\}b|c)', 'substituting commas 2'); is($rw->convert('{a,\\{d,e,,\\}b,c}\\\\{f,g,h,i}'), '(?:a|\\{d|e||\\}b|c)\\\\(?:f|g|h|i)', 'handling the rest'); Regexp-Wildcards-1.05/t/23-groups.t000644 000765 000024 00000001211 12200060106 017221 0ustar00vincestaff000000 000000 #!perl -T use strict; use warnings; use Test::More tests => 6; use Regexp::Wildcards; my $rw = Regexp::Wildcards->new(do => [ qw ]); is($rw->convert('a(?)b'), 'a(.)b', 'groups: single'); is($rw->convert('a(*)b'), 'a(.*)b', 'groups: any'); is($rw->convert('(a),(b)'), '(a)\\,(b)', 'groups: commas'); is($rw->convert('a({x,y})b'), 'a((?:x|y))b', 'groups: brackets'); is($rw->convert('a({x,(y?),{z,(t*u)}})b'), 'a((?:x|(y.)|(?:z|(t.*u))))b', 'groups: nested'); is($rw->convert('(a*\\(b?\\))'), '(a.*\\(b.\\))', 'groups: escape'); Regexp-Wildcards-1.05/t/24-anchors.t000644 000765 000024 00000002517 12200060106 017352 0ustar00vincestaff000000 000000 #!perl -T use strict; use warnings; use Test::More tests => 16; use Regexp::Wildcards; my $rw = Regexp::Wildcards->new(do => 'anchors'); is($rw->convert('\\^'), '\\^', 'anchor: escape ^ 1'); is($rw->convert('\\\\\\^'), '\\\\\\^', 'anchor: escape ^ 2'); is($rw->convert('\\$'), '\\$', 'anchor: escape $ 1'); is($rw->convert('\\\\\\$'), '\\\\\\$', 'anchor: escape $ 2'); is($rw->convert('^a?b*'), '^a\\?b\\*', 'anchor: ^'); is($rw->convert('a?b*$'), 'a\\?b\\*$', 'anchor: $'); is($rw->convert('^a?b*$'), '^a\\?b\\*$', 'anchor: ^$'); is($rw->convert('x^a?b*$y'), 'x^a\\?b\\*$y', 'anchor: intermediate ^$'); $rw->do(add => 'jokers'); is($rw->convert('^a?b*'), '^a.b.*', 'anchor: ^ with jokers'); is($rw->convert('a?b*$'), 'a.b.*$', 'anchor: $ with jokers'); is($rw->convert('^a?b*$'), '^a.b.*$', 'anchor: ^$ with jokers'); is($rw->convert('x^a?b*$y'), 'x^a.b.*$y','anchor: intermediate ^$ with jokers'); $rw->do(add => 'brackets'); is($rw->convert('{^,a}?b*'), '(?:^|a).b.*', 'anchor: ^ with brackets'); is($rw->convert('a?{b*,$}'), 'a.(?:b.*|$)', 'anchor: $ with brackets'); is($rw->convert('{^a,?}{b,*$}'),'(?:^a|.)(?:b|.*$)','anchor: ^$ with brackets'); is($rw->convert('x{^,a}?b{*,$}y'), 'x(?:^|a).b(?:.*|$)y', 'anchor: intermediate ^$ with brackets'); Regexp-Wildcards-1.05/samples/wc2re.pl000755 000765 000024 00000000522 12200060106 020062 0ustar00vincestaff000000 000000 #!/bin/env perl use strict; use warnings; use lib qw; use Regexp::Wildcards; use Data::Dumper; my $rw = Regexp::Wildcards->new( do => [ qw ], capture => [ qw ], ); $rw->do(add => [ qw ]); $rw->capture(add => [ qw ]); print $_, ' => ', $rw->convert($_), "\n" for @ARGV; Regexp-Wildcards-1.05/lib/Regexp/000755 000765 000024 00000000000 12206211220 017036 5ustar00vincestaff000000 000000 Regexp-Wildcards-1.05/lib/Regexp/Wildcards.pm000644 000765 000024 00000036543 12206210732 021332 0ustar00vincestaff000000 000000 package Regexp::Wildcards; use strict; use warnings; use Carp qw; use Scalar::Util qw; use Text::Balanced qw; =head1 NAME Regexp::Wildcards - Converts wildcard expressions to Perl regular expressions. =head1 VERSION Version 1.05 =cut use vars qw<$VERSION>; BEGIN { $VERSION = '1.05'; } =head1 SYNOPSIS use Regexp::Wildcards; my $rw = Regexp::Wildcards->new(type => 'unix'); my $re; $re = $rw->convert('a{b?,c}*'); # Do it Unix shell style. $re = $rw->convert('a?,b*', 'win32'); # Do it Windows shell style. $re = $rw->convert('*{x,y}?', 'jokers'); # Process the jokers and # escape the rest. $re = $rw->convert('%a_c%', 'sql'); # Turn SQL wildcards into # regexps. $rw = Regexp::Wildcards->new( do => [ qw ], # Do jokers and brackets. capture => [ qw ], # Capture *'s greedily. ); $rw->do(add => 'groups'); # Don't escape groups. $rw->capture(rem => [ qw ]); # Actually we want non-greedy # matches. $re = $rw->convert('*a{,(b)?}?c*'); # '(.*?)a(?:|(b).).c(.*?)' $rw->capture(); # No more captures. =head1 DESCRIPTION In many situations, users may want to specify patterns to match but don't need the full power of regexps. Wildcards make one of those sets of simplified rules. This module converts wildcard expressions to Perl regular expressions, so that you can use them for matching. It handles the C<*> and C jokers, as well as Unix bracketed alternatives C<{,}>, but also C<%> and C<_> SQL wildcards. If required, it can also keep original C<(...)> groups or C<^> and C<$> anchors. Backspace (C<\>) is used as an escape character. Typesets that mimic the behaviour of Windows and Unix shells are also provided. =head1 METHODS =cut sub _check_self { croak 'First argument isn\'t a valid ' . __PACKAGE__ . ' object' unless blessed $_[0] and $_[0]->isa(__PACKAGE__); } my %types = ( jokers => [ qw ], sql => [ qw ], commas => [ qw ], brackets => [ qw ], unix => [ qw ], win32 => [ qw ], ); $types{$_} = $types{win32} for qw; $types{$_} = $types{unix} for qw; my %escapes = ( jokers => '?*', sql => '_%', commas => ',', brackets => '{},', groups => '()', anchors => '^$', ); my %captures = ( single => sub { $_[1] ? '(.)' : '.' }, any => sub { $_[1] ? ($_[0]->{greedy} ? '(.*)' : '(.*?)') : '.*' }, brackets => sub { $_[1] ? '(' : '(?:'; }, greedy => undef, ); sub _validate { my $self = shift; _check_self $self; my $valid = shift; my $old = shift; $old = { } unless defined $old; my %opts; if (@_ <= 1) { $opts{set} = defined $_[0] ? $_[0] : { }; } elsif (@_ % 2) { croak 'Arguments must be passed as an unique scalar or as key => value pairs'; } else { %opts = @_; } my %checked; for (qw) { my $opt = $opts{$_}; next unless defined $opt; my $cb = { '' => sub { +{ ($_[0] => 1) x (exists $valid->{$_[0]}) } }, 'ARRAY' => sub { +{ map { ($_ => 1) x (exists $valid->{$_}) } @{$_[0]} } }, 'HASH' => sub { +{ map { ($_ => $_[0]->{$_}) x (exists $valid->{$_}) } keys %{$_[0]} } } }->{ ref $opt }; croak 'Wrong option set' unless $cb; $checked{$_} = $cb->($opt); } my $config = (exists $checked{set}) ? $checked{set} : $old; $config->{$_} = $checked{add}->{$_} for grep $checked{add}->{$_}, keys %{$checked{add} || {}}; delete $config->{$_} for grep $checked{rem}->{$_}, keys %{$checked{rem} || {}}; $config; } sub _do { my $self = shift; my $config; $config->{do} = $self->_validate(\%escapes, $self->{do}, @_); $config->{escape} = ''; $config->{escape} .= $escapes{$_} for keys %{$config->{do}}; $config->{escape} = quotemeta $config->{escape}; $config; } sub do { my $self = shift; _check_self $self; my $config = $self->_do(@_); $self->{$_} = $config->{$_} for keys %$config; $self; } sub _capture { my $self = shift; my $config; $config->{capture} = $self->_validate(\%captures, $self->{capture}, @_); $config->{greedy} = delete $config->{capture}->{greedy}; for (keys %captures) { $config->{'c_' . $_} = $captures{$_}->($config, $config->{capture}->{$_}) if $captures{$_}; # Skip 'greedy' } $config; } sub capture { my $self = shift; _check_self $self; my $config = $self->_capture(@_); $self->{$_} = $config->{$_} for keys %$config; $self; } sub _type { my ($self, $type) = @_; $type = 'unix' unless defined $type; croak 'Wrong type' unless exists $types{$type}; my $config = $self->_do($types{$type}); $config->{type} = $type; $config; } sub type { my $self = shift; _check_self $self; my $config = $self->_type(@_); $self->{$_} = $config->{$_} for keys %$config; $self; } sub new { my $class = shift; $class = blessed($class) || $class || __PACKAGE__; croak 'Optional arguments must be passed as key => value pairs' if @_ % 2; my %args = @_; my $self = bless { }, $class; if (defined $args{do}) { $self->do($args{do}); } else { $self->type($args{type}); } $self->capture($args{capture}); } =head2 C my $rw = Regexp::Wildcards->new(do => $what, capture => $capture); my $rw = Regexp::Wildcards->new(type => $type, capture => $capture); Constructs a new L object. C lists all features that should be enabled when converting wildcards to regexps. Refer to L for details on what can be passed in C<$what>. The C specifies a predefined set of C features to use. See L for details on which types are valid. The C option overrides C. C lists which atoms should be capturing. Refer to L for more details. =head2 C $rw->do($what); $rw->do(set => $c1); $rw->do(add => $c2); $rw->do(rem => $c3); Specifies the list of metacharacters to convert or to prevent for escaping. They fit into six classes : =over 4 =item * C<'jokers'> Converts C to C<.> and C<*> to C<.*>. 'a**\\*b??\\?c' ==> 'a.*\\*b..\\?c' =item * C<'sql'> Converts C<_> to C<.> and C<%> to C<.*>. 'a%%\\%b__\\_c' ==> 'a.*\\%b..\\_c' =item * C<'commas'> Converts all C<,> to C<|> and puts the complete resulting regular expression inside C<(?: ... )>. 'a,b{c,d},e' ==> '(?:a|b\\{c|d\\}|e)' =item * C<'brackets'> Converts all matching C<{ ... , ... }> brackets to C<(?: ... | ... )> alternations. If some brackets are unbalanced, it tries to substitute as many of them as possible, and then escape the remaining unmatched C<{> and C<}>. Commas outside of any bracket-delimited block are also escaped. 'a,b{c,d},e' ==> 'a\\,b(?:c|d)\\,e' '{a\\{b,c}d,e}' ==> '(?:a\\{b|c)d\\,e\\}' '{a{b,c\\}d,e}' ==> '\\{a\\{b\\,c\\}d\\,e\\}' =item * C<'groups'> Keeps the parenthesis C<( ... )> of the original string without escaping them. Currently, no check is done to ensure that the parenthesis are matching. 'a(b(c))d\\(\\)' ==> (no change) =item * C<'anchors'> Prevents the I C<^> and I C<$> anchors to be escaped. Since C<[...]> character class are currently escaped, a C<^> will always be interpreted as I. 'a^b$c' ==> (no change) =back Each C<$c> can be any of : =over 4 =item * A hash reference, with wanted metacharacter group names (described above) as keys and booleans as values ; =item * An array reference containing the list of wanted metacharacter classes ; =item * A plain scalar, when only one group is required. =back When C is present, the classes given as its value replace the current object options. Then the C classes are added, and the C classes removed. Passing a sole scalar C<$what> is equivalent as passing C<< set => $what >>. No argument means C<< set => [ ] >>. $rw->do(set => 'jokers'); # Only translate jokers. $rw->do('jokers'); # Same. $rw->do(add => [ qw ]); # Translate also SQL and commas. $rw->do(rem => 'jokers'); # Specifying both 'sql' and # 'jokers' is useless. $rw->do(); # Translate nothing. The C method returns the L object. =head2 C $rw->type($type); Notifies to convert the metacharacters that corresponds to the predefined type C<$type>. C<$type> can be any of : =over 4 =item * C<'jokers'>, C<'sql'>, C<'commas'>, C<'brackets'> Singleton types that enable the corresponding C classes. =item * C<'unix'> Covers typical Unix shell globbing features (effectively C<'jokers'> and C<'brackets'>). =item * C<$^O> values for common Unix systems Wrap to C<'unix'> (see L for the list). =item * C Defaults to C<'unix'>. =item * C<'win32'> Covers typical Windows shell globbing features (effectively C<'jokers'> and C<'commas'>). =item * C<'dos'>, C<'os2'>, C<'MSWin32'>, C<'cygwin'> Wrap to C<'win32'>. =back In particular, you can usually pass C<$^O> as the C<$type> and get the corresponding shell behaviour. $rw->type('win32'); # Set type to win32. $rw->type($^O); # Set type to unix on Unices and win32 on Windows $rw->type(); # Set type to unix. The C method returns the L object. =head2 C $rw->capture($captures); $rw->capture(set => $c1); $rw->capture(add => $c2); $rw->capture(rem => $c3); Specifies the list of atoms to capture. This method works like L, except that the classes are different : =over 4 =item * C<'single'> Captures all unescaped I<"exactly one"> metacharacters, i.e. C for wildcards or C<_> for SQL. 'a???b\\??' ==> 'a(.)(.)(.)b\\?(.)' 'a___b\\__' ==> 'a(.)(.)(.)b\\_(.)' =item * C<'any'> Captures all unescaped I<"any"> metacharacters, i.e. C<*> for wildcards or C<%> for SQL. 'a***b\\**' ==> 'a(.*)b\\*(.*)' 'a%%%b\\%%' ==> 'a(.*)b\\%(.*)' =item * C<'greedy'> When used in conjunction with C<'any'>, it makes the C<'any'> captures greedy (by default they are not). 'a***b\\**' ==> 'a(.*?)b\\*(.*?)' 'a%%%b\\%%' ==> 'a(.*?)b\\%(.*?)' =item * C<'brackets'> Capture matching C<{ ... , ... }> alternations. 'a{b\\},\\{c}' ==> 'a(b\\}|\\{c)' =back $rw->capture(set => 'single'); # Only capture "exactly one" # metacharacters. $rw->capture('single'); # Same. $rw->capture(add => [ qw ]); # Also greedily capture # "any" metacharacters. $rw->capture(rem => 'greedy'); # No more greed please. $rw->capture(); # Capture nothing. The C method returns the L object. =head2 C my $rx = $rw->convert($wc); my $rx = $rw->convert($wc, $type); Converts the wildcard expression C<$wc> into a regular expression according to the options stored into the L object, or to C<$type> if it's supplied. It successively escapes all unprotected regexp special characters that doesn't hold any meaning for wildcards, then replace C<'jokers'>, C<'sql'> and C<'commas'> or C<'brackets'> (depending on the L or L options), all of this by applying the C<'capture'> rules specified in the constructor or by L. =cut sub convert { my ($self, $wc, $type) = @_; _check_self $self; my $config = (defined $type) ? $self->_type($type) : $self; return unless defined $wc; my $e = $config->{escape}; # Escape : # - an even number of \ that doesn't protect a regexp/wildcard metachar # - an odd number of \ that doesn't protect a wildcard metachar $wc =~ s/ (?{do}; $wc = $self->_jokers($wc) if $do->{jokers}; $wc = $self->_sql($wc) if $do->{sql}; if ($do->{brackets}) { $wc = $self->_bracketed($wc); } elsif ($do->{commas} and $wc =~ /(?{'c_brackets'} . $self->_commas($wc) . ')'; } $wc } =head1 EXPORT An object module shouldn't export any function, and so does this one. =head1 DEPENDENCIES L (core module since perl 5), L, L (since 5.7.3). =head1 CAVEATS This module does not implement the strange behaviours of Windows shell that result from the special handling of the three last characters (for the file extension). For example, Windows XP shell matches C<*a> like C<.*a>, C<*a?> like C<.*a.?>, C<*a??> like C<.*a.{0,2}> and so on. =head1 SEE ALSO L. =head1 AUTHOR Vincent Pit, C<< >>, L. You can contact me by mail or on C (vincent). =head1 BUGS Please report any bugs or feature requests to C, or through the web interface at L. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. =head1 SUPPORT You can find documentation for this module with the perldoc command. perldoc Regexp::Wildcards Tests code coverage report is available at L. =head1 COPYRIGHT & LICENSE Copyright 2007,2008,2009,2013 Vincent Pit, all rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut sub _extract ($) { extract_bracketed $_[0], '{', qr/.*?(?{c_single}; s/(?{c_any}; s/(?{c_single}; s/(?{c_any}; s/(?_commas($prefix) . $self->_brackets($bracket); } $re .= $self->_commas($rest); $self->{c_brackets} . $re . ')'; } sub _bracketed { my ($self, $rest) = @_; my ($re, $bracket, $prefix) = (''); while (do { ($bracket, $rest, $prefix) = _extract $rest; $bracket }) { $re .= $prefix . $self->_brackets($bracket); } $re .= $rest; $re =~ s/(?