Lucene-QueryParser-1.04/0040755000076500007650000000000007767360311014215 5ustar simonsimonLucene-QueryParser-1.04/Changes0100644000076500007650000000060107765330537015507 0ustar simonsimonRevision history for Perl extension Lucene::QueryParser. 1.3 Tue Dec 9 11:26:08 GMT 2003 - Add to_plucene 1.2 Mon Jun 2 15:27:09 BST 2003 - Better handling of AND/OR conjunctions. 1.1 Tue Jan 14 13:57:15 GMT 2003 - Add deparse() method 0.01 Tue Jan 7 10:13:44 2003 - original version; created by h2xs 1.22 with options -b 5.5.3 -AX -n Lucene::QueryParser Lucene-QueryParser-1.04/Makefile.PL0100644000076500007650000000077007765077651016202 0ustar simonsimonuse 5.00503; use ExtUtils::MakeMaker; # See lib/ExtUtils/MakeMaker.pm for details of how to influence # the contents of the Makefile that is written. WriteMakefile( 'NAME' => 'Lucene::QueryParser', 'VERSION_FROM' => 'QueryParser.pm', # finds $VERSION 'PREREQ_PM' => { Text::Balanced => 0 }, # e.g., Module::Name => 1.1 ($] >= 5.005 ? ## Add these new keywords supported since 5.005 ( AUTHOR => 'Simon Cozens ') : ()), ); Lucene-QueryParser-1.04/MANIFEST0100644000076500007650000000021507765330610015336 0ustar simonsimonChanges Makefile.PL MANIFEST QueryParser.pm README t/1.t t/2.t META.yml Module meta-data (added by MakeMaker) Lucene-QueryParser-1.04/META.yml0100644000076500007650000000044507767360311015466 0ustar simonsimon#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# name: Lucene-QueryParser version: 1.04 version_from: QueryParser.pm installdirs: site requires: Text::Balanced: 0 distribution_type: module generated_by: ExtUtils::MakeMaker version 6.12 Lucene-QueryParser-1.04/QueryParser.pm0100644000076500007650000001656407767360137017054 0ustar simonsimonpackage Lucene::QueryParser; use 5.00503; use strict; use Carp; require Exporter; use Text::Balanced qw(extract_bracketed extract_delimited); use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); @ISA = qw(Exporter); @EXPORT_OK = qw( parse_query deparse_query ); @EXPORT = qw( parse_query deparse_query ); $VERSION = '1.04'; sub parse_query { local $_ = shift; my @rv; while ($_) { s/^\s+// and next; my $item; s/^(AND|OR|\|\|)\s+//; if ($1) { $item->{conj} = $1; } if (s/^\+//) { $item->{type} = "REQUIRED"; } elsif (s/^(-|!|NOT)\s*//i){ $item->{type} = "PROHIBITED"; } else { $item->{type} = "NORMAL"; } if (s/^([^\s(":]+)://) { $item->{field} = $1 } # Subquery if (/^\(/) { my ($extracted, $remainer) = extract_bracketed($_,"("); if (!$extracted) { croak "Unbalanced subquery" } $_ = $remainer; $extracted =~ s/^\(//; $extracted =~ s/\)$//; $item->{query} = "SUBQUERY"; $item->{subquery} = parse_query($extracted); } elsif (/^"/) { my ($extracted, $remainer) = extract_delimited($_, '"'); if (!$extracted) { croak "Unbalanced phrase" } $_ = $remainer; $extracted =~ s/^"//; $extracted =~ s/"$//; $item->{query} = "PHRASE"; $item->{term} = $extracted; } elsif (s/^(\S+)\*//) { $item->{query} = "PREFIX"; $item->{term} = $1; } else { s/([^\s\^]+)// or croak "Malformed query"; $item->{query} = "TERM"; $item->{term} = $1; } if (s/^\^(\d+(?:.\d+)?)//) { $item->{boost} = $1 } push @rv, bless $item, "Lucene::QueryParser::".ucfirst lc $item->{query}; } return bless \@rv, "Lucene::QueryParser::TopLevel"; } sub deparse_query { my $ds = shift; my @out; for my $elem (@$ds) { my $thing = ""; if ($elem->{conj}) { $thing .= "$elem->{conj} "; } if ($elem->{type} eq "REQUIRED") { $thing .= "+"; } elsif ($elem->{type} eq "PROHIBITED") { $thing .= "-"; } if (exists $elem->{field}) { $thing .= $elem->{field}.":" } if ($elem->{query} eq "TERM") { $thing .= $elem->{term}; } elsif ($elem->{query} eq "SUBQUERY") { $thing .= "(".deparse_query($elem->{subquery}).")"; } elsif ($elem->{query} eq "PHRASE") { $thing .= '"'.$elem->{term}.'"'; } if (exists $elem->{boost}) { $thing .= "^".$elem->{boost} } push @out, $thing; } return join " ", @out; } package Lucene::QueryParser::TopLevel; sub to_plucene { my ($self, $field) = @_; Carp::croak("You need to specify a default field for your query") unless $field; return $self->[0]->to_plucene($field) if @$self ==1 and $self->[0]->{type} eq "NORMAL"; my @clauses; $self->add_clause(\@clauses, $_, $field) for @$self; require Plucene::Search::BooleanQuery; my $query = new Plucene::Search::BooleanQuery; $query->add_clause($_) for @clauses; $query; } sub add_clause { my ($self, $clauses, $term, $field) = @_; my $q = $term->to_plucene($field); if (exists $term->{conj} and $term->{conj} eq "AND" and @$clauses) { # The previous term needs to become required $clauses->[-1]->required(1) unless $clauses->[-1]->prohibited; } return unless $q; # Shouldn't happen yet my $prohibited = $term->{type} eq "PROHIBITED"; my $required = $term->{type} eq "REQUIRED"; $required = 1 if exists $term->{conj} and $term->{conj} eq "AND" and !$prohibited; require Plucene::Search::BooleanClause; push @$clauses, Plucene::Search::BooleanClause->new({ prohibited => $prohibited, required => $required, query => $q }); } # Oh, I really like abstraction package Lucene::QueryParser::Term; sub to_plucene { require Plucene::Search::TermQuery; require Plucene::Index::Term; my ($self, $field) = @_; $self->{pl_term} = Plucene::Index::Term->new({ field => (exists $self->{field} ? $self->{field} : $field), text => $self->{term} }); my $q = Plucene::Search::TermQuery->new({ term => $self->{pl_term} }); $self->set_boost($q); return $q; } sub set_boost { my ($self, $q) = @_; $q->boost($self->{boost}) if exists $self->{boost}; } package Lucene::QueryParser::Phrase; our @ISA = qw(Lucene::QueryParser::Term); # This corresponds to the rules for "PHRASE" in the Plucene grammar sub to_plucene { require Plucene::Search::PhraseQuery; require Plucene::Index::Term; my ($self, $field) = @_; my @words = split /\s+/, $self->{term}; return $self->SUPER::to_plucene($field) if @words == 1; my $phrase = Plucene::Search::PhraseQuery->new; for my $word (@words) { my $term = Plucene::Index::Term->new({ field => (exists $self->{field} ? $self->{field} : $field), text => $word }); $phrase->add($term); } if (exists $self->{slop}) { # Future extension $phrase->slop($self->{slop}); } $self->set_boost($phrase); return $phrase; } package Lucene::QueryParser::Subquery; sub to_plucene { my ($self, $field) = @_; $self->{subquery}->to_plucene( exists $self->{field} ? $self->{field} : $field ) } 1; __END__ # Below is stub documentation for your module. You'd better edit it! =head1 NAME Lucene::QueryParser - Turn a Lucene query into a Perl data structure =head1 SYNOPSIS use Lucene::QueryParser; my $structure = parse_query("red and yellow and -(coat:pink and green)"); C<$structure> will be: [ { query => 'TERM', type => 'NORMAL', term => 'red' }, { query => 'TERM', type => 'NORMAL', term => 'yellow' }, { subquery => [ { query => 'TERM', type => 'NORMAL', term => 'pink', field => 'coat' }, { query => 'TERM', type => 'NORMAL', term => 'green' } ], query => 'SUBQUERY', type => 'PROHIBITED' } ] =head1 DESCRIPTION This module parses a Lucene query, as defined by http://lucene.sourceforge.net/cgi-bin/faq/faqmanager.cgi?file=chapter.search&toc=faq#q5 It deals with fields, types, phrases, subqueries, and so on; everything handled by the C class in Lucene. The data structure is similar to the one given above, and is pretty self-explanatory. The other function, C turns such a data structure back into a Lucene query string. This is useful if you've just been mucking about with the data. =head2 PLUCENE Note for people using Plucene: the big arrayref and the hashes in the output of C are actually objects. They're not C objects, because then everyone who wanted to do search queries would have to pull in Plucene, which is a bit unfair. However, they can be turned into Cs by calling C on them. The argument to C should be the default field to search if none is supplied. =head2 EXPORT Exports the C and C functions. =head1 AUTHOR Simon Cozens, Esimon@kasei.comE =head1 COPYRIGHT AND LICENSE Copyright 2003 by Kasei This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Lucene-QueryParser-1.04/README0100644000076500007650000000210107765077651015076 0ustar simonsimonLucene/QueryParser version 0.01 =============================== The README is used to introduce the module and provide instructions on how to install the module, any machine dependencies it may have (for example C compilers and installed libraries) and any other information that should be provided before the module is installed. A README file is required for CPAN modules since CPAN extracts the README file from a module distribution so that people browsing the archive can use it get an idea of the modules uses. It is usually a good idea to provide version information here so that people can decide whether fixes for the module are worth downloading. INSTALLATION To install this module type the following: perl Makefile.PL make make test make install DEPENDENCIES This module requires these other modules and libraries: blah blah blah COPYRIGHT AND LICENCE Put the correct copyright and licence information here. Copyright (C) 2003 Simon Cozens This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Lucene-QueryParser-1.04/t/0040755000076500007650000000000007767360311014460 5ustar simonsimonLucene-QueryParser-1.04/t/1.t0100644000076500007650000000532507765327213015011 0ustar simonsimon# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl 1.t' ######################### # change 'tests => 1' to 'tests => last_test_to_print'; use Test::More tests => 20; use_ok("Lucene::QueryParser"); Lucene::QueryParser->import; # Devel::Cover seems to screw this over. use Data::Dumper; sub test_query { my ($query, $expected, $message) = @_; my $parsed = parse_query($query); is_deeply($parsed, $expected, $message); $query =~ s/ (and|or)//g; $query =~ s/not /-/g; is(deparse_query($parsed), $query, " ... and back again"); } test_query("foo", [ { query => 'TERM', type => 'NORMAL', term => 'foo' } ], "Simple one-word query parses fine"); test_query("foo bar", [ { query => 'TERM', type => 'NORMAL', term => 'foo' }, { query => 'TERM', type => 'NORMAL', term => 'bar' }, ], "Simple two-word query parses fine"); test_query("foo +bar", [ { query => 'TERM', type => 'NORMAL', term => 'foo' }, { query => 'TERM', type => 'NORMAL', term => 'bar', type => "REQUIRED" }, ], "+ operator works"); test_query("foo -bar", [ { query => 'TERM', type => 'NORMAL', term => 'foo' }, { query => 'TERM', type => 'NORMAL', term => 'bar', type => "PROHIBITED" }, ], "- operator works"); test_query("foo not bar", [ { query => 'TERM', type => 'NORMAL', term => 'foo' }, { query => 'TERM', type => 'NORMAL', term => 'bar', type => "PROHIBITED" }, ], "not operator works"); is_deeply(parse_query('"foo bar" baz'), [ { query => 'PHRASE', type => 'NORMAL', term => 'foo bar' }, { query => 'TERM', type => 'NORMAL', term => 'baz',}, ], "Quoted phrase matches work"); test_query("foo AND bar", [ { query => 'TERM', type => 'NORMAL', term => 'foo' }, { query => 'TERM', type => 'NORMAL', term => 'bar' , conj => "AND" }, ], "conjunctions work"); test_query("foo AND baz:bar", [ { query => 'TERM', type => 'NORMAL', term => 'foo' }, { query => 'TERM', type => 'NORMAL', term => 'bar', field => 'baz' , conj => "AND" }, ], "fields work"); test_query("foo AND baz^2.0", [ { query => 'TERM', type => 'NORMAL', term => 'foo'}, { query => 'TERM', type => 'NORMAL', term => 'baz', boost => "2.0", conj => "AND" }, ], "boosting works"); # Grand finale! test_query("red AND yellow AND -(coat:pink AND green)", [ { query => 'TERM', type => 'NORMAL', term => 'red' }, { query => 'TERM', type => 'NORMAL', term => 'yellow', conj => "AND" }, { subquery => [ { query => 'TERM', type => 'NORMAL', term => 'pink', field => 'coat' }, { query => 'TERM', type => 'NORMAL', term => 'green', conj => "AND" } ], query => 'SUBQUERY', type => 'PROHIBITED', conj => "AND" } ], "A very complex query (with subquery)"); Lucene-QueryParser-1.04/t/2.t0100644000076500007650000000311607765330136015004 0ustar simonsimonuse Test::More; if (!eval { require Plucene::Search::Query }) { plan skip_all => "Plucene not installed"; } else { plan tests => 22; } use_ok("Lucene::QueryParser"); Lucene::QueryParser->import; # Devel::Cover seems to screw this over. sub pq { parse_query(shift)->to_plucene("text") }; my $query = pq( "hello" ); isa_ok($query, "Plucene::Search::TermQuery"); is($query->term->field, "text", "Field is correct"); is($query->term->text, "hello", "Text is correct"); $query = pq( "foo:hello^3" ); isa_ok($query, "Plucene::Search::TermQuery"); is($query->term->field, "foo", "Field is correct"); is($query->term->text, "hello", "Text is correct"); is($query->boost, 3, "Boost is set"); $query = pq( "-foo:hello" ); isa_ok($query, "Plucene::Search::BooleanQuery"); my @clauses = $query->clauses; is(@clauses, 1, "Boolean query with one clause"); ok($clauses[0]->prohibited, "Clause is prohibited"); ok(!$clauses[0]->required, "Clause is not required"); $query = pq('"hello"'); isa_ok($query, "Plucene::Search::TermQuery"); is($query->term->field, "text", "Field is correct"); is($query->term->text, "hello", "Text is correct"); $query = pq('nonsense:"hello world"'); isa_ok($query, "Plucene::Search::PhraseQuery"); my @terms = @{$query->terms}; is(@terms, 2, "With two terms"); is($terms[1]->field, "nonsense", "Correctly distributed"); is($terms[1]->text, "world", "Correctly partitioned"); $query = pq("foo AND bar"); isa_ok($query, "Plucene::Search::BooleanQuery"); @clauses = $query->clauses; is(@clauses, 2, "Boolean query with two clause"); ok($clauses[0]->required, "Forced to be required");