SWISH-API-Common-0.04/0000755000175000017500000000000011240212324012612 5ustar mikemikeSWISH-API-Common-0.04/t/0000755000175000017500000000000011240212324013055 5ustar mikemikeSWISH-API-Common-0.04/t/003Atime.t0000644000175000017500000000172311240210533014527 0ustar mikemike###################################################################### # Test suite for SWISH::API::Common # by Mike Schilli ###################################################################### use warnings; use strict; use Test::More qw(no_plan); use Sysadm::Install qw(:all); use Log::Log4perl qw(:easy); #Log::Log4perl->easy_init($DEBUG); BEGIN { use_ok('SWISH::API::Common') }; my $CANNED = "eg/canned"; $CANNED = "../eg/canned" unless -d $CANNED; use SWISH::API::Common; # Preserving atime my $sw = SWISH::API::Common->new(swish_adm_dir => "$CANNED/adm", atime_preserve => 1); my ($atime, $mtime) = (stat("$CANNED/data1/abc"))[8,9]; die "Cannot get atime" unless $atime; sleep(1); $sw->index("$CANNED/data1/abc"); my ($atime2, $mtime2) = (stat("$CANNED/data1/abc"))[8,9]; ok($atime <= $atime2, "atime unmodified by index"); ok($mtime <= $mtime2, "mtime unmodified by index"); END { rmf "$CANNED/adm"; } SWISH-API-Common-0.04/t/001Basic.t0000644000175000017500000000334011240210533014504 0ustar mikemike###################################################################### # Test suite for SWISH::API::Common # by Mike Schilli ###################################################################### use warnings; use strict; use Test::More qw(no_plan); use Sysadm::Install qw(:all); use Log::Log4perl qw(:easy); #Log::Log4perl->easy_init($DEBUG); BEGIN { use_ok('SWISH::API::Common') }; my $CANNED = "eg/canned"; $CANNED = "../eg/canned" unless -d $CANNED; use SWISH::API::Common; my $sw = SWISH::API::Common->new( swish_adm_dir => "$CANNED/adm", swish_fuzzy_indexing_mode => "NONE", ); $sw->index("$CANNED/data1"); my @found = $sw->search("mike"); my $found = join " ", map { $_->path } @found; like($found, qr(canned/data1/abc), "simple query"); @found = $sw->search("someone AND else"); $found = join " ", map { $_->path } @found; like($found, qr(canned/data1/def), "boolean query"); @found = $sw->search("someone AND else OR mike"); $found = join " ", map { $_->path } @found; like($found, qr(canned/data1/def), "and-or query"); like($found, qr(canned/data1/abc), "and-or query"); # Two directories $sw->index_remove(); $sw = SWISH::API::Common->new( swish_adm_dir => "$CANNED/adm", swish_fuzzy_indexing_mode => "NONE", ); $sw->index("$CANNED/data1", "$CANNED/data2"); @found = $sw->search("mike"); $found = join " ", map { $_->path } @found; like($found, qr(canned/data1/abc), "simple query (two dirs)"); @found = $sw->search("different"); $found = join " ", map { $_->path } @found; like($found, qr(canned/data2/ghi), "simple query (two dirs)"); @found = $sw->search("nowhere"); $found = join " ", map { $_->path } @found; is($found, "", "nothing found"); END { rmf "$CANNED/adm"; } SWISH-API-Common-0.04/t/002Merge.t0000644000175000017500000000245311240210533014527 0ustar mikemike###################################################################### # Test suite for SWISH::API::Common # by Mike Schilli ###################################################################### use warnings; use strict; use Test::More qw(no_plan); use Sysadm::Install qw(:all); use Log::Log4perl qw(:easy); #Log::Log4perl->easy_init($DEBUG); BEGIN { use_ok('SWISH::API::Common') }; my $CANNED = "eg/canned"; $CANNED = "../eg/canned" unless -d $CANNED; use SWISH::API::Common; my $sw = SWISH::API::Common->new(swish_adm_dir => "$CANNED/adm"); $sw->index("$CANNED/data1/abc"); # One my @found = $sw->search("mike"); my $found = join " ", map { $_->path } @found; like($found, qr(canned/data1/abc), "simple query"); # and not the other @found = $sw->search("someone AND else"); $found = join " ", map { $_->path } @found; unlike($found, qr(canned/data1/def), "boolean query"); # Now add 2nd file to index $sw->index_add("$CANNED/data1/def"); # Match one ... @found = $sw->search("someone AND else"); $found = join " ", map { $_->path } @found; like($found, qr(canned/data1/def), "boolean query"); # ... AND the other @found = $sw->search("mike"); $found = join " ", map { $_->path } @found; like($found, qr(canned/data1/abc), "simple query"); END { rmf "$CANNED/adm"; } SWISH-API-Common-0.04/Changes0000644000175000017500000000106711240211173014112 0ustar mikemike###################################################################### Revision history for Perl extension SWISH::API::Common ###################################################################### 0.04 2009/08/10 (ms) [RT 48597] Fixed regex typo in Makefile.PL, reported by Przemek Czerkas. 0.03 2005/07/01 (ms) Search swish-e in /usr/local/bin and other PATHs. 0.02 2005/07/01 (ms) index() now accepts one or more directories (ms) If $^X is relative, the absolute path with be determined 0.01 2005/06/28 (ms) Where it all began. SWISH-API-Common-0.04/README0000644000175000017500000000660611240212314013501 0ustar mikemike###################################################################### SWISH::API::Common 0.04 ###################################################################### NAME SWISH::API::Common - SWISH Document Indexing Made Easy SYNOPSIS use SWISH::API::Common; my $swish = SWISH::API::Common->new(); # Index all files in a directory and its subdirectories $swish->index("/usr/local/share/doc"); # After indexing once (it's persistent), fire up as many # queries as you like: # Search documents containing both "swish" and "install" for my $hit ($swish->search("swish AND install")) { print $hit->path(), "\n"; } DESCRIPTION "SWISH::API::Common" offers an easy interface to the Swish index engine. While SWISH::API offers a complete API, "SWISH::API::Common" focusses on ease of use. THIS MODULE IS CURRENTLY UNDER DEVELOPMENT. THE API MIGHT CHANGE AT ANY TIME. Currently, "SWISH::API::Common" just allows for indexing documents in a single directory and any of its subdirectories. Also, don't run index() and search() in parallel yet. INSTALLATION "SWISH::API::Common" requires "SWISH::API" and the swish engine to be installed. Please download the latest release from http://swish-e.org/distribution/swish-e-2.4.3.tar.gz and untar it, type ./configure make make install and then install SWISH::API which is contained in the distribution: cd perl perl Makefile.PL make make install METHODS $sw = SWISH::API::Common->new() Constructor. Takes many options, but the defaults are usually fine. Available options and their defaults: # Where SWISH::API::Common stores index files etc. swish_adm_dir "$ENV{HOME}/.swish-common" # The path to swish-e, relative is OK swish_exe "swish-e" # Swish index file swish_idx_file "$self->{swish_adm_dir}/default.idx" # Swish configuration file swish_cnf_file "$self->{swish_adm_dir}/default.cnf" # SWISH Stemming swish_fuzzy_indexing_mode => "Stemming_en" # Maximum amount of data (in bytes) extracted # from a single file file_len_max 100_000 # Preserve every indexed file's atime atime_preserve $sw->index($dir, ...) Generate a new index of all text documents under directory $dir. One or more directories can be specified. $sw->search("foo AND bar"); Searches the index, using the given search expression. Returns a list hits, which can be asked for their path: # Search documents containing # both "foo" and "bar" for my $hit ($swish->search("foo AND bar")) { print $hit->path(), "\n"; } index_remove Permanently delete the current index. TODO List * More than one index directory * Remove documents from index * Iterator for search hits LEGALESE Copyright 2005 by Mike Schilli, all rights reserved. This program is free software, you can redistribute it and/or modify it under the same terms as Perl itself. AUTHOR 2005, Mike Schilli SWISH-API-Common-0.04/Makefile.PL0000644000175000017500000000560511240211054014571 0ustar mikemike###################################################################### # Makefile.PL for SWISH::API::Common # 2005, Mike Schilli ###################################################################### use ExtUtils::MakeMaker; WriteMakefile( 'NAME' => 'SWISH::API::Common', 'VERSION_FROM' => 'lib/SWISH/API/Common.pm', # finds $VERSION 'PREREQ_PM' => { Sysadm::Install => 0, Log::Log4perl => 0, SWISH::API => 0, File::Path => 0, File::Find => 0, File::Basename => 0, File::Temp => 0, File::Copy => 0, LWP::Simple => 0, }, # e.g., Module::Name => 1.1 ($] >= 5.005 ? ## Add these new keywords supported since 5.005 (ABSTRACT_FROM => 'lib/SWISH/API/Common.pm', AUTHOR => 'Mike Schilli ') : ()), ); my $version = `swish-e -V`; if($version !~ /SWISH/) { print < 1); #print "*** Temporary directory: $dir\n"; chdir $dir or die "Cannot chdir to $dir"; my $distro = "swish-e-2.4.3.tar.gz"; (my $distrodir = $distro) =~ s/\.tar\.gz$//g; print "Downloading $distro ...\n"; #File::Copy::copy("/tmp/$distro", "$dir/$distro"); LWP::Simple::getstore( "http://swish-e.org/distribution/$distro", $distro); print "Done. Unpacking ...\n"; system("tar zxf $distro"); print "Done. Building ...\n"; system("cd $distrodir; ./configure; make; LD_LIBRARY_PATH=/usr/local/lib make install;"); system("cd $distrodir; cd perl; " . "LD_LIBRARY_PATH=/usr/local/lib LD_RUN_PATH=/usr/local/lib perl Makefile.PL; make install"); print "Done.\n"; # Step out of chdir "/"; } exit 0; } SWISH-API-Common-0.04/MANIFEST0000644000175000017500000000037411240212320013743 0ustar mikemikeChanges eg/canned/abc eg/canned/data1/abc eg/canned/data1/def eg/canned/data2/ghi eg/canned/def eg/idx_and_search.pl lib/SWISH/API/Common.pm Makefile.PL MANIFEST This list of files MANIFEST.SKIP META.yml README t/001Basic.t t/002Merge.t t/003Atime.t SWISH-API-Common-0.04/eg/0000755000175000017500000000000011240212324013205 5ustar mikemikeSWISH-API-Common-0.04/eg/canned/0000755000175000017500000000000011240212324014435 5ustar mikemikeSWISH-API-Common-0.04/eg/canned/data1/0000755000175000017500000000000011240212324015427 5ustar mikemikeSWISH-API-Common-0.04/eg/canned/data1/def0000644000175000017500000000001511240210533016104 0ustar mikemikesomeone else SWISH-API-Common-0.04/eg/canned/data1/abc0000644000175000017500000000000511240210533016072 0ustar mikemikemike SWISH-API-Common-0.04/eg/canned/data2/0000755000175000017500000000000011240212324015430 5ustar mikemikeSWISH-API-Common-0.04/eg/canned/data2/ghi0000644000175000017500000000001211240210533016113 0ustar mikemikedifferent SWISH-API-Common-0.04/eg/canned/def0000644000175000017500000000001511240210533015112 0ustar mikemikesomeone else SWISH-API-Common-0.04/eg/canned/abc0000644000175000017500000000000511240210533015100 0ustar mikemikemike SWISH-API-Common-0.04/eg/idx_and_search.pl0000755000175000017500000000113611240210533016501 0ustar mikemike#!/usr/bin/perl -w #################################################### # idx_and_search.pl - SWISH::API::Common test script # Mike Schilli, 2005 (m@perlmeister.com) #################################################### use strict; use Log::Log4perl qw(:easy); Log::Log4perl->easy_init($ERROR); use SWISH::API::Common; # Generate index my $sw = SWISH::API::Common->new(); $sw->index("/tmp"); # Search for "michael" my @results = $sw->search("michael"); # Print results if(@results) { for my $hit (@results) { print $hit->path(), "\n"; } } else { print "No results\n"; } SWISH-API-Common-0.04/META.yml0000644000175000017500000000121411240212325014062 0ustar mikemike# http://module-build.sourceforge.net/META-spec.html #XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# name: SWISH-API-Common version: 0.04 version_from: lib/SWISH/API/Common.pm installdirs: site requires: File::Basename: 0 File::Copy: 0 File::Find: 0 File::Path: 0 File::Temp: 0 Log::Log4perl: 0 LWP::Simple: 0 SWISH::API: 0 Sysadm::Install: 0 distribution_type: module generated_by: ExtUtils::MakeMaker version 6.30_01 SWISH-API-Common-0.04/lib/0000755000175000017500000000000011240212324013360 5ustar mikemikeSWISH-API-Common-0.04/lib/SWISH/0000755000175000017500000000000011240212324014255 5ustar mikemikeSWISH-API-Common-0.04/lib/SWISH/API/0000755000175000017500000000000011240212324014666 5ustar mikemikeSWISH-API-Common-0.04/lib/SWISH/API/Common.pm0000644000175000017500000002524511240211205016461 0ustar mikemike########################################### # SWISH::API::Common ########################################### ########################################### package SWISH::API::Common; ########################################### use strict; use warnings; our $VERSION = "0.04"; our $SWISH_EXE = "swish-e"; our @SWISH_EXE_PATHS = qw(/usr/local/bin); use SWISH::API; use File::Path; use File::Find; use File::Spec; use File::Basename; use Log::Log4perl qw(:easy); use Sysadm::Install qw(:all); use File::Temp qw(tempfile); ########################################### sub new { ########################################### my($class, %options) = @_; my $self = { swish_adm_dir => "$ENV{HOME}/.swish-common", swish_exe => swish_find(), swish_fuzzy_indexing_mode => "Stemming_en", %options, }; my $defaults = { swish_idx_file => "$self->{swish_adm_dir}/default.idx", swish_cnf_file => "$self->{swish_adm_dir}/default.cnf", dirs_file => "$self->{swish_adm_dir}/default.dirs", streamer => "$self->{swish_adm_dir}/default.streamer", file_len_max => 100_000, atime_preserve => 0, }; for my $name (keys %$defaults) { if(! exists $self->{$name}) { $self->{$name} = $defaults->{$name}; } } LOGDIE "swish-e executable not found" unless -x $self->{swish_exe}; bless $self, $class; } ########################################### sub index_remove { ########################################### my($self) = @_; unlink $self->{swish_idx_file}; } ########################################### sub search { ########################################### my($self, $term) = @_; if(! -f $self->{swish_idx_file}) { ERROR "Index file $self->{swish_idx_file} not found"; return undef; } my $swish = SWISH::API->new($self->{swish_idx_file}); $swish->AbortLastError if $swish->Error; my $results = $swish->Query($term); $swish->AbortLastError if $swish->Error; # We might change this in the future to return an iterator # in scalar context my @results = (); while (my $r = $results->NextResult) { my $hit = SWISH::API::Common::Hit->new( path => $r->Property("swishdocpath") ); push @results, $hit; } return @results; } ########################################### sub files_stream { ########################################### my($self) = @_; my @dirs = split /,/, slurp $self->{dirs_file}; my @files = grep { -f } @dirs; @dirs = grep { ! -f } @dirs; for(@files) { $self->file_stream($_); } return unless @dirs; find(sub { return unless -f; return unless -T; my $full = $File::Find::name; DEBUG "Indexing $full"; $self->file_stream(File::Spec->rel2abs($_)); }, @dirs); } ############################################ sub file_stream { ############################################ my($self, $file) = @_; my @saved; if($self->{atime_preserve}) { @saved = (stat($file))[8,9]; } if(! open FILE, "<$file") { WARN "Cannot open $file ($!)"; return; } my $rc = sysread FILE, my $data, $self->{file_len_max}; unless(defined $rc) { WARN "Can't read $file $!"; return; } close FILE; if($self->{atime_preserve}) { utime(@saved, $file); } my $size = length $data; print "Path-Name: $file\n", "Document-Type: TXT*\n", "Content-Length: $size\n\n"; print $data; } ############################################ sub dir_prep { ############################################ my($file) = @_; my $dir = dirname($file); if(! -d $dir) { mkd($dir) unless -d $dir; } } ############################################ sub index_add { ############################################ my($self, $dir) = @_; # Index new doc in tmp idx file my $old_idx_name = $self->{swish_idx_file}; (my $dummy, my $old_idx) = tempfile(CLEANUP => 1); mv $old_idx_name, $old_idx; mv "$old_idx_name.prop", "$old_idx.prop"; ($dummy, $self->{swish_idx_file}) = tempfile(CLEANUP => 1); $self->index($dir); # Merge two indices my($stdout, $stderr, $rc) = tap($self->{swish_exe}, "-M", $old_idx, $self->{swish_idx_file}, $old_idx_name); if($rc != 0) { ERROR "Merging failed: $stdout $stderr"; return undef; } $self->{swish_idx_file} = $old_idx_name; } ############################################ sub index { ############################################ my($self, @dirs) = @_; # Make a new dirs file dir_prep($self->{dirs_file}); blurt join(',', @dirs), $self->{dirs_file}; # Make a new swish conf file dir_prep($self->{swish_cnf_file}); blurt <{swish_cnf_file}; IndexDir $self->{streamer} IndexFile $self->{swish_idx_file} FuzzyIndexingMode $self->{swish_fuzzy_indexing_mode} EOT # Make a new streamer dir_prep($self->{streamer}); my $perl = perl_find(); blurt <{streamer}; #!$perl use SWISH::API::Common; SWISH::API::Common->new( dirs_file => '$self->{dirs_file}', file_len_max => '$self->{file_len_max}', )->files_stream(); EOT chmod 0755, $self->{streamer} or LOGDIE "chmod of $self->{streamer} failed ($!)"; my($stdout, $stderr, $rc) = tap($self->{swish_exe}, "-c", $self->{swish_cnf_file}, "-e", "-S", "prog"); unless($stdout =~ /Indexing done!/) { ERROR "Indexing failed: $stdout $stderr"; return undef; } DEBUG "$stdout"; 1; } ########################################### sub perl_find { ########################################### if($^X =~ m#/#) { return $^X; } return exe_find($^X); } ########################################### sub swish_find { ########################################### for my $path (@SWISH_EXE_PATHS) { if(-f File::Spec->catfile($path, $SWISH_EXE)) { return File::Spec->catfile($path, $SWISH_EXE); } } return exe_find($SWISH_EXE); } ########################################### sub exe_find { ########################################### my($exe) = @_; for my $path (split /:/, $ENV{PATH}) { if(-f File::Spec->catfile($path, $exe)) { return File::Spec->catfile($path, $exe); } } return undef; } ########################################### package SWISH::API::Common::Hit; ########################################### make_accessor(__PACKAGE__, "path"); ########################################### sub new { ########################################### my($class, %options) = @_; my $self = { %options, }; bless $self, $class; } ################################################## # Poor man's Class::Struct ################################################## sub make_accessor { ################################################## my($package, $name) = @_; no strict qw(refs); my $code = <{$name} = \$value; } if(exists \$self->{$name}) { return (\$self->{$name}); } else { return ""; } } EOT if(! defined *{"$package\::$name"}) { eval $code or die "$@"; } } 1; __END__ =head1 NAME SWISH::API::Common - SWISH Document Indexing Made Easy =head1 SYNOPSIS use SWISH::API::Common; my $swish = SWISH::API::Common->new(); # Index all files in a directory and its subdirectories $swish->index("/usr/local/share/doc"); # After indexing once (it's persistent), fire up as many # queries as you like: # Search documents containing both "swish" and "install" for my $hit ($swish->search("swish AND install")) { print $hit->path(), "\n"; } =head1 DESCRIPTION C offers an easy interface to the Swish index engine. While SWISH::API offers a complete API, C focusses on ease of use. THIS MODULE IS CURRENTLY UNDER DEVELOPMENT. THE API MIGHT CHANGE AT ANY TIME. Currently, C just allows for indexing documents in a single directory and any of its subdirectories. Also, don't run index() and search() in parallel yet. =head1 INSTALLATION C requires C and the swish engine to be installed. Please download the latest release from http://swish-e.org/distribution/swish-e-2.4.3.tar.gz and untar it, type ./configure make make install and then install SWISH::API which is contained in the distribution: cd perl perl Makefile.PL make make install =head2 METHODS =over 4 =item $sw = SWISH::API::Common-Enew() Constructor. Takes many options, but the defaults are usually fine. Available options and their defaults: # Where SWISH::API::Common stores index files etc. swish_adm_dir "$ENV{HOME}/.swish-common" # The path to swish-e, relative is OK swish_exe "swish-e" # Swish index file swish_idx_file "$self->{swish_adm_dir}/default.idx" # Swish configuration file swish_cnf_file "$self->{swish_adm_dir}/default.cnf" # SWISH Stemming swish_fuzzy_indexing_mode => "Stemming_en" # Maximum amount of data (in bytes) extracted # from a single file file_len_max 100_000 # Preserve every indexed file's atime atime_preserve =item $sw-Eindex($dir, ...) Generate a new index of all text documents under directory C<$dir>. One or more directories can be specified. =item $sw-Esearch("foo AND bar"); Searches the index, using the given search expression. Returns a list hits, which can be asked for their path: # Search documents containing # both "foo" and "bar" for my $hit ($swish->search("foo AND bar")) { print $hit->path(), "\n"; } =item index_remove Permanently delete the current index. =back =head1 TODO List * More than one index directory * Remove documents from index * Iterator for search hits =head1 LEGALESE Copyright 2005 by Mike Schilli, all rights reserved. This program is free software, you can redistribute it and/or modify it under the same terms as Perl itself. =head1 AUTHOR 2005, Mike Schilli SWISH-API-Common-0.04/MANIFEST.SKIP0000644000175000017500000000012111240212066014505 0ustar mikemikeblib ^Makefile$ ^Makefile.old$ CVS .cvsignore docs MANIFEST.bak adm/release .git