Search-Estraier-0.09.orig/0002755000372100037210000000000010744676054014021 5ustar tachtachSearch-Estraier-0.09.orig/t/0002755000372100037210000000000010744676054014264 5ustar tachtachSearch-Estraier-0.09.orig/t/2_Condition.t0000755000372100037210000000322610523406127016607 0ustar tachtach#!/usr/bin/perl -w use strict; use blib; use Test::More tests => 34; use Test::Exception; #use Data::Dumper; BEGIN { use_ok('Search::Estraier') }; ok(my $cond = new Search::Estraier::Condition, 'new'); isa_ok($cond, 'Search::Estraier::Condition'); cmp_ok($cond->max, '==', -1, 'max'); cmp_ok($cond->options, '==', 0, 'options'); ok($cond->set_phrase('search'), 'set_phrase'); ok($cond->add_attr('@foo BAR baz'), 'set_attr'); ok($cond->set_order('@foo ASC'), 'set_order'); ok($cond->set_max(42), 'set_max, number'); throws_ok { $cond->set_max('foo') } qr/number/, 'set_max, NaN'; my $old_options = -1; my @all_options = qw/SURE USUAL FAST AGITO NOIDF SIMPLE/; my $all_opts = 0; foreach my $opt (@all_options) { ok(my $options = $cond->set_options( $opt ), 'set_option '.$opt); cmp_ok($options, '!=', $old_options, "options changed"); $old_options = $options; $all_opts += $options; } cmp_ok($cond->set_options(@all_options), '==', $all_opts, "set_option all!"); throws_ok { $cond->set_options('foo') } qr/foo/, "set_option invalid"; cmp_ok($cond->set_options( SURE => 1 ), '==', $cond->set_options('SURE'), "set_option backward compatibility"); ok($cond->set_mask(qw/0 1 2/), 'mask'); my $v; cmp_ok($v = $cond->phrase, 'eq', 'search', "phrase: $v"); cmp_ok($v = $cond->max, '==', 42, "max: $v"); cmp_ok($v = $cond->options, '!=', 0, "options: $v"); #diag "attrs: ",join(",",$cond->attrs); cmp_ok($cond->attrs, '==', 1, 'one attrs'); ok($cond->add_attr('@foo2 BAR2 baz2'), 'set_attr'); #diag "attrs: ",join(",",$cond->attrs); cmp_ok($cond->attrs, '==', 2, 'two attrs'); ok($cond->set_distinct('@foo'), 'set_distinct'); cmp_ok($cond->distinct, 'eq', '@foo', 'distinct'); Search-Estraier-0.09.orig/t/1_Document.t0000755000372100037210000000577610523404056016451 0ustar tachtach#!/usr/bin/perl -w use strict; use blib; use Test::More tests => 60; use Test::Exception; use Data::Dumper; BEGIN { use_ok('Search::Estraier') }; #print Search::Estraier::Document::_s('foo'); #cmp_ok(Search::Estraier::Document::_s(" this is a text "), 'eq', 'this is a text', '_s - strip spaces'); my $debug = shift @ARGV; my $attr_data = { '@uri' => 'http://localhost/Search-Estraier/', 'size' => 42, 'zero' => 0, 'foo' => 'bar', 'empty' => '', }; my @test_texts = ( 'This is a test', 'of pure-perl bindings', 'for HyperEstraier' ); my $vectors = { 'foo' => 42, 'bar' => 100, 'baz' => 0, }; ok(my $doc = new Search::Estraier::Document, 'new'); isa_ok($doc, 'Search::Estraier::Document'); cmp_ok($doc->id, '==', -1, 'id'); ok($doc->delete, "delete"); ok($doc = new Search::Estraier::Document, 'new'); foreach my $a (keys %{$attr_data}) { my $d = $attr_data->{$a}; ok($doc->add_attr($a, $d), "add_attr $a = $d"); #diag "draft:\n",$doc->dump_draft,Dumper($doc->{attrs}); cmp_ok($doc->attr($a), 'eq', $d, "attr $a = $d"); } foreach my $t (@test_texts) { ok($doc->add_text($t), "add_text: $t"); } ok($doc->add_hidden_text('This is hidden text'), 'add_hidden_text'); ok($doc->add_vectors( %{ $vectors } ), 'add_vectors'); diag "current doc: ", Dumper($doc) if ($debug); ok(my @texts = $doc->texts, 'texts'); ok(my $draft = $doc->dump_draft, 'dump_draft'); foreach my $a (keys %{$attr_data}) { my $regex = $a . '=' . $attr_data->{$a}; like($draft, qr/$regex/, "draft has $regex"); } diag "dump_draft:\n$draft" if ($debug); ok(my $doc2 = new Search::Estraier::Document($draft), 'new from draft'); diag "doc from draft: ", Dumper($doc2) if ($debug); cmp_ok($doc2->dump_draft, 'eq', $draft, 'drafts same'); cmp_ok($doc->id, '==', -1, 'id'); cmp_ok($doc2->id, '==', -1, 'id'); ok(my @attr = $doc->attr_names, 'attr_names'); diag "attr_names: ", join(',',@attr), "\n" if ($debug); cmp_ok(scalar @attr, '==', keys %{$attr_data}, 'attr_names'); ok(! $doc->attr('foobar'), "non-existant attr"); foreach my $a (keys %{$attr_data}) { cmp_ok($doc->attr($a), 'eq', $attr_data->{$a}, "attr $a = ".$attr_data->{$a}); ok($doc->add_attr($a, undef), "delete attribute"); } @attr = $doc->attr_names; diag "attr_names left: ", join(',',$doc->attr_names), "\n" if ($debug); cmp_ok(@attr, '==' , 0, "attributes removed"); diag "texts: ", join(',',@texts), "\n" if ($debug); ok(eq_array(\@test_texts, \@texts), 'texts'); ok(my $cat_text = $doc->cat_texts, 'cat_text'); diag "cat_texts: $cat_text" if ($debug); ok($doc = new Search::Estraier::Document, 'new empty'); ok(! $doc->texts, 'texts'); cmp_ok($doc->dump_draft, 'eq', "\n", 'dump_draft'); cmp_ok($doc->id, '==', -1, 'id'); ok(! $doc->attr_names, 'attr_names'); ok(! $doc->attr(undef), 'attr'); ok(! $doc->cat_texts, 'cat_texts'); ok($doc = new Search::Estraier::Document, 'new empty'); cmp_ok($doc->score, '==', -1, 'no score'); ok($doc->set_score(12345), 'set_score'); cmp_ok($doc->score, '==', 12345, 'score'); like($doc->dump_draft, qr/%SCORE\s+12345/, 'dump_draft has %SCORE'); Search-Estraier-0.09.orig/t/9_pod.t0000755000372100037210000000025610465422114015451 0ustar tachtach#!/usr/bin/perl use strict; use warnings; use Test::More; eval "use Test::Pod 1.00"; plan skip_all => "Test::Pod 1.00 required for testing POD" if $@; all_pod_files_ok(); Search-Estraier-0.09.orig/t/3_ResultDocument.t0000755000372100037210000000172710357226471017652 0ustar tachtach#!/usr/bin/perl -w use strict; use blib; use Test::More tests => 12; use Test::Exception; use Data::Dumper; BEGIN { use_ok('Search::Estraier') }; my $doc = { uri => 'file:///foo', attrs => { foo => 1, bar => 2, }, snippet => 'none at all', keywords => "foo\tbar\tbaz\tboo", }; dies_ok { new Search::Estraier::ResultDocument } "new without args"; ok(my $rdoc = new Search::Estraier::ResultDocument( %$doc ), 'new'); isa_ok($rdoc, 'Search::Estraier::ResultDocument'); cmp_ok($rdoc->uri, 'eq', $doc->{uri}, 'uri'); ok(my @attr_names = keys %{ $doc->{attrs} }, "attr_names from original"); ok(my @rdoc_attr_names = $rdoc->attr_names, "attr_names from rdoc"); ok(eq_set(\@rdoc_attr_names, \@attr_names), 'attr_names comparison'); foreach my $attr (keys %{ $doc->{attrs} }) { cmp_ok($rdoc->attr($attr), 'eq', $doc->{attrs}->{$attr}, "attr: $attr"); } cmp_ok($rdoc->snippet, 'eq', $doc->{snippet}, 'snippet'); cmp_ok($rdoc->keywords, 'eq', $doc->{keywords}, 'keywords'); Search-Estraier-0.09.orig/t/4_NodeResult.t0000755000372100037210000000117110357500264016746 0ustar tachtach#!/usr/bin/perl -w use strict; use blib; use Test::More tests => 10; use Test::Exception; use Data::Dumper; BEGIN { use_ok('Search::Estraier') }; my $data = { docs => [ qw/1 2 3 4 5/ ], hints => { VERSION => 0.42, NODE => 'none', HIT => 42, DOCNUM => 1234, WORDNUM => 4321, }, }; dies_ok { new Search::Estraier::NodeResult } "new without args"; ok(my $res = new Search::Estraier::NodeResult( %$data ), 'new'); isa_ok($res, 'Search::Estraier::NodeResult'); cmp_ok($res->doc_num, '==', $#{$data->{docs}} + 1, 'doc_num'); for (my $i = 0; $i < $res->doc_num; $i++) { ok(my $doc = $res->get_doc($i), "get_doc $i"); } Search-Estraier-0.09.orig/t/5_Node.t0000755000372100037210000002657210744675717015604 0ustar tachtach#!/usr/bin/perl -w use strict; use blib; use Test::More; use Test::Exception; use Data::Dumper; BEGIN { plan tests => 313; use_ok('Search::Estraier'); }; my $debug = shift @ARGV; # name of node for test my $test1_node = '_test1_' . $$; my $test2_node = '_test2_' . $$; my $estmaster_uri = $ENV{'ESTMASTER_URI'} || 'http://localhost:1978'; ok(my $node = new Search::Estraier::Node( debug => $debug ), 'new'); isa_ok($node, 'Search::Estraier::Node'); ok($node->set_url("$estmaster_uri/node/$test1_node"), "set_url $test1_node"); ok($node->set_proxy('', 8080), 'set_proxy'); throws_ok {$node->set_proxy('proxy.example.com', 'foo') } qr/port/, 'set_proxy port NaN'; ok($node->set_timeout(42), 'set_timeout'); throws_ok {$node->set_timeout('foo') } qr/timeout/, 'set_timeout NaN'; my ($user, $passwd) = ( $ENV{EST_USER} || 'admin', $ENV{EST_PASSWD} || 'admin' ); ok($node->set_auth($user, $passwd), 'set_auth'); cmp_ok($node->status, '==', -1, 'status'); # test master functionality SKIP: { skip "can't find estmaster at $estmaster_uri", ( 313 - 10 ) if (! eval { $node->master( action => 'nodelist' ) } ); diag "using $estmaster_uri"; diag("node->master shutdown not tested"); diag("node->master backup not tested"); ok(my @users = $node->master( action => 'userlist' ), 'userlist'); #diag "users: ", Dumper( \@users ); diag "found ", $#users + 1, " users"; my $user = { name => '_test_' . $$, flags => 'b', fname => 'Search::Estraier', misc => 'test user', }; my $msg; ok($msg = $node->master( action => 'useradd', %{ $user }, passwd => 'test1234', ), "useradd: $msg"); ok(my @users2 = $node->master( action => 'userlist' ), 'userlist'); cmp_ok($#users2, '==', $#users + 1, 'added user'); while (my $row = shift @users2) { next unless ($row->{name} eq $user); map { cmp_ok($user->{$_}, 'eq', $row->{$_}, "$_"); } keys %{ $user }; } ok($msg = $node->master( action => 'userdel', name => $user->{name}, ), "userdel: $msg"); ok(@users2 = $node->master( action => 'userlist' ), 'userlist'); cmp_ok($#users2, '==', $#users, 'removed user'); ok(my @nodes = $node->master( action => 'nodelist' ), 'nodelist' ); #diag "nodelist: ", Dumper( \@nodes ); diag "found ", $#nodes + 1, " nodes"; if ($#nodes > 42) { diag <<'_END_OF_WARNING_'; This tests create three nodes in your Hyper Estraier. Since you have more than 43 modes, and Hyper Estraier needs more than 1024 file descriptors for more than 46 nodes, expect tests to fail. If tests do fail, you can try to add ulimit -n 2048 before staring estmaster, which will increase number of available nodes to 96 before estmaster runs out of file descriptors. _END_OF_WARNING_ } my $temp_node = "_test_temp_$$"; foreach my $node_name ( $test1_node , $test2_node, $temp_node ) { ok($msg = $node->master( action => 'nodeadd', name => $node_name, label => "$node_name label", ), "nodeadd $node_name: $msg"); } ok($msg = $node->master( action => 'nodedel', name => $temp_node, ), "nodedel $temp_node: $msg"); #diag "not testing logrtt\n"; # test document creation my $draft = <<'_END_OF_DRAFT_'; @uri=data0 @title=Material Girl Living in a material world And I am a material girl You know that we are living in a material world And I am a material girl _END_OF_DRAFT_ #diag "draft:\n$draft"; ok(my $doc = new Search::Estraier::Document($draft), 'new doc from draft'); ok( $node->put_doc($doc), "put_doc data001"); for ( 1 .. 17 ) { $doc->add_attr('@uri', 'test' . $_); $doc->set_score( $_ * 10000 ); ok( $node->put_doc($doc), "put_doc test$_"); #diag $doc->dump_draft; cmp_ok( $node->doc_num, '==', ($_ + 1), "node->doc_num " . ($_ + 1)); } ok(! $node->uri_to_id( 'does-not-exists' ), "non-existant uri_to_id"); my $id; ok($id = $node->uri_to_id( 'data0' ), "uri_to_id(data0)"); throws_ok { $node->get_doc( 'foo') } qr/id must be number/, 'croak on non-number'; ok($doc = $node->get_doc( $id ), "get_doc($id) for edit"); $doc->add_attr('foo', 'bar'); #diag Dumper($doc); ok( $node->edit_doc( $doc ), 'edit_doc'); my $doc_num; ok( $doc_num = $node->doc_num, "node->doc_num $doc_num"); ok( $node->out_doc( $id ), "out_doc($id)"); cmp_ok( $node->doc_num, '==', --$doc_num, "node->doc_num " . $doc_num); ok( ! $node->edit_doc( $doc ), "edit_doc of removed doc"); my $cache; ok($cache = $node->cacheusage, "cacheusage: $cache"); my $delete_num = 5; for ( 1 .. $delete_num ) { ok( $node->out_doc_by_uri( 'test' . $_ ), "out_doc_by_uri test$_"); cmp_ok( $node->doc_num, '==', $doc_num - $_, "node->doc_num " . ($doc_num - $_)); } my $doc_num2 = $doc_num - $delete_num; cmp_ok($node->doc_num, '==', $doc_num2, "node->doc_num $doc_num2"); my $max = int($doc_num2 / 2); ok(my $cond = new Search::Estraier::Condition, 'new cond'); ok($cond->set_phrase('girl'), 'cond set_phrase'); ok($cond->set_max($max), "cond set_max($max)"); ok($cond->set_order('@uri ASCD'), 'cond set_order'); ok($cond->add_attr('@title STRINC Material'), 'cond add_attr'); ok($cond->set_mask(qw/1 2/), 'cond set_mask'); cmp_ok($node->cond_to_query( $cond ), 'eq' , 'phrase=girl&attr1=%40title%20STRINC%20Material&order=%40uri%20ASCD&max='.$max.'&wwidth=480&hwidth=96&awidth=96&mask=6', 'cond_to_query'); ok( my $nres = $node->search( $cond, 0 ), 'search'); isa_ok( $nres, 'Search::Estraier::NodeResult' ); cmp_ok($nres->doc_num, '==', $max, "nres->doc_num $max"); cmp_ok($nres->hits, '==', $doc_num2, "nres->hits $doc_num2"); # upper limit is $nres->hits and not $nres->doc_num because we # check all documents, not just results! for my $i ( 0 .. ($nres->hits - 1) ) { my $num = $i + $delete_num + 1; my $uri = 'test' . $num; if ($i < $nres->doc_num) { ok( my $rdoc = $nres->get_doc( $i ), "nres->get_doc $i"); cmp_ok( $rdoc->attr('@uri'), 'eq', $uri, "\@uri = $uri"); cmp_ok( $node->uri_to_id( $uri ), '==', $num + 1, "uri_to_id($uri)"); ok( my $k = $rdoc->keywords( $id ), "rdoc keywords"); } else { ok( ! $nres->get_doc( $i ), "nres->get_doc doesn't exist"); } ok( my $id = $node->uri_to_id( $uri ), "uri_to_id($uri)"); my $doc; my $score = $num * 10000; ok( $doc = $node->get_doc( $id ), "get_doc($id)"); cmp_ok( $doc->score, '==', $score, "score $score"); ok( $doc = $node->get_doc_by_uri( $uri ), "get_doc_by_uri($uri)"); cmp_ok( $doc->score, '==', $score, "score $score"); cmp_ok( $node->get_doc_attr( $id, '@uri' ), 'eq', $uri, "get_doc_attr $id"); cmp_ok( $node->get_doc_attr_by_uri( $uri, '@uri' ), 'eq', $uri, "get_doc_attr $id"); ok( my $k1 = $node->etch_doc( $id ), "etch_doc_by_uri $uri"); ok( my $k2 = $node->etch_doc_by_uri( $uri ), "etch_doc_by_uri $uri"); #diag Dumper($k, $k2); ok( eq_hash( $k1, $k2 ), "keywords"); } ok(my $hints = $nres->hints, 'hints'); diag Dumper($hints) if ($debug); foreach my $h (qw/TIME DOCNUM VERSION NODE HIT WORDNUM/) { ok(defined( $nres->hint($h) ), "have hint $h"); } ok($node->_set_info, "refresh _set_info"); my $v; ok($v = $node->name, "name: $v"); ok($v = $node->label, "label: $v"); ok($v = $node->doc_num, "doc_num: $v"); ok(defined($v = $node->word_num), "word_num: $v"); ok($v = $node->size, "size: $v"); ok($node->set_snippet_width( 100, 10, 10 ), "set_snippet_width"); # test skip my $skip = int($max / 2) || die "skip is zero, can't test"; ok($cond->set_skip( $skip ), "cond set_skip($skip)"); cmp_ok($cond->skip, '==', $skip, "skip is $skip"); like($node->cond_to_query( $cond ), qr/skip=$skip/, 'cond_to_query have skip'); ok( $nres = $node->search( $cond, 0 ), 'search'); isa_ok( $nres, 'Search::Estraier::NodeResult' ); cmp_ok($nres->doc_num, '==', $max, "nres->doc_num " . ($max - $skip)); cmp_ok($nres->hits, '==', $doc_num2, "nres->hits $doc_num2"); for my $i ( 0 .. ($nres->doc_num - 1) ) { my $uri = 'test' . ($i + $delete_num + $skip + 1); ok( my $rdoc = $nres->get_doc( $i ), "nres->get_doc $i"); if ($rdoc) { cmp_ok( $rdoc->attr('@uri'), 'eq', $uri, "\@uri = $uri"); } else { fail('no rdoc'); } } # test distinct ok($cond = new Search::Estraier::Condition, 'new cond'); ok($cond->set_phrase('girl'), 'cond set_phrase'); my $distinct = '@title'; ok($cond->set_distinct( $distinct ), "cond set_distinct($distinct)"); cmp_ok($cond->distinct, 'eq', $distinct, "distinct is $distinct"); like($node->cond_to_query( $cond ), qr/distinct=%40title/, 'cond_to_query have distinct'); ok( $nres = $node->search( $cond, 0 ), 'search with distinct'); cmp_ok($nres->doc_num, '==', 1, "nres->doc_num"); cmp_ok($nres->hits, '==', 1, "nres->hits"); diag "nres = ", Dumper( $nres ) if ($debug); # user doesn't exist ok($node->set_user('foobar', 1), 'set_user'); ok(my $node2 = new Search::Estraier::Node( "$estmaster_uri/node/$test2_node" ), "new $test2_node"); ok($node2->set_auth('admin','admin'), "set_auth $test2_node"); # croak_on_error ok($node = new Search::Estraier::Node( url => "$estmaster_uri/non-existant", croak_on_error => 1 ), "new non-existant"); throws_ok { $node->name } qr/404/, 'croak on error'; # croak_on_error ok($node = new Search::Estraier::Node( url => "$estmaster_uri/node/$test1_node", croak_on_error => 1, user => $user, passwd => $passwd, debug => $debug ), "new $test1_node"); ok(! $node->uri_to_id('foobar'), 'uri_to_id without croak'); # test users ok($node->admins, 'have admins'); ok(! $node->guests, 'no guests'); # test search without results ok($cond = new Search::Estraier::Condition, 'new cond'); ok($cond->set_phrase('this_is_phrase_which_does_not_exits'), 'cond set_phrase'); ok($nres = $node->search( $cond, 0 ), 'search'); # now, test links my $test2_label = "$test2_node label"; my $link_url = "$estmaster_uri/node/$test2_node"; ok($node->set_link( $link_url, $test2_label, 42), "set_link $test2_node ($test2_label) 42"); ok(my $links = $node->links, 'links'); cmp_ok($#{$links}, '==', 0, 'one link'); like($links->[0], qr/^$link_url/, 'link correct'); ok($node->set_link("$estmaster_uri/node/$test2_node", $test2_label, 0), "set_link $test2_node ($test2_label) delete"); ok($msg = $node->master( action => 'nodeclr', name => $node->name, ), "nodeclr " . $node->name . ": $msg"); cmp_ok($node->doc_num, '==', 0, 'no documents'); # cleanup test nodes foreach my $node_name ( $test1_node , $test2_node ) { ok($msg = $node->master( action => 'nodedel', name => $node_name, ), "nodedel $node_name: $msg"); } # test create my $node_name = '_test_create_' . $$; my $node_label = "test $$ label"; ok($node = new Search::Estraier::Node( url => "$estmaster_uri/node/$node_name", create => 1, label => $node_label, croak_on_error => 1 ), "new create+croak"); cmp_ok($node->name, 'eq', $node_name, "node $node_name exists"); cmp_ok($node->label, 'eq', $node_label, "node label: $node_label"); ok($node = new Search::Estraier::Node( url => "$estmaster_uri/node/$node_name", create => 1, label => $node_label, croak_on_error => 0 ), "new create existing"); ok($node = new Search::Estraier::Node( url => "$estmaster_uri/node/$node_name", create => 1, label => $node_label, croak_on_error => 1 ), "new create+croak existing"); # cleanup ok($msg = $node->master( action => 'nodedel', name => $node_name, ), "nodedel $node_name: $msg"); # and again, this time without node ok($node = new Search::Estraier::Node( url => "$estmaster_uri/node/$node_name", create => 1, label => $node_label, croak_on_error => 0 ), "new create non-existing"); cmp_ok($node->name, 'eq', $node_name, "node $node_name exists"); cmp_ok($node->label, 'eq', $node_label, "node label: $node_label"); # cleanup ok($msg = $node->master( action => 'nodedel', name => $node_name, ), "nodedel $node_name: $msg"); ok($msg = $node->master( action => 'sync' ), "sync: $msg"); } # SKIP diag "over"; Search-Estraier-0.09.orig/t/9_pod-coverage.t0000755000372100037210000000026610465422103017241 0ustar tachtach#!/usr/bin/perl -w use Test::More; eval "use Test::Pod::Coverage 1.00"; plan skip_all => "Test::Pod::Coverage 1.00 required for testing POD coverage" if $@; all_pod_coverage_ok(); Search-Estraier-0.09.orig/lib/0002755000372100037210000000000010744676054014567 5ustar tachtachSearch-Estraier-0.09.orig/lib/Search/0002755000372100037210000000000010744676054015774 5ustar tachtachSearch-Estraier-0.09.orig/lib/Search/Estraier.pm0000644000372100037210000011224610744675774020124 0ustar tachtachpackage Search::Estraier; use 5.008; use strict; use warnings; our $VERSION = '0.09'; =head1 NAME Search::Estraier - pure perl module to use Hyper Estraier search engine =head1 SYNOPSIS =head2 Simple indexer use Search::Estraier; # create and configure node my $node = new Search::Estraier::Node( url => 'http://localhost:1978/node/test', user => 'admin', passwd => 'admin', create => 1, label => 'Label for node', croak_on_error => 1, ); # create document my $doc = new Search::Estraier::Document; # add attributes $doc->add_attr('@uri', "http://estraier.gov/example.txt"); $doc->add_attr('@title', "Over the Rainbow"); # add body text to document $doc->add_text("Somewhere over the rainbow. Way up high."); $doc->add_text("There's a land that I heard of once in a lullaby."); die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) }); =head2 Simple searcher use Search::Estraier; # create and configure node my $node = new Search::Estraier::Node( url => 'http://localhost:1978/node/test', user => 'admin', passwd => 'admin', croak_on_error => 1, ); # create condition my $cond = new Search::Estraier::Condition; # set search phrase $cond->set_phrase("rainbow AND lullaby"); my $nres = $node->search($cond, 0); if (defined($nres)) { print "Got ", $nres->hits, " results\n"; # for each document in results for my $i ( 0 ... $nres->doc_num - 1 ) { # get result document my $rdoc = $nres->get_doc($i); # display attribte print "URI: ", $rdoc->attr('@uri'),"\n"; print "Title: ", $rdoc->attr('@title'),"\n"; print $rdoc->snippet,"\n"; } } else { die "error: ", $node->status,"\n"; } =head1 DESCRIPTION This module is implementation of node API of Hyper Estraier. Since it's perl-only module with dependencies only on standard perl modules, it will run on all platforms on which perl runs. It doesn't require compilation or Hyper Estraier development files on target machine. It is implemented as multiple packages which closly resamble Ruby implementation. It also includes methods to manage nodes. There are few examples in C directory of this distribution. =cut =head1 Inheritable common methods This methods should really move somewhere else. =head2 _s Remove multiple whitespaces from string, as well as whitespaces at beginning or end my $text = $self->_s(" this is a text "); $text = 'this is a text'; =cut sub _s { my $text = $_[1]; return unless defined($text); $text =~ s/\s\s+/ /gs; $text =~ s/^\s+//; $text =~ s/\s+$//; return $text; } package Search::Estraier::Document; use Carp qw/croak confess/; use Search::Estraier; our @ISA = qw/Search::Estraier/; =head1 Search::Estraier::Document This class implements Document which is single item in Hyper Estraier. It's is collection of: =over 4 =item attributes C<< 'key' => 'value' >> pairs which can later be used for filtering of results You can add common filters to C in estmaster's C<_conf> file for better performance. See C in L. =item vectors also C<< 'key' => 'value' >> pairs =item display text Text which will be used to create searchable corpus of your index and included in snippet output. =item hidden text Text which will be searchable, but will not be included in snippet. =back =head2 new Create new document, empty or from draft. my $doc = new Search::HyperEstraier::Document; my $doc2 = new Search::HyperEstraier::Document( $draft ); =cut sub new { my $class = shift; my $self = {}; bless($self, $class); $self->{id} = -1; my $draft = shift; if ($draft) { my $in_text = 0; foreach my $line (split(/\n/, $draft)) { if ($in_text) { if ($line =~ /^\t/) { push @{ $self->{htexts} }, substr($line, 1); } else { push @{ $self->{dtexts} }, $line; } next; } if ($line =~ m/^%VECTOR\t(.+)$/) { my @fields = split(/\t/, $1); if ($#fields % 2 == 1) { $self->{kwords} = { @fields }; } else { warn "can't decode $line\n"; } next; } elsif ($line =~ m/^%SCORE\t(.+)$/) { $self->{score} = $1; next; } elsif ($line =~ m/^%/) { # What is this? comment? #warn "$line\n"; next; } elsif ($line =~ m/^$/) { $in_text = 1; next; } elsif ($line =~ m/^(.+)=(.*)$/) { $self->{attrs}->{ $1 } = $2; next; } warn "draft ignored: '$line'\n"; } } $self ? return $self : return undef; } =head2 add_attr Add an attribute. $doc->add_attr( name => 'value' ); Delete attribute using $doc->add_attr( name => undef ); =cut sub add_attr { my $self = shift; my $attrs = {@_}; while (my ($name, $value) = each %{ $attrs }) { if (! defined($value)) { delete( $self->{attrs}->{ $self->_s($name) } ); } else { $self->{attrs}->{ $self->_s($name) } = $self->_s($value); } } return 1; } =head2 add_text Add a sentence of text. $doc->add_text('this is example text to display'); =cut sub add_text { my $self = shift; my $text = shift; return unless defined($text); push @{ $self->{dtexts} }, $self->_s($text); } =head2 add_hidden_text Add a hidden sentence. $doc->add_hidden_text('this is example text just for search'); =cut sub add_hidden_text { my $self = shift; my $text = shift; return unless defined($text); push @{ $self->{htexts} }, $self->_s($text); } =head2 add_vectors Add a vectors $doc->add_vector( 'vector_name' => 42, 'another' => 12345, ); =cut sub add_vectors { my $self = shift; return unless (@_); # this is ugly, but works die "add_vector needs HASH as argument" unless ($#_ % 2 == 1); $self->{kwords} = {@_}; } =head2 set_score Set the substitute score $doc->set_score(12345); =cut sub set_score { my $self = shift; my $score = shift; return unless (defined($score)); $self->{score} = $score; } =head2 score Get the substitute score =cut sub score { my $self = shift; return -1 unless (defined($self->{score})); return $self->{score}; } =head2 id Get the ID number of document. If the object has never been registred, C<-1> is returned. print $doc->id; =cut sub id { my $self = shift; return $self->{id}; } =head2 attr_names Returns array with attribute names from document object. my @attrs = $doc->attr_names; =cut sub attr_names { my $self = shift; return unless ($self->{attrs}); #croak "attr_names return array, not scalar" if (! wantarray); return sort keys %{ $self->{attrs} }; } =head2 attr Returns value of an attribute. my $value = $doc->attr( 'attribute' ); =cut sub attr { my $self = shift; my $name = shift; return unless (defined($name) && $self->{attrs}); return $self->{attrs}->{ $name }; } =head2 texts Returns array with text sentences. my @texts = $doc->texts; =cut sub texts { my $self = shift; #confess "texts return array, not scalar" if (! wantarray); return @{ $self->{dtexts} } if ($self->{dtexts}); } =head2 cat_texts Return whole text as single scalar. my $text = $doc->cat_texts; =cut sub cat_texts { my $self = shift; return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts}); } =head2 dump_draft Dump draft data from document object. print $doc->dump_draft; =cut sub dump_draft { my $self = shift; my $draft; foreach my $attr_name (sort keys %{ $self->{attrs} }) { next unless defined(my $v = $self->{attrs}->{$attr_name}); $draft .= $attr_name . '=' . $v . "\n"; } if ($self->{kwords}) { $draft .= '%VECTOR'; while (my ($key, $value) = each %{ $self->{kwords} }) { $draft .= "\t$key\t$value"; } $draft .= "\n"; } if (defined($self->{score}) && $self->{score} >= 0) { $draft .= "%SCORE\t" . $self->{score} . "\n"; } $draft .= "\n"; $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts}); $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts}); return $draft; } =head2 delete Empty document object $doc->delete; This function is addition to original Ruby API, and since it was included in C wrappers it's here as a convinience. Document objects which go out of scope will be destroyed automatically. =cut sub delete { my $self = shift; foreach my $data (qw/attrs dtexts stexts kwords/) { delete($self->{$data}); } $self->{id} = -1; return 1; } package Search::Estraier::Condition; use Carp qw/carp confess croak/; use Search::Estraier; our @ISA = qw/Search::Estraier/; =head1 Search::Estraier::Condition =head2 new my $cond = new Search::HyperEstraier::Condition; =cut sub new { my $class = shift; my $self = {}; bless($self, $class); $self->{max} = -1; $self->{options} = 0; $self ? return $self : return undef; } =head2 set_phrase $cond->set_phrase('search phrase'); =cut sub set_phrase { my $self = shift; $self->{phrase} = $self->_s( shift ); } =head2 add_attr $cond->add_attr('@URI STRINC /~dpavlin/'); =cut sub add_attr { my $self = shift; my $attr = shift || return; push @{ $self->{attrs} }, $self->_s( $attr ); } =head2 set_order $cond->set_order('@mdate NUMD'); =cut sub set_order { my $self = shift; $self->{order} = shift; } =head2 set_max $cond->set_max(42); =cut sub set_max { my $self = shift; my $max = shift; croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/); $self->{max} = $max; } =head2 set_options $cond->set_options( 'SURE' ); $cond->set_options( qw/AGITO NOIDF SIMPLE/ ); Possible options are: =over 8 =item SURE check every N-gram =item USUAL check every second N-gram =item FAST check every third N-gram =item AGITO check every fourth N-gram =item NOIDF don't perform TF-IDF tuning =item SIMPLE use simplified query phrase =back Skipping N-grams will speed up search, but reduce accuracy. Every call to C will reset previous options; This option changed in version C<0.04> of this module. It's backwards compatibile. =cut my $options = { SURE => 1 << 0, USUAL => 1 << 1, FAST => 1 << 2, AGITO => 1 << 3, NOIDF => 1 << 4, SIMPLE => 1 << 10, }; sub set_options { my $self = shift; my $opt = 0; foreach my $option (@_) { my $mask; unless ($mask = $options->{$option}) { if ($option eq '1') { next; } else { croak "unknown option $option"; } } $opt += $mask; } $self->{options} = $opt; } =head2 phrase Return search phrase. print $cond->phrase; =cut sub phrase { my $self = shift; return $self->{phrase}; } =head2 order Return search result order. print $cond->order; =cut sub order { my $self = shift; return $self->{order}; } =head2 attrs Return search result attrs. my @cond_attrs = $cond->attrs; =cut sub attrs { my $self = shift; #croak "attrs return array, not scalar" if (! wantarray); return @{ $self->{attrs} } if ($self->{attrs}); } =head2 max Return maximum number of results. print $cond->max; C<-1> is returned for unitialized value, C<0> is unlimited. =cut sub max { my $self = shift; return $self->{max}; } =head2 options Return options for this condition. print $cond->options; Options are returned in numerical form. =cut sub options { my $self = shift; return $self->{options}; } =head2 set_skip Set number of skipped documents from beginning of results $cond->set_skip(42); Similar to C in RDBMS. =cut sub set_skip { my $self = shift; $self->{skip} = shift; } =head2 skip Return skip for this condition. print $cond->skip; =cut sub skip { my $self = shift; return $self->{skip}; } =head2 set_distinct $cond->set_distinct('@author'); =cut sub set_distinct { my $self = shift; $self->{distinct} = shift; } =head2 distinct Return distinct attribute print $cond->distinct; =cut sub distinct { my $self = shift; return $self->{distinct}; } =head2 set_mask Filter out some links when searching. Argument array of link numbers, starting with 0 (current node). $cond->set_mask(qw/0 1 4/); =cut sub set_mask { my $self = shift; return unless (@_); $self->{mask} = \@_; } package Search::Estraier::ResultDocument; use Carp qw/croak/; #use Search::Estraier; #our @ISA = qw/Search::Estraier/; =head1 Search::Estraier::ResultDocument =head2 new my $rdoc = new Search::HyperEstraier::ResultDocument( uri => 'http://localhost/document/uri/42', attrs => { foo => 1, bar => 2, }, snippet => 'this is a text of snippet' keywords => 'this\tare\tkeywords' ); =cut sub new { my $class = shift; my $self = {@_}; bless($self, $class); croak "missing uri for ResultDocument" unless defined($self->{uri}); $self ? return $self : return undef; } =head2 uri Return URI of result document print $rdoc->uri; =cut sub uri { my $self = shift; return $self->{uri}; } =head2 attr_names Returns array with attribute names from result document object. my @attrs = $rdoc->attr_names; =cut sub attr_names { my $self = shift; croak "attr_names return array, not scalar" if (! wantarray); return sort keys %{ $self->{attrs} }; } =head2 attr Returns value of an attribute. my $value = $rdoc->attr( 'attribute' ); =cut sub attr { my $self = shift; my $name = shift || return; return $self->{attrs}->{ $name }; } =head2 snippet Return snippet from result document print $rdoc->snippet; =cut sub snippet { my $self = shift; return $self->{snippet}; } =head2 keywords Return keywords from result document print $rdoc->keywords; =cut sub keywords { my $self = shift; return $self->{keywords}; } package Search::Estraier::NodeResult; use Carp qw/croak/; #use Search::Estraier; #our @ISA = qw/Search::Estraier/; =head1 Search::Estraier::NodeResult =head2 new my $res = new Search::HyperEstraier::NodeResult( docs => @array_of_rdocs, hits => %hash_with_hints, ); =cut sub new { my $class = shift; my $self = {@_}; bless($self, $class); foreach my $f (qw/docs hints/) { croak "missing $f for ResultDocument" unless defined($self->{$f}); } $self ? return $self : return undef; } =head2 doc_num Return number of documents print $res->doc_num; This will return real number of documents (limited by C). If you want to get total number of hits, see C. =cut sub doc_num { my $self = shift; return $#{$self->{docs}} + 1; } =head2 get_doc Return single document my $doc = $res->get_doc( 42 ); Returns undef if document doesn't exist. =cut sub get_doc { my $self = shift; my $num = shift; croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/); return undef if ($num < 0 || $num > $self->{docs}); return $self->{docs}->[$num]; } =head2 hint Return specific hint from results. print $res->hint( 'VERSION' ); Possible hints are: C, C, C, C, C, C, C