RefDB-SRU-0.7000755 001750 001750 00000000000 10713335323 013540 5ustar00markusmarkus000000 000000 RefDB-SRU-0.7/SRU.pm000644 001750 001750 00000154746 10661435752 014660 0ustar00markusmarkus000000 000000 ## package RefDB::SRU ## RefDB SRU support module. Use this to implement SRU services for RefDB ## databases ## markus@mhoenicka.de 2007-02-07 ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ## Package main documentation =head1 NAME RefDB::SRU - Module for a SRU service for RefDB =head1 SYNOPSIS use RefDB::SRU; my $sru_query = new RefDB::SRU(\%params); print $sru_query->result(); =head1 DESCRIPTION Implements the backend code of a SRU service to query RefDB databases. Your own code must provide the networking interface which invokes the code in this module =head1 FEEDBACK Send bug reports, questions, and comments to the refdb-users mailing list at: refdb-users@lists.sourceforge.net For list information and archives, please visit: http://lists.sourceforge.net/lists/listinfo/refdb-users =head1 AUTHOR Markus Hoenicka, markus@mhoenicka.de =head1 SEE ALSO This module is part of the RefDB package, a reference manager and bibliography tool for markup languages. Please visit http://refdb.sourceforge.net for further information. =cut ###################################################################### ###################################################################### ## defines a class to deal with SRU diagnostics package Diagnostics; ###################################################################### ## new ###################################################################### =head2 new Title : new Usage : $diag = Diagnostics::new($number, $detail) Function: creates a new Diagnostics object Parameter: $number: the SRU diagnostic number Parameter: $detail: the SRU diagnostic detail =cut sub new() { my ($class, $error_number, $detail) = @_; my $self = {}; # store the error number and the detail $self->{number} = $error_number; $self->{detail} = $detail; # the message is not stored in any way, as it can be generated # from the above conveniently whenever needed bless $self, $class; return $self; } ###################################################################### ## diag_uri ###################################################################### =head2 diag_uri Title : diag_uri Usage : $uri = $diag->diag_uri() Function: returns the URI associated with the diagnostic =cut sub diag_uri () { my $self = shift; return "info:srw/diagnostic/1/" . $self->{number}; } ###################################################################### ## diag_detail ###################################################################### =head2 diag_detail Title : diag_detail Usage : $detail = $diag->diag_detail() Function: returns the detail associated with the diagnostic =cut sub diag_detail () { my $self = shift; return $self->{detail}; } ###################################################################### ## diag_message ###################################################################### =head2 diag_message Title : diag_message Usage : $message = $diag->diag_message() Function: returns the message associated with the diagnostic =cut sub diag_message () { my $self = shift; if ($self->{number} == 1) { "system error related to '" . $self->{detail}; } elsif ($self->{number} == 4) { return "operation '" . $self->{detail} . "' is not supported"; } elsif ($self->{number} == 7) { return "mandatory parameter '" . $self->{detail} . "' was not supplied"; } elsif ($self->{number} == 10) { "query syntax error"; } elsif ($self->{number} == 19) { return "relation '" . $self->{detail} . "' is not supported"; } elsif ($self->{number} == 20) { return "relation modifiers are not supported"; } elsif ($self->{number} == 39) { return "proximity queries are not supported"; } elsif ($self->{number} == 66) { return "schema '" . $self->{detail} . "' is not supported"; } elsif ($self->{number} == 71) { return "record packing '" . $self->{detail} . "' is not supported"; } elsif ($self->{number} == 72) { return "recordXPath retrieval not supported"; } } ###################################################################### ###################################################################### ## defines a class to walk the CQL parse tree package MyVisitor; use base qw( CQL::Visitor ); ###################################################################### ## visit: walks the CQL parse tree ###################################################################### =head2 visit Title : visit Usage : called by CQL parser at each node Function: processes each node =cut # we walk the parse tree and translate each subexpression into # an equivalent RefDB query string. Parentheses keep the logic intact # the query string is assembled in the refdb_query member of the MyVisitor # object sub visit { my ($self,$node) = @_; if ( $node->isa( 'CQL::BooleanNode' ) ) { $self->boolean($node); } elsif ( $node->isa( 'CQL::TermNode' ) ) { $self->term( $node ); } elsif ( $node->isa( 'CQL::ProxNode' ) ) { $self->{unsupported} = "t"; } elsif ( $node->isa( 'CQL::PrefixNode' ) ) { $self->{unsupported} = "t"; } } ###################################################################### ## boolean: called whenever visit() hits a boolean node ###################################################################### =head2 boolean Title : boolean Usage : called by visit Function: translates boolean notes into the RefDB syntax =cut sub boolean { my ($self,$node) = @_; my $diag; $self->{refdb_query} .= "("; $self->visit( $node->left() ); if ($node->op() eq "not") { $self->{refdb_query} .= " AND NOT "; } elsif ($node->op() eq "prox") { $diag = Diagnostics->new(39, "prox"); push (@{$self->{diagnostics}}, $diag); # pretend we support proximity queries $self->{refdb_query} .= " AND "; } else { $self->{refdb_query} .= " " . uc($node->op()) . " "; } $self->visit( $node->right() ); $self->{refdb_query} .= ")"; } ###################################################################### ## term: called by visit whenever a terminal node is hit ###################################################################### =head2 term Title : term Usage : called by visit() Function: translates qualifier and terms into RefDB syntax =cut # called whenever the parser hits a terminal node sub term { my ($self,$node) = @_; # transcribe qualifiers to RefDB syntax (bib context set) $node->{qualifier} =~ s/bib.title/:TX:/; $node->{qualifier} =~ s/bib.seriesTitle/:T3:/; $node->{qualifier} =~ s/bib.titleAbbrev/:JA:/; $node->{qualifier} =~ s/bib.name/:AX:/; $node->{qualifier} =~ s/bib.namePersonal/:AX:/; $node->{qualifier} =~ s/bib.nameCorporate/:AX:/; $node->{qualifier} =~ s/bib.subject/:KW:/; $node->{qualifier} =~ s/bib.dateIssued/:PY:/; $node->{qualifier} =~ s/bib.volume/:VL:/; $node->{qualifier} =~ s/bib.issue/:IS:/; $node->{qualifier} =~ s/bib.startPage/:SP:/; $node->{qualifier} =~ s/bib.endPage/:EP:/; # transcribe qualifiers to RefDB syntax (dc context set) $node->{qualifier} =~ s/dc.identifier/:CK:/; $node->{qualifier} =~ s/dc.title/:TX:/; $node->{qualifier} =~ s/dc.subject/:KW:/; $node->{qualifier} =~ s/dc.coverage/:KW:/; $node->{qualifier} =~ s/dc.creator/:AX:/; $node->{qualifier} =~ s/dc.publisher/:PB:/; $node->{qualifier} =~ s/dc.contributor/:AX:/; $node->{qualifier} =~ s/dc.date/:PY:/; # transcribe qualifiers to RefDB syntax (no context set) $node->{qualifier} =~ s/title/:TX:/; $node->{qualifier} =~ s/seriesTitle/:T3:/; $node->{qualifier} =~ s/titleAbbrev/:JA:/; $node->{qualifier} =~ s/name/:AX:/; $node->{qualifier} =~ s/namePersonal/:AX:/; $node->{qualifier} =~ s/nameCorporate/:AX:/; $node->{qualifier} =~ s/dateIssued/:PY:/; $node->{qualifier} =~ s/volume/:VL:/; $node->{qualifier} =~ s/issue/:IS:/; $node->{qualifier} =~ s/startPage/:SP:/; $node->{qualifier} =~ s/endPage/:EP:/; $node->{qualifier} =~ s/identifier/:CK:/; $node->{qualifier} =~ s/subject/:KW:/; $node->{qualifier} =~ s/coverage/:KW:/; $node->{qualifier} =~ s/creator/:AX:/; $node->{qualifier} =~ s/publisher/:PB:/; $node->{qualifier} =~ s/contributor/:AX:/; $node->{qualifier} =~ s/date/:PY:/; my $qualifier = $self->_maybeQuote($node->getQualifier()); my $term = $self->_maybeQuote( $node->getTerm() ); $term = $self->_translate_regex($term); my $relation = $node->getRelation(); my $base = $relation->getBase(); if ($base eq "=") { $base = "~"; } if ($relation->getModifiers() > 0) { # modifiers are currently unsupported $diag = Diagnostics->new(20, "modifiers"); push (@{$self->{diagnostics}}, $diag); } # expand "all" to an ANDed list, and "any" to an ORed list if ( $qualifier and $qualifier !~ /srw\.serverChoice/i and $qualifier !~ /srw\.anywhere/i) { if (uc($base) eq "ALL" || uc($base) eq "ANY") { # todo: make insensitive to whitespace if ($term =~ /^\".*\"$/) { $term =~ s/^\"(.*)\"$/$1/; } my @terms = split / /, $term; if (uc($base) eq "ALL") { $self->{refdb_query} .= "($qualifier~" . join(" AND $qualifier~", @terms) . ")"; } else { $self->{refdb_query} .= "($qualifier~" . join(" OR $qualifier~", @terms) . ")"; } } elsif (uc($base) eq "EXACT") { $self->{refdb_query} .= "$qualifier=$term"; } elsif (uc($base) eq "WITHIN") { # todo: move stripping quote into sub if ($term =~ /^\".*\"$/) { $term =~ s/^\"(.*)\"$/$1/; } my @terms = split / /, $term; my $lower = $terms[0]; my $upper = $terms[1]; $self->{refdb_query} .= "$qualifier>=$lower AND $qualifier<=$upper"; } elsif (uc($base) eq "ENCLOSES") { $diag = Diagnostics->new(20, "encloses"); push (@{$self->{diagnostics}}, $diag); } else { $self->{refdb_query} .= $qualifier . $base . $term; } } else { # if no or "anywhere" qualifier is given, we query titles, authors, # and keywords $self->{refdb_query} .= ":TX:~$term OR :AX:~$term OR :KW:~$term" ; } } ###################################################################### ## _translate_regex: translates a regular expression from CQL to db ###################################################################### =head2 _translate_regex Title : _translage_regex Usage : _translate_regex($string) Function: translates regular expressions from the CQL syntax to the database engine syntax =cut sub _translate_regex { my ($self, $str) = @_; return if ! defined $str; if ($self->{'db_engine'} eq "mysql" || $self->{'db_engine'} eq "pgsql") { # use Unix regexp # replace '*' with '.*' $str =~ s/([^\\]*)\*/$1.*/g; # replace '?' with '.' $str =~ s/([^\\]*)\?/$1./g; # replace a trailing '^' anchor with '$' $str =~ s/(\b.*\b)\^/$1\$/g; } else { # use SQL regexp # replace '*' with '%' $str =~ s/([^\\]*)\*/$1%/g; # replace '?' with '_' $str =~ s/([^\\]*)\?/$1_/g; # anchors are not supported $str =~ s/\^//g; } return $str; } ###################################################################### ## maybeQuote: quotes strings for inclusion in a CQL query ###################################################################### =head2 _maybeQuote Title : _maybeQuote Usage : _maybeQuote($string) Function: quotes particular characters =cut sub _maybeQuote { my ($self, $str) = @_; return if ! defined $str; if ( $str =~ m|[" \t=<>/()]| ) { $str =~ s/"/\\"/g; $str = qq("$str"); } return $str; } #" ###################################################################### ###################################################################### ## defines the main class package RefDB::SRU; ## need this for logging use RefDB::Log; ## the refdb client module use RefDB::Client; ## the parser for the query string use CQL::Parser; ## used to output all XML use XML::Writer; use strict; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); $VERSION = "0.7"; =head1 package functions =head2 new Title : new Usage : $my_query->new(\%params); Function: creates and initializes a new query object Arguments: reference to a hash containing the query parameter-value pairs Returns : new object =cut sub new($) { my ($class, $paramref) = @_; my $self = {}; # store the reference to the parameter-value hash $self->{params} = $paramref; # this string will receive the refdb data $self->{refdb_result} = undef; # this string will receive the response $self->{response} = undef; # this array will hold non-surrogate diagnostic objects @{$self->{diagnostics}} = (); # this is the counter for retrieved references. Initialize # to zero instead of leaving it undefined $self->{num_references} = 0; # same with total number of references $self->{total_num_references} = 0; # namespace declarations. We'll put them here instead of scattering # them in all functions that use namespaces my $srw = "http://www.loc.gov/zing/srw/"; my $diag = "http://www.loc.gov/zing/srw/diagnostic/"; my $xcql = "http://www.loc.gov/zing/cql/xcql/"; my $zeerex = "http://explain.z3950.org/dtd/2.1/"; my $risx = "http://refdb.sourceforge.net/ns/risx/"; my $mods = "http://www.loc.gov/mods/v3"; # namespace hashes; both prefix_map and rpm (reverse_prefix_map) # are hash references. The latter merely adds the convenience of # being able to address each URL with the shorter prefix. $self->{prefix_map} = { $srw => 'srw', $diag => 'diag', $xcql => 'xcql', $zeerex => 'zeerex', $risx => 'risx', $mods => 'mods'}; my %reverse_prefix_map = reverse %{$self->{prefix_map}}; $self->{rpm} = \%reverse_prefix_map; # record schemas my $risxdtd = "http://refdb.sourceforge.net/dtd/risx/risx.dtd"; my $modsschema = "info:srw/schema/1/mods-v3.0"; # schema hash (somewhat lame with two entries. more to come?) $self->{schema_map} = { $risxdtd => 'risx', $modsschema => 'mods'}; my %reverse_schema_map = reverse %{$self->{schema_map}}; $self->{rsm} = \%reverse_schema_map; # logging options $self->{params}->{'logfile'} = (defined($self->{params}->{'logfile'})) ? $self->{params}->{'logfile'} : "/var/log/refdbsru.log"; $self->{params}->{'loglevel'} = (defined($self->{params}->{'loglevel'})) ? $self->{params}->{'loglevel'} : 6; $self->{params}->{'logdest'} = (defined($self->{params}->{'logdest'})) ? $self->{params}->{'logdest'} : 2; ## 0 = stderr, 1 = syslog, 2 = file # post-process the logging parameters $self->{params}->{'logdest'} = RefDB::Log::num_logdest($self->{params}->{'logdest'}); $self->{params}->{'loglevel'} = RefDB::Log::num_loglevel($self->{params}->{'loglevel'}); # set up logging $self->{params}->{'log'} = RefDB::Log::->new($self->{params}->{'logdest'}, $self->{params}->{'loglevel'}, $self->{params}->{'logfile'}, "refdbsru"); bless $self, $class; return $self; } ###################################################################### ## print_vars: displays the parameter/value pairs for debugging ###################################################################### =head2 print_vars Title : print_vars Usage : $my_query->print_vars; Function: displays the parameter/value pairs for debugging purposes =cut sub print_vars() { my $self = shift; while ((my $key, my $value) = each %{$self->{params}}) { print $key . ": " . $value . "\n"; } } ###################################################################### ## run: analyzes the parameter values and runs an appropriate operation ###################################################################### =head2 run Title : run Usage : $my_query->run; Function: analyzes the parameter values and runs an appropriate operation =cut sub run() { my $self = shift; # see which operation we are supposed to perform # $self->print_vars(); # an explain operation is requested if there is either no # parameter-value pair, no operation parameter, or if the # explain operation is explicitly requested if (keys(%{$self->{params}}) == 0 || !defined($self->{params}->{'operation'}) || $self->{params}->{'operation'} eq "explain") { $self->_check_params("explain"); $self->_explain(); } elsif ($self->{params}->{'operation'} eq "searchRetrieve" || $self->{params}->{'operation'} eq "analyzeQuery") { # check for missing mandatory parameters $self->_check_params("searchRetrieve"); $self->_searchRetrieve($self->{params}->{'operation'}); } elsif ($self->{params}->{'operation'} eq "scan") { $self->_check_params("scan"); $self->_scan(); } else { # the SRU/SRW docs are silent about what should happen if an # invalid operation is requested. There is a diagnostic code for # this case, but I don't know what the XML output is supposed to # look like. Therefore, we just treat this like an explain request. $self->_explain(); } } ###################################################################### ## response: accessor for the query response string ###################################################################### =head2 response Title : response Usage : $query->response() Function: accessor for the query response string =cut sub response() { my $self = shift; $self->{response}; } ###################################################################### ## _check_params: check the SRU parameters ###################################################################### #=head2 _check_params #Title : _check_params #Usage : $self->_check_params($operation) #Function: checks the parameters of the SRU request #Parameter: $operation either "searchRetrieve" or "scan" #=cut sub _check_params() { my ($self, $operation) = @_; my $diag; # mandatory parameters for all operations if (!defined($self->{params}->{'version'}) && $operation ne "explain") { $diag = Diagnostics->new(7, "version"); push (@{$self->{diagnostics}}, $diag); } # not mandatory, but if given, only three operations are allowed # (no operation parameter counts as 'explain') if (defined($self->{params}->{'operation'}) && $self->{params}->{'operation'} ne "searchRetrieve" && $self->{params}->{'operation'} ne "scan" && $self->{params}->{'operation'} ne "explain") { $diag = Diagnostics->new(4, $operation); push (@{$self->{diagnostics}}, $diag); } # if there is no db_engine parameter, query the refdbd server and # see what it should be if (!defined($self->{params}->{'db_engine'}) && defined($self->{params}->{'operation'}) && ($self->{params}->{'operation'} eq "searchRetrieve" || $self->{params}->{'operation'} eq "scan")) { my $client = new RefDB::Client; $client->set_conninfo($self->{params}->{'server_ip'}, $self->{params}->{'port'}, $self->{params}->{'username'}, $self->{params}->{'password'}, $self->{params}->{'database'}, $self->{params}->{'pdfroot'}, $self->{params}->{'css_url'}, $self->{params}->{'timeout'}); # save error status and message in case we need it for diagnostics my $summary = $client->refdb_whichdb(); my $result = $client->get_data(); $result =~ s/.*Database server: ([^\n]+)\n.*/$1/s; $self->{params}->{'db_engine'} = $result; } # if no stylesheet is specified in the query, use our default, if any if (!defined($self->{params}->{'stylesheet'})) { if (defined($self->{params}->{'xsl_url'}) && length($self->{params}->{'xsl_url'}) > 0) { $self->{params}->{'stylesheet'} = $self->{params}->{'xsl_url'}; } } if ($operation eq "searchRetrieve") { # mandatory parameters if (!defined($self->{params}->{'query'})) { $diag = Diagnostics->new(7, "query"); push (@{$self->{diagnostics}}, $diag); } # check values of optional parameters if (defined($self->{params}->{'recordPacking'}) && $self->{params}->{'recordPacking'} ne "xml") { $diag = Diagnostics->new(71, $self->{params}->{'recordPacking'}); push (@{$self->{diagnostics}}, $diag); } else { $self->{params}->{'recordPacking'} = "xml"; } if (defined($self->{params}->{'recordSchema'})) { if (!defined(${$self->{schema_map}}{$self->{params}->{'recordSchema'}}) && !defined(${$self->{rsm}}{$self->{params}->{'recordSchema'}})) { $diag = Diagnostics->new(66, $self->{params}->{'recordSchema'}); push (@{$self->{diagnostics}}, $diag); } } else { # use MODS as default schema $self->{params}->{'recordSchema'} = "mods"; } if (defined($self->{params}->{'recordXPath'})) { $diag = Diagnostics->new(72, "XPath"); push (@{$self->{diagnostics}}, $diag); } } elsif ($operation eq "scan") { # mandatory parameters if (!defined($self->{params}->{'scanClause'})) { $diag = Diagnostics->new(7, "scanClause"); push (@{$self->{diagnostics}}, $diag); } } # return the number of diagnostics. Should be zero if all is fine return scalar @{$self->{diagnostics}}; } ###################################################################### ## _explain: performs the explain operation ###################################################################### #=head2 _explain #Title : _explain #Usage : $self->explain; #Function: performs the explain operation #=cut sub _explain() { my $self = shift; $self->{params}->{'log'}->log_print("debug", "explain"); if (@{$self->{diagnostics}} == 0) { $self->_run_refdb_info(); } } ###################################################################### ## _searchRetrieve: performs the searchRetrieve operation ###################################################################### #=head2 _searchRetrieve #Title : _searchRetrieve #Usage : $self->searchRetrieve(); #Argument: $operation: either "searchRetrieve" or "analyzeQuery" #Function: performs the searchRetrieve operation #=cut sub _searchRetrieve() { my ($self, $operation) = @_; my $refdb_query; if (@{$self->{diagnostics}} == 0) { $self->{params}->{'log'}->log_print("debug", "searchRetrieve"); my $parser = CQL::Parser->new(); # uncomment to receive debug output on stderr # $CQL::DEBUG = 1; # keep a copy of the original query my $cql = $self->{params}->{'query'}; $self->{params}->{'log'}->log_print("debug", $cql); my $root = $parser->parse($self->{params}->{'query'}); my $visitor = new MyVisitor; # pass the database engine to the visitor object to adapt the # regular expressions appropriately $visitor->{'db_engine'} = $self->{params}->{'db_engine'}; # parse and translate the query $visitor->visit($root); # transfer visitor diagnostics to query diagnostics if (defined(@{$visitor->{diagnostics}})) { push (@{$self->{diagnostics}}, @{$visitor->{diagnostics}}); } $refdb_query = $visitor->{'refdb_query'}; } # if ($operation eq "searchRetrieve") { $self->_run_refdb_query($refdb_query); # } # else { # todo: send back an XML-wrapped input and output query string # $self->_wrap_query_strings($cql, $refdb_query); # } } ###################################################################### ## _scan: performs the scan operation ###################################################################### #=head2 _scan #Title : _scan #Usage : $self->scan(); #Function: performs the scan operation #=cut sub _scan() { my ($self, $operation) = @_; my $refdb_query; if (@{$self->{diagnostics}} == 0) { $self->{params}->{'log'}->log_print("debug", "scan"); my $parser = CQL::Parser->new(); # uncomment to receive debug output on stderr # $CQL::DEBUG = 1; my $root = $parser->parse($self->{params}->{'scanClause'}); my $visitor = new MyVisitor; # pass the database engine to the visitor object to adapt the # regular expressions appropriately $visitor->{'db_engine'} = $self->{params}->{'db_engine'}; # parse and translate the query $visitor->visit($root); $refdb_query = $visitor->{'refdb_query'}; } $self->_run_refdb_scan($refdb_query); } ###################################################################### ## _run_refdb_info: retrieves configInfo from RefDB ###################################################################### #=head2 _run_refdb_info #Title : _run_refdb_info #Usage : $self->_run_refdb_info(); #Function: retrieves configInfo from RefDB #=cut sub _run_refdb_info() { my $self = shift; # query the database only if there were no query or parse errors # up to here if (@{$self->{diagnostics}} == 0) { my $client = new RefDB::Client; $client->set_conninfo($self->{params}->{'server_ip'}, $self->{params}->{'port'}, $self->{params}->{'username'}, $self->{params}->{'password'}, $self->{params}->{'database'}, $self->{params}->{'pdfroot'}, $self->{params}->{'css_url'}, $self->{params}->{'timeout'}); # save error status and message in case we need it for diagnostics my $summary = $client->refdb_whichdb(); $self->{refdb_error_status} = $client->get_status(); $self->{refdb_error_message} = $client->get_status_msg(); if ($self->_process_refdb_summary("scan", $summary)) { $self->{refdb_result} = $client->get_data(); } } $self->_wrap_explain_result(); } ###################################################################### ## _run_refdb_query: sends the translated query string to refdbd ###################################################################### #=head2 _run_refdb_query #Title : _run_refdb_query #Usage : $self->_run_refdb_query($query_string); #Function: sends the query string to refdbd #Parameter: $query_string: the query in the RefDB query language #=cut sub _run_refdb_query() { my ($self, $query) = @_; # query the database only if there were no query or parse errors # up to here if (@{$self->{diagnostics}} == 0) { my $client = new RefDB::Client; $client->set_conninfo($self->{params}->{'server_ip'}, $self->{params}->{'port'}, $self->{params}->{'username'}, $self->{params}->{'password'}, $self->{params}->{'database'}, $self->{params}->{'pdfroot'}, $self->{params}->{'css_url'}, $self->{params}->{'timeout'}); my $limit_string; if (defined($self->{params}->{'maximumRecords'})) { $limit_string = $self->{params}->{'maximumRecords'}; } if (defined($self->{params}->{'startRecord'}) && $self->{params}->{'startRecord'} > 1) { # startRecord is the 1-based number of the first reference to # retrieve. SQL OFFSET is the number of references to skip which # is one less than startRecord my $skip = $self->{params}->{'startRecord'} - 1; if (defined($limit_string)) { $limit_string .= ":" . $skip; } else { $limit_string = "999999999:" . $skip; } } # if we're to retrieve a subset, find out the total number of # references that match the query if (length($limit_string) > 0) { my $summary = $client->refdb_countref("", "", $query); $self->{params}->{'log'}->log_print("debug", $query); $self->{refdb_error_status} = $client->get_status(); $self->{refdb_error_message} = $client->get_status_msg(); # use scan mode as the countref command returns only a single count $self->_process_refdb_summary("scan", $summary); $self->{total_num_references} = $self->{num_references}; } # save error status and message in case we need it for diagnostics my $summary = $client->refdb_getref($self->{params}->{'recordSchema'}, "", undef, "", "UTF-8", $limit_string, $self->{params}->{'recordSchema'}, $query); $self->{params}->{'log'}->log_print("debug", $query); $self->{refdb_error_status} = $client->get_status(); $self->{refdb_error_message} = $client->get_status_msg(); if ($self->_process_refdb_summary("searchRetrieve", $summary)) { $self->{refdb_result} = $client->get_data(); if ($self->{total_num_references} == 0) { $self->{total_num_references} = $self->{num_references}; } } } $self->_wrap_searchRetrieve_result(); return $self->{num_references}; } ###################################################################### ## _run_refdb_scan: sends the translated scan string to refdbd ###################################################################### #=head2 _run_refdb_scan #Title : _run_refdb_scan #Usage : $self->_run_refdb_scan($scan_string); #Function: sends the scan string to refdbd #Parameter: $scan_string: the scan in the RefDB query language #=cut sub _run_refdb_scan() { my ($self, $scan) = @_; my $summary; my @scan_pairs; # query the database only if there were no query or parse errors # up to here if (@{$self->{diagnostics}} == 0) { my $client = new RefDB::Client; $client->set_conninfo($self->{params}->{'server_ip'}, $self->{params}->{'port'}, $self->{params}->{'username'}, $self->{params}->{'password'}, $self->{params}->{'database'}, $self->{params}->{'pdfroot'}, $self->{params}->{'css_url'}, $self->{params}->{'timeout'}); # first retrieve a full list of index terms if ($scan =~ /:KW:/) { $summary = $client->refdb_getkw("", "freq", ""); $self->{params}->{'log'}->log_print("debug", "getkw"); } elsif ($scan =~ /:AX:/) { $summary = $client->refdb_getax("", "freq", ""); $self->{params}->{'log'}->log_print("debug", "getax"); } elsif ($scan =~ /:JA:/) { $summary = $client->refdb_getjo("", "", "freq", ""); $self->{params}->{'log'}->log_print("debug", "getjo"); } my $term = $scan; $term =~ s/^:..:[<=>~!][>=~]?(.*)/$1/; # save error status and message in case we need it for diagnostics $self->{refdb_error_status} = $client->get_status(); $self->{refdb_error_message} = $client->get_status_msg(); if ($self->_process_refdb_summary("scan", $summary)) { $self->{refdb_result} = $client->get_data(); } # now we have sorted "lists" formatted as freq:item # split them into an array and locate the requested item my @pairs = split(/\n/, $self->{refdb_result}); my $counter = 0; foreach my $pair (@pairs) { my $index = index($pair, ":"); if ($index > -1) { my $value = substr($pair, $index+1); my $freq = substr($pair, 0, $index); if ($value ge $term) { last; } } $counter++; } # now $counter is an array index to $term. Compute the range # of items to return my $start_index; my $pos = $self->{params}->{'responsePosition'}; if (!defined($pos)) { $pos = 1; } my $max = $self->{params}->{'maximumRecords'}; if (!defined($max)) { # can't return more than we have $max = $counter; } if ($pos > $max) { $start_index = $counter+1-$max; } else { $start_index = $counter+1-$pos; } $self->{num_references} = 0; # create a new array containing only the requested items foreach ($start_index..$start_index+$max) { push (@scan_pairs, $pairs[$_]); $self->{num_references}++; } } $self->_wrap_scan_result(\@scan_pairs); return $self->{num_references}; } ###################################################################### ## _process_refdb_summary: processes the refdbd command summary ###################################################################### #=head2 _process_refdb_summary #Title : _process_refdb_summary #Usage : $self->_process_refdb_summary($command, $summary_string); #Function: determines whether the command was successful, and reads # the number of successful and failed retrievals #Parameter: $command either "searchRetrieve" or "scan" #Parameter: $summary_string: the command summary returned by refdbd #Returns: the number of successfully retrieved references #=cut sub _process_refdb_summary { my ($self, $command, $summary) = @_; my $diag; if ($command eq "searchRetrieve" && $summary =~ /^999:(\d+).*:(\d+).*/) { $self->{num_references} = $1; $self->{num_errors} = $2; } elsif ($command eq "scan" && $summary =~ /^999:(\d+)/) { $self->{num_references} = $1; $self->{num_errors} = 0; } else { $self->{num_references} = 0; $self->{num_errors} = 1; if ($self->{refdb_error_status} == 204 || $self->{refdb_error_status} == 208) { $diag = Diagnostics->new(1, "database"); push (@{$self->{diagnostics}}, $diag); } elsif ($self->{refdb_error_status} == 234) { $diag = Diagnostics->new(10, ""); push (@{$self->{diagnostics}}, $diag); } elsif ($self->{refdb_error_status} == 701) { $diag = Diagnostics->new(1, "character encoding"); push (@{$self->{diagnostics}}, $diag); } elsif ($self->{refdb_error_status} == 801) { $diag = Diagnostics->new(1, "memory"); push (@{$self->{diagnostics}}, $diag); } } $self->{num_references}; } ###################################################################### ## _wrap_explain_result: wraps the data in a explain response ###################################################################### #=head2 _wrap_explain_result #Title : _wrap_explain_result #Usage : $self->_wrap_explain_result(); #Function: wraps the data in a explain response #=cut sub _wrap_explain_result { my $self = shift; # massage whichdb response my $response = $self->{refdb_result}; my $numrefs = $response; $numrefs =~ s/.*Number of references: (\d+).*/$1/s; my $lastmodified = $response; $lastmodified =~ s/.*Last modified: ([\-0-9A-Z: ]+).*/$1/s; my $writer = new XML::Writer(OUTPUT => \$self->{response}, DATA_MODE => 1, NAMESPACES => 1, PREFIX_MAP => $self->{prefix_map}, FORCED_NS_DECLS => [${$self->{rpm}}{'zeerex'}], DATA_INDENT => 2); # simplify access to the namespaces my $srw = ${$self->{rpm}}{'srw'}; my $zeerex = ${$self->{rpm}}{'zeerex'}; my $stylesheet = $self->{params}->{'stylesheet'}; $writer->xmlDecl("UTF-8"); if (defined($stylesheet) && length($stylesheet) > 0) { my $stylespec = "href=\"" . $stylesheet . "\" type=\"text/xml\""; $writer->pi('xml-stylesheet', $stylespec); } $writer->startTag([$zeerex, "explain"], "authoritative" => "true"); ## serverInfo $writer->startTag([$zeerex, "serverInfo"], "protocol" => "SRU", "transport" => "http", "version" => "1.1"); $writer->dataElement([$zeerex, "host"], $self->{params}->{'zeerex_host'}); $writer->dataElement([$zeerex, "port"], $self->{params}->{'zeerex_port'}); $writer->dataElement([$zeerex, "database"], $self->{params}->{'zeerex_database'}, "numRecs" => $numrefs, "lastUpdate" => $lastmodified); # print only if user/pass configured if (length($self->{params}->{'username'}) > 0) { $writer->startTag([$zeerex, "authentication"]); $writer->dataElement([$zeerex, "user"], $self->{params}->{'username'}); $writer->dataElement([$zeerex, "password"], $self->{params}->{'password'}); $writer->endTag([$zeerex, "authentication"]); } $writer->endTag([$zeerex, "serverInfo"]); ## databaseInfo $writer->startTag([$zeerex, "databaseInfo"]); $writer->dataElement([$zeerex, "title"], $self->{params}->{'zeerex_databaseInfo_title'}, "lang" => "en", "primary" => "true"); $writer->dataElement([$zeerex, "description"], $self->{params}->{'zeerex_databaseInfo_description'}, "lang" => "en", "primary" => "true"); $writer->dataElement([$zeerex, "author"], $self->{params}->{'zeerex_databaseInfo_author'}); $writer->dataElement([$zeerex, "contact"], $self->{params}->{'zeerex_databaseInfo_contact'}); $writer->dataElement([$zeerex, "langUsage"], "Records are in English", "codes" => "en"); $writer->endTag([$zeerex, "databaseInfo"]); ## metaInfo $writer->startTag([$zeerex, "metaInfo"]); # dateModified is set to the current time (UTC) as the numRec and # lastUpdate fields are set at runtime my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = gmtime(); my $now = sprintf "%4d-%02d-%02dT%02d:%02d:%02dZ", 1900+$year, $mon+1, $mday, $hour, $min, $sec; $writer->dataElement([$zeerex, "dateModified"], "$now"); $writer->endTag([$zeerex, "metaInfo"]); ## indexInfo ## the bib index uses a subset of the context set proposed by loc, see ## http://www.loc.gov/standards/sru/cql-bibliographic-searching.html $writer->startTag([$zeerex, "indexInfo"]); $writer->emptyTag([$zeerex, "set"], "name" => "bib", "identifier" => "http://www.loc.gov/standards/sru/cql-bibliographic-searching.html"); $writer->emptyTag([$zeerex, "set"], "name" => "dc", "identifier" => "http://www.loc.gov/zing/cql/dc-indexes/v1.0/"); ## search by title ## bib.titleSub ## bib.titleTranslated ## bib.titleAlternative ## bib.titleUniform, dc.title $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "false"); $writer->dataElement([$zeerex, "title"], "Title", "lang" => "en"); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "title", "set" => "bib"); $writer->endTag([$zeerex, "map"]); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "title", "set" => "dc"); $writer->endTag([$zeerex, "map"]); $writer->endTag([$zeerex, "index"]); ## bib.titleSeries $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "false"); $writer->dataElement([$zeerex, "title"], "Series Title", "lang" => "en"); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "seriesTitle", "set" => "bib"); $writer->endTag([$zeerex, "map"]); $writer->endTag([$zeerex, "index"]); ## bib.titleAbbreviated (abuse to find periodicals?) $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "true"); $writer->dataElement([$zeerex, "title"], "Title Abbreviated", "lang" => "en"); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "titleAbbrev", "set" => "bib"); $writer->endTag([$zeerex, "map"]); $writer->endTag([$zeerex, "index"]); ## search by name ## bib.namePersonal ## bib.namePersonalFamily ## bib.namePersonalGiven ## bib.nameCorporate ## bib.nameConference ## bib.name, dc.creator, dc.contributor $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "true"); $writer->dataElement([$zeerex, "title"], "Name", "lang" => "en"); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "name", "set" => "bib"); $writer->endTag([$zeerex, "map"]); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "creator", "set" => "dc"); $writer->endTag([$zeerex, "map"]); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "contributor", "set" => "dc"); $writer->endTag([$zeerex, "map"]); $writer->endTag([$zeerex, "index"]); ## bib.namePersonal (map to name) $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "true"); $writer->dataElement([$zeerex, "title"], "Personal Name", "lang" => "en"); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "namePersonal", "set" => "bib"); $writer->endTag([$zeerex, "map"]); $writer->endTag([$zeerex, "index"]); ## bib.nameCorporate (map to name) $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "true"); $writer->dataElement([$zeerex, "title"], "Corporate Name", "lang" => "en"); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "nameCorporate", "set" => "bib"); $writer->endTag([$zeerex, "map"]); $writer->endTag([$zeerex, "index"]); ## search by subject ## bib.subjectPlace ## bib.subjectTitle ## bib.subjectName ## bib.subjectOccupation ## bib.subject, dc.subject, dc.coverage $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "true"); $writer->dataElement([$zeerex, "title"], "Keyword", "lang" => "en"); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "subject", "set" => "bib"); $writer->endTag([$zeerex, "map"]); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "subject", "set" => "dc"); $writer->endTag([$zeerex, "map"]); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "coverage", "set" => "dc"); $writer->endTag([$zeerex, "map"]); $writer->endTag([$zeerex, "index"]); ## searching by identifier ## dc.identifier $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "false"); $writer->dataElement([$zeerex, "title"], "Identifier", "lang" => "en"); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "identifier", "set" => "dc"); $writer->endTag([$zeerex, "map"]); $writer->endTag([$zeerex, "index"]); ## searching by date ## bib.dateIssued, dc.date $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "false"); $writer->dataElement([$zeerex, "title"], "Publication Date", "lang" => "en"); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "dateIssued", "set" => "bib"); $writer->endTag([$zeerex, "map"]); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "date", "set" => "dc"); $writer->endTag([$zeerex, "map"]); $writer->endTag([$zeerex, "index"]); ## bib.dateCreated ## bib.dateValid ## bib.dateModified ## bib.dateCopyright ## searching by edition ## bib.edition ## searching by part ## bib.volume $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "false"); $writer->dataElement([$zeerex, "title"], "Volume", "lang" => "en"); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "volume", "set" => "bib"); $writer->endTag([$zeerex, "map"]); $writer->endTag([$zeerex, "index"]); ## bib.issue $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "false"); $writer->dataElement([$zeerex, "title"], "Issue", "lang" => "en"); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "issue", "set" => "bib"); $writer->endTag([$zeerex, "map"]); $writer->endTag([$zeerex, "index"]); ## bib.startPage $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "false"); $writer->dataElement([$zeerex, "title"], "Start Page", "lang" => "en"); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "startPage", "set" => "bib"); $writer->endTag([$zeerex, "map"]); $writer->endTag([$zeerex, "index"]); ## bib.endPage $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "false"); $writer->dataElement([$zeerex, "title"], "End Page", "lang" => "en"); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "endPage", "set" => "bib"); $writer->endTag([$zeerex, "map"]); $writer->endTag([$zeerex, "index"]); ## dc.publisher $writer->startTag([$zeerex, "index"], "search" => "true", "scan" => "false"); $writer->dataElement([$zeerex, "title"], "Publisher", "lang" => "en"); $writer->startTag([$zeerex, "map"]); $writer->dataElement([$zeerex, "name"], "publisher", "set" => "dc"); $writer->endTag([$zeerex, "map"]); $writer->endTag([$zeerex, "index"]); ## searching by issuance ## bib.issuance (translate to TY?) $writer->endTag([$zeerex, "indexInfo"]); ## schemaInfo $writer->startTag([$zeerex, "schemaInfo"]); $writer->startTag([$zeerex, "schema"], "identifier" => "info:srw/schema/1/mods-v3.0", "name" => "mods", "location" => "http://www.loc.gov/standards/mods/v3/mods-3-2.xsd", "sort" => "false", "retrieve" => "true"); $writer->dataElement([$zeerex, "title"], "mods", "lang" => "en"); $writer->endTag([$zeerex, "schema"]); $writer->startTag([$zeerex, "schema"], "identifier" => "-//Markus Hoenicka//DTD Ris V1.1.0//EN", "name" => "risx", "location" => "http://refdb.sourceforge.net/dtd/risx/risx.dtd", "sort" => "false", "retrieve" => "true"); $writer->dataElement([$zeerex, "title"], "risx", "lang" => "en"); $writer->endTag([$zeerex, "schema"]); $writer->endTag([$zeerex, "schemaInfo"]); ## configInfo $writer->startTag([$zeerex, "configInfo"]); $writer->dataElement([$zeerex, "default"], "bib", "type" => "contextSet"); $writer->dataElement([$zeerex, "default"], "mods", "type" => "retrieveSchema"); $writer->dataElement([$zeerex, "setting"], "xml", "type" => "recordPacking"); $writer->dataElement([$zeerex, "supports"], "all", "type" => "relation"); $writer->dataElement([$zeerex, "supports"], "any", "type" => "relation"); $writer->dataElement([$zeerex, "supports"], "exact", "type" => "relation"); $writer->dataElement([$zeerex, "supports"], "within", "type" => "relation"); $writer->dataElement([$zeerex, "supports"], "*", "type" => "maskingCharacter"); $writer->dataElement([$zeerex, "supports"], "?", "type" => "maskingCharacter"); if ($self->{params}->{'db_engine'} eq "mysql" || $self->{params}->{'db_engine'} eq "pgsql") { $writer->dataElement([$zeerex, "supports"], "^", "type" => "anchoring"); } $writer->emptyTag([$zeerex, "supports"], "type" => "scan"); $writer->endTag([$zeerex, "configInfo"]); $writer->endTag([$zeerex, "explain"]); $writer->end(); } ###################################################################### ## _wrap_searchRetrieve_result: wraps the refdbd result in a ## searchRetrieveResult ###################################################################### #=head2 _wrap_searchRetrieve_result #Title : _wrap_searchRetrieve_result #Usage : $self->_wrap_searchRetrieve_result(); #Function: wraps the refdbd result in a searchRetrieveResult and puts the # resulting string in $self->{response} #=cut sub _wrap_searchRetrieve_result { my $self = shift; # the counter will be incremented at the beginning of a loop. We set # it to zero or to one less than the requested start record my $counter = 0; if (defined($self->{params}->{'startRecord'})) { $counter = $self->{params}->{'startRecord'} - 1; } # massage the refdbd result. risx datasets are shipped in a wrapper # which we simply discard. The remainder is split at the start # and end tags # mods datasets arrive in a wrapper, and the entries # are called my $response = $self->{refdb_result}; my @records; if ($self->{params}->{'recordSchema'} eq "mods") { $response =~ s/.*/$1/s; @records = split /\s*\<\/mods:mods\>\s*/, $response; } else { $response =~ s/.*/$1/s; @records = split /\s*\<\/risx:entry\>\s*/, $response; } my $writer; if ($self->{params}->{'recordSchema'} eq "mods") { $writer = new XML::Writer(OUTPUT => \$self->{response}, DATA_MODE => 1, NAMESPACES => 1, PREFIX_MAP => $self->{prefix_map}, FORCED_NS_DECLS => [${$self->{rpm}}{'srw'}, ${$self->{rpm}}{'diag'}, ${$self->{rpm}}{'xcql'}, ${$self->{rpm}}{'mods'}], DATA_INDENT => 2); } else { $writer = new XML::Writer(OUTPUT => \$self->{response}, DATA_MODE => 1, NAMESPACES => 1, PREFIX_MAP => $self->{prefix_map}, FORCED_NS_DECLS => [${$self->{rpm}}{'srw'}, ${$self->{rpm}}{'diag'}, ${$self->{rpm}}{'xcql'}, ${$self->{rpm}}{'risx'}], DATA_INDENT => 2); } # simplify access to the namespaces my $srw = ${$self->{rpm}}{'srw'}; my $diag = ${$self->{rpm}}{'diag'}; my $xcql = ${$self->{rpm}}{'xcql'}; my $risx = ${$self->{rpm}}{'risx'}; my $mods = ${$self->{rpm}}{'mods'}; my $stylesheet = $self->{params}->{'stylesheet'}; $writer->xmlDecl("UTF-8"); if (defined($stylesheet) && length($stylesheet) > 0) { my $stylespec = "href=\"" . $stylesheet . "\" type=\"text/xml\""; $writer->pi('xml-stylesheet', $stylespec); } # if (defined($self->{params}->{'stylesheet'}) # && length($self->{params}->{'stylesheet'}) > 0) { # is it CSS or XSL? # if ($self->{params}->{'stylesheet'} =~ /.xsl$/) { # $self->{response} .= "{params}->{'stylesheet'} . "\"?>"; # } # else { # $self->{response} .= "{params}->{'stylesheet'} . "\"?>"; # } # } $writer->startTag([$srw, "searchRetrieveResponse"]); $writer->dataElement([$srw, "version"], "1.1"); $writer->dataElement([$srw, "numberOfRecords"], $self->{total_num_references}); if ($self->{num_references} > 0) { $writer->startTag([$srw, "records"]); # loop over all retrieved records foreach my $record (@records) { $counter++; $writer->startTag([$srw, "record"]); $writer->dataElement([$srw, "recordPacking"], "XML"); if ($self->{params}->{'recordSchema'} eq "mods") { $writer->dataElement([$srw, "recordSchema"], $mods); } else { $writer->dataElement([$srw, "recordSchema"], $risx); } $writer->startTag([$srw, "recordData"]); # strip whitespace at the beginning (the end should not have any # that needs attention), and add back the missing end tag $record =~ s/^\s(.*)/$1/; $record = " " . $record; if ($self->{params}->{'recordSchema'} eq "mods") { $record .= "\n \n"; } else { $record .= "\n \n"; } $self->{response} .= "\n" . $record; $writer->endTag([$srw, "recordData"]); $writer->dataElement([$srw, "recordPosition"], $counter); $writer->endTag([$srw, "record"]); } $writer->endTag([$srw, "records"]); } # nextRecordPosition should be provided only if there are more # datasets available if ($self->{total_num_references} > $counter) { $writer->dataElement([$srw, "nextRecordPosition"], $counter+1); } # echo the request $writer->startTag([$srw, "echoedSearchRetrieveRequest"]); $writer->dataElement([$srw, "version"], "1.1"); $writer->dataElement([$srw, "query"], $self->{params}->{'query'}); $writer->dataElement([$srw, "recordSchema"], $self->{params}->{'recordSchema'}); $writer->dataElement([$srw, "recordPacking"], $self->{params}->{'recordPacking'}); $writer->dataElement([$srw, "stylesheet"], $self->{params}->{'stylesheet'}); if (defined($self->{params}->{'startRecord'})) { $writer->dataElement([$srw, "startRecord"], $self->{params}->{'startRecord'}); } if (defined($self->{params}->{'maximumRecords'})) { $writer->dataElement([$srw, "maximumRecords"], $self->{params}->{'maximumRecords'}); } $writer->endTag([$srw, "echoedSearchRetrieveRequest"]); # extra response data. We provide the URI to call the script again in order # to request further datasets $writer->startTag([$srw, "extraResponseData"]); my $uri = "http://" . $self->{params}->{'zeerex_host'} . ":" . $self->{params}->{'zeerex_port'} . "/" . $self->{params}->{'zeerex_database'}; $writer->dataElement("databaseURI", $uri); $writer->endTag([$srw, "extraResponseData"]); # dump any diagnostics if (@{$self->{diagnostics}} > 0) { $writer->startTag([$srw, "diagnostics"]); foreach my $diagnostic (@{$self->{diagnostics}}) { $writer->startTag([$diag, "diagnostic"]); $writer->dataElement([$diag, "uri"], $diagnostic->diag_uri()); $writer->dataElement([$diag, "detail"], $diagnostic->diag_detail()); $writer->dataElement([$diag, "message"], $diagnostic->diag_message()); $writer->endTag([$diag, "diagnostic"]); } $writer->endTag([$srw, "diagnostics"]); } $writer->endTag([$srw, "searchRetrieveResponse"]); } ###################################################################### ## _wrap_scan_result: wraps the refdbd result in a ## scanResult ###################################################################### #=head2 _wrap_scan_result #Title : _wrap_scan_result #Usage : $self->_wrap_scan_result(); #Function: wraps the refdbd result in a scanResult and puts the # resulting string in $self->{response} # #Param: $itemref reference to an array containing freq:item pairs #=cut sub _wrap_scan_result { my ($self, $itemref) = @_; # massage the refdbd result. risx datasets are shipped in a wrapper # which we simply discard. The remainder is split at the start # and end tags my $writer = new XML::Writer(OUTPUT => \$self->{response}, DATA_MODE => 1, NAMESPACES => 1, PREFIX_MAP => $self->{prefix_map}, FORCED_NS_DECLS => [${$self->{rpm}}{'srw'}, ${$self->{rpm}}{'diag'}, ${$self->{rpm}}{'xcql'}, ${$self->{rpm}}{'risx'}], DATA_INDENT => 2); # simplify access to the namespaces my $srw = ${$self->{rpm}}{'srw'}; my $diag = ${$self->{rpm}}{'diag'}; my $xcql = ${$self->{rpm}}{'xcql'}; my $risx = ${$self->{rpm}}{'risx'}; my $stylesheet = $self->{params}->{'stylesheet'}; $writer->xmlDecl("UTF-8"); if (defined($stylesheet) && length($stylesheet) > 0) { my $stylespec = "href=\"" . $stylesheet . "\" type=\"text/xml\""; $writer->pi('xml-stylesheet', $stylespec); } $writer->startTag([$srw, "scanResponse"]); $writer->dataElement([$srw, "version"], "1.1"); if ($self->{num_references} > 0) { $writer->startTag([$srw, "terms"]); # loop over all retrieved records foreach my $pair (@{$itemref}) { my $index = index($pair, ":"); if ($index > -1) { my $value = substr($pair, $index+1); my $freq = substr($pair, 0, $index); # the frequency string has leading whitespace for padding $freq =~ s/^\s+//; $writer->startTag([$srw, "term"]); $writer->dataElement([$srw, "value"], $value); $writer->dataElement([$srw, "numberOfRecords"], $freq); $writer->endTag([$srw, "term"]); } } $writer->endTag([$srw, "terms"]); } if (@{$self->{diagnostics}} > 0) { $writer->startTag([$srw, "diagnostics"]); foreach my $diagnostic (@{$self->{diagnostics}}) { $writer->startTag([$diag, "diagnostic"]); $writer->dataElement([$diag, "uri"], $diagnostic->diag_uri()); $writer->dataElement([$diag, "detail"], $diagnostic->diag_detail()); $writer->dataElement([$diag, "message"], $diagnostic->diag_message()); $writer->endTag([$diag, "diagnostic"]); } $writer->endTag([$srw, "diagnostics"]); } $writer->endTag([$srw, "scanResponse"]); } 1; __END__ RefDB-SRU-0.7/MANIFEST000644 001750 001750 00000000222 10713335323 014744 0ustar00markusmarkus000000 000000 ChangeLog INSTALL SRU.pm SRUserver.pm Makefile.PL MANIFEST test.pl META.yml Module meta-data (added by MakeMaker) RefDB-SRU-0.7/ChangeLog000644 001750 001750 00000000001 10652215005 015354 0ustar00markusmarkus000000 000000 RefDB-SRU-0.7/test.pl000644 001750 001750 00000010203 10652215005 015123 0ustar00markusmarkus000000 000000 # Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl test.pl' ######################### We start with some black magic to print on failure. # Change 1..1 below to 1..last_test_to_print . # (It may become useful if the test is moved to ./t subdirectory.) BEGIN { $| = 1; print "1..10: "; } END {print "not ok\n" unless $loaded;} use RefDB::SRU; $loaded = 1; print "ok\n"; ######################### End of black magic. # Insert your test code below (better if it prints "ok 13" # (correspondingly "not ok 13") depending on the success of chunk 13 # of the test code): # set a few initial params my $cuser = ""; # must match your database server my $cpasswd = ""; # must match your database server my $server_ip = "127.0.0.1"; # where refdbd runs my $port = "9734"; # where refdbd listens my $dbname = "refs"; # some reference database # test counter my $numtest = 1; my $numok = 1; ## ask user for connection parameters my $input; print "This test script requires a running refdbd server that we can connect to. It also needs a RefDB database that contains a couple of references, and an user account that has read access to this database. Please provide the following connection parameters (hitting ENTER will use the defaults):\n\n"; print "username of a RefDB database user [$cuser]:"; chomp($input = ); if (length($input) > 0) { $cuser = $input; } print "password of a RefDB database user [$cpasswd]:"; chomp($input = ); if (length($input) > 0) { $cpasswd = $input; } print "IP address of refdbd server [$server_ip]:"; chomp($input = ); if (length($input) > 0) { $server_ip = $input; } print "port of refdbd server [$port]:"; chomp($input = ); if (length($input) > 0) { $port = $input; } print "name of a RefDB database [$dbname]:"; chomp($input = ); if (length($input) > 0) { $dbname = $input; } print "\n\n"; # set the parameters common for all tests my %params = ( "loglevel" => "7", "logdest" => "0", "server_ip" => $server_ip, "port" => $port, "timeout" => "180", "username" => $cuser, "password" => $cpasswd, "pdfroot" => "/home/foo/literature", "css_url" => "/usr/local/share/refdb/css/refdb.css", "database" => $dbname, "version" => "1.1", "maximumRecords" => "10", "zeerex_host" => "my.example.com", "zeerex_port" => "80", "zeerex_database" => "refs", "zeerex_databaseInfo_title" => "Reference Database", "zeerex_databaseInfo_description" => "Reference Database", "zeerex_databaseInfo_author" => "John Doe", "zeerex_databaseInfo_contact" => "John\@Doe.org", ); ###################################################################### ## Test module ###################################################################### # Test 2 print "2..4: explain operation"; $numtest++; $params{'operation'} = "explain"; my $refdbquery = new RefDB::SRU(\%params); $refdbquery->run(); my $result = $refdbquery->response(); if (length($result) > 0) { $numok++; } print "Content-type: text/xml\n\n"; print "$result\n"; ###################################################################### # Test 3 print "3..4: searchRetrieve operation"; $numtest++; $params{'operation'} = "searchRetrieve"; $params{'query'} = "bib.subject=A"; my $refdbquery = new RefDB::SRU(\%params); $refdbquery->run(); my $result = $refdbquery->response(); if (length($result) > 0) { $numok++; } print "Content-type: text/xml\n\n"; print "$result\n"; ###################################################################### # Test 4 print "4..4: scan operation"; $numtest++; $params{'operation'} = "scan"; $params{'scanClause'} = "bib.subject=A"; my $refdbquery = new RefDB::SRU(\%params); $refdbquery->run(); my $result = $refdbquery->response(); if (length($result) > 0) { $numok++; } print "Content-type: text/xml\n\n"; print "$result\n"; ###################################################################### ## Grand finale print "That is ... waitaminute ... $numok out of $numtest\n"; RefDB-SRU-0.7/SRUserver.pm000644 001750 001750 00000015424 10657320574 016074 0ustar00markusmarkus000000 000000 #!/usr/bin/perl ## SRUserver.pm: defines a class for a standalone SRU server for RefDB ## markus@mhoenicka.de 2007-08-10 ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA package RefDB::SRUserver; use base qw(HTTP::Server::Simple::CGI); ## some RefDB modules use RefDB::Prefs; use RefDB::SRU; use Sys::Syslog; =head1 NAME RefDB::SRUserver - Lightweight SRU server for RefDB =head1 SYNOPSIS use warnings; use strict; use RefDB::SRUserver; # change into the directory that contains the sru stylesheets chdir '/sru' or die "Can't cd to stylesheet directory: $!\n"; # create and start the web server my $server = RefDB::SRUserver->new(); $server->run(); =head1 DESCRIPTION This is a simple standalone SRU server for RefDB. By default, it doesn't thread, fork, scale well, run fast, or do anything else you'd need to serve out datasets to the world. It is intended to set up a no-frills SRU server without having to configure a web server like Apache. If you need performance, or want to allow remote access to your RefDB data, by all means use the RefDB SRU CGI module with a decent web server instead. The CGI module is available in the same package as this standalone server. To add some level of security, an application using this module must run from or change to the subdirectory which contains the stylesheets. All path info is stripped, and the stylesheets are then served from the current working directory. This should keep most users from sharing their /etc/passwd with everyone else. =head1 FEEDBACK Send bug reports, questions, and comments to the refdb-users mailing list at: refdb-users@lists.sourceforge.net For list information and archives, please visit: http://lists.sourceforge.net/lists/listinfo/refdb-users =head1 AUTHOR Markus Hoenicka, markus@mhoenicka.de =head1 SEE ALSO This module is part of the RefDB package, a reference manager and bibliography tool for markup languages. Please visit http://refdb.sourceforge.net for further information. =head2 print_banner This routine prints a banner before the server request-handling loop starts. =cut sub print_banner { my $self = shift; print( __PACKAGE__ . ": You can connect to your server at " . "http://localhost:" . $self->port . "/\n" . "To terminate the server, press Ctrl-c\n"); } =head2 handle_request CGI This routine is called whenever your server gets a request it can handle. It's called with a CGI object that's been pre-initialized. =cut sub handle_request { my ($self, $query) = @_; if ($query->path_info() =~ "styles") { # this is a stylesheet request. Strip the leading directory from the # path and load the file from the current directory. $stylepath = $query->path_info(); $stylepath =~ s/.*\/([^\/]+)/$1/; my $sh = eval { local *FH; open(FH, "< $stylepath") or die; *FH{IO}}; while (<$sh>) { print $_; } return; } # else: run the SRU stuff ## read config file settings my $confdir = $self->{"confdir"}; my $prefs = RefDB::Prefs::->new("$confdir/refdbsrurc", undef); # this hash receives the parameter-value pairs my %params = $query->Vars; # logging options $params{'logfile'} = (defined($prefs->{"logfile"})) ? $prefs->{"logfile"} : "/var/log/refdbsru.log"; $params{'loglevel'} = (defined($prefs->{"loglevel"})) ? $prefs->{"loglevel"} : 6; $params{'logdest'} = (defined($prefs->{"logdest"})) ? $prefs->{"logdest"} : 2; ## 0 = stderr, 1 = syslog, 2 = file # networking options $params{'server_ip'} = (defined($prefs->{"serverip"})) ? $prefs->{"serverip"} : "127.0.0.1"; $params{'port'} = (defined($prefs->{"port"})) ? $prefs->{"port"} : "9734"; $params{'timeout'} = (defined($prefs->{"timeout"})) ? $prefs->{"timeout"} : "180"; # user authentication $params{'username'} = (defined($prefs->{"username"})) ? $prefs->{"username"} : "anonymous"; $params{'password'} = (defined($prefs->{"passwd"})) ? $prefs->{"passwd"} : ""; $params{'pdfroot'} = (defined($prefs->{"pdfroot"})) ? $prefs->{"pdfroot"} : "/home/foo/literature"; $params{'xsl_url'} = (defined($prefs->{"xsl_url"})) ? $prefs->{"xsl_url"} : ""; $params{'db_engine'} = (defined($prefs->{"dbserver"})) ? $prefs->{"dbserver"} : undef; # zeerex parameters $params{'zeerex_host'} = (defined($prefs->{"zeerex_host"})) ? $prefs->{"zeerex_host"} : "www.change.me"; $params{'zeerex_port'} = (defined($prefs->{"zeerex_port"})) ? $prefs->{"zeerex_port"} : "80"; $params{'zeerex_database'} = (defined($prefs->{"zeerex_database"})) ? $prefs->{"zeerex_database"} : "refs"; $params{'zeerex_databaseInfo_title'} = (defined($prefs->{"zeerex_databaseInfo_title"})) ? $prefs->{"zeerex_databaseInfo_title"} : "Reference Database"; $params{'zeerex_databaseInfo_description'} = (defined($prefs->{"zeerex_databaseInfo_description"})) ? $prefs->{"zeerex_databaseInfo_description"} : "Reference Database"; $params{'zeerex_databaseInfo_author'} = (defined($prefs->{"zeerex_databaseInfo_author"})) ? $prefs->{"zeerex_databaseInfo_author"} : "John Doe"; $params{'zeerex_databaseInfo_contact'} = (defined($prefs->{"zeerex_databaseInfo_contact"})) ? $prefs->{"zeerex_databaseInfo_contact"} : "John\@Doe.org"; # create a new RefDB query object and pass a reference to the # parameter-value hash my $refdbquery = new RefDB::SRU(\%params); # try to figure out the database name. A default database should be # provided in the config file. Remote users can override this setting # by providing an additional path info in the URL. E.g. # http://myserver.com/cgi-bin/refdbsru/? will use the # default database, whereas # http://myserver.com/cgi-bin/refdbsru/foo? will use the # database "foo" instead if (defined($query->path_info()) && length($query->path_info()) > 1) { $params{'database'} = $query->path_info(); # strip leading slash $params{'database'} =~ s/^\///; } else { $params{'database'} = (defined($prefs->{"defaultdb"})) ? $prefs->{"defaultdb"} : "references"; } $refdbquery->run(); my $result = $refdbquery->response(); print "HTTP/1.0 200 OK\r\nContent-Type: text/xml\r\n\r\n"; print "$result\n"; } 1; __END__ RefDB-SRU-0.7/INSTALL000644 001750 001750 00000000743 10652215005 014650 0ustar00markusmarkus000000 000000 Installing the RefDBClient Perl module ====================================== 1. Create a system-specific Makefile: perl Makefile.PL 2. Build the module: make 3. Run a test. This step is optional and requires a running RefDB installation: make test 4. Install the module in the default location: make install See the test.pl script for an example of how to use this module. $Id: INSTALL,v 1.1 2004/02/17 22:41:45 mhoenicka Exp $ (c) Markus Hoenicka 2004 RefDB-SRU-0.7/Makefile.PL000644 001750 001750 00000001012 10652215005 015557 0ustar00markusmarkus000000 000000 use ExtUtils::MakeMaker; # See lib/ExtUtils/MakeMaker.pm for details of how to influence # the contents of the Makefile that is written. WriteMakefile( 'NAME' => 'RefDB::SRU', 'VERSION_FROM' => 'SRU.pm', # finds $VERSION 'AUTHOR' => 'Markus Hoenicka ', 'PREREQ_PM' => { 'RefDB::Client' => '1.17', 'RefDB::Log' => '1.2', 'CQL::Parser' => '1.0', 'XML::Writer' => '0.600', }, 'dist' => { COMPRESS => 'gzip -9f', SUFFIX => '.tar.gz',}, ); RefDB-SRU-0.7/META.yml000644 001750 001750 00000000702 10713335323 015067 0ustar00markusmarkus000000 000000 # http://module-build.sourceforge.net/META-spec.html #XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# name: RefDB-SRU version: 0.7 version_from: SRU.pm installdirs: site requires: CQL::Parser: 1.0 RefDB::Client: 1.17 RefDB::Log: 1.2 XML::Writer: 0.600 distribution_type: module generated_by: ExtUtils::MakeMaker version 6.30