HTTP-OAI-4.03/0000755003026500005230000000000012320512116011446 5ustar tdbrodyjfHTTP-OAI-4.03/bin/0000755003026500005230000000000012320512116012216 5ustar tdbrodyjfHTTP-OAI-4.03/bin/oai_browser.pl0000755003026500005230000002126412317253770015114 0ustar tdbrodyjf#!/usr/bin/perl -w =head1 NAME oai_browser - Command line OAI repository browser =head1 DESCRIPTION The oai_browser utility provides a command-line tool to browse an OAI-compliant repository. =head1 SYNOPSIS oai_browser.pl B<[options]> I =head1 ARGUMENTS =over 4 =item I Specify baseURL to connect to. =back =head1 OPTIONS =over 8 =item B<--help> Show this page. =item B<--silent> Don't display data harvested from the repository - only shows a record count. =item B<--trace> Turn on trace debugging. =item B<--tracesax> Turn on trace debugging of SAX calls. =item B<--skip-identify> Don't perform an initial Identify to check the repository's baseURL. =cut BEGIN { unshift @INC, "."; } use vars qw($VERSION $h); use lib "../lib"; use lib "lib"; use HTTP::OAI; use Pod::Usage; $VERSION = $HTTP::OAI::VERSION; use vars qw( @ARCHIVES ); @ARCHIVES = qw( http://cogprints.soton.ac.uk/perl/oai2 http://citebase.eprints.org/cgi-bin/oai2 http://arXiv.org/oai2 http://www.biomedcentral.com/oai/2.0/ ); use strict; use warnings; #use sigtrap qw( die INT ); # This is just confusing ... #binmode(STDOUT,":encoding(iso-8859-1)"); # Causes Out of memory! errors :-( binmode(STDOUT,":utf8"); use Getopt::Long; eval "use Term::ReadLine"; if( $@ ) { die "Requires Term::ReadLine perl module\n"; } eval "use Term::ReadKey"; if( $@ ) { die "Requires Term::ReadKey perl module\n"; } use HTTP::OAI::Harvester; use HTTP::OAI::Metadata::OAI_DC; my ($opt_silent, $opt_help, $opt_trace, $opt_tracesax, $opt_skip_identify); $opt_silent = 0; GetOptions ( 'silent' => \$opt_silent, 'help' => \$opt_help, 'trace' => \$opt_trace, 'tracesax' => \$opt_tracesax, 'skip-identify' => \$opt_skip_identify, ); pod2usage(1) if $opt_help; if( $opt_trace ) { HTTP::OAI::Debug::level( '+trace' ); } if( $opt_tracesax ) { HTTP::OAI::Debug::level( '+sax' ); } print < Use CTRL+C to quit at any time --- EOF my $DEFAULTID = ''; use vars qw($TERM @SETS @PREFIXES); $TERM = Term::ReadLine->new($0); $TERM->addhistory(@ARCHIVES); while(1) { # my $burl = input('Enter the base URL to use [http://cogprints.soton.ac.uk/perl/oai2]: ') || 'http://cogprints.soton.ac.uk/perl/oai2'; my $burl = shift || $TERM->readline('OAI Base URL to query>','http://cogprints.soton.ac.uk/perl/oai2') || next; $h = new HTTP::OAI::Harvester(baseURL=>$burl); last if $opt_skip_identify; if( my $id = Identify() ) { last; } } &mainloop(); sub mainloop { while(1) { print "\nMenu\n----\n\n", "1. GetRecord\n2. Identify\n3. ListIdentifiers\n4. ListMetadataFormats\n5. ListRecords\n6. ListSets\nq. Quit\n\n>"; my $cmd; ReadMode(4); $cmd = ReadKey(); ReadMode(0); last unless defined($cmd); print $cmd . "\n"; if( $cmd eq 'q' ) { last; } elsif($cmd eq '1') { eval { GetRecord() }; } elsif($cmd eq '2') { eval { Identify() }; } elsif($cmd eq '3') { eval { ListIdentifiers() }; } elsif($cmd eq '4') { eval { ListMetadataFormats() }; } elsif($cmd eq '5') { eval { ListRecords() }; } elsif($cmd eq '6') { eval { ListSets() }; } if( $@ ) { warn "Internal error occurred: $@\n"; } } } sub GetRecord { printtitle("GetRecord"); my $id = $TERM->readline("Enter the identifier to request>",$DEFAULTID) || $DEFAULTID; $TERM->addhistory(@PREFIXES); my $mdp = $TERM->readline("Enter the metadataPrefix to use>",'oai_dc') || 'oai_dc'; my $r = $h->GetRecord( identifier=>$id, metadataPrefix=>$mdp, handlers=>{ metadata=>($mdp eq 'oai_dc' ? 'HTTP::OAI::Metadata::OAI_DC' : undef), }, ); if( defined(my $rec = $r->next) ) { printheader($r); print "identifier => ", $rec->identifier, ($rec->status ? " (".$rec->status.") " : ''), "\n", "datestamp => ", $rec->datestamp, "\n"; foreach($rec->header->setSpec) { print "setSpec => ", $_, "\n"; } print "\nHeader:\n", $rec->header->toString; print "\nMetadata:\n", $rec->metadata->toString if defined($rec->metadata); print "\nAbout data:\n", join("\n",map { $_->toString } $rec->about) if $rec->about; } iserror($r); } sub Identify { printtitle("Identify"); my $r = $h->Identify; return if iserror($r); print map({ "adminEmail => " . $_ . "\n" } $r->adminEmail), "baseURL => ", $r->baseURL, "\n", "protocolVersion => ", $r->protocolVersion, "\n", "repositoryName => ", $r->repositoryName, "\n"; foreach my $dom (grep { defined } map { $_->dom } $r->description) { foreach my $md ($dom->firstChild) { foreach my $elem ($md->getElementsByTagNameNS('http://www.openarchives.org/OAI/2.0/oai-identifier','sampleIdentifier')) { $DEFAULTID = $elem->getFirstChild->toString; print "sampleIdentifier => ", $DEFAULTID, "\n"; } } } $r; } sub ListIdentifiers { printtitle("ListIdentifiers"); my $resumptionToken = $TERM->readline("Enter an optional resumptionToken>"); my ($from, $until, $set, $mdp); if( !$resumptionToken ) { $from = $TERM->readline("Enter an optional from period (yyyy-mm-dd)>"); $until = $TERM->readline("Enter an optional until period (yyyy-mm-dd)>"); $TERM->addhistory(@SETS); $set = $TERM->readline("Enter an optional set ([A-Z0-9_]+)>"); $TERM->addhistory(@PREFIXES); $mdp = $TERM->readline("Enter the metadataPrefix to use>",'oai_dc') || 'oai_dc'; } my $c = 0; my $cb = $opt_silent ? sub { print STDERR $c++, "\r"; } : sub { my $rec = shift; $c++; print "identifier => ", $rec->identifier, (defined($rec->datestamp) ? " / " . $rec->datestamp : ''), ($rec->status ? " (".$rec->status.") " : ''), "\n"; }; #printheader($r); my $r = $h->ListIdentifiers( checkargs(resumptionToken=>$resumptionToken,from=>$from,until=>$until,set=>$set,metadataPrefix=>$mdp), onRecord => $cb, ); print "\nRead a total of $c records\n"; return if iserror($r); } sub ListMetadataFormats { printtitle("ListMetadataFormats"); my $id = $TERM->readline("Enter an optional identifier>"); my $r = $h->ListMetadataFormats(checkargs(identifier=>$id)); return if iserror($r); @PREFIXES = (); printheader($r); while( my $mdf = $r->next ) { push @PREFIXES, $mdf->metadataPrefix; print "metadataPrefix => ", $mdf->metadataPrefix, "\n", "schema => ", $mdf->schema, "\n", "metadataNamespace => ", ($mdf->metadataNamespace || ''), "\n"; } } sub ListRecords { printtitle("ListRecords"); my $resumptionToken = $TERM->readline("Enter an optional resumptionToken>"); my ($from, $until, $set, $mdp); if( !$resumptionToken ) { $from = $TERM->readline("Enter an optional from period (yyyy-mm-dd)>"); $until = $TERM->readline("Enter an optional until period (yyyy-mm-dd)>"); $TERM->addhistory(@SETS); $set = $TERM->readline("Enter an optional set ([A-Z0-9_]+)>"); $TERM->addhistory(@PREFIXES); $mdp = $TERM->readline("Enter the metadataPrefix to use>",'oai_dc') || 'oai_dc'; } my $c = 0; my $cb = $opt_silent ? sub { print STDERR $c++, "\r"; } : sub { my $rec = shift; $c++; print "\nidentifier => ", $rec->identifier, ($rec->status ? " (".$rec->status.") " : ''), "\n", "datestamp => ", $rec->datestamp, "\n"; foreach($rec->header->setSpec) { print "setSpec => ", $_, "\n"; } print "\nMetadata:\n", ($rec->metadata->toString||'(null)') if $rec->metadata; print "\nAbout data:\n", join("\n",map { ($_->toString||'(null)') } $rec->about) if $rec->about; }; #printheader($r); my $r = $h->ListRecords( checkargs(resumptionToken=>$resumptionToken,from=>$from,until=>$until,set=>$set,metadataPrefix=>$mdp), handlers=>{ metadata=>(($mdp and $mdp eq 'oai_dc') ? 'HTTP::OAI::Metadata::OAI_DC' : undef), }, onRecord => $cb, ); print "\nRead a total of $c records\n"; return if iserror($r); } sub ListSets { printtitle("ListSets"); sub cb { my $rec = shift; push @SETS, $rec->setSpec; print "setSpec => ", $rec->setSpec, "\n", "setName => ", ($rec->setName||'(null)'), "\n"; }; my $r = $h->ListSets(onRecord=>\&cb); return if iserror($r); } sub input { my $q = shift; print $q; my $r = <>; return unless defined($r); chomp($r); return $r; } sub printtitle { my $t = shift; print "\n$t\n"; for( my $i = 0; $i < length($t); $i++ ) { print "-"; } print "\n"; } sub printheader { my $r = shift; print "verb => ", $r->verb, "\n", "responseDate => ", $r->responseDate, "\n", "requestURL => ", $r->requestURL, "\n"; } sub checkargs { my %args = @_; foreach my $key (keys %args) { delete $args{$key} if( !defined($args{$key}) || $args{$key} eq '' ); } %args; } sub iserror { my $r = shift; if( $r->is_success ) { return undef; } else { print "An error ", $r->code, " occurred while making the request", ($r->request ? " (" . $r->request->uri . ") " : ''), ":\n", $r->message, "\n"; return 1; } } HTTP-OAI-4.03/bin/oai_static_gateway.pl0000755003026500005230000000327312317253770016441 0ustar tdbrodyjf#!/usr/bin/perl -w # Change this to the location of your static repository # XML file my $STATIC_REPO = 'file:../examples/repository.xml'; use strict; use HTTP::OAI; use HTTP::OAI::Repository qw/:validate/; use XML::SAX::Writer; use CGI qw/:standard -oldstyle_urls/; use vars qw( $GZIP ); BEGIN { eval { require PerlIO::gzip }; $GZIP = $@ ? 0 : 1; } # Create a new harvester object to read the xml file my $h = HTTP::OAI::Harvester->new(baseURL=>$STATIC_REPO); binmode(STDOUT,':utf8'); my @encodings = http('HTTP_ACCEPT_ENCODING'); if( $GZIP && grep { defined($_) && $_ eq 'gzip' } @encodings ) { print header( -type=>'text/xml; charset=utf-8', -charset=>'utf-8', '-Content-Encoding'=>'gzip', ); binmode(STDOUT, ":gzip"); } else { print header( -type=>'text/xml; charset=utf-8', -charset=>'utf-8', ); } # Check for grammatical errors in the request my @errs = validate_request(CGI::Vars()); my $mdp = param('metadataPrefix') || ''; my @mdfs = $h->ListMetadataFormats()->metadataFormat; if( $mdp && !grep { $_->metadataPrefix } @mdfs ) { push @errs, new HTTP::OAI::Error(code=>'cannotDisseminateFormat',message=>"Dissemination as '$mdp' is not supported"); } if( param('resumptionToken') ) { push @errs, new HTTP::OAI::Error(code=>'badArgument',message=>'This repository does not support flow-control'); } my $r; if( @errs ) { $r = HTTP::OAI::Response->new( requestURL=>self_url() ); $r->errors(@errs); } else { my %attr = CGI::Vars(); my $verb = delete($attr{'verb'}); $r = $h->$verb(%attr); $r->requestURL(self_url()); if( 'Identify' eq $verb && ref($r) eq 'HTTP::OAI::Identify' ) { $r->baseURL(url()); } } $r->set_handler(XML::SAX::Writer->new(Output=>\*STDOUT)); $r->generate; HTTP-OAI-4.03/bin/oai_pmh.pl0000755003026500005230000000516412317253770014216 0ustar tdbrodyjf#!/usr/bin/perl use encoding 'utf8'; use HTTP::OAI; use Getopt::Long; use Pod::Usage; use XML::LibXML; =head1 NAME oai_pmh.pl - pipe OAI-PMH to the command-line =head1 SYNOPSIS oai_pmh.pl [baseURL] =head1 OPTIONS =over 8 =item --help =item --man =item --verbose Be more verbose (repeatable). =item --force Force a non-conformant OAI request. =item --from =item --identifier OAI identifier to GetRecord or ListMetadataFormats. =item --metadataPrefix Specify format of metadata to retrieve. =item -X/--request Verb to request, defaults to ListRecords. =item --set Request only those records in a set. =item --until =back =head1 DESCRIPTION Retrieve data from OAI-PMH endpoints. The output format is: Where are in HTTP header format. Content will be the raw XML as exposed by the repository. Each record is separated by a FORMFEED character. For example: oai_pmh.pl -X GetRecord --metadataPrefix oai_dc \ --identifier oai:eprints.soton.ac.uk:20 http://eprints.soton.ac.uk/cgi/oai2 =cut my %opts = ( verbose => 1, ); GetOptions(\%opts, 'help', 'man', 'metadataPrefix=s', 'request|X=s', 'identifier=s', 'verbose+', 'force', 'from=s', 'until=s', ) or pod2usage(2); pod2usage(1) if $opts{help}; pod2usage({-verbose => 2}) if $opts{man}; my $noise = delete $opts{verbose}; if (!exists $opts{request}) { $opts{request} = 'ListRecords'; $opts{metadataPrefix} = 'oai_dc'; } my $base_url = pop @ARGV; pod2usage(1) if !$base_url; my $ha = HTTP::OAI::Harvester->new(baseURL => $base_url); my $f = delete $opts{request}; debug("Requesting $f", 2); my $r = $ha->$f( %opts, onRecord => \&output_record, ); if( $f eq "ListMetadataFormats" ) { foreach my $mdf ($r->metadataFormat) { print "metadataPrefix: " . $mdf->metadataPrefix . "\n"; print "schema: " . $mdf->schema . "\n"; print "metadataNamespace: " . $mdf->metadataNamespace . "\n"; print "\n"; print "\f"; } } if( !$r->is_success ) { die "Error in response: " . $r->message . "\n"; } sub debug { my( $msg, $level ) = @_; warn "$msg\n" if $noise >= $level; } sub output_record { my( $rec ) = @_; my $header = $rec->isa( 'HTTP::OAI::Header' ) ? $rec : $rec->header; print "identifier: " . $header->identifier . "\n"; print "datestamp: " . $header->datestamp . "\n"; print "status: " . $header->status . "\n"; foreach my $set ($header->setSpec) { print "setSpec: " . $set . "\n"; } print "\n"; if ($rec->can( "metadata" ) && defined(my $metadata = $rec->metadata)) { print $metadata->dom->toString( 1 ); } print "\f"; } HTTP-OAI-4.03/examples/0000755003026500005230000000000012320512116013264 5ustar tdbrodyjfHTTP-OAI-4.03/examples/mets.xml0000644003026500005230000002337512317253770015006 0ustar tdbrodyjf 2007-06-01T15:44:53Z http://celestial.eprints.org/oai/DSpace%20at%20MIT
oai:dspace.mit.edu:1721.1/8338 2006-10-23T15:52:20Z hdl_1721.1_7680 hdl_1721.1_7843
DSpace at MIT advisorDaniel J. Kleitman. authorYang, Xiaochun, 1971- otherMassachusetts Institute of Technology. Dept. of Mathematics. 2005-08-23T19:18:06Z 2005-08-23T19:18:06Z 2002 2002 http://hdl.handle.net/1721.1/8338 Thesis (Ph. D.)--Massachusetts Institute of Technology, Dept. of Mathematics, 2002. Includes bibliographical references (p. 89-91). Geometry is the synthetic tool we use to unify all existing analytical cone-beam reconstruction methods. These reconstructions are based on formulae derived by Tuy [Tuy, 1983], Smith [Smith, 1985] and Grangeat [Grangeat, 1991] which explicitly link the cone-beam data to some intermediate functions in the Radon transform domain. However, the essential step towards final reconstruction, that is, differential-backprojection, has not yet achieved desired efficiency. A new inversion formula is obtained directly from the 3D Radon inverse [Radon, 1917, Helgason, 1999]. It incorporates the cone-beam scanning geometry and allows the theoretical work mentioned above to be reduced to exact and frugal implementations. Extensions can be easily carried out to 2D fan-beam reconstruction as well as other scanning modalities such as parallel scans by allowing more abstract geometric description on the embedding subspace of the Radon manifold. The new approach provides a canonical inverse procedure for computerized tomography in general, with applications ranging from diagnostic medical imaging to industrial testing, such as X-ray CT, Emission CT, Ultrasound CT, etc. It also suggests a principled frame for approaching other 3D reconstruction problems related to the Radon transform. The idea is simple: as was spelled out by Helgason on the opening page of his book, The Radon Transform [Helgason, 1999] - a remarkable duality characterizes the Radon transform and its inverse. Our study shows that the dual space, the so-called Radon space, can be geometrically decomposed according to the specified scanning modality. (cont.) In cone-beam X-ray reconstruction, for example, each cone-beam projection is seen as a 2D projective subspace embedded in the Radon manifold. Besides the duality in the space relation, the symbiosis played between algebra and geometry, integration and differentiation is another striking feature in the tomographic reconstruction. Simply put, * Geometry and algebra: the two play fundamentally different roles during the inverse. Algebraic transforms carry cone-beam data into the Radon domain, whereas, the geometric decomposition of the dual space determines how the differential-backprojection operator should be systematically performed. The reason that different algorithms in cone-beam X-ray reconstruction share structural similarity is that the dual space decomposition is intrinsic to the specified scanning geometry. The differences in the algorithms lie in the appearance of algebra on the projection submanifold. The algebraic transforms initiate diverse reconstruction methods varying in terms of computational cost and stability. Equipped with this viewpoint, we are able to simplify mathematical analysis and develop algorithms that are easy to implement. Integration and differentiation: forward projection is the integral along straight lines (or planes) in the Euclidean space. During the reconstruction, differentiation is performed over the parallel planes in the projective Radon space, a manifold with clear differential structure. It is important to learn about this differential structure to ensure that correct differentiation can be carried out with respect to the parameters governing the scanning process during the reconstruction ... Made available in DSpace on 2005-08-23T19:18:06Z (GMT). No. of bitstreams: 2 50500372.pdf: 5942218 bytes, checksum: b1eba8f820c75fe04e1de950af6c3548 (MD5) 50500372-MIT.pdf: 5941980 bytes, checksum: ed337b0618cf18bd701502cc9e79e823 (MD5) Previous issue date: 2002 by Xiaochun Yang. 91 p. 5942218 bytes 5941980 bytes application/pdf application/pdf eng Massachusetts Institute of Technology M.I.T. theses are protected by copyright. They may be viewed from this source for any purpose, but reproduction or distribution in any format is prohibited without written permission. See <a href="https://dspace.mit.edu/handle/1721.1/7582">https://dspace.mit.edu/handle/1721.1/7582</a> for inquiries about permission. Mathematics. Geometry of cone-beam reconstruction Thesis
http://dspace.mit.edu/dspace-oai/request oai:dspace.mit.edu:1721.1/8338 2006-10-23T15:52:20Z http://www.loc.gov/METS/ HTTP-OAI-4.03/examples/getrecord.xml0000644003026500005230000000402512317253770016003 0ustar tdbrodyjf 2005-02-25T16:37:50Zhttp://citebase.eprints.org/cgi-bin/oai2
oai:arXiv.org:hep-th/00010012004-06-22T19:46:16Z
http://arXiv.org/abs/hep-th/0001001Aspinwall, Paul S.1999-12-312000-01-17textCompactification, Geometry and Duality: N=2 These are notes based on lectures given at TASI99. We review the geometry of the moduli space of N=2 theories in four dimensions from the point of view of superstring compactification. The cases of a type IIA or type IIB string compactified on a Calabi-Yau threefold and the heterotic string compactified on K3xT2 are each considered in detail. We pay specific attention to the differences between N=2 theories and N>2 theories. The moduli spaces of vector multiplets and the moduli spaces of hypermultiplets are reviewed. In the case of hypermultiplets this review is limited by the poor state of our current understanding. Some peculiarities such as ``mixed instantons'' and the non-existence of a universal hypermultiplet are discussed. Comment: 82 pages, 8 figures, LaTeX2e, TASI99, refs added and some typos fixed
HTTP-OAI-4.03/examples/repository.xml0000644003026500005230000001321212317253770016242 0ustar tdbrodyjf Demo repository file:examples/repository.xml 2.0 jondoe@oai.org 2002-09-19 no YYYY-MM-DD oai_dc http://www.openarchives.org/OAI/2.0/oai_dc.xsd http://www.openarchives.org/OAI/2.0/oai_dc/ oai_rfc1807 http://www.openarchives.org/OAI/1.1/rfc1807.xsd http://info.internet.isi.edu:80/in-notes/rfc/files/rfc1807.txt oai:arXiv:cs/0112017 2001-12-14 Using Structural Metadata to Localize Experience of Digital Content Dushay, Naomi Digital Libraries With the increasing technical sophistication of both information consumers and providers, there is increasing demand for more meaningful experiences of digital information. We present a framework that separates digital object experience, or rendering, from digital object storage and manipulation, so the rendering can be tailored to particular communities of users. Comment: 23 pages including 2 appendices, 8 figures 2001-12-14 oai:perseus:Perseus:text:1999.02.0084 2002-05-01 Germany and its Tribes Tacitus text Complete Works of Tacitus. Tacitus. Alfred John Church. William Jackson Brodribb. Lisa Cerrato. edited for Perseus. New York: Random House, Inc. Random House, Inc. reprinted 1942. http://www.perseus.tufts.edu/cgi-bin/ptext? doc=Perseus:text:1999.02.0083 oai:arXiv:cs/0112017 2001-12-14 v2 cs/0112017 December 23, 2001 Using Structural Metadata to Localize Experience of Digital Content Naomi Dushay December 14, 2001 Los Alamos arXiv Metadata may be used without restrictions as long as the oai identifier remains attached to it. HTTP-OAI-4.03/examples/identify.xml0000644003026500005230000000374312317253770015646 0ustar tdbrodyjf 2005-02-25T16:36:30Zhttp://citebase.eprints.org/cgi-bin/oai2citebase.eprints.orghttp://citebase.eprints.org/cgi-bin/oai22.0mailto:tdb01r@ecs.soton.ac.uk0001-01-01transientYYYY-MM-DDoaicitebase.eprints.org:oai:arXiv.org:hep-th/0001001http://citebase.eprints.org/help/Identify/policy.phpNo commercial harvesting without prior permission.http://citebase.eprints.org/help/Identify/policy.phpNo commercial harvesting without prior permission.Repositories of scholarly literature, preferably peer-reviewed with journal reference. HTTP-OAI-4.03/examples/badbytes.xml0000644003026500005230000000405512317253770015625 0ustar tdbrodyjf 2005-02-25T16:37:50Zhttp://citebase.eprints.org/cgi-bin/oai2
oai:arXiv.org:hep-th/00010012004-06-22T19:46:16Z
http://arXiv.org/abs/hep-th/0001001Aspinwall, Paul S.1999-12-312000-01-17textCompactification, Geometry and Duality: N=2 These are notes based on lectures given at TASI99. We review the geometry of  Â the moduli space of N=2 theories in four dimensions from the point of view of  superstring compactification. The cases of a type IIA or type IIB string  compactified on a Calabi-Yau threefold and the heterotic string compactified on K3xT2 are each considered in detail. We pay specific attention to the differences between N=2 theories and N>2 theories. The moduli spaces of vector multiplets and the moduli spaces of hypermultiplets are reviewed. In the case of hypermultiplets this review is limited by the poor state of our current understanding. Some peculiarities such as ``mixed instantons'' and the non-existence of a universal hypermultiplet are discussed. Comment: 82 pages, 8 figures, LaTeX2e, TASI99, refs added and some typos fixed
HTTP-OAI-4.03/META.yml0000644003026500005230000000135412320512116012722 0ustar tdbrodyjf--- #YAML:1.0 name: HTTP-OAI version: 4.03 abstract: ~ author: [] license: unknown distribution_type: module configure_requires: ExtUtils::MakeMaker: 0 build_requires: ExtUtils::MakeMaker: 0 requires: CGI: 0 Encode: 2.12 HTTP::Request: 0 HTTP::Response: 0 LWP::UserAgent: 0 Test::More: 0 URI: 0 XML::LibXML: 1.6 XML::LibXML::SAX: 0 XML::SAX: 0 XML::SAX::Base: 1.04 no_index: directory: - t - inc generated_by: ExtUtils::MakeMaker version 6.55_02 meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 HTTP-OAI-4.03/MANIFEST0000644003026500005230000000242212320512027012600 0ustar tdbrodyjfbin/oai_browser.pl bin/oai_pmh.pl bin/oai_static_gateway.pl CHANGES examples/badbytes.xml examples/getrecord.xml examples/identify.xml examples/mets.xml examples/repository.xml lib/HTTP/OAI.pm lib/HTTP/OAI/Debug.pm lib/HTTP/OAI/Encapsulation.pm lib/HTTP/OAI/Error.pm lib/HTTP/OAI/GetRecord.pm lib/HTTP/OAI/Harvester.pm lib/HTTP/OAI/Header.pm lib/HTTP/OAI/Identify.pm lib/HTTP/OAI/ListIdentifiers.pm lib/HTTP/OAI/ListMetadataFormats.pm lib/HTTP/OAI/ListRecords.pm lib/HTTP/OAI/ListSets.pm lib/HTTP/OAI/MemberMixin.pm lib/HTTP/OAI/Metadata.pm lib/HTTP/OAI/Metadata/METS.pm lib/HTTP/OAI/Metadata/OAI_DC.pm lib/HTTP/OAI/Metadata/OAI_Eprints.pm lib/HTTP/OAI/Metadata/OAI_Identifier.pm lib/HTTP/OAI/MetadataFormat.pm lib/HTTP/OAI/PartialList.pm lib/HTTP/OAI/Record.pm lib/HTTP/OAI/Repository.pm lib/HTTP/OAI/Response.pm lib/HTTP/OAI/ResumptionToken.pm lib/HTTP/OAI/SAX/Base.pm lib/HTTP/OAI/SAX/Driver.pm lib/HTTP/OAI/SAX/Text.pm lib/HTTP/OAI/SAX/Trace.pm lib/HTTP/OAI/SAXHandler.pm lib/HTTP/OAI/Set.pm lib/HTTP/OAI/UserAgent.pm lib/HTTP/OAI/Verb.pm LICENSE Makefile.PL MANIFEST MANIFEST.SKIP META.yml MYMETA.json MYMETA.yml README t/000xml_sax.t t/00static.t t/01parse.t t/02token.t t/03badbytes.t t/50mets.t t/80network.t t/error.t t/getrecord.t t/identify.t t/listidentifiers.t t/listmetadataformats.t HTTP-OAI-4.03/t/0000755003026500005230000000000012320512116011711 5ustar tdbrodyjfHTTP-OAI-4.03/t/02token.t0000644003026500005230000000035012317253770013374 0ustar tdbrodyjfuse Test::More tests => 3; use strict; use warnings; use HTTP::OAI; use_ok( 'HTTP::OAI::ResumptionToken' ); my $rt = HTTP::OAI::ResumptionToken->new; $rt->resumptionToken(''); ok(!$rt); $rt->resumptionToken('token'); ok($rt); HTTP-OAI-4.03/t/listidentifiers.t0000644003026500005230000000270112317253770015315 0ustar tdbrodyjfuse Test::More tests => 3; use strict; use HTTP::OAI; use URI; my $r = new HTTP::OAI::ListIdentifiers(); my $str = < 2004-10-08T17:11:44Zhttp://eprints.ecs.soton.ac.uk/perl/oai2
oai:eprints.ecs.soton.ac.uk:100092004-10-077374617475733D707562747970653D696E70726F63656564696E677366756C6C746578743D46414C5345
oai:eprints.ecs.soton.ac.uk:100102004-10-087374617475733D707562747970653D61727469636C6566756C6C746578743D46414C5345
EOF chomp($str); $r->parse_string($str); ok(1); my $ha = HTTP::OAI::Harvester->new(baseURL=>'http://domain.invalid/'); $r = $ha->ListRecords(metadataPrefix=>'oai_dc', from=>'2005-01-01'); my $uri = URI->new($r->request->uri); my %args = $uri->query_form; ok($args{metadataPrefix} eq 'oai_dc' && $args{'from'} eq '2005-01-01','Request arguments'); ok(1); HTTP-OAI-4.03/t/03badbytes.t0000644003026500005230000000067712317253770014066 0ustar tdbrodyjfuse Test::More tests => 3; use strict; use warnings; use HTTP::OAI; my $ha = HTTP::OAI::Harvester->new( baseURL => 'file:///' ); ok(defined $ha); my $r = "HTTP::OAI::GetRecord"->new( harvestAgent => $ha, resume => $ha->resume, ); $HTTP::OAI::UserAgent::SILENT_BAD_CHARS = 1; $r = $ha->request( HTTP::Request->new( GET => 'file:examples/badbytes.xml' ), undef, # arg undef, # size undef, # previous $r ); ok($r->is_success); ok(1); HTTP-OAI-4.03/t/listmetadataformats.t0000644003026500005230000000051112317253770016161 0ustar tdbrodyjfprint "1..1\n"; use strict; use HTTP::OAI; my $r = new HTTP::OAI::ListMetadataFormats(); my $mf = new HTTP::OAI::MetadataFormat( metadataPrefix=>'oai_dc', schema=>'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', metadataNamespace=>'http://www.openarchives.org/OAI/2.0/oai_dc/', ); $r->metadataFormat($mf); print "ok 1\n"; HTTP-OAI-4.03/t/identify.t0000644003026500005230000000142112317253770013725 0ustar tdbrodyjfuse Test; BEGIN { plan tests => 8; } use warnings; use strict; use HTTP::OAI; ok(1); my $r = new HTTP::OAI::Identify( baseURL=>'http://citebase.eprints.org/cgi-bin/oai2', adminEmail=>'tdb01r@ecs.soton.ac.uk', repositoryName=>'oai:citebase.eprints.org', granularity=>'YYYY-MM-DD', deletedRecord=>'transient', ); ok($r->baseURL,'http://citebase.eprints.org/cgi-bin/oai2'); ok($r->adminEmail,'tdb01r@ecs.soton.ac.uk'); ok($r->repositoryName,'oai:citebase.eprints.org'); ok($r->granularity,'YYYY-MM-DD'); ok($r->deletedRecord,'transient'); $r = HTTP::OAI::Identify->new(); open my $fh, "; close $fh; $r->parse_string($xml); ok($r->adminEmail,'mailto:tdb01r@ecs.soton.ac.uk'); my $xml_out = $r->toDOM->toString; ok($xml_out); HTTP-OAI-4.03/t/50mets.t0000644003026500005230000000101212317253770013223 0ustar tdbrodyjfuse Test::More tests => 3; use IO::File; use HTTP::OAI; use HTTP::OAI::Metadata::METS; ok(1); my $fh; my $r = HTTP::OAI::GetRecord->new(handlers=>{ metadata=>'HTTP::OAI::Metadata::METS' }); $fh = IO::File->new('examples/mets.xml','r') or die "Unable to open examples/mets.xml: $!"; $r->parse_file($fh); $fh->close(); my $rec = $r->record; my @files = $rec->metadata->files; is(scalar(@files), 4, 'file_count'); is($files[1]->{ url }, "http://dspace.mit.edu/bitstream/1721.1/8338/2/50500372-MIT.pdf", 'file_url'); HTTP-OAI-4.03/t/000xml_sax.t0000644003026500005230000000326712317253770014017 0ustar tdbrodyjfuse Test::More tests => 12; BEGIN { use_ok( "XML::SAX" ) } BEGIN { use_ok( "XML::SAX::ParserFactory" ) } BEGIN { use_ok( "XML::SAX::Base" ) } use Data::Dumper; use strict; my %EXPECTED = ( root_name => 0, root_ns => 0, element_name => 0, element_ns => 0, element_local_name => 0, element_prefix => 0, ns_name => 0, ns_prefix => 0, ); { package MyHandler; our @ISA = qw( XML::SAX::Base ); sub start_element { my( $self, $hash ) = @_; # print STDERR Data::Dumper::Dumper( $self, $hash ); if( $hash->{Name} eq "root" ) { $EXPECTED{"root_name"} = 1; if( $hash->{NamespaceURI} eq "NAMESPACE1" ) { $EXPECTED{"root_ns"} = 1; } } if( $hash->{Name} eq "x:element" ) { $EXPECTED{"element_name"} = 1; if( $hash->{LocalName} eq "element" ) { $EXPECTED{"element_local_name"} = 1; } if( $hash->{NamespaceURI} eq "NAMESPACE2" ) { $EXPECTED{"element_ns"} = 1; } if( $hash->{Prefix} eq "x" ) { $EXPECTED{"element_prefix"} = 1; } my $namespace_attr = $hash->{"Attributes"}->{"{http://www.w3.org/2000/xmlns/}x"}; if( defined $namespace_attr ) { if( $namespace_attr->{Name} eq "xmlns:x" ) { $EXPECTED{"ns_name"} = 1; } if( $namespace_attr->{Prefix} eq "xmlns" ) { $EXPECTED{"ns_prefix"} = 1; } if( $namespace_attr->{Value} eq "NAMESPACE2" ) { $EXPECTED{"ns_value"} = 1; } } } } } my $handler = MyHandler->new; my $parser = XML::SAX::ParserFactory->parser( Handler => $handler ); $parser->parse_string( join "", ); foreach my $test (sort keys %EXPECTED) { ok($EXPECTED{$test}, "parsed $test"); } __DATA__ content HTTP-OAI-4.03/t/error.t0000644003026500005230000000310412317253770013243 0ustar tdbrodyjfuse Test::More tests => 6; use strict; use warnings; use_ok( 'HTTP::OAI' ); my $expected = < 0000-00-00T00:00:00Zhttp://localhost/path/scriptYou didn't supply a verb argument EOF my $r = HTTP::OAI::Response->new( requestURL=>'http://localhost/path/script?', responseDate=>'0000-00-00T00:00:00Z', ); $r->errors(HTTP::OAI::Error->new(code=>'badVerb',message=>'You didn\'t supply a verb argument')); is($r->toDOM->toString, $expected, 'badVerb'); $r = HTTP::OAI::Response->new; $r->parse_string("\n"); ok($r->is_error, 'Junk XML is_error'); is($r->code, 600, 'Chunk xml'); $r = HTTP::OAI::Response->new; $r->parse_string($expected); ok($r->is_error, 'Parse_string'); my $err_noid = < 0000-00-00T00:00:00Zhttp://localhost/path/script?Requested identifier does not exist EOF $r = HTTP::OAI::Response->new; $r->parse_string($err_noid); ok($r->is_error); HTTP-OAI-4.03/t/getrecord.t0000644003026500005230000001135312317253770014075 0ustar tdbrodyjfuse Test::More tests => 8; use_ok( 'HTTP::OAI' ); use_ok( 'HTTP::OAI::Metadata::OAI_DC' ); use XML::LibXML; my $expected = < 0000-00-00T00:00:00Zhttp://localhost/path/script
oai:arXiv.org:acc-phys/94110012004-06-22T17:51:18Za:aa:b
Symplectic Computation of Lyapunov Exponents Habib, Salman Ryne, Robert D. Accelerator Physics A recently developed method for the calculation of Lyapunov exponents of dynamical systems is described. The method is applicable whenever the linearized dynamics is Hamiltonian. By utilizing the exponential representation of symplectic matrices, this approach avoids the renormalization and reorthogonalization procedures necessary in usual techniques. It is also easily extendible to damped systems. The method is illustrated by considering two examples of physical interest: a model system that describes the beam halo in charged particle beams and the driven van der Pol oscillator. Comment: 12 pages, uuencoded PostScript (figures included) 1994-10-31 text http://arXiv.org/abs/acc-phys/9411001
EOF my $r = new HTTP::OAI::GetRecord( requestURL=>'http://localhost/path/script', responseDate=>'0000-00-00T00:00:00Z' ); my $rec = new HTTP::OAI::Record(); my $str_header = <
oai:arXiv.org:acc-phys/9411001 2004-06-22T17:51:18Z a:a a:b
EOF $rec->header->dom(XML::LibXML->new()->parse_string($str_header)); ok($rec->identifier eq 'oai:arXiv.org:acc-phys/9411001', 'header/identifier'); ok($rec->datestamp eq '2004-06-22T17:51:18Z', 'header/datestamp'); ok($rec->status eq 'deleted', 'header/status'); my @sets = $rec->header->setSpec; ok($sets[0] eq 'a:a', 'header/setSpec'); my $str = < Symplectic Computation of Lyapunov Exponents Habib, Salman Ryne, Robert D. Accelerator Physics A recently developed method for the calculation of Lyapunov exponents of dynamical systems is described. The method is applicable whenever the linearized dynamics is Hamiltonian. By utilizing the exponential representation of symplectic matrices, this approach avoids the renormalization and reorthogonalization procedures necessary in usual techniques. It is also easily extendible to damped systems. The method is illustrated by considering two examples of physical interest: a model system that describes the beam halo in charged particle beams and the driven van der Pol oscillator. Comment: 12 pages, uuencoded PostScript (figures included) 1994-10-31 text http://arXiv.org/abs/acc-phys/9411001 EOF $rec->metadata(new HTTP::OAI::Metadata()); $rec->metadata->parse_string($str); $r->record($rec); { # hopefully if we can re-parse our own output we're ok, because we can't # compare against the ever changing XML output my $str = $r->toDOM->toString; my $_r = HTTP::OAI::GetRecord->new(handlers=>{ metadata=>'HTTP::OAI::Metadata::OAI_DC' }); $_r->parse_string($str); is($_r->record->metadata->dc->{creator}->[1], 'Ryne, Robert D.', 'toDOM'); } SKIP: { eval { require XML::SAX::Writer }; skip "XML::SAX::Writer not installed", 1 if $@; my $output; my $w = XML::SAX::Writer->new(Output=>\$output); $r->set_handler($w); $r->generate; # SAX::Writer behaves differently :-( # ok($output eq $expected, 'XML::SAX::Writer'); ok(1); } HTTP-OAI-4.03/t/00static.t0000644003026500005230000000435012317253770013545 0ustar tdbrodyjfuse Test::More tests => 21; use strict; use HTTP::OAI; ok(1); # This test harness checks that the library correctly supports # transparent gateway to static repositories my $fn = "file:".$ENV{PWD}."/examples/repository.xml"; my $repo = HTTP::OAI::Harvester->new(baseURL=>$fn); ok($repo, "Harvester"); # Identify my $id = $repo->Identify; if( !$id->is_success ) { BAIL_OUT( "Error parsing static repository: " . $id->message ); } ok($id->is_success, "Identify is_success"); ok($id->repositoryName && $id->repositoryName eq 'Demo repository'); ok($repo->Identify->version eq '2.0s'); # Removed this test, as paths screw up too much #ok($repo->Identify->baseURL && $repo->Identify->baseURL eq 'file:///examples/repository.xml'); # ListMetadataFormats my $lmdf = $repo->ListMetadataFormats; ok($lmdf->is_success); ok(my $mdf = $lmdf->next); ok($mdf && $mdf->metadataPrefix && $mdf->metadataPrefix eq 'oai_dc'); # ListRecords my $lr = $repo->ListRecords(metadataPrefix=>'oai_rfc1807'); ok($lr->is_success); my $rec = $lr->next; is(ref($rec), 'HTTP::OAI::Record', 'ListRecords::next returns Record'); ok($rec && $rec->identifier && $rec->identifier eq 'oai:arXiv:cs/0112017'); # ListIdentifiers my $li = $repo->ListIdentifiers(metadataPrefix=>'oai_dc'); ok($li->is_success, 'ListIdentifiers: '.$li->message); my @recs = $li->identifier; ok(@recs && $recs[-1]->identifier eq 'oai:perseus:Perseus:text:1999.02.0084'); # ListSets my $ls = $repo->ListSets(); ok($ls->is_success, 'ListSets'); my @errs = $ls->errors; ok(@errs && $errs[-1]->code eq 'noSetHierarchy'); # GetRecord my $gr = $repo->GetRecord(metadataPrefix=>'oai_dc',identifier=>'oai:perseus:Perseus:text:1999.02.0084'); ok($gr->is_success, 'GetRecord '.$gr->code." ".$gr->message); $rec = $gr->next; ok($rec && $rec->identifier eq 'oai:perseus:Perseus:text:1999.02.0084'); # Errors $gr = $repo->GetRecord(metadataPrefix=>'oai_dc',identifier=>'invalid',force=>1); ok($gr->is_error, 'GetRecord bad id'); @errs = $gr->errors; is(eval { $errs[0]->code }, 'idDoesNotExist', 'idDoesNotExist'); $lr = $repo->ListRecords(metadataPrefix=>'invalid'); ok($lr->is_error, "invalid metadataPrefix is_error"); @errs = $lr->errors; ok(@errs && $errs[0]->code eq 'cannotDisseminateFormat', "is_error is cannotDisseminateFormat"); HTTP-OAI-4.03/t/80network.t0000644003026500005230000000246212317253770013761 0ustar tdbrodyjf#!/usr/bin/perl -w use Test::More tests => 6; use strict; use warnings; use HTTP::OAI; my @repos = qw( http://eprints.ecs.soton.ac.uk/cgi/oai2 http://www.citebase.org/oai2 http://memory.loc.gov/cgi-bin/oai2_0 ); @repos = qw( http://eprints.ecs.soton.ac.uk/cgi/oai2 ); my $h = HTTP::OAI::Harvester->new(baseURL=>$repos[int(rand(@repos))]); my $r; my $dotest = defined($ENV{"HTTP_OAI_NETTESTS"}); SKIP : { skip "Skipping flakey net tests (set HTTP_OAI_NETTESTS env. variable to enable)", 6 unless $dotest; #$r = $h->GetRecord(identifier=>'oai:eprints.ecs.soton.ac.uk:23',metadataPrefix=>'oai_dc'); #ok($r->is_success()); $r = $h->Identify(); ok($r->is_success(), "Identify: ".$r->message); $r = $h->ListIdentifiers(metadataPrefix=>'oai_dc'); ok($r->is_success(), "ListIdentifiers: ".$r->message); $r = $h->ListMetadataFormats(); ok($r->is_success(), "ListMetadataFormats: ".$r->message); $r = $h->ListRecords(metadataPrefix=>'oai_dc'); ok($r->is_success(), "ListRecords: ".$r->message); $r = $h->ListSets(); ok($r->is_success(), "ListSets: ".$r->message); $r = $h->ListIdentifiers(metadataPrefix => 'oai_dc'); my $ok = 0; while(1) { last if $r->is_error; my $uri = $r->request->uri; my $rec = $r->next; $ok = 1, last if $uri ne $r->request->uri; } ok($ok, "Auto-resumption RT #69337"); } HTTP-OAI-4.03/t/01parse.t0000644003026500005230000000146312317253770013373 0ustar tdbrodyjfuse Test::More tests => 5; use IO::File; use HTTP::OAI; use HTTP::OAI::Metadata::OAI_DC; ok(1); my $fh; my $r = HTTP::OAI::GetRecord->new(handlers=>{ metadata=>'HTTP::OAI::Metadata::OAI_DC' }); $fh = IO::File->new('examples/getrecord.xml','r') or BAIL_OUT( "Failed to open examples/getrecord.xml: $!" ); $r->parse_file($fh); $fh->close(); my $rec = $r->next; ok($rec); ok($rec->metadata->dc->{creator}->[0] eq 'Aspinwall, Paul S.'); my $dom = $rec->metadata->dom; my $md = HTTP::OAI::Metadata::OAI_DC->new; $md->metadata( $dom ); ok($md->dc->{creator}->[0] eq 'Aspinwall, Paul S.'); $r = HTTP::OAI::Identify->new(); $fh = IO::File->new('examples/identify.xml','r') or BAIL_OUT( "Failed to open examples/identify.xml: $!" ); $r->parse_file($fh); $fh->close(); ok($r->repositoryName eq 'citebase.eprints.org'); HTTP-OAI-4.03/lib/0000755003026500005230000000000012320512116012214 5ustar tdbrodyjfHTTP-OAI-4.03/lib/HTTP/0000755003026500005230000000000012320512116012773 5ustar tdbrodyjfHTTP-OAI-4.03/lib/HTTP/OAI/0000755003026500005230000000000012320512116013403 5ustar tdbrodyjfHTTP-OAI-4.03/lib/HTTP/OAI/Metadata.pm0000644003026500005230000000311012317253770015472 0ustar tdbrodyjfpackage HTTP::OAI::Metadata; @ISA = qw( HTTP::OAI::MemberMixin HTTP::OAI::SAX::Base ); use strict; sub new { my( $class, %self ) = @_; $self{doc} = XML::LibXML::Document->new( '1.0', 'UTF-8' ); $self{dom} = $self{current} = $self{doc}->createDocumentFragment; return bless \%self, $class; } sub metadata { shift->dom( @_ ) } sub dom { shift->_elem( "dom", @_ ) } sub generate { my( $self, $driver ) = @_; $driver->generate( $self->dom ); } sub start_element { my( $self, $hash ) = @_; my $node = $self->{doc}->createElementNS( $hash->{NamespaceURI}, $hash->{Name}, ); foreach my $attr (values %{$hash->{Attributes}}) { Carp::confess "Can't setAttribute without attribute name" if !defined $attr->{Name}; $node->setAttribute( $attr->{Name}, $attr->{Value} ); } $self->{current} = $self->{current}->appendChild( $node ); } sub end_element { my( $self, $hash ) = @_; $self->{current} = $self->{current}->parentNode; } sub characters { my( $self, $hash ) = @_; $self->{current}->appendText( $hash->{Data} ); } 1; __END__ =head1 NAME HTTP::OAI::Metadata - Base class for data objects that contain DOM trees =head1 SYNOPSIS use HTTP::OAI::Metadata; $xml = XML::LibXML::Document->new(); $xml = XML::LibXML->new->parse( ... ); $md = new HTTP::OAI::Metadata(dom=>$xml); print $md->dom->toString; my $dom = $md->dom(); # Return internal DOM tree =head1 METHODS =over 4 =item $md->dom( [$dom] ) Return and optionally set the XML DOM object that contains the actual metadata. If you intend to use the generate() method $dom must be a XML_DOCUMENT_NODE. =back HTTP-OAI-4.03/lib/HTTP/OAI/SAXHandler.pm0000644003026500005230000001307512317253770015716 0ustar tdbrodyjfpackage HTTP::OAI::SAXHandler; use strict; use warnings; use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); use Data::Dumper; # debugging for here @ISA = qw( Exporter XML::SAX::Base ); @EXPORT_OK = qw( g_start_document g_start_element g_end_element g_data_element ); %EXPORT_TAGS = (SAX=>[qw( g_start_document g_start_element g_end_element g_data_element )]); =pod =head1 NAME HTTP::OAI::SAXHandler - SAX2 utility filter =head1 DESCRIPTION This module provides utility methods for SAX2, including collapsing multiple "characters" events into a single event. This module exports methods for generating SAX2 events with Namespace support. This *isn't* a fully-fledged SAX2 generator! =over 4 =item $h = HTTP::OAI::SAXHandler->new() Class constructor. =cut sub new { my ($class,%args) = @_; $class = ref($class) || $class; my $self = $class->SUPER::new(%args); $self->{Depth} = 0; $self; } sub g_start_document { my ($handler) = @_; $handler->start_document(); $handler->start_prefix_mapping({ 'Prefix'=>'xsi', 'NamespaceURI'=>'http://www.w3.org/2001/XMLSchema-instance' }); $handler->start_prefix_mapping({ 'Prefix'=>'', 'NamespaceURI'=>'http://www.openarchives.org/OAI/2.0/' }); } sub g_data_element { my ($handler,$uri,$qName,$attr,$value) = @_; g_start_element($handler,$uri,$qName,$attr); if( ref($value) ) { $value->set_handler($handler); $value->generate; } else { $handler->characters({'Data'=>$value}); } g_end_element($handler,$uri,$qName); } sub g_start_element { my ($handler,$uri,$qName,$attr) = @_; $attr ||= {}; my ($prefix,$localName) = split /:/, $qName; unless(defined($localName)) { $localName = $prefix; $prefix = ''; } $handler->start_element({ 'NamespaceURI'=>$uri, 'Name'=>$qName, 'Prefix'=>$prefix, 'LocalName'=>$localName, 'Attributes'=>$attr }); } sub g_end_element { my ($handler,$uri,$qName) = @_; my ($prefix,$localName) = split /:/, $qName; unless(defined($localName)) { $localName = $prefix; $prefix = ''; } $handler->end_element({ 'NamespaceURI'=>$uri, 'Name'=>$qName, 'Prefix'=>$prefix, 'LocalName'=>$localName, }); } sub current_state { my $self = shift; return $self->{State}->[$#{$self->{State}}]; } sub current_element { my $self = shift; return $self->{Elem}->[$#{$self->{Elem}}]; } sub start_document { HTTP::OAI::Debug::sax( Dumper($_[1]) ); $_[0]->SUPER::start_document(); } sub end_document { $_[0]->SUPER::end_document(); HTTP::OAI::Debug::sax( Dumper($_[1]) ); } # Char data is rolled together by this module sub characters { my ($self,$hash) = @_; $self->{Text} .= $hash->{Data}; # characters are traced in {start,end}_element #HTTP::OAI::Debug::sax( "'" . substr($hash->{Data},0,40) . "'" ); } sub start_element { my ($self,$hash) = @_; push @{$self->{Attributes}}, $hash->{Attributes}; # Call characters with the joined character data if( defined($self->{Text}) ) { HTTP::OAI::Debug::sax( "'".substr($self->{Text},0,40) . "'" ); $self->SUPER::characters({Data=>$self->{Text}}); $self->{Text} = undef; } $hash->{State} = $self; $hash->{Depth} = ++$self->{Depth}; HTTP::OAI::Debug::sax( (" " x $hash->{Depth}) . '<'.$hash->{Name}.'>' ); $self->SUPER::start_element($hash); } sub end_element { my ($self,$hash) = @_; # Call characters with the joined character data $hash->{Text} = $self->{Text}; if( defined($self->{Text}) ) { # Trailing whitespace causes problems if( $self->{Text} =~ /\S/ ) { HTTP::OAI::Debug::sax( "'".substr($self->{Text},0,40) . "'" ); $self->SUPER::characters({Data=>$self->{Text}}); } $self->{Text} = undef; } $hash->{Attributes} = pop @{$self->{Attributes}} || {}; $hash->{State} = $self; $hash->{Depth} = $self->{Depth}--; HTTP::OAI::Debug::sax( (" " x $hash->{Depth}) . ' <'.$hash->{Name}.'>' ); $self->SUPER::end_element($hash); } sub entity_reference { my ($self,$hash) = @_; HTTP::OAI::Debug::sax( $hash->{Name} ); } sub start_cdata { HTTP::OAI::Debug::sax(); } sub end_cdata { HTTP::OAI::Debug::sax(); } sub comment { HTTP::OAI::Debug::sax( $_[1]->{Data} ); } sub doctype_decl { # {SystemId,PublicId,Internal} HTTP::OAI::Debug::sax( $_[1]->{Name} ); } sub attlist_decl { # {ElementName,AttributeName,Type,Default,Fixed} HTTP::OAI::Debug::sax( $_[1]->{ElementName} ); } sub xml_decl { # {Version,Encoding,Standalone} HTTP::OAI::Debug::sax( join ", ", map { defined($_) ? $_ : "null" } @{$_[1]}{qw( Version Encoding Standalone )} ); } sub entity_decl { # {Value,SystemId,PublicId,Notation} HTTP::OAI::Debug::sax( $_[1]->{Name} ); } sub unparsed_decl { HTTP::OAI::Debug::sax(); } sub element_decl { # {Model} HTTP::OAI::Debug::sax( $_[1]->{Name} ); } sub notation_decl { # {Name,Base,SystemId,PublicId} HTTP::OAI::Debug::sax( $_[1]->{Name} ); } sub processing_instruction { # {Target,Data} HTTP::OAI::Debug::sax( $_[1]->{Target} . " => " . $_[1]->{Data} ); } package HTTP::OAI::FilterDOMFragment; use vars qw( @ISA ); @ISA = qw( XML::SAX::Base ); # Trap things that don't apply to a balanced fragment sub start_document {} sub end_document {} sub xml_decl {} package XML::SAX::Debug; use Data::Dumper; use vars qw( @ISA $AUTOLOAD ); @ISA = qw( XML::SAX::Base ); sub DEBUG { my ($event,$self,$hash) = @_; warn "$event(".Dumper($hash).")\n"; my $superior = "SUPER::$event"; $self->$superior($hash); } sub start_document { DEBUG('start_document',@_) } sub end_document { DEBUG('end_document',@_) } sub start_element { DEBUG('start_element',@_) } sub end_element { DEBUG('end_element',@_) } sub characters { DEBUG('characters',@_) } sub xml_decl { DEBUG('xml_decl',@_) } 1; __END__ =back =head1 AUTHOR Tim Brody HTTP-OAI-4.03/lib/HTTP/OAI/ListRecords.pm0000644003026500005230000000365612317253770016226 0ustar tdbrodyjfpackage HTTP::OAI::ListRecords; @ISA = qw( HTTP::OAI::PartialList ); use strict; sub record { shift->item(@_) } sub start_element { my ($self,$hash, $r) = @_; if( $hash->{Depth} == 3 && $hash->{LocalName} eq "record" ) { $self->set_handler(HTTP::OAI::Record->new); } $self->SUPER::start_element($hash, $r); } sub end_element { my ($self,$hash, $r) = @_; $self->SUPER::end_element($hash, $r); if( $hash->{Depth} == 3 && $hash->{LocalName} eq "record" ) { HTTP::OAI::Debug::trace( "record: " . $self->get_handler->identifier ); $r->callback( $self->get_handler, $self ); $self->set_handler( undef ); } } 1; __END__ =head1 NAME HTTP::OAI::ListRecords - Provide access to an OAI ListRecords response =head1 SYNOPSIS my $r = $h->ListRecords( metadataPrefix=>'oai_dc', ); while( my $rec = $r->next ) { print "Identifier => ", $rec->identifier, "\n"; } die $r->message if $r->is_error; # Using callback method sub callback { my $rec = shift; print "Identifier => ", $rec->identifier, "\n"; }; my $r = $h->ListRecords( metadataPrefix=>'oai_dc', onRecord=>\&callback ); die $r->message if $r->is_error; =head1 METHODS =over 4 =item $lr = new HTTP::OAI::ListRecords This constructor method returns a new HTTP::OAI::ListRecords object. =item $rec = $lr->next Returns either an L object, or undef, if no more record are available. Use $rec->is_error to test whether there was an error getting the next record. =item @recl = $lr->record([$rec]) Returns the record list and optionally adds a new record or resumptionToken, $rec. Returns an array ref of Ls, including an optional resumptionToken string. =item $token = $lr->resumptionToken([$token]) Returns and optionally sets the L. =item $dom = $lr->toDOM Returns a XML::DOM object representing the ListRecords response. =back HTTP-OAI-4.03/lib/HTTP/OAI/ListIdentifiers.pm0000644003026500005230000000377612317253770017075 0ustar tdbrodyjfpackage HTTP::OAI::ListIdentifiers; @ISA = qw( HTTP::OAI::PartialList ); use strict; sub identifier { shift->item(@_) } sub start_element { my ($self,$hash, $r) = @_; if( $hash->{Depth} == 3 && $hash->{LocalName} eq "header" ) { $self->set_handler(HTTP::OAI::Header->new); } $self->SUPER::start_element($hash, $r); } sub end_element { my ($self,$hash, $r) = @_; $self->SUPER::end_element($hash); # OAI 1.x if( $hash->{Depth} == 3 && $hash->{LocalName} eq "identifier" ) { $r->callback(HTTP::OAI::Header->new( identifier=>$hash->{Text}, datestamp=>'0000-00-00', )); } elsif( $hash->{Depth} == 3 && $hash->{LocalName} eq "header" ) { $r->callback( $self->get_handler, $self ); $self->set_handler( undef ); } } 1; __END__ =head1 NAME HTTP::OAI::ListIdentifiers - Provide access to an OAI ListIdentifiers response =head1 SYNOPSIS my $r = $h->ListIdentifiers; while(my $rec = $r->next) { print "identifier => ", $rec->identifier, "\n", print "datestamp => ", $rec->datestamp, "\n" if $rec->datestamp; print "status => ", ($rec->status || 'undef'), "\n"; } die $r->message if $r->is_error; =head1 METHODS =over 4 =item $li = new OAI::ListIdentifiers This constructor method returns a new OAI::ListIdentifiers object. =item $rec = $li->next Returns either an L object, or undef, if there are no more records. Use $rec->is_error to test whether there was an error getting the next record (otherwise things will break). If -resume was set to false in the Harvest Agent, next may return a string (the resumptionToken). =item @il = $li->identifier([$idobj]) Returns the identifier list and optionally adds an identifier or resumptionToken, $idobj. Returns an array ref of Ls. =item $dom = $li->toDOM Returns a XML::DOM object representing the ListIdentifiers response. =item $token = $li->resumptionToken([$token]) Returns and optionally sets the L. =back HTTP-OAI-4.03/lib/HTTP/OAI/UserAgent.pm0000644003026500005230000002013612317253770015656 0ustar tdbrodyjfpackage HTTP::OAI::UserAgent; use strict; use warnings; use vars qw(@ISA $ACCEPT); # Do not use eval() our $USE_EVAL = 1; # Ignore bad utf8 characters our $IGNORE_BAD_CHARS = 1; # Silence bad utf8 warnings our $SILENT_BAD_CHARS = 0; use constant MAX_UTF8_BYTES => 4; require LWP::UserAgent; @ISA = qw(LWP::UserAgent); unless( $@ ) { $ACCEPT = "gzip"; } sub delay { shift->_elem( "delay", @_ ) } sub last_request_completed { shift->_elem( "last_request_completed", @_ ) } sub redirect_ok { 1 } sub _oai { my( $self, @args ) = @_; my $cb = ref($args[0]) eq "CODE" ? shift @args : undef; my %args = @args; $cb = delete $args{onRecord} || $cb || $self->{onRecord}; my $handlers = delete $args{handlers} || {}; if( !$args{force} && (my @errors = HTTP::OAI::Repository::validate_request(%args)) ) { return new HTTP::OAI::Response( code=>503, message=>'Invalid Request (use \'force\' to force a non-conformant request): ' . $errors[0]->toString, errors=>\@errors ); } # Get rid of any empty arguments for( keys %args ) { delete $args{$_} if !defined($args{$_}) || !length($args{$_}); } my $request = HTTP::Request->new( GET => $self->_buildurl(%args) ); delete $args{force}; my $response = HTTP::OAI::Response->new( %args, handlers => $handlers, onRecord => $cb, ); $response->request( $request ); my $parser = XML::LibXML->new( Handler => HTTP::OAI::SAX::Trace->new( Handler => HTTP::OAI::SAX::Text->new( Handler => $response ) ) ); $parser->{content_length} = 0; $parser->{content_buffer} = Encode::encode('UTF-8',''); HTTP::OAI::Debug::trace( $args{verb} . " " . ref($parser) . "->parse_chunk()" ); my $r; { local $SIG{__DIE__}; $r = $self->SUPER::request($request,sub { $self->lwp_callback( $parser, @_ ) }); if( $r->is_success && !defined $r->headers->header( 'Client-Aborted' ) ) { eval { $self->lwp_endparse( $parser ) }; if( $@ ) { $r->headers->header( 'Client-Aborted', 'die' ); $r->headers->header( 'X-Died', $@ ); } } } if( defined($r->headers->header( 'Client-Aborted' )) && $r->headers->header( 'Client-Aborted' ) eq 'die' ) { my $err = $r->headers->header( 'X-Died' ); if( $err eq "done" ) { $r->code(200); $r->message("OK"); } else { $r->code(500); $r->message( 'An error occurred while parsing: ' . $err ); } } my $cnt_len = $parser->{content_length}; undef $parser; # OAI retry-after if( defined($r) && $r->code == 503 && defined(my $timeout = $r->headers->header('Retry-After')) ) { if( $self->{recursion}++ > 10 ) { $r->code(500); $r->message("Server did not give a response after 10 retries"); return $r; } if( !$timeout or $timeout =~ /\D/ or $timeout < 0 or $timeout > 86400 ) { $r->code(500); $r->message("Server specified an unsupported duration to wait (\"".($timeout||'null')."\""); return $r; } HTTP::OAI::Debug::trace( "Waiting $timeout seconds" ); sleep($timeout+10); # We wait an extra 10 secs for safety return $self->_oai(@args); # Got an empty response } elsif( defined($r) && $r->is_success && $cnt_len == 0 ) { if( $self->{recursion}++ > 10 ) { $r->code(500); $r->message("No content in server response"); return $r; } HTTP::OAI::Debug::trace( "Retrying on empty response" ); sleep(5); return $self->_oai(@args); # An HTTP error occurred } elsif( $r->is_error ) { return $r; # An error occurred during parsing } elsif( $@ ) { $r->code(my $code = $@ =~ /read timeout/ ? 504 : 600); $r->message($@); return $r; } # access the original response via previous $response->previous($r); return $response; } sub request { my( $self, @args ) = @_; my $delay = $self->delay; if( defined $delay ) { if( ref($delay) eq "CODE" ) { $delay = &$delay( $self->last_request_completed ); } select(undef,undef,undef,$delay) if $delay > 0; } my $r = $self->SUPER::request( @args ); $self->last_request_completed( time ); return $r; } sub lwp_badchar { my $codepoint = sprintf('U+%04x', ord($_[2])); unless( $SILENT_BAD_CHARS ) { warn "Bad Unicode character $codepoint at byte offset ".$_[1]->{content_length}." from ".$_[1]->{request}->uri."\n"; } return $codepoint; } sub lwp_endparse { my( $self, $parser ) = @_; my $utf8 = $parser->{content_buffer}; # Replace bad chars with '?' if( $IGNORE_BAD_CHARS and length($utf8) ) { $utf8 = Encode::decode('UTF-8', $utf8, sub { $self->lwp_badchar($parser, @_) }); } if( length($utf8) > 0 ) { _ccchars($utf8); # Fix control chars $parser->{content_length} += length($utf8); $parser->parse_chunk($utf8); } delete($parser->{content_buffer}); $parser->parse_chunk('', 1); } sub lwp_callback { my( $self, $parser ) = @_; use bytes; # fixing utf-8 will need byte semantics $parser->{content_buffer} .= $_[2]; do { # FB_QUIET won't split multi-byte chars on input my $utf8 = Encode::decode('UTF-8', $parser->{content_buffer}, Encode::FB_QUIET); if( length($utf8) > 0 ) { use utf8; _ccchars($utf8); # Fix control chars $parser->{content_length} += length($utf8); $parser->parse_chunk($utf8); } if( length($parser->{content_buffer}) > MAX_UTF8_BYTES ) { $parser->{content_buffer} =~ s/^([\x80-\xff]{1,4})//s; my $badbytes = $1; if( length($badbytes) == 0 ) { Carp::confess "Internal error - bad bytes but not in 0x80-0xff range???"; } if( $IGNORE_BAD_CHARS ) { $badbytes = join('', map { $self->lwp_badchar($parser, $_) } split //, $badbytes); } $parser->parse_chunk( $badbytes ); } } while( length($parser->{content_buffer}) > MAX_UTF8_BYTES ); } sub _ccchars { $_[0] =~ s/([\x00-\x08\x0b-\x0c\x0e-\x1f])/sprintf("\\%04d",ord($1))/seg; } sub _buildurl { my( $self, %args ) = @_; Carp::confess "Requires verb parameter" unless $args{'verb'}; my $uri = URI->new( $self->baseURL ); return $uri->as_string if $uri->scheme eq "file"; if( defined($args{resumptionToken}) && !$args{force} ) { $uri->query_form(verb=>$args{'verb'},resumptionToken=>$args{'resumptionToken'}); } else { delete $args{force}; # http://www.cshc.ubc.ca/oai/ breaks if verb isn't first, doh $uri->query_form(verb=>delete($args{'verb'}),%args); } return $uri->as_string; } sub decompress { my ($response) = @_; my $type = $response->headers->header("Content-Encoding"); return $response->{_content_filename} unless defined($type); if( $type eq 'gzip' ) { my $filename = File::Temp->new( UNLINK => 1 ); my $gz = Compress::Zlib::gzopen($response->{_content_filename}, "r") or die $!; my ($buffer,$c); my $fh = IO::File->new($filename,"w"); binmode($fh,":utf8"); while( ($c = $gz->gzread($buffer)) > 0 ) { print $fh $buffer; } $fh->close(); $gz->gzclose(); die "Error decompressing gziped response: " . $gz->gzerror() if -1 == $c; return $response->{_content_filename} = $filename; } else { die "Unsupported compression returned: $type\n"; } } 1; __END__ =head1 NAME HTTP::OAI::UserAgent - Extension of the LWP::UserAgent for OAI HTTP requests =head1 DESCRIPTION This module provides a simplified mechanism for making requests to an OAI repository, using the existing LWP::UserAgent module. =head1 SYNOPSIS require HTTP::OAI::UserAgent; my $ua = new HTTP::OAI::UserAgent; my $response = $ua->request( baseURL=>'http://arXiv.org/oai1', verb=>'ListRecords', from=>'2001-08-01', until=>'2001-08-31' ); print $response->content; =head1 METHODS =over 4 =item $ua = new HTTP::OAI::UserAgent(proxy=>'www-cache',...) This constructor method returns a new instance of a HTTP::OAI::UserAgent module. All arguments are passed to the L constructor. =item $r = $ua->request($req) Requests the HTTP response defined by $req, which is a L object. =item $r = $ua->request(baseURL=>$baseref, verb=>$verb, %opts) Makes an HTTP request to the given OAI server (baseURL) with OAI arguments. Returns an L object. OAI-PMH related options: from => $from until => $until resumptionToken => $token metadataPrefix => $mdp set => $set =item $time_d = $ua->delay( $time_d ) Return and optionally set a time (in seconds) to wait between requests. $time_d may be a CODEREF. =back HTTP-OAI-4.03/lib/HTTP/OAI/Set.pm0000644003026500005230000000302712317253770014514 0ustar tdbrodyjfpackage HTTP::OAI::Set; @ISA = qw( HTTP::OAI::MemberMixin XML::SAX::Base ); use strict; sub setSpec { shift->_elem('setSpec',@_) } sub setName { shift->_elem('setName',@_) } sub setDescription { shift->_multi('setDescription',@_) } sub generate { my( $self, $driver ) = @_; $driver->start_element( 'set' ); $driver->data_element( 'setSpec', $self->setSpec ); $driver->data_element( 'setName', $self->setName ); for( $self->setDescription ) { $_->generate; } $driver->end_element( 'set' ); } sub start_element { my ($self,$hash,$r) = @_; my $elem = lc($hash->{Name}); if( $elem eq 'setdescription' ) { $self->setDescription(my $desc = HTTP::OAI::Metadata->new); $self->set_handler($desc); $self->{in_desc} = $hash->{Depth}; } $self->SUPER::start_element($hash,$r); } sub end_element { my ($self,$hash,$r) = @_; $self->SUPER::end_element($hash,$r); if( $self->{in_desc} ) { if( $self->{in_desc} == $hash->{Depth} ) { $self->set_handler( undef ); } } else { my $elem = $hash->{Name}; if( $elem =~ /^setSpec|setName$/ ) { $self->$elem( $hash->{Text} ); } } } 1; __END__ =head1 NAME HTTP::OAI::Set - Encapsulates OAI set XML data =head1 METHODS =over 4 =item $spec = $s->setSpec([$spec]) =item $name = $s->setName([$name]) These methods return respectively, the setSpec and setName of the OAI Set. =item @descs = $s->setDescription([$desc]) Returns and optionally adds the list of set descriptions. Returns a reference to an array of L objects. =back HTTP-OAI-4.03/lib/HTTP/OAI/MemberMixin.pm0000644003026500005230000000141712317253770016176 0ustar tdbrodyjfpackage HTTP::OAI::MemberMixin; @ISA = qw( LWP::MemberMixin ); sub new { my( $class, %self ) = @_; return bless \%self, $class; } sub harvester { shift->_elem("harvester",@_) } sub _multi { my( $self, $elem ) = splice(@_, 0, 2); if( ref($_[0]) eq "ARRAY" ) { $self->{$elem} = $_[0]; } elsif( @_ ) { push @{$self->{$elem}}, @_; } return @{$self->{$elem} || []}; } 1; =head1 NAME HTTP::OAI::MemberMixin =head1 DESCRIPTION Subclasses L to provide attribute utility methods. =head1 METHODS =over 4 =item $obj->_elem( FIELD [, VALUE ] ) See L. =item $obj->_multi( FIELD [, VALUE ] ) Same as L but if you pass a non-ARRAY reference appends the given value(s). In list context returns a list of all the items. HTTP-OAI-4.03/lib/HTTP/OAI/ResumptionToken.pm0000644003026500005230000000371612317253770017134 0ustar tdbrodyjfpackage HTTP::OAI::ResumptionToken; @ISA = qw( HTTP::OAI::MemberMixin XML::SAX::Base ); use strict; use overload "bool" => \¬_empty; sub resumptionToken { shift->_elem('resumptionToken',@_) } sub expirationDate { shift->_elem('expirationDate',@_) } sub completeListSize { shift->_elem('completeListSize',@_) } sub cursor { shift->_elem('cursor',@_) } sub not_empty { defined($_[0]->resumptionToken) and length($_[0]->resumptionToken) > 0 } sub is_empty { !not_empty(@_) } sub generate { my( $self, $driver ) = @_; $driver->data_element( 'resumptionToken', $self->resumptionToken, expirationDate => scalar($self->expirationDate), completeListSize => scalar($self->completeListSize), cursor => scalar($self->cursor), ); } sub end_element { my ($self,$hash) = @_; $self->SUPER::end_element($hash); if( lc($hash->{LocalName}) eq 'resumptiontoken' ) { $self->resumptionToken($hash->{Text}); my $attr = $hash->{Attributes}; $self->expirationDate($attr->{'{}expirationDate'}->{'Value'}); $self->completeListSize($attr->{'{}completeListSize'}->{'Value'}); $self->cursor($attr->{'{}cursor'}->{'Value'}); } } 1; __END__ =head1 NAME HTTP::OAI::ResumptionToken - Encapsulates an OAI resumption token =head1 METHODS =over 4 =item $rt = new HTTP::OAI::ResumptionToken This constructor method returns a new HTTP::OAI::ResumptionToken object. =item $token = $rt->resumptionToken([$token]) Returns and optionally sets the resumption token string. =item $ed = $rt->expirationDate([$rt]) Returns and optionally sets the expiration date of the resumption token. =item $cls = $rt->completeListSize([$cls]) Returns and optionally sets the cardinality of the result set. =item $cur = $rt->cursor([$cur]) Returns and optionally sets the index of the first record (of the current page) in the result set. =back =head1 NOTE - Completing incomplete list The final page of a record list which has been split using resumption tokens must contain an empty resumption token. HTTP-OAI-4.03/lib/HTTP/OAI/ListSets.pm0000644003026500005230000000315612317253770015536 0ustar tdbrodyjfpackage HTTP::OAI::ListSets; @ISA = qw( HTTP::OAI::PartialList ); use strict; sub set { shift->item(@_) } sub start_element { my ($self,$hash, $r) = @_; if( $hash->{Depth} == 3 && $hash->{LocalName} eq "set" ) { $self->set_handler(HTTP::OAI::Set->new); } $self->SUPER::start_element($hash, $r); } sub end_element { my ($self,$hash, $r) = @_; $self->SUPER::end_element($hash, $r); if( $hash->{Depth} == 3 && $hash->{LocalName} eq "set" ) { $r->callback( $self->get_handler, $self ); $self->set_handler( undef ); } } 1; __END__ =head1 NAME HTTP::OAI::ListSets - Provide access to an OAI ListSets response =head1 SYNOPSIS my $r = $h->ListSets(); while( my $rec = $r->next ) { print $rec->setSpec, "\n"; } die $r->message if $r->is_error; =head1 METHODS =over 4 =item $ls = new HTTP::OAI::ListSets This constructor method returns a new OAI::ListSets object. =item $set = $ls->next Returns either an L object, or undef, if no more records are available. Use $set->is_error to test whether there was an error getting the next record. If -resume was set to false in the Harvest Agent, next may return a string (the resumptionToken). =item @setl = $ls->set([$set]) Returns the set list and optionally adds a new set or resumptionToken, $set. Returns an array ref of Ls, with an optional resumptionToken string. =item $token = $ls->resumptionToken([$token]) Returns and optionally sets the L. =item $dom = $ls->toDOM Returns a XML::DOM object representing the ListSets response. =back HTTP-OAI-4.03/lib/HTTP/OAI/Harvester.pm0000644003026500005230000002467512317253770015740 0ustar tdbrodyjfpackage HTTP::OAI::Harvester; use base HTTP::OAI::UserAgent; use strict; sub new { my ($class,%args) = @_; my %ARGS = %args; delete @ARGS{qw(baseURL resume repository handlers onRecord)}; my $self = $class->SUPER::new(%ARGS); $self->{doc} = XML::LibXML::Document->new( '1.0', 'UTF-8' ); $self->{'resume'} = exists($args{resume}) ? $args{resume} : 1; $self->agent('OAI-PERL/'.$HTTP::OAI::VERSION); # Record the base URL this harvester instance is associated with $self->{repository} = $args{repository} || HTTP::OAI::Identify->new(baseURL=>$args{baseURL}); Carp::croak "Requires repository or baseURL" unless $self->repository and $self->repository->baseURL; # Canonicalise $self->baseURL($self->baseURL); return $self; } sub resume { shift->_elem('resume',@_) } sub repository { shift->_elem('repository',@_) } sub baseURL { my $self = shift; return @_ ? $self->repository->baseURL(URI->new(shift)->canonical) : $self->repository->baseURL(); } sub version { shift->repository->protocolVersion(@_); } sub ListIdentifiers { shift->_list( @_, verb => "ListIdentifiers" ); } sub ListRecords { shift->_list( @_, verb => "ListRecords" ); } sub ListSets { shift->_list( @_, verb => "ListSets" ); } sub _list { my $self = shift; local $self->{recursion}; my $r = $self->_oai( @_ ); # resume the partial list? # note: noRecordsMatch is a "success" but won't have a resumptionToken RESUME: while($self->resume && $r->is_success && !$r->error && defined(my $token = $r->resumptionToken)) { last RESUME if !$token->resumptionToken; local $self->{recursion}; $r = $self->_oai( $r->{onRecord}, handlers => $r->handlers, verb => $r->verb, resumptionToken => $token->resumptionToken, ); } $self->version( $r->version ) if $r->is_success; return $r; } # build the methods for each OAI verb foreach my $verb (qw( GetRecord Identify ListMetadataFormats )) { no strict "refs"; *$verb = sub { my $self = shift; local $self->{recursion}; my $r = $self->_oai( @_, verb => $verb ); $self->version( $r->version ) if $r->is_success; return $r; }; } 1; __END__ =head1 NAME HTTP::OAI::Harvester - Agent for harvesting from Open Archives version 1.0, 1.1, 2.0 and static ('2.0s') compatible repositories =head1 DESCRIPTION C is the harvesting front-end in the OAI-PERL library. To harvest from an OAI-PMH compliant repository create an C object using the baseURL option and then call OAI-PMH methods to request data from the repository. To handle version 1.0/1.1 repositories automatically you B request C first. It is recommended that you request an Identify from the Repository and use the C method to update the Identify object used by the harvester. When making OAI requests the underlying L module will take care of automatic redirection (http code 302) and retry-after (http code 503). OAI-PMH flow control (i.e. resumption tokens) is handled transparently by C. =head2 Static Repository Support Static repositories are automatically and transparently supported within the existing API. To harvest a static repository specify the repository XML file using the baseURL argument to HTTP::OAI::Harvester. An initial request is made that determines whether the base URL specifies a static repository or a normal OAI 1.x/2.0 CGI repository. To prevent this initial request state the OAI version using an HTTP::OAI::Identify object e.g. $h = HTTP::OAI::Harvester->new( repository=>HTTP::OAI::Identify->new( baseURL => 'http://arXiv.org/oai2', version => '2.0', )); If a static repository is found the response is cached, and further requests are served by that cache. Static repositories do not support sets, and will result in a noSetHierarchy error if you try to use sets. You can determine whether the repository is static by checking the version ($ha->repository->version), which will be "2.0s" for static repositories. =head1 FURTHER READING You should refer to the Open Archives Protocol version 2.0 and other OAI documentation, available from http://www.openarchives.org/. Note OAI-PMH 1.0 and 1.1 are deprecated. =head1 BEFORE USING EXAMPLES In the examples I use arXiv.org's and cogprints OAI interfaces. To avoid causing annoyance to their server administrators please contact them before performing testing or large downloads (or use other, less loaded, servers for testing). =head1 SYNOPSIS use HTTP::OAI; my $h = new HTTP::OAI::Harvester(baseURL=>'http://arXiv.org/oai2'); my $response = $h->repository($h->Identify) if( $response->is_error ) { print "Error requesting Identify:\n", $response->code . " " . $response->message, "\n"; exit; } # Note: repositoryVersion will always be 2.0, $r->version returns # the actual version the repository is running print "Repository supports protocol version ", $response->version, "\n"; # Version 1.x repositories don't support metadataPrefix, # but OAI-PERL will drop the prefix automatically # if an Identify was requested first (as above) $response = $h->ListIdentifiers( metadataPrefix=>'oai_dc', from=>'2001-02-03', until=>'2001-04-10' ); if( $response->is_error ) { die("Error harvesting: " . $response->message . "\n"); } print "responseDate => ", $response->responseDate, "\n", "requestURL => ", $response->requestURL, "\n"; while( my $id = $response->next ) { print "identifier => ", $id->identifier; # Only available from OAI 2.0 repositories print " (", $id->datestamp, ")" if $id->datestamp; print " (", $id->status, ")" if $id->status; print "\n"; # Only available from OAI 2.0 repositories for( $id->setSpec ) { print "\t", $_, "\n"; } } # Using a handler $response = $h->ListRecords( metadataPrefix=>'oai_dc', handlers=>{metadata=>'HTTP::OAI::Metadata::OAI_DC'}, ); while( my $rec = $response->next ) { print $rec->identifier, "\t", $rec->datestamp, "\n", $rec->metadata, "\n"; print join(',', @{$rec->metadata->dc->{'title'}}), "\n"; } if( $rec->is_error ) { die $response->message; } # Offline parsing $I = HTTP::OAI::Identify->new(); $I->parse_string($content); $I->parse_file($fh); =head1 METHODS =over 4 =item HTTP::OAI::Harvester->new( %params ) This constructor method returns a new instance of C. Requires either an L object, which in turn must contain a baseURL, or a baseURL from which to construct an Identify object. Any other parameters are passed to the L module, and from there to the L module. $h = HTTP::OAI::Harvester->new( baseURL => 'http://arXiv.org/oai2', resume=>0, # Suppress automatic resumption ) $id = $h->repository(); $h->repository($h->Identify); $h = HTTP::OAI::Harvester->new( HTTP::OAI::Identify->new( baseURL => 'http://arXiv.org/oai2', )); =item $h->repository() Returns and optionally sets the L object used by the Harvester agent. =item $h->resume( [1] ) If set to true (default) resumption tokens will automatically be handled by requesting the next partial list during C calls. =back =head1 OAI-PMH Verbs The 6 OAI-PMH Verbs are the requests supported by an OAI-PMH interface. =head2 Error Messages Use C or C on the returned object to determine whether an error occurred (see L). C and C return the error code (200 is success) and a human-readable message respectively. L returned by the repository can be retrieved using the C method: foreach my $error ($r->errors) { print $error->code, "\t", $error->message, "\n"; } Note: C is true for the OAI Error Code C (i.e. empty set), although C will still contain the OAI error. =head2 Flow Control If the response contained a L this can be retrieved using the $r->resumptionToken method. =head2 Methods These methods return an object subclassed from L (where the class corresponds to the verb requested, e.g. C requests return an C object). =over 4 =item $r = $h->GetRecord( %params ) Get a single record from the repository identified by identifier, in format metadataPrefix. $gr = $h->GetRecord( identifier => 'oai:arXiv:hep-th/0001001', # Required metadataPrefix => 'oai_dc' # Required ); $rec = $gr->next; die $rec->message if $rec->is_error; printf("%s (%s)\n", $rec->identifier, $rec->datestamp); $dom = $rec->metadata->dom; =item $r = $h->Identify() Get information about the repository. $id = $h->Identify(); print join ',', $id->adminEmail; =item $r = $h->ListIdentifiers( %params ) Retrieve the identifiers, datestamps, sets and deleted status for all records within the specified date range (from/until) and set spec (set). 1.x repositories will only return the identifier. Or, resume an existing harvest by specifying resumptionToken. $lr = $h->ListIdentifiers( metadataPrefix => 'oai_dc', # Required from => '2001-10-01', until => '2001-10-31', set=>'physics:hep-th', ); while($rec = $lr->next) { { ... do something with $rec ... } } die $lr->message if $lr->is_error; =item $r = $h->ListMetadataFormats( %params ) List available metadata formats. Given an identifier the repository should only return those metadata formats for which that item can be disseminated. $lmdf = $h->ListMetadataFormats( identifier => 'oai:arXiv.org:hep-th/0001001' ); for($lmdf->metadataFormat) { print $_->metadataPrefix, "\n"; } die $lmdf->message if $lmdf->is_error; =item $r = $h->ListRecords( %params ) Return full records within the specified date range (from/until), set and metadata format. Or, specify a resumption token to resume a previous partial harvest. $lr = $h->ListRecords( metadataPrefix=>'oai_dc', # Required from => '2001-10-01', until => '2001-10-01', set => 'physics:hep-th', ); while($rec = $lr->next) { { ... do something with $rec ... } } die $lr->message if $lr->is_error; =item $r = $h->ListSets( %params ) Return a list of sets provided by the repository. The scope of sets is undefined by OAI-PMH, so therefore may represent any subset of a collection. Optionally provide a resumption token to resume a previous partial request. $ls = $h->ListSets(); while($set = $ls->next) { print $set->setSpec, "\n"; } die $ls->message if $ls->is_error; =back =head1 AUTHOR These modules have been written by Tim Brody Etdb01r@ecs.soton.ac.ukE. HTTP-OAI-4.03/lib/HTTP/OAI/Response.pm0000644003026500005230000001725012317253770015562 0ustar tdbrodyjfpackage HTTP::OAI::Response; require POSIX; @ISA = qw( HTTP::Response HTTP::OAI::MemberMixin HTTP::OAI::SAX::Base ); use strict; # Backwards compatibility, pass any unknown methods to content our $AUTOLOAD; sub DESTROY {} sub AUTOLOAD { my $self = shift; $AUTOLOAD =~ s/^.*:://; # don't call a $self method here, because that might call AUTOLOAD again! my $content = $self->{content}->[-1]; return defined $content ? $content->$AUTOLOAD( @_ ) : undef; } sub new { my( $class, %self ) = @_; my $handlers = delete $self{handlers}; my $cb = delete $self{onRecord}; $self{responseDate} ||= POSIX::strftime("%Y-%m-%dT%H:%M:%S",gmtime).'Z'; $self{requestURL} ||= CGI::self_url() if defined &CGI::self_url; my $self = $class->SUPER::new( delete($self{code}) || 200, delete($self{message}) || "OK", HTTP::Headers->new( %self ) ); $self->{Depth} = 0; $self->{handlers} = $handlers || {}; $self->{onRecord} = $cb; $self->{doc} = XML::LibXML::Document->new( '1.0', 'UTF-8' ); $self->{content} = []; return $self; } # Back compatibility sub errors { shift->error(@_) } sub toDOM { shift->dom } # data that belong to this class sub content { shift->_multi('content',@_) } sub doc { shift->_elem('doc',@_) } sub handlers { shift->_elem('handlers',@_) } # data that belong to this class's headers sub version { shift->headers->header('version',@_) } sub verb { shift->headers->header('verb',@_) } sub error { shift->headers->header('error',@_) } sub xslt { shift->headers->header('xslt',@_) } sub responseDate { shift->headers->header('responseDate',@_) } sub requestURL { shift->headers->header('requestURL',@_) } sub callback { my( $self, $item, $list ) = @_; if( defined $self->{onRecord} ) { $self->{onRecord}->( $item, $self ); } else { Carp::confess( "Requires list parameter" ) if !defined $list; $list->item( $item ); } } # error on 600 as well sub is_error { my $code = shift->code; $code != 0 && $code != 200 } sub is_success { !shift->is_error } sub parse_string { my( $self, $string ) = @_; eval { $self->SUPER::parse_string( $string ) }; if( $@ ) { $self->code( 600 ); $self->message( $@ ); } } sub parse_file { my( $self, $fh ) = @_; eval { $self->SUPER::parse_file( $fh ) }; if( $@ ) { $self->code( 600 ); $self->message( $@ ); } } sub generate { my( $self, $driver ) = @_; if( !defined $self->version || $self->version eq "2.0" ) { $driver->start_element( 'OAI-PMH', 'xsi:schemaLocation' => 'http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd', ); $driver->data_element( 'responseDate', $self->responseDate ); my $url = URI->new( $self->requestURL ); if( $self->error ) { $url->query( undef ); $driver->data_element( 'request', $url ); for($self->error) { $_->generate( $driver ); } } elsif( $self->content ) { my %attr = $url->query_form; $url->query( undef ); $driver->data_element( 'request', $url, %attr ); my $content = ($self->content)[-1]; $driver->start_element( $content->verb ); $content->generate_body( $driver ); $driver->end_element( $content->verb ); } $driver->end_element( 'OAI-PMH' ); } elsif( $self->version eq "2.0s" ) { $driver->start_prefix_mapping({ Prefix => 'static', NamespaceURI => 'http://www.openarchives.org/OAI/2.0/static-repository', }); $driver->start_element( 'static:Repository', 'xsi:schemaLocation' => 'http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd http://www.openarchives.org/OAI/2.0/static-repository http://www.openarchives.org/OAI/2.0/static-repository.xsd', ); for($self->content) { $driver->start_element( 'static:' . $_->verb ); $_->generate_body( $driver ); $driver->end_element( 'static:' . $_->verb ); } $driver->end_element( 'static:Repository' ); } } sub start_element { my( $self, $hash ) = @_; $hash->{Depth} = ++$self->{Depth}; if( $self->{Depth} == 1 ) { $self->version( $HTTP::OAI::VERSIONS{lc($hash->{NamespaceURI})} ); if( !defined $self->version ) { die "Unrecognised namespace for OAI response: {$hash->{NamespaceURI}}$hash->{Name}"; } # static repositories don't contain ListIdentifiers or GetRecord, so # instead we'll perform a complete ListRecords then extract the # relevant data if( $self->version eq "2.0s" ) { if( $self->verb eq "ListIdentifiers" || $self->verb eq "GetRecord" ) { $self->{_verb} = $self->verb; $self->verb( "ListRecords" ); } elsif( $self->verb eq 'ListSets' ) { $self->content( HTTP::OAI::ListSets->new ); $self->error(HTTP::OAI::Error->new( code => 'noSetHierarchy' )); die "done\n"; } } } elsif( $self->{Depth} == 2 ) { my $elem = $hash->{LocalName}; if( $elem eq "error" ) { $self->set_handler( my $error = HTTP::OAI::Error->new ); $self->error( $error ); } elsif ( $elem =~ /^GetRecord|Identify|ListIdentifiers|ListMetadataFormats|ListRecords|ListSets$/ && (!defined $self->verb || $elem eq $self->verb) ) { if( $self->version eq "2.0s" && $self->verb eq "ListRecords" ) { my $metadataPrefix = $hash->{Attributes}{'{}metadataPrefix'}{Value}; if( $metadataPrefix eq $self->headers->header( 'metadataPrefix' ) ) { $self->set_handler( my $content = "HTTP::OAI::$elem"->new ); $self->content( [ $content ] ); } } else { $self->set_handler( my $content = "HTTP::OAI::$elem"->new ); $self->content( [ $content ] ); } } } $self->SUPER::start_element( $hash, $self ); } sub end_element { my( $self, $hash ) = @_; $hash->{Depth} = $self->{Depth}; $self->SUPER::end_element( $hash, $self ); if( $self->{Depth} == 2 ) { my $elem = $hash->{LocalName}; if( $elem eq "responseDate" || $elem eq "requestURL" ) { $self->headers->header( $elem, $hash->{Text} ); } elsif( $elem eq "request" ) { $self->headers->header("request",$hash->{Text}); my $uri = new URI($hash->{Text}); $uri->query_form(map { ($_->{LocalName},$_->{Value}) } values %{$hash->{Attributes}}); $self->headers->header("requestURL",$uri); } elsif( $elem eq "error" ) { my $error = $self->get_handler; if( $error->code !~ /^noRecordsMatch|noSetHierarchy$/ ) { $self->code( 500 ); $self->message( $error->code . ": " . $error->message ); } } # extract ListIdentifiers and GetRecord from a static ListRecords elsif( defined($self->get_handler) && $self->version eq "2.0s" ) { # fake ListIdentifiers/GetRecord if( defined(my $verb = $self->{_verb}) ) { if( $verb eq "ListIdentifiers" ) { my $content = HTTP::OAI::ListIdentifiers->new; $content->item( map { $_->header } ($self->content)[-1]->item ); $self->content( [ $content ] ); } elsif( $verb eq "GetRecord" ) { my $content = HTTP::OAI::GetRecord->new; $content->item( [grep { $_->identifier eq $self->headers->header('identifier') } ($self->content)[-1]->item] ); $self->content( [ $content ] ); if( !defined( ($content->item)[0] ) ) { $self->content( [] ); $self->error(my $error = HTTP::OAI::Error->new( code => 'idDoesNotExist' )); $self->code( 500 ); $self->message( $error->code . ": " . $error->message ); } } } die "done\n"; } $self->set_handler( undef ); } if( $self->{Depth} == 1 ) { if( $self->version eq "2.0s" && !$self->error && !$self->content ) { $self->error(my $error = HTTP::OAI::Error->new( code => 'cannotDisseminateFormat' )); $self->code( 500 ); $self->message( $error->code . ": " . $error->message ); } # allow callers to do $r->next to check whether anything came back if( !$self->content && defined(my $verb = $self->verb) ) { $self->content( "HTTP::OAI::$verb"->new ); } } $self->{Depth}--; } 1; HTTP-OAI-4.03/lib/HTTP/OAI/Identify.pm0000644003026500005230000000736512317253770015545 0ustar tdbrodyjfpackage HTTP::OAI::Identify; @ISA = qw( HTTP::OAI::Verb ); use strict; use HTTP::OAI::SAXHandler qw( :SAX ); sub adminEmail { shift->_elem('adminEmail',@_) } sub baseURL { shift->_elem('baseURL',@_) } sub compression { shift->_multi('compression',@_) } sub deletedRecord { shift->_elem('deletedRecord',@_) } sub description { shift->_multi('description',@_) } sub earliestDatestamp { shift->_elem('earliestDatestamp',@_) } sub granularity { shift->_elem('granularity',@_) } sub protocolVersion { shift->_elem('protocolVersion',@_) } sub repositoryName { shift->_elem('repositoryName',@_) } sub next { my $self = shift; return shift @{$self->{description}}; } sub generate_body { my( $self, $driver ) = @_; for(qw( repositoryName baseURL protocolVersion adminEmail earliestDatestamp deletedRecord granularity compression )) { foreach my $value ($self->$_) { $driver->data_element( $_, $value ); } } for($self->description) { $_->generate( $driver ); } } sub start_element { my ($self,$hash,$r) = @_; my $elem = lc($hash->{LocalName}); if( $elem eq 'description' && !$self->{"in_$elem"} ) { $self->set_handler(my $desc = HTTP::OAI::Metadata->new); $self->description([$self->description, $desc]); $self->{"in_$elem"} = $hash->{Depth}; } $self->SUPER::start_element($hash,$r); } sub end_element { my ($self,$hash,$r) = @_; my $elem = $hash->{LocalName}; my $text = $hash->{Text}; if( defined $text ) { $text =~ s/^\s+//; $text =~ s/\s+$//; } $self->SUPER::end_element($hash,$r); if( defined($self->get_handler) ) { if( $elem eq 'description' && $self->{"in_$elem"} == $hash->{Depth} ) { $self->set_handler( undef ); $self->{"in_$elem"} = 0; } } elsif( $elem eq 'adminEmail' ) { $self->adminEmail($text); } elsif( $elem eq 'compression' ) { $self->compression($text); } elsif( $elem eq 'baseURL' ) { $self->baseURL($text); } elsif( $elem eq 'protocolVersion' ) { $text = '2.0' if $text =~ /\D/ or $text < 2.0; $self->protocolVersion($text); } elsif( defined($text) && length($text) ) { $self->_elem($elem,$text); } } 1; __END__ =head1 NAME HTTP::OAI::Identify - Provide access to an OAI Identify response =head1 SYNOPSIS use HTTP::OAI::Identify; my $i = new HTTP::OAI::Identify( adminEmail=>'billg@microsoft.com', baseURL=>'http://www.myarchives.org/oai', repositoryName=>'www.myarchives.org' ); for( $i->adminEmail ) { print $_, "\n"; } =head1 METHODS =over 4 =item $i = new HTTP::OAI::Identify(-baseURL=>'http://arXiv.org/oai1'[, adminEmail=>$email, protocolVersion=>'2.0', repositoryName=>'myarchive']) This constructor method returns a new instance of the OAI::Identify module. =item $i->version Return the original version of the OAI response, according to the given XML namespace. =item $i->headers Returns an HTTP::Headers object. Use $headers->header('headername') to retrive field values. =item $burl = $i->baseURL([$burl]) =item $eds = $i->earliestDatestamp([$eds]) =item $gran = $i->granularity([$gran]) =item $version = $i->protocolVersion($version) =item $name = $i->repositoryName($name) Returns and optionally sets the relevent header. NOTE: protocolVersion will always be '2.0'. Use $i->version to find out the protocol version used by the repository. =item @addys = $i->adminEmail([$email]) =item @cmps = $i->compression([$cmp]) Returns and optionally adds to the multi-value headers. =item @dl = $i->description([$d]) Returns the description list and optionally appends a new description $d. Returns an array ref of Ls, or an empty ref if there are no description. =item $d = $i->next Returns the next description or undef if no more description left. =item $dom = $i->toDOM Returns a XML::DOM object representing the Identify response. =back HTTP-OAI-4.03/lib/HTTP/OAI/SAX/0000755003026500005230000000000012320512116014036 5ustar tdbrodyjfHTTP-OAI-4.03/lib/HTTP/OAI/SAX/Text.pm0000644003026500005230000000155412317253770015343 0ustar tdbrodyjfpackage HTTP::OAI::SAX::Text; @ISA = qw( XML::SAX::Base ); use strict; sub start_element { ( my $self, my $hash, @_ ) = @_; $self->{Data} = ""; push @{$self->{Attributes}}, $hash->{Attributes}; $self->SUPER::start_element( $hash, @_ ); } sub characters { $_[0]->{Data} .= $_[1]->{Data} } sub end_element { ( my $self, my $hash, @_ ) = @_; $hash->{Text} = $self->{Data}; $hash->{Attributes} = pop @{$self->{Attributes} || []}; # strip surrounding whitespace in leaf nodes $hash->{Text} =~ s/^\s+//; $hash->{Text} =~ s/\s+$//; $self->SUPER::characters( {Data => $self->{Data}}, @_ ); $self->{Data} = ""; $self->SUPER::end_element( $hash, @_ ); } 1; =head1 NAME HTTP::OAI::SAX::Text =head1 DESCRIPTION This module adds Text and Attributes to the end_element call. This is only useful for leaf nodes (elements that don't contain any child elements). HTTP-OAI-4.03/lib/HTTP/OAI/SAX/Driver.pm0000644003026500005230000000765012317253770015655 0ustar tdbrodyjfpackage HTTP::OAI::SAX::Driver; use XML::LibXML; use base XML::SAX::Base; use XML::NamespaceSupport; use strict; =pod =head1 NAME HTTP::OAI::SAXHandler - SAX2 utility filter =head1 DESCRIPTION This module provides utility methods for SAX2, including collapsing multiple "characters" events into a single event. This module exports methods for generating SAX2 events with Namespace support. This *isn't* a fully-fledged SAX2 generator! =over 4 =item $h = HTTP::OAI::SAXHandler->new() Class constructor. =cut sub new { my( $class, %self ) = @_; $self{ns} = XML::NamespaceSupport->new; my $self = $class->SUPER::new( %self ); return $self; } sub generate { my( $self, $node ) = @_; my $nodeType = $node->nodeType; if( $nodeType == XML_DOCUMENT_NODE ) { $self->generate( $node->documentElement ); } elsif( $nodeType == XML_DOCUMENT_FRAG_NODE ) { $self->generate( $_ ) for $node->childNodes; } elsif( $nodeType == XML_ELEMENT_NODE ) { $self->start_element( $node->nodeName, map { $_->nodeName => $_->nodeValue } $node->attributes ); $self->generate( $_ ) for $node->childNodes; $self->end_element( $node->nodeName ); } elsif( $nodeType == XML_TEXT_NODE ) { $self->characters( { Data => $node->nodeValue } ); } } sub start_oai_pmh { my( $self ) = @_; $self->start_document; $self->xml_decl({'Version'=>'1.0','Encoding'=>'UTF-8'}); $self->characters({'Data'=>"\n"}); $self->start_prefix_mapping({ Prefix => "", NamespaceURI => HTTP::OAI::OAI_NS(), }); $self->start_prefix_mapping({ Prefix => "xsi", NamespaceURI => "http://www.w3.org/2001/XMLSchema-instance", }); } sub end_oai_pmh { my( $self ) = @_; $self->end_prefix_mapping({ Prefix => "", NamespaceURI => HTTP::OAI::OAI_NS(), }); $self->end_prefix_mapping({ Prefix => "xsi", NamespaceURI => "http://www.w3.org/2001/XMLSchema-instance", }); $self->end_document; } sub data_element { my( $self, $Name, $value, @attr ) = @_; $self->start_element( $Name, @attr ); $self->characters( {Data => $value} ); $self->end_element( $Name ); } sub start_prefix_mapping { my( $self, $hash ) = @_; $self->{ns}->declare_prefix( $hash->{Prefix}, $hash->{NamespaceURI} ); $self->SUPER::start_prefix_mapping( $hash ); } sub start_element { my( $self, $Name, @attr ) = @_; $self->{ns}->push_context; my %attr; while(my( $key, $value ) = splice(@attr,0,2)) { next if !defined $value; my( $NamespaceURI, $Prefix, $LocalName ); if( $key =~ /^xmlns:(.+)$/ ) { $self->start_prefix_mapping( {Prefix => $1, NamespaceURI => $value} ); $NamespaceURI = "http://www.w3.org/2000/xmlns/"; $Prefix = "xmlns"; $LocalName = $1; } elsif( $key eq "xmlns" ) { $self->start_prefix_mapping( {Prefix => '', NamespaceURI => $value} ); $NamespaceURI = ''; $Prefix = ''; $LocalName = $key; } elsif( $key =~ /^(.+):(.+)$/ ) { $NamespaceURI = $self->{ns}->get_uri( $1 ); $Prefix = $1; $LocalName = $2; } else { $NamespaceURI = ''; $Prefix = ''; $LocalName = $key; } $attr{"{$NamespaceURI}$LocalName"} = { NamespaceURI => $NamespaceURI, Prefix => $Prefix, LocalName => $LocalName, Name => $key, Value => $value, }; } my ($Prefix,$LocalName) = split /:/, $Name; unless(defined($LocalName)) { $LocalName = $Prefix; $Prefix = ''; } my $NamespaceURI = $self->{ns}->get_uri( $Prefix ); $self->SUPER::start_element({ 'NamespaceURI'=>$NamespaceURI, 'Name'=>$Name, 'Prefix'=>$Prefix, 'LocalName'=>$LocalName, 'Attributes'=>\%attr }); } sub end_element { my( $self, $Name ) = @_; my ($Prefix,$LocalName) = split /:/, $Name; unless(defined($LocalName)) { $LocalName = $Prefix; $Prefix = ''; } my $NamespaceURI = $self->{ns}->get_uri( $Prefix ); $self->SUPER::end_element({ 'NamespaceURI'=>$NamespaceURI, 'Name'=>$Name, 'Prefix'=>$Prefix, 'LocalName'=>$LocalName, }); $self->{ns}->pop_context; } 1; __END__ =back =head1 AUTHOR Tim Brody HTTP-OAI-4.03/lib/HTTP/OAI/SAX/Trace.pm0000644003026500005230000000047412317253770015455 0ustar tdbrodyjfpackage HTTP::OAI::SAX::Trace; #use base XML::SAX::Base; our $AUTOLOAD; sub new { my( $class, %self ) = @_; bless \%self, $class; } sub DESTROY {} sub AUTOLOAD { $AUTOLOAD =~ s/^.*:://; HTTP::OAI::Debug::sax( $AUTOLOAD . ": " . Data::Dumper::Dumper( @_[1..$#_] ) ); shift->{Handler}->$AUTOLOAD( @_ ); } 1; HTTP-OAI-4.03/lib/HTTP/OAI/SAX/Base.pm0000644003026500005230000000201412317253770015261 0ustar tdbrodyjfpackage HTTP::OAI::SAX::Base; @ISA = qw( XML::SAX::Base ); use strict; sub toString { my $str = shift->dom->toString( 1 ); utf8::decode($str); return $str; } sub parse_string { my( $self, $string ) = @_; my $parser = XML::LibXML::SAX->new( Handler => HTTP::OAI::SAX::Text->new( Handler => $self, ) ); $parser->parse_string( $string ); } sub parse_file { my( $self, $fh ) = @_; my $parser = XML::LibXML::SAX->new( Handler => HTTP::OAI::SAX::Text->new( Handler => $self, ) ); $parser->parse_file( $fh ); } sub generate { my( $self, $driver ) = @_; # override this } sub dom { my $self = shift; if( my $dom = shift ) { my $driver = XML::LibXML::SAX::Parser->new( Handler=>HTTP::OAI::SAXHandler->new( Handler=>$self )); $driver->generate($dom); } else { my $driver = HTTP::OAI::SAX::Driver->new( Handler => my $builder = XML::LibXML::SAX::Builder->new() ); $driver->start_oai_pmh(); $self->generate( $driver ); $driver->end_oai_pmh(); return $builder->result; } } 1; HTTP-OAI-4.03/lib/HTTP/OAI/Encapsulation.pm0000644003026500005230000000457612317253770016600 0ustar tdbrodyjfpackage HTTP::OAI::Encapsulation; use strict; use warnings; use HTTP::OAI::SAXHandler qw( :SAX ); use vars qw(@ISA); @ISA = qw(XML::SAX::Base); sub new { my $class = shift; my %args = @_ > 1 ? @_ : (dom => shift); my $self = bless {}, ref($class) || $class; $self->version($args{version}); $self->dom($args{dom}); $self; } sub dom { shift->_elem('dom',@_) } # Pseudo HTTP::Response sub code { 200 } sub message { 'OK' } sub is_info { 0 } sub is_success { 1 } sub is_redirect { 0 } sub is_error { 0 } sub version { shift->_elem('version',@_) } sub _elem { my $self = shift; my $name = shift; return @_ ? $self->{_elem}->{$name} = shift : $self->{_elem}->{$name}; } sub _attr { my $self = shift; my $name = shift or return $self->{_attr}; return $self->{_attr}->{$name} unless @_; if( defined(my $value = shift) ) { return $self->{_attr}->{$name} = $value; } else { delete $self->{_attr}->{$name}; return undef; } } package HTTP::OAI::Encapsulation::DOM; use strict; use warnings; use XML::LibXML qw( :all ); use vars qw(@ISA); @ISA = qw(HTTP::OAI::Encapsulation); sub toString { defined($_[0]->dom) ? $_[0]->dom->toString : undef } sub generate { my $self = shift; unless( $self->dom ) { Carp::confess("Can't generate() without a dom."); } unless( $self->dom->nodeType == XML_DOCUMENT_NODE ) { Carp::confess( "Can only generate() from a DOM of type XML_DOCUMENT_NODE" ); } return unless defined($self->get_handler); my $driver = XML::LibXML::SAX::Parser->new( Handler=>HTTP::OAI::FilterDOMFragment->new( Handler=>$self->get_handler )); $driver->generate($self->dom); } sub start_document { my ($self) = @_; HTTP::OAI::Debug::sax( ref($self) ); my $builder = XML::LibXML::SAX::Builder->new() or die "Unable to create XML::LibXML::SAX::Builder: $!"; $self->{OLDHandler} = $self->get_handler(); $self->set_handler($builder); $self->SUPER::start_document(); $self->SUPER::xml_decl({'Version'=>'1.0','Encoding'=>'UTF-8'}); } sub end_document { my ($self) = @_; $self->SUPER::end_document(); $self->dom($self->get_handler->result()); $self->set_handler($self->{OLDHandler}); HTTP::OAI::Debug::sax( ref($self) . " <" . $self->dom->documentElement->nodeName . " />" ); } 1; __END__ =head1 NAME HTTP::OAI::Encapsulation - Base class for data objects that contain DOM trees =head1 DESCRIPTION This class shouldn't be used directly, use L. =cut HTTP-OAI-4.03/lib/HTTP/OAI/Header.pm0000644003026500005230000000514312317253770015152 0ustar tdbrodyjfpackage HTTP::OAI::Header; @ISA = qw( HTTP::OAI::MemberMixin HTTP::OAI::SAX::Base ); use strict; use POSIX qw/strftime/; sub identifier { shift->_elem('identifier',@_) } sub datestamp { my $self = shift; return $self->_elem('datestamp') unless @_; my $ds = shift or return $self->_elem('datestamp',undef); if( $ds =~ /^(\d{4})(\d{2})(\d{2})$/ ) { $ds = "$1-$2-$3"; } elsif( $ds =~ /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})$/ ) { $ds = "$1-$2-$3T$4:$5:$6Z"; } return $self->_elem('datestamp',$ds); } sub status { shift->_elem('status',@_) } sub setSpec { shift->_multi('setSpec',@_) } sub now { return strftime("%Y-%m-%dT%H:%M:%SZ",gmtime()) } sub is_deleted { my $s = shift->status(); return defined($s) && $s eq 'deleted'; } sub generate { my ($self, $driver) = @_; if( defined($self->status) ) { $driver->start_element( 'header', status => $self->status ); } else { $driver->start_element( 'header' ); } $driver->data_element( 'identifier', $self->identifier ); $driver->data_element( 'datestamp', ($self->datestamp || $self->now) ); for($self->setSpec) { $driver->data_element( 'setSpec', $_ ); } $driver->end_element( 'header' ); } sub end_element { my ($self,$hash) = @_; my $elem = lc($hash->{LocalName}); my $text = $hash->{Text}; if( defined $text ) { $text =~ s/^\s+//; $text =~ s/\s+$//; } if( $elem eq 'identifier' ) { $self->identifier($text); } elsif( $elem eq 'datestamp' ) { $self->datestamp($text); } elsif( $elem eq 'setspec' ) { $self->setSpec($text); } elsif( $elem eq 'header' ) { $self->status($hash->{Attributes}->{'{}status'}->{Value}); } } 1; __END__ =head1 NAME HTTP::OAI::Header - Encapsulates an OAI header structure =head1 SYNOPSIS use HTTP::OAI::Header; my $h = new HTTP::OAI::Header( identifier=>'oai:myarchive.org:2233-add', datestamp=>'2002-04-12T20:31:00Z', ); $h->setSpec('all:novels'); =head1 METHODS =over 4 =item $h = new HTTP::OAI::Header This constructor method returns a new C. =item $h->identifier([$identifier]) Get and optionally set the record OAI identifier. =item $h->datestamp([$datestamp]) Get and optionally set the record datestamp (OAI 2.0+). =item $h->status([$status]) Get and optionally set the record status (valid values are 'deleted' or undef). =item $h->is_deleted() Returns whether this record's status is deleted. =item @sets = $h->setSpec([$setSpec]) Returns the list of setSpecs and optionally appends a new setSpec C<$setSpec> (OAI 2.0+). =item $dom_fragment = $id->generate() Act as a SAX driver (use C<< $h->set_handler() >> to specify the filter to pass events to). =back HTTP-OAI-4.03/lib/HTTP/OAI/Error.pm0000644003026500005230000000567512317253770015065 0ustar tdbrodyjfpackage HTTP::OAI::Error; @ISA = qw( HTTP::OAI::SAX::Base HTTP::OAI::MemberMixin Exporter ); use strict; use vars qw(@EXPORT @EXPORT_OK %EXPORT_TAG); @EXPORT = qw(); @EXPORT_OK = qw(%OAI_ERRORS); %EXPORT_TAG = (); my %OAI_ERRORS = ( badArgument => 'The request includes illegal arguments, is missing required arguments, includes a repeated argument, or values for arguments have an illegal syntax.', # badGranularity => 'The values of the from and until arguments are illegal or specify a finer granularity than is supported by the repository.', badResumptionToken => 'The value of the resumptionToken argument is invalid or expired.', badVerb => 'Value of the verb argument is not a legal OAI-PMH verb, the verb argument is missing, or the verb argument is repeated.', cannotDisseminateFormat => 'The metadata format identified by the value given for the metadataPrefix argument is not supported by the item or by the repository', idDoesNotExist => 'The value of the identifier argument is unknown or illegal in this repository.', noRecordsMatch => 'The combination of the values of the from, until, set, and metadataPrefix arguments results in an empty list.', noMetadataFormats => 'There are no metadata formats available for the specified item.', noSetHierarchy => 'The repository does not support sets.' ); sub new { my( $class, %self ) = @_; $self{message} ||= $OAI_ERRORS{$self{code}} if $self{code}; return $class->SUPER::new(%self); } sub code { shift->_elem('code',@_) } sub message { shift->_elem('message',@_) } sub generate { my( $self, $driver ) = @_; $driver->data_element( 'error', ($self->message || $OAI_ERRORS{$self->code} || ''), code => $self->code, ); } sub start_element { my( $self, $hash ) = @_; $self->code( $hash->{Attributes}->{'{}code'}->{Value} ); } sub characters { my( $self, $hash ) = @_; $self->message( $hash->{Data} ); } 1; __END__ =head1 NAME HTTP::OAI::Error - Encapsulates OAI error codes =head1 METHODS =over 4 =item $err = new HTTP::OAI::Error(code=>'badArgument',[message=>'An incorrect argument was supplied']) This constructor method returns a new HTTP::OAI::Error object. If no message is specified, and the code is a valid OAI error code, the appropriate message from the OAI protocol document is the default message. =item $code = $err->code([$code]) Returns and optionally sets the error name. =item $msg = $err->message([$msg]) Returns and optionally sets the error message. =back =head1 NOTE - noRecordsMatch noRecordsMatch, without additional errors, is not treated as an error code. If noRecordsMatch was returned by a repository the HTTP::OAI::Response object will have a verb 'error' and will contain the noRecordsMatch error, however is_success will return true. e.g. my $r = $ha->ListIdentifiers(metadataPrefix='oai_dc',from=>'3000-02-02'); if( $r->is_success ) { print "Successful\n"; } else { print "Failed\n"; } print $r->verb, "\n"; Will print "Successful" followed by "error". HTTP-OAI-4.03/lib/HTTP/OAI/Verb.pm0000644003026500005230000000206612317253770014661 0ustar tdbrodyjfpackage HTTP::OAI::Verb; @ISA = qw( HTTP::OAI::MemberMixin HTTP::OAI::SAX::Base ); use strict; # back compatibility sub toDOM { shift->dom } sub errors { shift->_multi('error',@_) } for(qw( parse_string parse_file )) { no strict; my $fn = $_; *$fn = sub { my( $self, $io ) = @_; my $r = HTTP::OAI::Response->new( verb => $self->verb, handlers => $self->{handlers}, ); $r->$fn( $io ); if( $r->is_error ) { die "Error parsing: ".$r->code." ".$r->message; } elsif( $r->error ) { $self->errors( $r->error ); } else { my $content = ($r->content)[-1]; # HACK HACK HACK %$self = %$content; } }; } sub verb { my $class = ref($_[0]); $class =~ s/^.*:://; return $class; } sub generate { my( $self, $driver ) = @_; $driver->start_element( 'OAI-PMH', 'xsi:schemaLocation' => 'http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd', ); $driver->start_element( $self->verb ); $self->generate_body( $driver ); $driver->end_element( $self->verb ); $driver->end_element( 'OAI-PMH' ); } 1; HTTP-OAI-4.03/lib/HTTP/OAI/PartialList.pm0000644003026500005230000000160512317253770016211 0ustar tdbrodyjfpackage HTTP::OAI::PartialList; @ISA = qw( HTTP::OAI::Verb ); use strict; sub resumptionToken { shift->_elem('resumptionToken',@_) } sub item { shift->_multi('item',@_) } sub next { my( $self ) = @_; return shift @{$self->{item}}; } sub generate_body { my( $self, $driver ) = @_; for($self->item) { $_->generate( $driver ); } if(my $token = $self->resumptionToken) { $token->generate( $driver ); } } sub start_element { my ($self, $hash, $r) = @_; if( $hash->{Depth} == 3 && $hash->{LocalName} eq "resumptionToken" ) { $self->set_handler(HTTP::OAI::ResumptionToken->new); } $self->SUPER::start_element( $hash, $r ); } sub end_element { my ($self, $hash, $r) = @_; $self->SUPER::end_element( $hash, $r ); if( $hash->{Depth} == 3 && $hash->{LocalName} eq "resumptionToken" ) { $self->resumptionToken( $self->get_handler ); $self->set_handler( undef ); } } 1; HTTP-OAI-4.03/lib/HTTP/OAI/ListMetadataFormats.pm0000644003026500005230000000326612317253770017676 0ustar tdbrodyjfpackage HTTP::OAI::ListMetadataFormats; @ISA = qw( HTTP::OAI::PartialList ); use strict; sub metadataFormat { shift->item(@_) } sub start_element { my ($self,$hash,$r) = @_; if( !$self->{'in_mdf'} ) { if( lc($hash->{LocalName}) eq 'metadataformat' ) { $self->set_handler(my $mdf = HTTP::OAI::MetadataFormat->new); $self->metadataFormat($mdf); $self->{'in_mdf'} = $hash->{Depth}; } } $self->SUPER::start_element($hash,$r); } sub end_element { my ($self,$hash,$r) = @_; $self->SUPER::end_element($hash,$r); if( $self->{'in_mdf'} == $hash->{Depth} ) { if( lc($hash->{LocalName}) eq 'metadataformat' ) { HTTP::OAI::Debug::trace( "metadataFormat: " . $self->get_handler->metadataPrefix ); $self->set_handler( undef ); $self->{'in_mdf'} = 0; } } } 1; __END__ =head1 NAME HTTP::OAI::ListMetadataFormats - Provide access to an OAI ListMetadataFormats response =head1 SYNOPSIS my $r = $h->ListMetadataFormats; # ListMetadataFormats doesn't use flow control while( my $rec = $r->next ) { print $rec->metadataPrefix, "\n"; } die $r->message if $r->is_error; =head1 METHODS =over 4 =item $lmdf = new HTTP::OAI::ListMetadataFormats This constructor method returns a new HTTP::OAI::ListMetadataFormats object. =item $mdf = $lmdf->next Returns either an L object, or undef, if no more records are available. =item @mdfl = $lmdf->metadataFormat([$mdf]) Returns the metadataFormat list and optionally adds a new metadataFormat, $mdf. Returns an array ref of Ls. =item $dom = $lmdf->toDOM Returns a XML::DOM object representing the ListMetadataFormats response. =back HTTP-OAI-4.03/lib/HTTP/OAI/MetadataFormat.pm0000644003026500005230000000321712317253770016653 0ustar tdbrodyjfpackage HTTP::OAI::MetadataFormat; @ISA = qw( HTTP::OAI::MemberMixin XML::SAX::Base ); use strict; sub metadataPrefix { shift->_elem('metadataPrefix',@_) } sub schema { shift->_elem('schema',@_) } sub metadataNamespace { shift->_elem('metadataNamespace',@_) } sub generate { my( $self, $driver ) = @_; $driver->start_element('metadataFormat'); $driver->data_element('metadataPrefix',$self->metadataPrefix); $driver->data_element('schema',$self->schema); if( defined($self->metadataNamespace) ) { $driver->data_element('metadataNamespace',$self->metadataNamespace); } $driver->end_element('metadataFormat'); } sub end_element { my ($self,$hash) = @_; $self->SUPER::end_element($hash); my $elem = lc($hash->{LocalName}); if( defined $hash->{Text} ) { $hash->{Text} =~ s/^\s+//; $hash->{Text} =~ s/\s+$//; } if( $elem eq 'metadataprefix' ) { $self->metadataPrefix($hash->{Text}); } elsif( $elem eq 'schema' ) { $self->schema($hash->{Text}); } elsif( $elem eq 'metadatanamespace' ) { $self->metadataNamespace($hash->{Text}); } } 1; __END__ =head1 NAME HTTP::OAI::MetadataFormat - Encapsulates OAI metadataFormat XML data =head1 METHODS =over 4 =item $mdf = new HTTP::OAI::MetadataFormat This constructor method returns a new HTTP::OAI::MetadataFormat object. =item $mdp = $mdf->metadataPrefix([$mdp]) =item $schema = $mdf->schema([$schema]) =item $ns = $mdf->metadataNamespace([$ns]) These methods respectively return and optionally set the metadataPrefix, schema and, metadataNamespace, for the metadataFormat record. metadataNamespace is optional in OAI 1.x and therefore may be undef when harvesting pre OAI 2 repositories. =back HTTP-OAI-4.03/lib/HTTP/OAI/Debug.pm0000644003026500005230000000264612317253770015015 0ustar tdbrodyjfpackage HTTP::OAI::Debug; =pod =head1 NAME B - debug the HTTP::OAI libraries =head1 DESCRIPTION This package is a copy of L and exposes the same API. In addition to "trace", "debug" and "conns" this exposes a "sax" level for debugging SAX events. =cut require Exporter; @ISA = qw(Exporter); @EXPORT_OK = qw(level trace debug conns); use Carp (); my @levels = qw(trace debug conns sax); %current_level = (); sub import { my $pack = shift; my $callpkg = caller(0); my @symbols = (); my @levels = (); for (@_) { if (/^[-+]/) { push(@levels, $_); } else { push(@symbols, $_); } } Exporter::export($pack, $callpkg, @symbols); level(@levels); } sub level { for (@_) { if ($_ eq '+') { # all on # switch on all levels %current_level = map { $_ => 1 } @levels; } elsif ($_ eq '-') { # all off %current_level = (); } elsif (/^([-+])(\w+)$/) { $current_level{$2} = $1 eq '+'; } else { Carp::croak("Illegal level format $_"); } } } sub trace { _log(@_) if $current_level{'trace'}; } sub debug { _log(@_) if $current_level{'debug'}; } sub conns { _log(@_) if $current_level{'conns'}; } sub sax { _log(@_) if $current_level{'sax'}; } sub _log { my $msg = shift; $msg =~ s/\n$//; $msg =~ s/\n/\\n/g; my($package,$filename,$line,$sub) = caller(2); print STDERR "$sub: $msg\n"; } 1; HTTP-OAI-4.03/lib/HTTP/OAI/Repository.pm0000644003026500005230000001671612317253770016151 0ustar tdbrodyjfpackage HTTP::OAI::Repository; use strict; use warnings; use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); require Exporter; @ISA = qw(Exporter); @EXPORT = qw(); @EXPORT_OK = qw( &validate_request &validate_request_1_1 &validate_date &validate_metadataPrefix &validate_responseDate &validate_setSpec ); %EXPORT_TAGS = (validate=>[qw(&validate_request &validate_date &validate_metadataPrefix &validate_responseDate &validate_setSpec)]); use HTTP::OAI::Error qw(%OAI_ERRORS); # Copied from Simeon Warner's tutorial at # http://library.cern.ch/HEPLW/4/papers/3/OAIServer.pm # (note: corrected grammer for ListSets) # 0 = optional, 1 = required, 2 = exclusive my %grammer = ( 'GetRecord' => { 'identifier' => [1, \&validate_identifier], 'metadataPrefix' => [1, \&validate_metadataPrefix] }, 'Identify' => {}, 'ListIdentifiers' => { 'from' => [0, \&validate_date], 'until' => [0, \&validate_date], 'set' => [0, \&validate_setSpec_2_0], 'metadataPrefix' => [1, \&validate_metadataPrefix], 'resumptionToken' => [2, sub { 0 }] }, 'ListMetadataFormats' => { 'identifier' => [0, \&validate_identifier] }, 'ListRecords' => { 'from' => [0, \&validate_date], 'until' => [0, \&validate_date], 'set' => [0, \&validate_setSpec_2_0], 'metadataPrefix' => [1, \&validate_metadataPrefix], 'resumptionToken' => [2, sub { 0 }] }, 'ListSets' => { 'resumptionToken' => [2, sub { 0 }] } ); sub new { my ($class,%args) = @_; my $self = bless {}, $class; $self; } sub validate_request { validate_request_2_0(@_); } sub validate_request_2_0 { my %params = @_; my $verb = $params{'verb'}; delete $params{'verb'}; my @errors; return (new HTTP::OAI::Error(code=>'badVerb',message=>'No verb supplied')) unless defined $verb; my $grm = $grammer{$verb} or return (new HTTP::OAI::Error(code=>'badVerb',message=>"Unknown verb '$verb'")); if( defined $params{'from'} && defined $params{'until'} ) { if( granularity($params{'from'}) ne granularity($params{'until'}) ) { return (new HTTP::OAI::Error( code=>'badArgument', message=>'Granularity used in from and until must be the same' )); } } # Check exclusivity foreach my $arg (keys %$grm) { my ($type, $valid_func) = @{$grm->{$arg}}; next unless ($type == 2 && defined($params{$arg})); if( my $err = &$valid_func($params{$arg}) ) { return (new HTTP::OAI::Error( code=>'badArgument', message=>("Bad argument ($arg): " . $err) )); } delete $params{$arg}; if( %params ) { for(keys %params) { push @errors, new HTTP::OAI::Error( code=>'badArgument', message=>"'$_' can not be used in conjunction with $arg" ); } return @errors; } else { return (); } } # Check required/optional foreach my $arg (keys %$grm) { my ($type, $valid_func) = @{$grm->{$arg}}; if( $params{$arg} ) { if( my $err = &$valid_func($params{$arg}) ) { return (new HTTP::OAI::Error(code=>'badArgument',message=>"Bad argument ($arg): " . $err)) } } if( $type == 1 && (!defined($params{$arg}) || $params{$arg} eq '') ) { return (new HTTP::OAI::Error(code=>'badArgument',message=>"Required argument '$arg' was undefined")); } delete $params{$arg}; } if( %params ) { for(keys %params) { push @errors, new HTTP::OAI::Error( code=>'badArgument', message=>"'$_' is not a recognised argument for $verb" ); } return @errors; } else { return (); } } sub granularity { my $date = shift; return 'year' if $date =~ /^\d{4}-\d{2}-\d{2}$/; return 'seconds' if $date =~ /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$/; } sub validate_date { my $date = shift; return "Date not in OAI format (yyyy-mm-dd or yyyy-mm-ddThh:mm:ssZ)" unless $date =~ /^(\d{4})-(\d{2})-(\d{2})(T\d{2}:\d{2}:\d{2}Z)?$/; my( $y, $m, $d ) = ($1,($2||1),($3||1)); return "Month in date is not in range 1-12" if ($m < 1 || $m > 12); return "Day in date is not in range 1-31" if ($d < 1 || $d > 31); 0; } sub validate_responseDate { return shift =~ /^(\d{4})\-([01][0-9])\-([0-3][0-9])T([0-2][0-9]):([0-5][0-9]):([0-5][0-9])[\+\-]([0-2][0-9]):([0-5][0-9])$/ ? 0 : "responseDate not in OAI format (yyyy-mm-ddThh:mm:dd:ss[+-]hh:mm)"; } sub validate_setSpec { return shift =~ /^([A-Za-z0-9])+(:[A-Za-z0-9]+)*$/ ? 0 : "Set spec not in OAI format, must match ^([A-Za-z0-9])+(:[A-Za-z0-9]+)*\$"; } sub validate_setSpec_2_0 { return shift =~ /^([A-Za-z0-9_!'\$\(\)\+\-\.\*])+(:[A-Za-z0-9_!'\$\(\)\+\-\.\*]+)*$/ ? 0 : "Set spec not in OAI format, must match ([A-Za-z0-9_!'\\\$\(\\)\\+\\-\\.\\*])+(:[A-Za-z0-9_!'\\$\\(\\)\\+\\-\\.\\*]+)*"; } sub validate_metadataPrefix { return shift =~ /^[\w]+$/ ? 0 : "Metadata prefix not in OAI format, must match regexp ^[\\w]+\$"; } # OAI 2 requires identifiers by valid URIs # This doesn't check for invalid chars, merely : sub validate_identifier { return shift =~ /^[[:alpha:]][[:alnum:]\+\-\.]*:.+/ ? 0 : "Identifier not in OAI format, must match regexp ^[[:alpha:]][[:alnum:]\\+\\-\\.]*:.+"; } 1; __END__ =head1 NAME HTTP::OAI::Repository - Documentation for building an OAI compliant repository using OAI-PERL =head1 DESCRIPTION Using the OAI-PERL library in a repository context requires the user to build the OAI responses to be sent to OAI harvesters. =head1 SYNOPSIS 1 use HTTP::OAI::Harvester; use HTTP::OAI::Metadata::OAI_DC; use XML::SAX::Writer; use XML::LibXML; # (all of these options _must_ be supplied to comply with the OAI protocol) # (protocolVersion and responseDate both have sensible defaults) my $r = new HTTP::OAI::Identify( baseURL=>'http://yourhost/cgi/oai', adminEmail=>'youremail@yourhost', repositoryName=>'agoodname', requestURL=>self_url() ); # Include a description (an XML::LibXML Dom object) $r->description(new HTTP::OAI::Metadata(dom=>$dom)); my $r = HTTP::OAI::Record->new( header=>HTTP::OAI::Header->new( identifier=>'oai:myrepo:10', datestamp=>'2004-10-01' ), metadata=>HTTP::OAI::Metadata::OAI_DC->new( dc=>{title=>['Hello, World!'],description=>['My Record']} ) ); $r->about(HTTP::OAI::Metadata->new(dom=>$dom)); my $writer = XML::SAX::Writer->new(); $r->set_handler($writer); $r->generate; =head1 Building an OAI compliant repository The validation scripts included in this module provide the repository admin with a number of tools for helping with being OAI compliant, however they can not be exhaustive in themselves. =head1 METHODS =over 4 =item $r = HTTP::OAI::Repository::validate_request(%paramlist) =item $r = HTTP::OAI::Repository::validate_request_2_0(%paramlist) These functions, exported by the Repository module, validate an OAI request against the protocol requirements. Returns an L object, with the code set to 200 if the request is well-formed, or an error code and the message set. e.g: my $r = validate_request(%paramlist); print header(-status=>$r->code.' '.$r->message), $r->error_as_HTML; Note that validate_request attempts to be as strict to the Protocol as possible. =item $b = HTTP::OAI::Repository::validate_date($date) =item $b = HTTP::OAI::Repository::validate_metadataPrefix($mdp) =item $b = HTTP::OAI::Repository::validate_responseDate($date) =item $b = HTTP::OAI::Repository::validate_setSpec($set) These functions, exported by the Repository module, validate the given type of OAI data. Returns true if the given value is sane, false otherwise. =back =head1 EXAMPLE See the bin/gateway.pl for an example implementation (it's actually for creating a static repository gateway, but you get the idea!). HTTP-OAI-4.03/lib/HTTP/OAI/Record.pm0000644003026500005230000000630512317253770015201 0ustar tdbrodyjfpackage HTTP::OAI::Record; @ISA = qw( HTTP::OAI::MemberMixin HTTP::OAI::SAX::Base ); use strict; sub new { my ($class,%args) = @_; $args{header} ||= HTTP::OAI::Header->new(%args); return $class->SUPER::new(%args); } sub header { shift->_elem('header',@_) } sub metadata { shift->_elem('metadata',@_) } sub about { shift->_multi('about',@_) } sub identifier { shift->header->identifier(@_) } sub datestamp { shift->header->datestamp(@_) } sub status { shift->header->status(@_) } sub is_deleted { shift->header->is_deleted(@_) } sub generate { my( $self, $driver ) = @_; $driver->start_element('record'); $self->header->generate( $driver ); $self->metadata->generate( $driver ) if defined $self->metadata; $self->about->generate( $driver ) for $self->about; $driver->end_element('record'); } sub start_element { my ($self,$hash, $r) = @_; if( !$self->{in_record} ) { my $elem = lc($hash->{LocalName}); if( $elem eq 'record' && $hash->{Attributes}->{'{}status'}->{Value} ) { $self->status($hash->{Attributes}->{'{}status'}->{Value}); } elsif( $elem eq "header" ) { $self->set_handler(my $handler = HTTP::OAI::Header->new); $self->header( $handler ); $self->{in_record} = $hash->{Depth}; } elsif( $elem =~ /^metadata|about$/ ) { my $class = $r->handlers->{$elem} || "HTTP::OAI::Metadata"; $self->set_handler(my $handler = $class->new); $self->$elem($handler); $self->{in_record} = $hash->{Depth}; } } $self->SUPER::start_element($hash, $r); } sub end_element { my ($self,$hash, $r) = @_; $self->SUPER::end_element($hash, $r); if( $self->{in_record} == $hash->{Depth} ) { $self->set_handler( undef ); $self->{in_record} = 0; } } 1; __END__ =head1 NAME HTTP::OAI::Record - Encapsulates an OAI record =head1 SYNOPSIS use HTTP::OAI::Record; # Create a new HTTP::OAI Record my $r = new HTTP::OAI::Record(); $r->header->identifier('oai:myarchive.org:oid-233'); $r->header->datestamp('2002-04-01'); $r->header->setSpec('all:novels'); $r->header->setSpec('all:books'); $r->metadata(new HTTP::OAI::Metadata(dom=>$md)); $r->about(new HTTP::OAI::Metadata(dom=>$ab)); =head1 METHODS =over 4 =item $r = new HTTP::OAI::Record( %opts ) This constructor method returns a new L object. Options (see methods below): header => $header metadata => $metadata about => [$about] =item $r->header([HTTP::OAI::Header]) Returns and optionally sets the record header (an L object). =item $r->metadata([HTTP::OAI::Metadata]) Returns and optionally sets the record metadata (an L object). =item $r->about([HTTP::OAI::Metadata]) Optionally adds a new About record (an L object) and returns an array of objects (may be empty). =back =head2 Header Accessor Methods These methods are equivalent to C<< $rec->header->$method([$value]) >>. =over 4 =item $r->identifier([$identifier]) Get and optionally set the record OAI identifier. =item $r->datestamp([$datestamp]) Get and optionally set the record datestamp. =item $r->status([$status]) Get and optionally set the record status (valid values are 'deleted' or undef). =item $r->is_deleted() Returns whether this record's status is deleted. =back HTTP-OAI-4.03/lib/HTTP/OAI/Metadata/0000755003026500005230000000000012320512116015123 5ustar tdbrodyjfHTTP-OAI-4.03/lib/HTTP/OAI/Metadata/OAI_DC.pm0000644003026500005230000000416212317253770016460 0ustar tdbrodyjfpackage HTTP::OAI::Metadata::OAI_DC; @ISA = qw( HTTP::OAI::MemberMixin HTTP::OAI::SAX::Base ); use strict; our $OAI_DC_SCHEMA = 'http://www.openarchives.org/OAI/2.0/oai_dc/'; our $DC_SCHEMA = 'http://purl.org/dc/elements/1.1/'; our @DC_TERMS = qw( contributor coverage creator date description format identifier language publisher relation rights source subject title type ); our %VALID_TERM = map { $_ => 1 } @DC_TERMS; sub metadata { shift->dom(@_) } sub dc { shift->_elem('dc',@_) } sub generate { my( $self, $driver ) = @_; $driver->start_element( 'metadata' ); $driver->start_element( 'oai_dc:dc', 'xmlns:oai_dc' => 'http://www.openarchives.org/OAI/2.0/oai_dc/', 'xmlns:dc' => 'http://purl.org/dc/elements/1.1/', 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', 'xsi:schemaLocation' => 'http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd', ); foreach my $term (@DC_TERMS) { foreach my $value (@{$self->{dc}{$term} || []}) { $driver->data_element( "dc:$term", $value ); } } $driver->end_element( 'oai_dc:dc' ); $driver->end_element( 'metadata' ); } sub _toString { my $self = shift; my $str = "Open Archives Initiative Dublin Core (".ref($self).")\n"; foreach my $term ( @DC_TERMS ) { for(@{$self->{dc}->{$term}}) { $str .= sprintf("%s:\t%s\n", $term, $_||''); } } $str; } sub end_element { my ($self,$hash) = @_; my $elem = lc($hash->{LocalName}); if( $VALID_TERM{$elem} ) { push @{$self->{dc}->{$elem}}, $hash->{Text}; } } 1; __END__ =head1 NAME HTTP::OAI::Metadata::OAI_DC - Easy access to OAI Dublin Core =head1 DESCRIPTION HTTP::OAI::Metadata::OAI_DC provides a simple interface to parsing and generating OAI Dublin Core ("oai_dc"). =head1 SYNOPSIS use HTTP::OAI::Metadata::OAI_DC; my $md = new HTTP::OAI::Metadata( dc=>{title=>['Hello, World!','Hi, World!']}, ); # Prints "Hello, World!" print $md->dc->{title}->[0], "\n"; my $xml = $md->metadata(); $md->metadata($xml); =head1 NOTE HTTP::OAI::Metadata::OAI_DC will automatically (and silently) convert OAI version 1.x oai_dc records into OAI version 2.0 oai_dc records. HTTP-OAI-4.03/lib/HTTP/OAI/Metadata/OAI_Identifier.pm0000644003026500005230000000176012317253770020255 0ustar tdbrodyjfpackage HTTP::OAI::Metadata::OAI_Identifier; use strict; use warnings; use Carp; use XML::LibXML; use HTTP::OAI::Metadata; use vars qw( @ISA ); @ISA = qw( HTTP::OAI::Metadata ); sub new { my $self = shift->SUPER::new(@_); my %args = @_; my $dom = XML::LibXML->createDocument(); $dom->setDocumentElement(my $root = $dom->createElementNS('http://www.openarchives.org/OAI/2.0/oai-identifier','oai-identifier')); # $root->setAttribute('xmlns','http://www.openarchives.org/OAI/2.0/oai-identifier'); $root->setAttribute('xmlns:xsi','http://www.w3.org/2001/XMLSchema-instance'); $root->setAttribute('xsi:schemaLocation','http://www.openarchives.org/OAI/2.0/oai-identifier http://www.openarchives.org/OAI/2.0/oai-identifier.xsd'); for(qw( scheme repositoryIdentifier delimiter sampleIdentifier )) { Carp::croak "Required argument $_ is undefined" unless defined($args{$_}); $root->appendChild($dom->createElement($_))->appendChild($dom->createTextNode($args{$_})); } $self->dom($dom); $self; } 1; HTTP-OAI-4.03/lib/HTTP/OAI/Metadata/OAI_Eprints.pm0000644003026500005230000000273612317253770017623 0ustar tdbrodyjfpackage HTTP::OAI::Metadata::OAI_Eprints; use strict; use warnings; use Carp; use XML::LibXML; use HTTP::OAI::Metadata; use vars qw( @ISA ); @ISA = qw( HTTP::OAI::Metadata ); sub new { my $self = shift->SUPER::new(@_); my %args = @_; my $dom = XML::LibXML->createDocument(); $dom->setDocumentElement(my $root = $dom->createElementNS('http://www.openarchives.org/OAI/1.1/eprints','eprints')); # $root->setAttribute('xmlns','http://www.openarchives.org/OAI/2.0/oai-identifier'); $root->setAttribute('xmlns:xsi','http://www.w3.org/2001/XMLSchema-instance'); $root->setAttribute('xsi:schemaLocation','http://www.openarchives.org/OAI/1.1/eprints http://www.openarchives.org/OAI/1.1/eprints.xsd'); for(qw( content metadataPolicy dataPolicy submissionPolicy )) { Carp::croak "Required argument $_ undefined" if !defined($args{$_}) && $_ =~ /metadataPolicy|dataPolicy/; next unless defined($args{$_}); my $node = $root->appendChild($dom->createElement($_)); $args{$_}->{'URL'} ||= []; $args{$_}->{'text'} ||= []; foreach my $value (@{$args{$_}->{'URL'}}) { $node->appendChild($dom->createElement('URL'))->appendChild($dom->createTextNode($value)); } foreach my $value (@{$args{$_}->{'text'}}) { $node->appendChild($dom->createElement('text'))->appendChild($dom->createTextNode($value)); } } $args{'comment'} ||= []; for(@{$args{'comment'}}) { $root->appendChild($dom->createElement('comment'))->appendChild($dom->createTextNode($_)); } $self->dom($dom); $self; } 1; HTTP-OAI-4.03/lib/HTTP/OAI/Metadata/METS.pm0000644003026500005230000000200412317253770016243 0ustar tdbrodyjfpackage HTTP::OAI::Metadata::METS; use XML::LibXML; use XML::LibXML::XPathContext; @ISA = qw( HTTP::OAI::Metadata ); use strict; sub new { my $class = shift; my $self = $class->SUPER::new(@_); my %args = @_; $self; } sub _xc { my $xc = XML::LibXML::XPathContext->new( @_ ); $xc->registerNs( 'oai_dc', HTTP::OAI::OAI_NS ); $xc->registerNs( 'mets', 'http://www.loc.gov/METS/' ); $xc->registerNs( 'xlink', 'http://www.w3.org/1999/xlink' ); return $xc; } sub files { my $self = shift; my $dom = $self->dom; my $xc = _xc($dom); my @files; foreach my $file ($xc->findnodes( '*//mets:file' )) { my $f = {}; foreach my $attr ($file->attributes) { $f->{ $attr->nodeName } = $attr->nodeValue; } $file = _xc($file); foreach my $locat ($file->findnodes( 'mets:FLocat' )) { $f->{ url } = $locat->getAttribute( 'xlink:href' ); } push @files, $f; } return @files; } 1; __END__ =head1 NAME HTTP::OAI::Metadata::METS - METS accessor utility =head1 DESCRIPTION =head1 SYNOPSIS =head1 NOTE HTTP-OAI-4.03/lib/HTTP/OAI/GetRecord.pm0000644003026500005230000000253112317253770015636 0ustar tdbrodyjfpackage HTTP::OAI::GetRecord; require HTTP::OAI::ListRecords; @ISA = qw( HTTP::OAI::ListRecords ); use strict; sub record { my $self = shift; $self->{item} = [@_] if @_; return $self->{item}->[0]; } sub generate_body { my ($self, $driver) = @_; for( $self->record ) { $_->generate( $driver ); } } 1; __END__ =head1 NAME HTTP::OAI::GetRecord - An OAI GetRecord response =head1 DESCRIPTION HTTP::OAI::GetRecord is derived from L and provides access to the data contained in an OAI GetRecord response in addition to the header information provided by OAI::Response. =head1 SYNOPSIS use HTTP::OAI::GetRecord(); $res = new HTTP::OAI::GetRecord(); $res->record($rec); =head1 METHODS =over 4 =item $gr = new HTTP::OAI::GetRecord This constructor method returns a new HTTP::OAI::GetRecord object. =item $rec = $gr->next Returns the next record stored in the response, or undef if no more record are available. The record is returned as an L. =item @recs = $gr->record([$rec]) Returns the record list, and optionally adds a record to the end of the queue. GetRecord will only store one record at a time, so this method will replace any existing record if called with argument(s). =item $dom = $gr->toDOM() Returns an XML::DOM object representing the GetRecord response. =back HTTP-OAI-4.03/lib/HTTP/OAI.pm0000644003026500005230000000560212320512034013743 0ustar tdbrodyjfpackage HTTP::OAI; use strict; our $VERSION = '4.03'; use constant OAI_NS => 'http://www.openarchives.org/OAI/2.0/'; # perlcore use Carp; use Encode; # http related stuff use URI; use HTTP::Headers; use HTTP::Request; use HTTP::Response; require LWP::UserAgent; require LWP::MemberMixin; # xml related stuff use XML::SAX; use XML::SAX::ParserFactory; use XML::LibXML; use XML::LibXML::SAX; use XML::LibXML::SAX::Parser; use XML::LibXML::SAX::Builder; use HTTP::OAI::SAX::Driver; use HTTP::OAI::SAX::Text; # debug use HTTP::OAI::Debug; use HTTP::OAI::SAX::Trace; # generic superclasses use HTTP::OAI::SAX::Base; use HTTP::OAI::MemberMixin; use HTTP::OAI::Verb; use HTTP::OAI::PartialList; # utility classes use HTTP::OAI::Response; # oai data objects use HTTP::OAI::Metadata; # Super class of all data objects use HTTP::OAI::Error; use HTTP::OAI::Header; use HTTP::OAI::MetadataFormat; use HTTP::OAI::Record; use HTTP::OAI::ResumptionToken; use HTTP::OAI::Set; # oai verbs use HTTP::OAI::GetRecord; use HTTP::OAI::Identify; use HTTP::OAI::ListIdentifiers; use HTTP::OAI::ListMetadataFormats; use HTTP::OAI::ListRecords; use HTTP::OAI::ListSets; # oai agents use HTTP::OAI::UserAgent; use HTTP::OAI::Harvester; use HTTP::OAI::Repository; $HTTP::OAI::Harvester::VERSION = $VERSION; if( $ENV{HTTP_OAI_TRACE} ) { HTTP::OAI::Debug::level( '+trace' ); } if( $ENV{HTTP_OAI_SAX_TRACE} ) { HTTP::OAI::Debug::level( '+sax' ); } our %VERSIONS = ( 'http://www.openarchives.org/oai/1.0/oai_getrecord' => '1.0', 'http://www.openarchives.org/oai/1.0/oai_identify' => '1.0', 'http://www.openarchives.org/oai/1.0/oai_listidentifiers' => '1.0', 'http://www.openarchives.org/oai/1.0/oai_listmetadataformats' => '1.0', 'http://www.openarchives.org/oai/1.0/oai_listrecords' => '1.0', 'http://www.openarchives.org/oai/1.0/oai_listsets' => '1.0', 'http://www.openarchives.org/oai/1.1/oai_getrecord' => '1.1', 'http://www.openarchives.org/oai/1.1/oai_identify' => '1.1', 'http://www.openarchives.org/oai/1.1/oai_listidentifiers' => '1.1', 'http://www.openarchives.org/oai/1.1/oai_listmetadataformats' => '1.1', 'http://www.openarchives.org/oai/1.1/oai_listrecords' => '1.1', 'http://www.openarchives.org/oai/1.1/oai_listsets' => '1.1', 'http://www.openarchives.org/oai/2.0/' => '2.0', 'http://www.openarchives.org/oai/2.0/static-repository' => '2.0s', ); 1; __END__ =head1 NAME HTTP::OAI - API for the OAI-PMH =head1 DESCRIPTION This is a stub module, you probably want to look at L or L. =head1 SEE ALSO You can find links to this and other OAI tools (perl, C++, java) at: http://www.openarchives.org/tools/tools.html. Ed Summers L module. =head1 AUTHOR Copyright 2004-2010 Tim Brody , University of Southampton. This module is free software and is released under the BSD License (see LICENSE). HTTP-OAI-4.03/LICENSE0000644003026500005230000000276512317253770012503 0ustar tdbrodyjfCopyright (c) 2010, Timothy D Brody All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of University of Southampton nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. HTTP-OAI-4.03/MANIFEST.SKIP0000644003026500005230000000010312317253770013354 0ustar tdbrodyjf.svn Makefile$ Makefile.old MANIFEST.bak ^blib .tar.gz$ pm_to_blib HTTP-OAI-4.03/Makefile.PL0000644003026500005230000000125612317253770013442 0ustar tdbrodyjfrequire 5.005; use ExtUtils::MakeMaker; # n.b. I put 0.1 as version requirements as I have # very recent versions, whereas these modules will # most likely work with older versions of the req. # modules. TDB WriteMakefile( NAME => 'HTTP-OAI', VERSION_FROM => 'lib/HTTP/OAI.pm', EXE_FILES => [ qw( bin/oai_browser.pl bin/oai_pmh.pl ) ], PREREQ_PM => { 'Encode' => 2.12, 'XML::LibXML' => 1.60, 'XML::LibXML::SAX' => 0, 'XML::SAX::Base' => 1.04, 'XML::SAX' => 0, 'URI' => 0, 'HTTP::Request' => 0, 'HTTP::Response' => 0, 'LWP::UserAgent' => 0, 'Test::More' => 0, 'CGI' => 0, }, dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', }, ); HTTP-OAI-4.03/README0000644003026500005230000000237012317253770012346 0ustar tdbrodyjfDescription ----------- OAI-PERL are a set of Perl modules that provide an API to the Open Archives Initiative Protocol for Metadata Harvesting (OAI-PMH). OAI-PMH is a XML-over-HTTP protocol for transferring metadata between a repository (the HTTP server) and service provider (the HTTP client). Synopsis -------- use HTTP::OAI::Harvester; my $h = HTTP::OAI::Harvester->new( baseURL=>'http://eprints.ecs.soton.ac.uk/perl/oai2', debug=>1, ); my $r = $h->ListIdentifiers( metadataPrefix=>'oai_dc' ); die unless $r->is_success(); while(my $rec = $r->next) { # Check we didn't fail on a resumption token die unless $rec->is_success(); print $rec->identifier(); } Installation ------------ perl Makefile.pl make make test make install To disable the network tests use: export SKIP_HTTP_OAI_NETTESTS=1 Use the following to view the root documentation: man HTTP::OAI::Harvester man HTTP::OAI::Repository Utility Scripts --------------- bin/oai_browser - Interactively browse an OAI repository Related Modules --------------- Net::OAI::Harvester by Ed Summers. Development ----------- git clone git://github.com/timbrody/perl-oai-lib.git Author ------ Copyright 2012 © Tim Brody This module is released under the same terms as Perl. HTTP-OAI-4.03/CHANGES0000644003026500005230000001212012320512064012437 0ustar tdbrodyjf4.03 - Fixed MANIFEST [phochste] 4.02 - Fixed namespace issue with ResumptionToken.pm [sebastfr] 4.01 - Reworked API to use DocumentFragments - Various speed improvements - Added bin/oai_pmh.pl CLI tool 3.28 - Fixed parsing elements that contain multiple child nodes 3.26 - Added 'delay' option to delay between requests - Added --skip-identify option to oai_browser.pl - Fixed POD for RT #64077 - Hopefully fixes #69337 (no test case given) - Bumped XML::SAX::Base to 1.04 which should fix #68907 3.25 - Added unit test for OAI_DC metadata() parsing - Metadata::OAI_DC now parses dc when passed as a DOM to ->metadata 3.24 - Fixed CPAN bugs #60760 and #60856 3.23 - Changed license to BSD - Added dependency for XML::SAX rt #43287 - Fixed some POD issues rt #51853 3.22 - use XML::SAX::Parser rather than ParserFactory - test the namespace on NamespaceURI rather than the XMLNS attribute, which is reported differently between XML::LibXML::SAX and XML::LibXML::SAX::Parser - added debug class HTTP::OAI::Debug - added check in 01parse test case for file open - added 000xml_sax.pl test case for basic XML::SAX function - added "trace" and "saxtrace" options to oai_browser.pl 3.21 - added test case for bad characters - fix bad chars as they arrive, to avoid buffering an entire response if it contains bad chars - some code cleanup 3.20 - use strict UTF-8 (suggested by Dennis Vierkant) 3.19 3.18 - broke apart Metadata from Encapsulation - now uses XML::SAX in Response, rt.cpan.org ticket #27191 - shifted all use's into HTTP::OAI - added use strict(), use warnings() to every module - shifted $VERSION in HTTP::OAI - href attribute lookup in METS should be namespaced too - added mets test case - fixed test cases for changed XML output 3.17 - Removed next() call from oai_browser for List* - Fixed missing HTTP:: in ListIdentifiers documentation - Fixed missing namespace initialization in SAX - Fixed bug in request where '?' wasn't being removed 3.16 - Fixed recursion bug in onRecord - No longer need to call next() for onRecord (Harvester will do it for you) - Added PartialList module (ListIdentifiers, ListRecords, ListSets) 3.15 - Several bug fixes, particularly catching and throwing parsing errors - Added onRecord argument to ListRecords, ListIdentifiers, ListSets - INCOMPATIBLE: Changed error handling, so now only need to check after the next() loop for both initial errors and flow errors - Added is_deleted method to Record and Header 3.14 - By default oai-lib now fixes bad UTF-8 characters (replacing with '?') To disable this set $HTTP::OAI::UserAgent::IGNORE_BAD_CHARS to 0 - Fixed bug where a partially downloaded utf-8 character could cause a harvest to fail 3.13 3.12 - BUG: Static repository was re-fetching the source for every request - API change: when parsing a static repository records are now pre-parsed to allow for quick GetRecord access, this means you *must* supply the handlers argument to the Harvester constructor (otherwise the first request's handlers are always used) - Added resumptionToken option to ListIdentifiers in the oai_browser.pl tool 2005-09-05 - Added -oldstyle_urls to gateway & doc examples [thanks to Patrick Hochstenbach] 2005-04-13 - Improved display of errors in the event of XML parsing failure (bad chars etc.) - In the event of bad XML resumption token will not sleep(60) and retry, but simply fail 2005-04-08 - Fixed bug where Header wasn't getting end_element on , resulting in deleted items in ListRecords never being flagged as such 2005-04-06 - UserAgent now uses a callback with LWP to parse XML responses, resulting in a much reduced memory footprint - Compression support is removed for the moment 2005-03-30 - CGI has an annoying habit of return the URL with semi-colons, ::Response now checks for this and acts appropriately - Header was blanking setSpecs when given a dom (corrected POD for setSpec) - Fixed Header missing the record status - Tests added to getrecord.t for parsing Header - oai_static_gateway.pl now throws an error if given a resumption token (which should never occur because it doesn't use Flow-Control) 2005-03-07 - Fixed undef warning when trying to set the repository to an Identify w/o a base URL - Changed back to XML::LibXML::SAX::Parser due to unreliability in XML::LibXML::SAX :-( 2005-03-05 - 80network.t && added Prereqs (jaybonci@debian.org RT #11508 & #11509) 2005-02-25 - Added new script 'gateway.pl' that acts as a static repository broker - Library now uses temporary files to harvest (due to memory leak in LibXML's parse_string), provides for a smaller memory footprint - Numerous bug fixes 2005-02-23 - Added support for harvesting from Static repositories - Fixed bug where the error message for an unsupported namespace wasn't getting displayed 2004-10-08 - Moved to namespace HTTP::OAI - Changed all arguments from -style to 'argument' - Now uses (in a round-about way) XML::SAX, dropping the requirement for XML::Parser and XML::SAX::PerlSAX - Fixed some bugs with the DOM construction code (which also effected toString)