IO-Compress-2.063/0000755000175000017500000000000012235214476012316 5ustar paulpaulIO-Compress-2.063/lib/0000755000175000017500000000000012235214476013064 5ustar paulpaulIO-Compress-2.063/lib/File/0000755000175000017500000000000012235214476013743 5ustar paulpaulIO-Compress-2.063/lib/File/GlobMapper.pm0000644000175000017500000003656411511600745016341 0ustar paulpaulpackage File::GlobMapper; use strict; use warnings; use Carp; our ($CSH_GLOB); BEGIN { if ($] < 5.006) { require File::BSDGlob; import File::BSDGlob qw(:glob) ; $CSH_GLOB = File::BSDGlob::GLOB_CSH() ; *globber = \&File::BSDGlob::csh_glob; } else { require File::Glob; import File::Glob qw(:glob) ; $CSH_GLOB = File::Glob::GLOB_CSH() ; #*globber = \&File::Glob::bsd_glob; *globber = \&File::Glob::csh_glob; } } our ($Error); our ($VERSION, @EXPORT_OK); $VERSION = '1.000'; @EXPORT_OK = qw( globmap ); our ($noPreBS, $metachars, $matchMetaRE, %mapping, %wildCount); $noPreBS = '(? '([^/]*)', '?' => '([^/])', '.' => '\.', '[' => '([', '(' => '(', ')' => ')', ); %wildCount = map { $_ => 1 } qw/ * ? . { ( [ /; sub globmap ($$;) { my $inputGlob = shift ; my $outputGlob = shift ; my $obj = new File::GlobMapper($inputGlob, $outputGlob, @_) or croak "globmap: $Error" ; return $obj->getFileMap(); } sub new { my $class = shift ; my $inputGlob = shift ; my $outputGlob = shift ; # TODO -- flags needs to default to whatever File::Glob does my $flags = shift || $CSH_GLOB ; #my $flags = shift ; $inputGlob =~ s/^\s*\<\s*//; $inputGlob =~ s/\s*\>\s*$//; $outputGlob =~ s/^\s*\<\s*//; $outputGlob =~ s/\s*\>\s*$//; my %object = ( InputGlob => $inputGlob, OutputGlob => $outputGlob, GlobFlags => $flags, Braces => 0, WildCount => 0, Pairs => [], Sigil => '#', ); my $self = bless \%object, ref($class) || $class ; $self->_parseInputGlob() or return undef ; $self->_parseOutputGlob() or return undef ; my @inputFiles = globber($self->{InputGlob}, $flags) ; if (GLOB_ERROR) { $Error = $!; return undef ; } #if (whatever) { my $missing = grep { ! -e $_ } @inputFiles ; if ($missing) { $Error = "$missing input files do not exist"; return undef ; } } $self->{InputFiles} = \@inputFiles ; $self->_getFiles() or return undef ; return $self; } sub _retError { my $string = shift ; $Error = "$string in input fileglob" ; return undef ; } sub _unmatched { my $delimeter = shift ; _retError("Unmatched $delimeter"); return undef ; } sub _parseBit { my $self = shift ; my $string = shift ; my $out = ''; my $depth = 0 ; while ($string =~ s/(.*?)$noPreBS(,|$matchMetaRE)//) { $out .= quotemeta($1) ; $out .= $mapping{$2} if defined $mapping{$2}; ++ $self->{WildCount} if $wildCount{$2} ; if ($2 eq ',') { return _unmatched "(" if $depth ; $out .= '|'; } elsif ($2 eq '(') { ++ $depth ; } elsif ($2 eq ')') { return _unmatched ")" if ! $depth ; -- $depth ; } elsif ($2 eq '[') { # TODO -- quotemeta & check no '/' # TODO -- check for \] & other \ within the [] $string =~ s#(.*?\])## or return _unmatched "[" ; $out .= "$1)" ; } elsif ($2 eq ']') { return _unmatched "]" ; } elsif ($2 eq '{' || $2 eq '}') { return _retError "Nested {} not allowed" ; } } $out .= quotemeta $string; return _unmatched "(" if $depth ; return $out ; } sub _parseInputGlob { my $self = shift ; my $string = $self->{InputGlob} ; my $inGlob = ''; # Multiple concatenated *'s don't make sense #$string =~ s#\*\*+#*# ; # TODO -- Allow space to delimit patterns? #my @strings = split /\s+/, $string ; #for my $str (@strings) my $out = ''; my $depth = 0 ; while ($string =~ s/(.*?)$noPreBS($matchMetaRE)//) { $out .= quotemeta($1) ; $out .= $mapping{$2} if defined $mapping{$2}; ++ $self->{WildCount} if $wildCount{$2} ; if ($2 eq '(') { ++ $depth ; } elsif ($2 eq ')') { return _unmatched ")" if ! $depth ; -- $depth ; } elsif ($2 eq '[') { # TODO -- quotemeta & check no '/' or '(' or ')' # TODO -- check for \] & other \ within the [] $string =~ s#(.*?\])## or return _unmatched "["; $out .= "$1)" ; } elsif ($2 eq ']') { return _unmatched "]" ; } elsif ($2 eq '}') { return _unmatched "}" ; } elsif ($2 eq '{') { # TODO -- check no '/' within the {} # TODO -- check for \} & other \ within the {} my $tmp ; unless ( $string =~ s/(.*?)$noPreBS\}//) { return _unmatched "{"; } #$string =~ s#(.*?)\}##; #my $alt = join '|', # map { quotemeta $_ } # split "$noPreBS,", $1 ; my $alt = $self->_parseBit($1); defined $alt or return 0 ; $out .= "($alt)" ; ++ $self->{Braces} ; } } return _unmatched "(" if $depth ; $out .= quotemeta $string ; $self->{InputGlob} =~ s/$noPreBS[\(\)]//g; $self->{InputPattern} = $out ; #print "# INPUT '$self->{InputGlob}' => '$out'\n"; return 1 ; } sub _parseOutputGlob { my $self = shift ; my $string = $self->{OutputGlob} ; my $maxwild = $self->{WildCount}; if ($self->{GlobFlags} & GLOB_TILDE) #if (1) { $string =~ s{ ^ ~ # find a leading tilde ( # save this in $1 [^/] # a non-slash character * # repeated 0 or more times (0 means me) ) }{ $1 ? (getpwnam($1))[7] : ( $ENV{HOME} || $ENV{LOGDIR} ) }ex; } # max #1 must be == to max no of '*' in input while ( $string =~ m/#(\d)/g ) { croak "Max wild is #$maxwild, you tried #$1" if $1 > $maxwild ; } my $noPreBS = '(?{OutputGlob}' => '$string'\n"; $self->{OutputPattern} = $string ; return 1 ; } sub _getFiles { my $self = shift ; my %outInMapping = (); my %inFiles = () ; foreach my $inFile (@{ $self->{InputFiles} }) { next if $inFiles{$inFile} ++ ; my $outFile = $inFile ; if ( $inFile =~ m/$self->{InputPattern}/ ) { no warnings 'uninitialized'; eval "\$outFile = $self->{OutputPattern};" ; if (defined $outInMapping{$outFile}) { $Error = "multiple input files map to one output file"; return undef ; } $outInMapping{$outFile} = $inFile; push @{ $self->{Pairs} }, [$inFile, $outFile]; } } return 1 ; } sub getFileMap { my $self = shift ; return $self->{Pairs} ; } sub getHash { my $self = shift ; return { map { $_->[0] => $_->[1] } @{ $self->{Pairs} } } ; } 1; __END__ =head1 NAME File::GlobMapper - Extend File Glob to Allow Input and Output Files =head1 SYNOPSIS use File::GlobMapper qw( globmap ); my $aref = globmap $input => $output or die $File::GlobMapper::Error ; my $gm = new File::GlobMapper $input => $output or die $File::GlobMapper::Error ; =head1 DESCRIPTION This module needs Perl5.005 or better. This module takes the existing C module as a starting point and extends it to allow new filenames to be derived from the files matched by C. This can be useful when carrying out batch operations on multiple files that have both an input filename and output filename and the output file can be derived from the input filename. Examples of operations where this can be useful include, file renaming, file copying and file compression. =head2 Behind The Scenes To help explain what C does, consider what code you would write if you wanted to rename all files in the current directory that ended in C<.tar.gz> to C<.tgz>. So say these files are in the current directory alpha.tar.gz beta.tar.gz gamma.tar.gz and they need renamed to this alpha.tgz beta.tgz gamma.tgz Below is a possible implementation of a script to carry out the rename (error cases have been omitted) foreach my $old ( glob "*.tar.gz" ) { my $new = $old; $new =~ s#(.*)\.tar\.gz$#$1.tgz# ; rename $old => $new or die "Cannot rename '$old' to '$new': $!\n; } Notice that a file glob pattern C<*.tar.gz> was used to match the C<.tar.gz> files, then a fairly similar regular expression was used in the substitute to allow the new filename to be created. Given that the file glob is just a cut-down regular expression and that it has already done a lot of the hard work in pattern matching the filenames, wouldn't it be handy to be able to use the patterns in the fileglob to drive the new filename? Well, that's I what C does. Here is same snippet of code rewritten using C for my $pair (globmap '<*.tar.gz>' => '<#1.tgz>' ) { my ($from, $to) = @$pair; rename $from => $to or die "Cannot rename '$old' to '$new': $!\n; } So how does it work? Behind the scenes the C function does a combination of a file glob to match existing filenames followed by a substitute to create the new filenames. Notice how both parameters to C are strings that are delimited by <>. This is done to make them look more like file globs - it is just syntactic sugar, but it can be handy when you want the strings to be visually distinctive. The enclosing <> are optional, so you don't have to use them - in fact the first thing globmap will do is remove these delimiters if they are present. The first parameter to C, C<*.tar.gz>, is an I. Once the enclosing "< ... >" is removed, this is passed (more or less) unchanged to C to carry out a file match. Next the fileglob C<*.tar.gz> is transformed behind the scenes into a full Perl regular expression, with the additional step of wrapping each transformed wildcard metacharacter sequence in parenthesis. In this case the input fileglob C<*.tar.gz> will be transformed into this Perl regular expression ([^/]*)\.tar\.gz Wrapping with parenthesis allows the wildcard parts of the Input File Glob to be referenced by the second parameter to C, C<#1.tgz>, the I. This parameter operates just like the replacement part of a substitute command. The difference is that the C<#1> syntax is used to reference sub-patterns matched in the input fileglob, rather than the C<$1> syntax that is used with perl regular expressions. In this case C<#1> is used to refer to the text matched by the C<*> in the Input File Glob. This makes it easier to use this module where the parameters to C are typed at the command line. The final step involves passing each filename matched by the C<*.tar.gz> file glob through the derived Perl regular expression in turn and expanding the output fileglob using it. The end result of all this is a list of pairs of filenames. By default that is what is returned by C. In this example the data structure returned will look like this ( ['alpha.tar.gz' => 'alpha.tgz'], ['beta.tar.gz' => 'beta.tgz' ], ['gamma.tar.gz' => 'gamma.tgz'] ) Each pair is an array reference with two elements - namely the I filename, that C has matched, and a I filename that is derived from the I filename. =head2 Limitations C has been kept simple deliberately, so it isn't intended to solve all filename mapping operations. Under the hood C (or for older versions of Perl, C) is used to match the files, so you will never have the flexibility of full Perl regular expression. =head2 Input File Glob The syntax for an Input FileGlob is identical to C, except for the following =over 5 =item 1. No nested {} =item 2. Whitespace does not delimit fileglobs. =item 3. The use of parenthesis can be used to capture parts of the input filename. =item 4. If an Input glob matches the same file more than once, only the first will be used. =back The syntax =over 5 =item B<~> =item B<~user> =item B<.> Matches a literal '.'. Equivalent to the Perl regular expression \. =item B<*> Matches zero or more characters, except '/'. Equivalent to the Perl regular expression [^/]* =item B Matches zero or one character, except '/'. Equivalent to the Perl regular expression [^/]? =item B<\> Backslash is used, as usual, to escape the next character. =item B<[]> Character class. =item B<{,}> Alternation =item B<()> Capturing parenthesis that work just like perl =back Any other character it taken literally. =head2 Output File Glob The Output File Glob is a normal string, with 2 glob-like features. The first is the '*' metacharacter. This will be replaced by the complete filename matched by the input file glob. So *.c *.Z The second is Output FileGlobs take the =over 5 =item "*" The "*" character will be replaced with the complete input filename. =item #1 Patterns of the form /#\d/ will be replaced with the =back =head2 Returned Data =head1 EXAMPLES =head2 A Rename script Below is a simple "rename" script that uses C to determine the source and destination filenames. use File::GlobMapper qw(globmap) ; use File::Copy; die "rename: Usage rename 'from' 'to'\n" unless @ARGV == 2 ; my $fromGlob = shift @ARGV; my $toGlob = shift @ARGV; my $pairs = globmap($fromGlob, $toGlob) or die $File::GlobMapper::Error; for my $pair (@$pairs) { my ($from, $to) = @$pair; move $from => $to ; } Here is an example that renames all c files to cpp. $ rename '*.c' '#1.cpp' =head2 A few example globmaps Below are a few examples of globmaps To copy all your .c file to a backup directory '' '' If you want to compress all '' '<*.gz>' To uncompress '' '' =head1 SEE ALSO L =head1 AUTHOR The I module was written by Paul Marquess, F. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. IO-Compress-2.063/lib/Compress/0000755000175000017500000000000012235214476014657 5ustar paulpaulIO-Compress-2.063/lib/Compress/Zlib.pm0000644000175000017500000012552712211042535016116 0ustar paulpaul package Compress::Zlib; require 5.006 ; require Exporter; use Carp ; use IO::Handle ; use Scalar::Util qw(dualvar); use IO::Compress::Base::Common 2.063 ; use Compress::Raw::Zlib 2.063 ; use IO::Compress::Gzip 2.063 ; use IO::Uncompress::Gunzip 2.063 ; use strict ; use warnings ; use bytes ; our ($VERSION, $XS_VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); $VERSION = '2.063'; $XS_VERSION = $VERSION; $VERSION = eval $VERSION; @ISA = qw(Exporter); # Items to export into callers namespace by default. Note: do not export # names by default without a very good reason. Use EXPORT_OK instead. # Do not simply export all your public functions/methods/constants. @EXPORT = qw( deflateInit inflateInit compress uncompress gzopen $gzerrno ); push @EXPORT, @Compress::Raw::Zlib::EXPORT ; @EXPORT_OK = qw(memGunzip memGzip zlib_version); %EXPORT_TAGS = ( ALL => \@EXPORT ); BEGIN { *zlib_version = \&Compress::Raw::Zlib::zlib_version; } use constant FLAG_APPEND => 1 ; use constant FLAG_CRC => 2 ; use constant FLAG_ADLER => 4 ; use constant FLAG_CONSUME_INPUT => 8 ; our (@my_z_errmsg); @my_z_errmsg = ( "need dictionary", # Z_NEED_DICT 2 "stream end", # Z_STREAM_END 1 "", # Z_OK 0 "file error", # Z_ERRNO (-1) "stream error", # Z_STREAM_ERROR (-2) "data error", # Z_DATA_ERROR (-3) "insufficient memory", # Z_MEM_ERROR (-4) "buffer error", # Z_BUF_ERROR (-5) "incompatible version",# Z_VERSION_ERROR(-6) ); sub _set_gzerr { my $value = shift ; if ($value == 0) { $Compress::Zlib::gzerrno = 0 ; } elsif ($value == Z_ERRNO() || $value > 2) { $Compress::Zlib::gzerrno = $! ; } else { $Compress::Zlib::gzerrno = dualvar($value+0, $my_z_errmsg[2 - $value]); } return $value ; } sub _set_gzerr_undef { _set_gzerr(@_); return undef; } sub _save_gzerr { my $gz = shift ; my $test_eof = shift ; my $value = $gz->errorNo() || 0 ; my $eof = $gz->eof() ; if ($test_eof) { # gzread uses Z_STREAM_END to denote a successful end $value = Z_STREAM_END() if $gz->eof() && $value == 0 ; } _set_gzerr($value) ; } sub gzopen($$) { my ($file, $mode) = @_ ; my $gz ; my %defOpts = (Level => Z_DEFAULT_COMPRESSION(), Strategy => Z_DEFAULT_STRATEGY(), ); my $writing ; $writing = ! ($mode =~ /r/i) ; $writing = ($mode =~ /[wa]/i) ; $defOpts{Level} = $1 if $mode =~ /(\d)/; $defOpts{Strategy} = Z_FILTERED() if $mode =~ /f/i; $defOpts{Strategy} = Z_HUFFMAN_ONLY() if $mode =~ /h/i; $defOpts{Append} = 1 if $mode =~ /a/i; my $infDef = $writing ? 'deflate' : 'inflate'; my @params = () ; croak "gzopen: file parameter is not a filehandle or filename" unless isaFilehandle $file || isaFilename $file || (ref $file && ref $file eq 'SCALAR'); return undef unless $mode =~ /[rwa]/i ; _set_gzerr(0) ; if ($writing) { $gz = new IO::Compress::Gzip($file, Minimal => 1, AutoClose => 1, %defOpts) or $Compress::Zlib::gzerrno = $IO::Compress::Gzip::GzipError; } else { $gz = new IO::Uncompress::Gunzip($file, Transparent => 1, Append => 0, AutoClose => 1, MultiStream => 1, Strict => 0) or $Compress::Zlib::gzerrno = $IO::Uncompress::Gunzip::GunzipError; } return undef if ! defined $gz ; bless [$gz, $infDef], 'Compress::Zlib::gzFile'; } sub Compress::Zlib::gzFile::gzread { my $self = shift ; return _set_gzerr(Z_STREAM_ERROR()) if $self->[1] ne 'inflate'; my $len = defined $_[1] ? $_[1] : 4096 ; my $gz = $self->[0] ; if ($self->gzeof() || $len == 0) { # Zap the output buffer to match ver 1 behaviour. $_[0] = "" ; _save_gzerr($gz, 1); return 0 ; } my $status = $gz->read($_[0], $len) ; _save_gzerr($gz, 1); return $status ; } sub Compress::Zlib::gzFile::gzreadline { my $self = shift ; my $gz = $self->[0] ; { # Maintain backward compatibility with 1.x behaviour # It didn't support $/, so this can't either. local $/ = "\n" ; $_[0] = $gz->getline() ; } _save_gzerr($gz, 1); return defined $_[0] ? length $_[0] : 0 ; } sub Compress::Zlib::gzFile::gzwrite { my $self = shift ; my $gz = $self->[0] ; return _set_gzerr(Z_STREAM_ERROR()) if $self->[1] ne 'deflate'; $] >= 5.008 and (utf8::downgrade($_[0], 1) or croak "Wide character in gzwrite"); my $status = $gz->write($_[0]) ; _save_gzerr($gz); return $status ; } sub Compress::Zlib::gzFile::gztell { my $self = shift ; my $gz = $self->[0] ; my $status = $gz->tell() ; _save_gzerr($gz); return $status ; } sub Compress::Zlib::gzFile::gzseek { my $self = shift ; my $offset = shift ; my $whence = shift ; my $gz = $self->[0] ; my $status ; eval { $status = $gz->seek($offset, $whence) ; }; if ($@) { my $error = $@; $error =~ s/^.*: /gzseek: /; $error =~ s/ at .* line \d+\s*$//; croak $error; } _save_gzerr($gz); return $status ; } sub Compress::Zlib::gzFile::gzflush { my $self = shift ; my $f = shift ; my $gz = $self->[0] ; my $status = $gz->flush($f) ; my $err = _save_gzerr($gz); return $status ? 0 : $err; } sub Compress::Zlib::gzFile::gzclose { my $self = shift ; my $gz = $self->[0] ; my $status = $gz->close() ; my $err = _save_gzerr($gz); return $status ? 0 : $err; } sub Compress::Zlib::gzFile::gzeof { my $self = shift ; my $gz = $self->[0] ; return 0 if $self->[1] ne 'inflate'; my $status = $gz->eof() ; _save_gzerr($gz); return $status ; } sub Compress::Zlib::gzFile::gzsetparams { my $self = shift ; croak "Usage: Compress::Zlib::gzFile::gzsetparams(file, level, strategy)" unless @_ eq 2 ; my $gz = $self->[0] ; my $level = shift ; my $strategy = shift; return _set_gzerr(Z_STREAM_ERROR()) if $self->[1] ne 'deflate'; my $status = *$gz->{Compress}->deflateParams(-Level => $level, -Strategy => $strategy); _save_gzerr($gz); return $status ; } sub Compress::Zlib::gzFile::gzerror { my $self = shift ; my $gz = $self->[0] ; return $Compress::Zlib::gzerrno ; } sub compress($;$) { my ($x, $output, $err, $in) =('', '', '', '') ; if (ref $_[0] ) { $in = $_[0] ; croak "not a scalar reference" unless ref $in eq 'SCALAR' ; } else { $in = \$_[0] ; } $] >= 5.008 and (utf8::downgrade($$in, 1) or croak "Wide character in compress"); my $level = (@_ == 2 ? $_[1] : Z_DEFAULT_COMPRESSION() ); $x = Compress::Raw::Zlib::_deflateInit(FLAG_APPEND, $level, Z_DEFLATED, MAX_WBITS, MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY, 4096, '') or return undef ; $err = $x->deflate($in, $output) ; return undef unless $err == Z_OK() ; $err = $x->flush($output) ; return undef unless $err == Z_OK() ; return $output ; } sub uncompress($) { my ($output, $in) =('', '') ; if (ref $_[0] ) { $in = $_[0] ; croak "not a scalar reference" unless ref $in eq 'SCALAR' ; } else { $in = \$_[0] ; } $] >= 5.008 and (utf8::downgrade($$in, 1) or croak "Wide character in uncompress"); my ($obj, $status) = Compress::Raw::Zlib::_inflateInit(0, MAX_WBITS, 4096, "") ; $status == Z_OK or return undef; $obj->inflate($in, $output) == Z_STREAM_END or return undef; return $output; } sub deflateInit(@) { my ($got) = ParseParameters(0, { 'bufsize' => [IO::Compress::Base::Common::Parse_unsigned, 4096], 'level' => [IO::Compress::Base::Common::Parse_signed, Z_DEFAULT_COMPRESSION()], 'method' => [IO::Compress::Base::Common::Parse_unsigned, Z_DEFLATED()], 'windowbits' => [IO::Compress::Base::Common::Parse_signed, MAX_WBITS()], 'memlevel' => [IO::Compress::Base::Common::Parse_unsigned, MAX_MEM_LEVEL()], 'strategy' => [IO::Compress::Base::Common::Parse_unsigned, Z_DEFAULT_STRATEGY()], 'dictionary' => [IO::Compress::Base::Common::Parse_any, ""], }, @_ ) ; croak "Compress::Zlib::deflateInit: Bufsize must be >= 1, you specified " . $got->getValue('bufsize') unless $got->getValue('bufsize') >= 1; my $obj ; my $status = 0 ; ($obj, $status) = Compress::Raw::Zlib::_deflateInit(0, $got->getValue('level'), $got->getValue('method'), $got->getValue('windowbits'), $got->getValue('memlevel'), $got->getValue('strategy'), $got->getValue('bufsize'), $got->getValue('dictionary')) ; my $x = ($status == Z_OK() ? bless $obj, "Zlib::OldDeflate" : undef) ; return wantarray ? ($x, $status) : $x ; } sub inflateInit(@) { my ($got) = ParseParameters(0, { 'bufsize' => [IO::Compress::Base::Common::Parse_unsigned, 4096], 'windowbits' => [IO::Compress::Base::Common::Parse_signed, MAX_WBITS()], 'dictionary' => [IO::Compress::Base::Common::Parse_any, ""], }, @_) ; croak "Compress::Zlib::inflateInit: Bufsize must be >= 1, you specified " . $got->getValue('bufsize') unless $got->getValue('bufsize') >= 1; my $status = 0 ; my $obj ; ($obj, $status) = Compress::Raw::Zlib::_inflateInit(FLAG_CONSUME_INPUT, $got->getValue('windowbits'), $got->getValue('bufsize'), $got->getValue('dictionary')) ; my $x = ($status == Z_OK() ? bless $obj, "Zlib::OldInflate" : undef) ; wantarray ? ($x, $status) : $x ; } package Zlib::OldDeflate ; our (@ISA); @ISA = qw(Compress::Raw::Zlib::deflateStream); sub deflate { my $self = shift ; my $output ; my $status = $self->SUPER::deflate($_[0], $output) ; wantarray ? ($output, $status) : $output ; } sub flush { my $self = shift ; my $output ; my $flag = shift || Compress::Zlib::Z_FINISH(); my $status = $self->SUPER::flush($output, $flag) ; wantarray ? ($output, $status) : $output ; } package Zlib::OldInflate ; our (@ISA); @ISA = qw(Compress::Raw::Zlib::inflateStream); sub inflate { my $self = shift ; my $output ; my $status = $self->SUPER::inflate($_[0], $output) ; wantarray ? ($output, $status) : $output ; } package Compress::Zlib ; use IO::Compress::Gzip::Constants 2.063 ; sub memGzip($) { _set_gzerr(0); my $x = Compress::Raw::Zlib::_deflateInit(FLAG_APPEND|FLAG_CRC, Z_BEST_COMPRESSION, Z_DEFLATED, -MAX_WBITS(), MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY, 4096, '') or return undef ; # if the deflation buffer isn't a reference, make it one my $string = (ref $_[0] ? $_[0] : \$_[0]) ; $] >= 5.008 and (utf8::downgrade($$string, 1) or croak "Wide character in memGzip"); my $out; my $status ; $x->deflate($string, $out) == Z_OK or return undef ; $x->flush($out) == Z_OK or return undef ; return IO::Compress::Gzip::Constants::GZIP_MINIMUM_HEADER . $out . pack("V V", $x->crc32(), $x->total_in()); } sub _removeGzipHeader($) { my $string = shift ; return Z_DATA_ERROR() if length($$string) < GZIP_MIN_HEADER_SIZE ; my ($magic1, $magic2, $method, $flags, $time, $xflags, $oscode) = unpack ('CCCCVCC', $$string); return Z_DATA_ERROR() unless $magic1 == GZIP_ID1 and $magic2 == GZIP_ID2 and $method == Z_DEFLATED() and !($flags & GZIP_FLG_RESERVED) ; substr($$string, 0, GZIP_MIN_HEADER_SIZE) = '' ; # skip extra field if ($flags & GZIP_FLG_FEXTRA) { return Z_DATA_ERROR() if length($$string) < GZIP_FEXTRA_HEADER_SIZE ; my ($extra_len) = unpack ('v', $$string); $extra_len += GZIP_FEXTRA_HEADER_SIZE; return Z_DATA_ERROR() if length($$string) < $extra_len ; substr($$string, 0, $extra_len) = ''; } # skip orig name if ($flags & GZIP_FLG_FNAME) { my $name_end = index ($$string, GZIP_NULL_BYTE); return Z_DATA_ERROR() if $name_end == -1 ; substr($$string, 0, $name_end + 1) = ''; } # skip comment if ($flags & GZIP_FLG_FCOMMENT) { my $comment_end = index ($$string, GZIP_NULL_BYTE); return Z_DATA_ERROR() if $comment_end == -1 ; substr($$string, 0, $comment_end + 1) = ''; } # skip header crc if ($flags & GZIP_FLG_FHCRC) { return Z_DATA_ERROR() if length ($$string) < GZIP_FHCRC_SIZE ; substr($$string, 0, GZIP_FHCRC_SIZE) = ''; } return Z_OK(); } sub _ret_gun_error { $Compress::Zlib::gzerrno = $IO::Uncompress::Gunzip::GunzipError; return undef; } sub memGunzip($) { # if the buffer isn't a reference, make it one my $string = (ref $_[0] ? $_[0] : \$_[0]); $] >= 5.008 and (utf8::downgrade($$string, 1) or croak "Wide character in memGunzip"); _set_gzerr(0); my $status = _removeGzipHeader($string) ; $status == Z_OK() or return _set_gzerr_undef($status); my $bufsize = length $$string > 4096 ? length $$string : 4096 ; my $x = Compress::Raw::Zlib::_inflateInit(FLAG_CRC | FLAG_CONSUME_INPUT, -MAX_WBITS(), $bufsize, '') or return _ret_gun_error(); my $output = '' ; $status = $x->inflate($string, $output); if ( $status == Z_OK() ) { _set_gzerr(Z_DATA_ERROR()); return undef; } return _ret_gun_error() if ($status != Z_STREAM_END()); if (length $$string >= 8) { my ($crc, $len) = unpack ("VV", substr($$string, 0, 8)); substr($$string, 0, 8) = ''; return _set_gzerr_undef(Z_DATA_ERROR()) unless $len == length($output) and $crc == Compress::Raw::Zlib::crc32($output); } else { $$string = ''; } return $output; } # Autoload methods go after __END__, and are processed by the autosplit program. 1; __END__ =head1 NAME Compress::Zlib - Interface to zlib compression library =head1 SYNOPSIS use Compress::Zlib ; ($d, $status) = deflateInit( [OPT] ) ; $status = $d->deflate($input, $output) ; $status = $d->flush([$flush_type]) ; $d->deflateParams(OPTS) ; $d->deflateTune(OPTS) ; $d->dict_adler() ; $d->crc32() ; $d->adler32() ; $d->total_in() ; $d->total_out() ; $d->msg() ; $d->get_Strategy(); $d->get_Level(); $d->get_BufSize(); ($i, $status) = inflateInit( [OPT] ) ; $status = $i->inflate($input, $output [, $eof]) ; $status = $i->inflateSync($input) ; $i->dict_adler() ; $d->crc32() ; $d->adler32() ; $i->total_in() ; $i->total_out() ; $i->msg() ; $d->get_BufSize(); $dest = compress($source) ; $dest = uncompress($source) ; $gz = gzopen($filename or filehandle, $mode) ; $bytesread = $gz->gzread($buffer [,$size]) ; $bytesread = $gz->gzreadline($line) ; $byteswritten = $gz->gzwrite($buffer) ; $status = $gz->gzflush($flush) ; $offset = $gz->gztell() ; $status = $gz->gzseek($offset, $whence) ; $status = $gz->gzclose() ; $status = $gz->gzeof() ; $status = $gz->gzsetparams($level, $strategy) ; $errstring = $gz->gzerror() ; $gzerrno $dest = Compress::Zlib::memGzip($buffer) ; $dest = Compress::Zlib::memGunzip($buffer) ; $crc = adler32($buffer [,$crc]) ; $crc = crc32($buffer [,$crc]) ; $crc = adler32_combine($crc1, $crc2, $len2)l $crc = crc32_combine($adler1, $adler2, $len2) my $version = Compress::Raw::Zlib::zlib_version(); =head1 DESCRIPTION The I module provides a Perl interface to the I compression library (see L for details about where to get I). The C module can be split into two general areas of functionality, namely a simple read/write interface to I files and a low-level in-memory compression/decompression interface. Each of these areas will be discussed in the following sections. =head2 Notes for users of Compress::Zlib version 1 The main change in C version 2.x is that it does not now interface directly to the zlib library. Instead it uses the C and C modules for reading/writing gzip files, and the C module for some low-level zlib access. The interface provided by version 2 of this module should be 100% backward compatible with version 1. If you find a difference in the expected behaviour please contact the author (See L). See L With the creation of the C and C modules no new features are planned for C - the new modules do everything that C does and then some. Development on C will be limited to bug fixes only. If you are writing new code, your first port of call should be one of the new C or C modules. =head1 GZIP INTERFACE A number of functions are supplied in I for reading and writing I files that conform to RFC 1952. This module provides an interface to most of them. If you have previously used C 1.x, the following enhancements/changes have been made to the C interface: =over 5 =item 1 If you want to open either STDIN or STDOUT with C, you can now optionally use the special filename "C<->" as a synonym for C<\*STDIN> and C<\*STDOUT>. =item 2 In C version 1.x, C used the zlib library to open the underlying file. This made things especially tricky when a Perl filehandle was passed to C. Behind the scenes the numeric C file descriptor had to be extracted from the Perl filehandle and this passed to the zlib library. Apart from being non-portable to some operating systems, this made it difficult to use C in situations where you wanted to extract/create a gzip data stream that is embedded in a larger file, without having to resort to opening and closing the file multiple times. It also made it impossible to pass a perl filehandle that wasn't associated with a real filesystem file, like, say, an C. In C version 2.x, the C interface has been completely rewritten to use the L for writing gzip files and L for reading gzip files. None of the limitations mentioned above apply. =item 3 Addition of C to provide a restricted C interface. =item 4. Added C. =back A more complete and flexible interface for reading/writing gzip files/buffers is included with the module C. See L and L for more details. =over 5 =item B<$gz = gzopen($filename, $mode)> =item B<$gz = gzopen($filehandle, $mode)> This function opens either the I file C<$filename> for reading or writing or attaches to the opened filehandle, C<$filehandle>. It returns an object on success and C on failure. When writing a gzip file this interface will I create the smallest possible gzip header (exactly 10 bytes). If you want greater control over what gets stored in the gzip header (like the original filename or a comment) use L instead. Similarly if you want to read the contents of the gzip header use L. The second parameter, C<$mode>, is used to specify whether the file is opened for reading or writing and to optionally specify a compression level and compression strategy when writing. The format of the C<$mode> parameter is similar to the mode parameter to the 'C' function C, so "rb" is used to open for reading, "wb" for writing and "ab" for appending (writing at the end of the file). To specify a compression level when writing, append a digit between 0 and 9 to the mode string -- 0 means no compression and 9 means maximum compression. If no compression level is specified Z_DEFAULT_COMPRESSION is used. To specify the compression strategy when writing, append 'f' for filtered data, 'h' for Huffman only compression, or 'R' for run-length encoding. If no strategy is specified Z_DEFAULT_STRATEGY is used. So, for example, "wb9" means open for writing with the maximum compression using the default strategy and "wb4R" means open for writing with compression level 4 and run-length encoding. Refer to the I documentation for the exact format of the C<$mode> parameter. =item B<$bytesread = $gz-Egzread($buffer [, $size]) ;> Reads C<$size> bytes from the compressed file into C<$buffer>. If C<$size> is not specified, it will default to 4096. If the scalar C<$buffer> is not large enough, it will be extended automatically. Returns the number of bytes actually read. On EOF it returns 0 and in the case of an error, -1. =item B<$bytesread = $gz-Egzreadline($line) ;> Reads the next line from the compressed file into C<$line>. Returns the number of bytes actually read. On EOF it returns 0 and in the case of an error, -1. It is legal to intermix calls to C and C. To maintain backward compatibility with version 1.x of this module C ignores the C<$/> variable - it I uses the string C<"\n"> as the line delimiter. If you want to read a gzip file a line at a time and have it respect the C<$/> variable (or C<$INPUT_RECORD_SEPARATOR>, or C<$RS> when C is in use) see L. =item B<$byteswritten = $gz-Egzwrite($buffer) ;> Writes the contents of C<$buffer> to the compressed file. Returns the number of bytes actually written, or 0 on error. =item B<$status = $gz-Egzflush($flush_type) ;> Flushes all pending output into the compressed file. This method takes an optional parameter, C<$flush_type>, that controls how the flushing will be carried out. By default the C<$flush_type> used is C. Other valid values for C<$flush_type> are C, C, C and C. It is strongly recommended that you only set the C parameter if you fully understand the implications of what it does - overuse of C can seriously degrade the level of compression achieved. See the C documentation for details. Returns 0 on success. =item B<$offset = $gz-Egztell() ;> Returns the uncompressed file offset. =item B<$status = $gz-Egzseek($offset, $whence) ;> Provides a sub-set of the C functionality, with the restriction that it is only legal to seek forward in the compressed file. It is a fatal error to attempt to seek backward. When opened for writing, empty parts of the file will have NULL (0x00) bytes written to them. The C<$whence> parameter should be one of SEEK_SET, SEEK_CUR or SEEK_END. Returns 1 on success, 0 on failure. =item B<$gz-Egzclose> Closes the compressed file. Any pending data is flushed to the file before it is closed. Returns 0 on success. =item B<$gz-Egzsetparams($level, $strategy> Change settings for the deflate stream C<$gz>. The list of the valid options is shown below. Options not specified will remain unchanged. Note: This method is only available if you are running zlib 1.0.6 or better. =over 5 =item B<$level> Defines the compression level. Valid values are 0 through 9, C, C, C, and C. =item B<$strategy> Defines the strategy used to tune the compression. The valid values are C, C and C. =back =item B<$gz-Egzerror> Returns the I error message or number for the last operation associated with C<$gz>. The return value will be the I error number when used in a numeric context and the I error message when used in a string context. The I error number constants, shown below, are available for use. Z_OK Z_STREAM_END Z_ERRNO Z_STREAM_ERROR Z_DATA_ERROR Z_MEM_ERROR Z_BUF_ERROR =item B<$gzerrno> The C<$gzerrno> scalar holds the error code associated with the most recent I routine. Note that unlike C, the error is I associated with a particular file. As with C it returns an error number in numeric context and an error message in string context. Unlike C though, the error message will correspond to the I message when the error is associated with I itself, or the UNIX error message when it is not (i.e. I returned C). As there is an overlap between the error numbers used by I and UNIX, C<$gzerrno> should only be used to check for the presence of I error in numeric context. Use C to check for specific I errors. The I example below shows how the variable can be used safely. =back =head2 Examples Here is an example script which uses the interface. It implements a I function. use strict ; use warnings ; use Compress::Zlib ; # use stdin if no files supplied @ARGV = '-' unless @ARGV ; foreach my $file (@ARGV) { my $buffer ; my $gz = gzopen($file, "rb") or die "Cannot open $file: $gzerrno\n" ; print $buffer while $gz->gzread($buffer) > 0 ; die "Error reading from $file: $gzerrno" . ($gzerrno+0) . "\n" if $gzerrno != Z_STREAM_END ; $gz->gzclose() ; } Below is a script which makes use of C. It implements a very simple I like script. use strict ; use warnings ; use Compress::Zlib ; die "Usage: gzgrep pattern [file...]\n" unless @ARGV >= 1; my $pattern = shift ; # use stdin if no files supplied @ARGV = '-' unless @ARGV ; foreach my $file (@ARGV) { my $gz = gzopen($file, "rb") or die "Cannot open $file: $gzerrno\n" ; while ($gz->gzreadline($_) > 0) { print if /$pattern/ ; } die "Error reading from $file: $gzerrno\n" if $gzerrno != Z_STREAM_END ; $gz->gzclose() ; } This script, I, does the opposite of the I script above. It reads from standard input and writes a gzip data stream to standard output. use strict ; use warnings ; use Compress::Zlib ; binmode STDOUT; # gzopen only sets it on the fd my $gz = gzopen(\*STDOUT, "wb") or die "Cannot open stdout: $gzerrno\n" ; while (<>) { $gz->gzwrite($_) or die "error writing: $gzerrno\n" ; } $gz->gzclose ; =head2 Compress::Zlib::memGzip This function is used to create an in-memory gzip file with the minimum possible gzip header (exactly 10 bytes). $dest = Compress::Zlib::memGzip($buffer) or die "Cannot compress: $gzerrno\n"; If successful, it returns the in-memory gzip file. Otherwise it returns C and the C<$gzerrno> variable will store the zlib error code. The C<$buffer> parameter can either be a scalar or a scalar reference. See L for an alternative way to carry out in-memory gzip compression. =head2 Compress::Zlib::memGunzip This function is used to uncompress an in-memory gzip file. $dest = Compress::Zlib::memGunzip($buffer) or die "Cannot uncompress: $gzerrno\n"; If successful, it returns the uncompressed gzip file. Otherwise it returns C and the C<$gzerrno> variable will store the zlib error code. The C<$buffer> parameter can either be a scalar or a scalar reference. The contents of the C<$buffer> parameter are destroyed after calling this function. If C<$buffer> consists of multiple concatenated gzip data streams only the first will be uncompressed. Use C with the C option in the C module if you need to deal with concatenated data streams. See L for an alternative way to carry out in-memory gzip uncompression. =head1 COMPRESS/UNCOMPRESS Two functions are provided to perform in-memory compression/uncompression of RFC 1950 data streams. They are called C and C. =over 5 =item B<$dest = compress($source [, $level] ) ;> Compresses C<$source>. If successful it returns the compressed data. Otherwise it returns I. The source buffer, C<$source>, can either be a scalar or a scalar reference. The C<$level> parameter defines the compression level. Valid values are 0 through 9, C, C, C, and C. If C<$level> is not specified C will be used. =item B<$dest = uncompress($source) ;> Uncompresses C<$source>. If successful it returns the uncompressed data. Otherwise it returns I. The source buffer can either be a scalar or a scalar reference. =back Please note: the two functions defined above are I compatible with the Unix commands of the same name. See L and L included with this distribution for an alternative interface for reading/writing RFC 1950 files/buffers. =head1 Deflate Interface This section defines an interface that allows in-memory compression using the I interface provided by zlib. Here is a definition of the interface available: =head2 B<($d, $status) = deflateInit( [OPT] )> Initialises a deflation stream. It combines the features of the I functions C, C and C. If successful, it will return the initialised deflation stream, C<$d> and C<$status> of C in a list context. In scalar context it returns the deflation stream, C<$d>, only. If not successful, the returned deflation stream (C<$d>) will be I and C<$status> will hold the exact I error code. The function optionally takes a number of named options specified as C<< -Name=>value >> pairs. This allows individual options to be tailored without having to specify them all in the parameter list. For backward compatibility, it is also possible to pass the parameters as a reference to a hash containing the name=>value pairs. The function takes one optional parameter, a reference to a hash. The contents of the hash allow the deflation interface to be tailored. Here is a list of the valid options: =over 5 =item B<-Level> Defines the compression level. Valid values are 0 through 9, C, C, C, and C. The default is Z_DEFAULT_COMPRESSION. =item B<-Method> Defines the compression method. The only valid value at present (and the default) is Z_DEFLATED. =item B<-WindowBits> To create an RFC 1950 data stream, set C to a positive number. To create an RFC 1951 data stream, set C to C<-MAX_WBITS>. For a full definition of the meaning and valid values for C refer to the I documentation for I. Defaults to MAX_WBITS. =item B<-MemLevel> For a definition of the meaning and valid values for C refer to the I documentation for I. Defaults to MAX_MEM_LEVEL. =item B<-Strategy> Defines the strategy used to tune the compression. The valid values are C, C and C. The default is Z_DEFAULT_STRATEGY. =item B<-Dictionary> When a dictionary is specified I will automatically call C directly after calling C. The Adler32 value for the dictionary can be obtained by calling the method C<$d->dict_adler()>. The default is no dictionary. =item B<-Bufsize> Sets the initial size for the deflation buffer. If the buffer has to be reallocated to increase the size, it will grow in increments of C. The default is 4096. =back Here is an example of using the C optional parameter list to override the default buffer size and compression level. All other options will take their default values. deflateInit( -Bufsize => 300, -Level => Z_BEST_SPEED ) ; =head2 B<($out, $status) = $d-Edeflate($buffer)> Deflates the contents of C<$buffer>. The buffer can either be a scalar or a scalar reference. When finished, C<$buffer> will be completely processed (assuming there were no errors). If the deflation was successful it returns the deflated output, C<$out>, and a status value, C<$status>, of C. On error, C<$out> will be I and C<$status> will contain the I error code. In a scalar context C will return C<$out> only. As with the I function in I, it is not necessarily the case that any output will be produced by this method. So don't rely on the fact that C<$out> is empty for an error test. =head2 B<($out, $status) = $d-Eflush()> =head2 B<($out, $status) = $d-Eflush($flush_type)> Typically used to finish the deflation. Any pending output will be returned via C<$out>. C<$status> will have a value C if successful. In a scalar context C will return C<$out> only. Note that flushing can seriously degrade the compression ratio, so it should only be used to terminate a decompression (using C) or when you want to create a I (using C). By default the C used is C. Other valid values for C are C, C, C and C. It is strongly recommended that you only set the C parameter if you fully understand the implications of what it does. See the C documentation for details. =head2 B<$status = $d-EdeflateParams([OPT])> Change settings for the deflate stream C<$d>. The list of the valid options is shown below. Options not specified will remain unchanged. =over 5 =item B<-Level> Defines the compression level. Valid values are 0 through 9, C, C, C, and C. =item B<-Strategy> Defines the strategy used to tune the compression. The valid values are C, C and C. =back =head2 B<$d-Edict_adler()> Returns the adler32 value for the dictionary. =head2 B<$d-Emsg()> Returns the last error message generated by zlib. =head2 B<$d-Etotal_in()> Returns the total number of bytes uncompressed bytes input to deflate. =head2 B<$d-Etotal_out()> Returns the total number of compressed bytes output from deflate. =head2 Example Here is a trivial example of using C. It simply reads standard input, deflates it and writes it to standard output. use strict ; use warnings ; use Compress::Zlib ; binmode STDIN; binmode STDOUT; my $x = deflateInit() or die "Cannot create a deflation stream\n" ; my ($output, $status) ; while (<>) { ($output, $status) = $x->deflate($_) ; $status == Z_OK or die "deflation failed\n" ; print $output ; } ($output, $status) = $x->flush() ; $status == Z_OK or die "deflation failed\n" ; print $output ; =head1 Inflate Interface This section defines the interface available that allows in-memory uncompression using the I interface provided by zlib. Here is a definition of the interface: =head2 B<($i, $status) = inflateInit()> Initialises an inflation stream. In a list context it returns the inflation stream, C<$i>, and the I status code in C<$status>. In a scalar context it returns the inflation stream only. If successful, C<$i> will hold the inflation stream and C<$status> will be C. If not successful, C<$i> will be I and C<$status> will hold the I error code. The function optionally takes a number of named options specified as C<< -Name=>value >> pairs. This allows individual options to be tailored without having to specify them all in the parameter list. For backward compatibility, it is also possible to pass the parameters as a reference to a hash containing the name=>value pairs. The function takes one optional parameter, a reference to a hash. The contents of the hash allow the deflation interface to be tailored. Here is a list of the valid options: =over 5 =item B<-WindowBits> To uncompress an RFC 1950 data stream, set C to a positive number. To uncompress an RFC 1951 data stream, set C to C<-MAX_WBITS>. For a full definition of the meaning and valid values for C refer to the I documentation for I. Defaults to MAX_WBITS. =item B<-Bufsize> Sets the initial size for the inflation buffer. If the buffer has to be reallocated to increase the size, it will grow in increments of C. Default is 4096. =item B<-Dictionary> The default is no dictionary. =back Here is an example of using the C optional parameter to override the default buffer size. inflateInit( -Bufsize => 300 ) ; =head2 B<($out, $status) = $i-Einflate($buffer)> Inflates the complete contents of C<$buffer>. The buffer can either be a scalar or a scalar reference. Returns C if successful and C if the end of the compressed data has been successfully reached. If not successful, C<$out> will be I and C<$status> will hold the I error code. The C<$buffer> parameter is modified by C. On completion it will contain what remains of the input buffer after inflation. This means that C<$buffer> will be an empty string when the return status is C. When the return status is C the C<$buffer> parameter will contains what (if anything) was stored in the input buffer after the deflated data stream. This feature is useful when processing a file format that encapsulates a compressed data stream (e.g. gzip, zip). =head2 B<$status = $i-EinflateSync($buffer)> Scans C<$buffer> until it reaches either a I or the end of the buffer. If a I is found, C is returned and C<$buffer> will be have all data up to the flush point removed. This can then be passed to the C method. Any other return code means that a flush point was not found. If more data is available, C can be called repeatedly with more compressed data until the flush point is found. =head2 B<$i-Edict_adler()> Returns the adler32 value for the dictionary. =head2 B<$i-Emsg()> Returns the last error message generated by zlib. =head2 B<$i-Etotal_in()> Returns the total number of bytes compressed bytes input to inflate. =head2 B<$i-Etotal_out()> Returns the total number of uncompressed bytes output from inflate. =head2 Example Here is an example of using C. use strict ; use warnings ; use Compress::Zlib ; my $x = inflateInit() or die "Cannot create a inflation stream\n" ; my $input = '' ; binmode STDIN; binmode STDOUT; my ($output, $status) ; while (read(STDIN, $input, 4096)) { ($output, $status) = $x->inflate(\$input) ; print $output if $status == Z_OK or $status == Z_STREAM_END ; last if $status != Z_OK ; } die "inflation failed\n" unless $status == Z_STREAM_END ; =head1 CHECKSUM FUNCTIONS Two functions are provided by I to calculate checksums. For the Perl interface, the order of the two parameters in both functions has been reversed. This allows both running checksums and one off calculations to be done. $crc = adler32($buffer [,$crc]) ; $crc = crc32($buffer [,$crc]) ; The buffer parameters can either be a scalar or a scalar reference. If the $crc parameters is C, the crc value will be reset. If you have built this module with zlib 1.2.3 or better, two more CRC-related functions are available. $crc = adler32_combine($crc1, $crc2, $len2)l $crc = crc32_combine($adler1, $adler2, $len2) These functions allow checksums to be merged. =head1 Misc =head2 my $version = Compress::Zlib::zlib_version(); Returns the version of the zlib library. =head1 CONSTANTS All the I constants are automatically imported when you make use of I. =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L For RFC 1950, 1951 and 1952 see F, F and F The I compression library was written by Jean-loup Gailly F and Mark Adler F. The primary site for the I compression library is F. The primary site for gzip is F. =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 1995-2013 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. IO-Compress-2.063/lib/IO/0000755000175000017500000000000012235214476013373 5ustar paulpaulIO-Compress-2.063/lib/IO/Uncompress/0000755000175000017500000000000012235214476015531 5ustar paulpaulIO-Compress-2.063/lib/IO/Uncompress/RawInflate.pm0000755000175000017500000007556412211042537020136 0ustar paulpaulpackage IO::Uncompress::RawInflate ; # for RFC1951 use strict ; use warnings; #use bytes; use Compress::Raw::Zlib 2.063 ; use IO::Compress::Base::Common 2.063 qw(:Status ); use IO::Uncompress::Base 2.063 ; use IO::Uncompress::Adapter::Inflate 2.063 ; require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, %DEFLATE_CONSTANTS, $RawInflateError); $VERSION = '2.063'; $RawInflateError = ''; @ISA = qw( Exporter IO::Uncompress::Base ); @EXPORT_OK = qw( $RawInflateError rawinflate ) ; %DEFLATE_CONSTANTS = (); %EXPORT_TAGS = %IO::Uncompress::Base::EXPORT_TAGS ; push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; Exporter::export_ok_tags('all'); #{ # # Execute at runtime # my %bad; # for my $module (qw(Compress::Raw::Zlib IO::Compress::Base::Common IO::Uncompress::Base IO::Uncompress::Adapter::Inflate)) # { # my $ver = ${ $module . "::VERSION"} ; # # $bad{$module} = $ver # if $ver ne $VERSION; # } # # if (keys %bad) # { # my $string = join "\n", map { "$_ $bad{$_}" } keys %bad; # die caller(0)[0] . "needs version $VERSION mismatch\n$string\n"; # } #} sub new { my $class = shift ; my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$RawInflateError); $obj->_create(undef, 0, @_); } sub rawinflate { my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$RawInflateError); return $obj->_inf(@_); } sub getExtraParams { return (); } sub ckParams { my $self = shift ; my $got = shift ; return 1; } sub mkUncomp { my $self = shift ; my $got = shift ; my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::Inflate::mkUncompObject( $got->getValue('crc32'), $got->getValue('adler32'), $got->getValue('scan'), ); return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; *$self->{Uncomp} = $obj; my $magic = $self->ckMagic() or return 0; *$self->{Info} = $self->readHeader($magic) or return undef ; return 1; } sub ckMagic { my $self = shift; return $self->_isRaw() ; } sub readHeader { my $self = shift; my $magic = shift ; return { 'Type' => 'rfc1951', 'FingerprintLength' => 0, 'HeaderLength' => 0, 'TrailerLength' => 0, 'Header' => '' }; } sub chkTrailer { return STATUS_OK ; } sub _isRaw { my $self = shift ; my $got = $self->_isRawx(@_); if ($got) { *$self->{Pending} = *$self->{HeaderPending} ; } else { $self->pushBack(*$self->{HeaderPending}); *$self->{Uncomp}->reset(); } *$self->{HeaderPending} = ''; return $got ; } sub _isRawx { my $self = shift ; my $magic = shift ; $magic = '' unless defined $magic ; my $buffer = ''; $self->smartRead(\$buffer, *$self->{BlockSize}) >= 0 or return $self->saveErrorString(undef, "No data to read"); my $temp_buf = $magic . $buffer ; *$self->{HeaderPending} = $temp_buf ; $buffer = ''; my $status = *$self->{Uncomp}->uncompr(\$temp_buf, \$buffer, $self->smartEof()) ; return $self->saveErrorString(undef, *$self->{Uncomp}{Error}, STATUS_ERROR) if $status == STATUS_ERROR; $self->pushBack($temp_buf) ; return $self->saveErrorString(undef, "unexpected end of file", STATUS_ERROR) if $self->smartEof() && $status != STATUS_ENDSTREAM; #my $buf_len = *$self->{Uncomp}->uncompressedBytes(); my $buf_len = length $buffer; if ($status == STATUS_ENDSTREAM) { if (*$self->{MultiStream} && (length $temp_buf || ! $self->smartEof())){ *$self->{NewStream} = 1 ; *$self->{EndStream} = 0 ; } else { *$self->{EndStream} = 1 ; } } *$self->{HeaderPending} = $buffer ; *$self->{InflatedBytesRead} = $buf_len ; *$self->{TotalInflatedBytesRead} += $buf_len ; *$self->{Type} = 'rfc1951'; $self->saveStatus(STATUS_OK); return { 'Type' => 'rfc1951', 'HeaderLength' => 0, 'TrailerLength' => 0, 'Header' => '' }; } sub inflateSync { my $self = shift ; # inflateSync is a no-op in Plain mode return 1 if *$self->{Plain} ; return 0 if *$self->{Closed} ; #return G_EOF if !length *$self->{Pending} && *$self->{EndStream} ; return 0 if ! length *$self->{Pending} && *$self->{EndStream} ; # Disable CRC check *$self->{Strict} = 0 ; my $status ; while (1) { my $temp_buf ; if (length *$self->{Pending} ) { $temp_buf = *$self->{Pending} ; *$self->{Pending} = ''; } else { $status = $self->smartRead(\$temp_buf, *$self->{BlockSize}) ; return $self->saveErrorString(0, "Error Reading Data") if $status < 0 ; if ($status == 0 ) { *$self->{EndStream} = 1 ; return $self->saveErrorString(0, "unexpected end of file", STATUS_ERROR); } } $status = *$self->{Uncomp}->sync($temp_buf) ; if ($status == STATUS_OK) { *$self->{Pending} .= $temp_buf ; return 1 ; } last unless $status == STATUS_ERROR ; } return 0; } #sub performScan #{ # my $self = shift ; # # my $status ; # my $end_offset = 0; # # $status = $self->scan() # #or return $self->saveErrorString(undef, "Error Scanning: $$error_ref", $self->errorNo) ; # or return $self->saveErrorString(G_ERR, "Error Scanning: $status") # # $status = $self->zap($end_offset) # or return $self->saveErrorString(G_ERR, "Error Zapping: $status"); # #or return $self->saveErrorString(undef, "Error Zapping: $$error_ref", $self->errorNo) ; # # #(*$obj->{Deflate}, $status) = $inf->createDeflate(); # ## *$obj->{Header} = *$inf->{Info}{Header}; ## *$obj->{UnCompSize_32bit} = ## *$obj->{BytesWritten} = *$inf->{UnCompSize_32bit} ; ## *$obj->{CompSize_32bit} = *$inf->{CompSize_32bit} ; # # ## if ( $outType eq 'buffer') ## { substr( ${ *$self->{Buffer} }, $end_offset) = '' } ## elsif ($outType eq 'handle' || $outType eq 'filename') { ## *$self->{FH} = *$inf->{FH} ; ## delete *$inf->{FH}; ## *$obj->{FH}->flush() ; ## *$obj->{Handle} = 1 if $outType eq 'handle'; ## ## #seek(*$obj->{FH}, $end_offset, SEEK_SET) ## *$obj->{FH}->seek($end_offset, SEEK_SET) ## or return $obj->saveErrorString(undef, $!, $!) ; ## } # #} sub scan { my $self = shift ; return 1 if *$self->{Closed} ; return 1 if !length *$self->{Pending} && *$self->{EndStream} ; my $buffer = '' ; my $len = 0; $len = $self->_raw_read(\$buffer, 1) while ! *$self->{EndStream} && $len >= 0 ; #return $len if $len < 0 ? $len : 0 ; return $len < 0 ? 0 : 1 ; } sub zap { my $self = shift ; my $headerLength = *$self->{Info}{HeaderLength}; my $block_offset = $headerLength + *$self->{Uncomp}->getLastBlockOffset(); $_[0] = $headerLength + *$self->{Uncomp}->getEndOffset(); #printf "# End $_[0], headerlen $headerLength \n";; #printf "# block_offset $block_offset %x\n", $block_offset; my $byte ; ( $self->smartSeek($block_offset) && $self->smartRead(\$byte, 1) ) or return $self->saveErrorString(0, $!, $!); #printf "#byte is %x\n", unpack('C*',$byte); *$self->{Uncomp}->resetLastBlockByte($byte); #printf "#to byte is %x\n", unpack('C*',$byte); ( $self->smartSeek($block_offset) && $self->smartWrite($byte) ) or return $self->saveErrorString(0, $!, $!); #$self->smartSeek($end_offset, 1); return 1 ; } sub createDeflate { my $self = shift ; my ($def, $status) = *$self->{Uncomp}->createDeflateStream( -AppendOutput => 1, -WindowBits => - MAX_WBITS, -CRC32 => *$self->{Params}->getValue('crc32'), -ADLER32 => *$self->{Params}->getValue('adler32'), ); return wantarray ? ($status, $def) : $def ; } 1; __END__ =head1 NAME IO::Uncompress::RawInflate - Read RFC 1951 files/buffers =head1 SYNOPSIS use IO::Uncompress::RawInflate qw(rawinflate $RawInflateError) ; my $status = rawinflate $input => $output [,OPTS] or die "rawinflate failed: $RawInflateError\n"; my $z = new IO::Uncompress::RawInflate $input [OPTS] or die "rawinflate failed: $RawInflateError\n"; $status = $z->read($buffer) $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $line = $z->getline() $char = $z->getc() $char = $z->ungetc() $char = $z->opened() $status = $z->inflateSync() $data = $z->trailingData() $status = $z->nextStream() $data = $z->getHeaderInfo() $z->tell() $z->seek($position, $whence) $z->binmode() $z->fileno() $z->eof() $z->close() $RawInflateError ; # IO::File mode <$z> read($z, $buffer); read($z, $buffer, $length); read($z, $buffer, $length, $offset); tell($z) seek($z, $position, $whence) binmode($z) fileno($z) eof($z) close($z) =head1 DESCRIPTION This module provides a Perl interface that allows the reading of files/buffers that conform to RFC 1951. For writing RFC 1951 files/buffers, see the companion module IO::Compress::RawDeflate. =head1 Functional Interface A top-level function, C, is provided to carry out "one-shot" uncompression between buffers and/or files. For finer control over the uncompression process, see the L section. use IO::Uncompress::RawInflate qw(rawinflate $RawInflateError) ; rawinflate $input_filename_or_reference => $output_filename_or_reference [,OPTS] or die "rawinflate failed: $RawInflateError\n"; The functional interface needs Perl5.005 or better. =head2 rawinflate $input_filename_or_reference => $output_filename_or_reference [, OPTS] C expects at least two parameters, C<$input_filename_or_reference> and C<$output_filename_or_reference>. =head3 The C<$input_filename_or_reference> parameter The parameter, C<$input_filename_or_reference>, is used to define the source of the compressed data. It can take one of the following forms: =over 5 =item A filename If the <$input_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for reading and the input data will be read from it. =item A filehandle If the C<$input_filename_or_reference> parameter is a filehandle, the input data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input_filename_or_reference> is a scalar reference, the input data will be read from C<$$input_filename_or_reference>. =item An array reference If C<$input_filename_or_reference> is an array reference, each element in the array must be a filename. The input data will be read from each file in turn. The complete array will be walked to ensure that it only contains valid filenames before any data is uncompressed. =item An Input FileGlob string If C<$input_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The input is the list of files that match the fileglob. See L for more details. =back If the C<$input_filename_or_reference> parameter is any other type, C will be returned. =head3 The C<$output_filename_or_reference> parameter The parameter C<$output_filename_or_reference> is used to control the destination of the uncompressed data. This parameter can take one of these forms. =over 5 =item A filename If the C<$output_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for writing and the uncompressed data will be written to it. =item A filehandle If the C<$output_filename_or_reference> parameter is a filehandle, the uncompressed data will be written to it. The string '-' can be used as an alias for standard output. =item A scalar reference If C<$output_filename_or_reference> is a scalar reference, the uncompressed data will be stored in C<$$output_filename_or_reference>. =item An Array Reference If C<$output_filename_or_reference> is an array reference, the uncompressed data will be pushed onto the array. =item An Output FileGlob If C<$output_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The output is the list of files that match the fileglob. When C<$output_filename_or_reference> is an fileglob string, C<$input_filename_or_reference> must also be a fileglob string. Anything else is an error. See L for more details. =back If the C<$output_filename_or_reference> parameter is any other type, C will be returned. =head2 Notes When C<$input_filename_or_reference> maps to multiple compressed files/buffers and C<$output_filename_or_reference> is a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a concatenation of all the uncompressed data from each of the input files/buffers. =head2 Optional Parameters Unless specified below, the optional parameters for C, C, are the same as those used with the OO interface defined in the L section below. =over 5 =item C<< AutoClose => 0|1 >> This option applies to any input or output data streams to C that are filehandles. If C is specified, and the value is true, it will result in all input and/or output filehandles being closed once C has completed. This parameter defaults to 0. =item C<< BinModeOut => 0|1 >> When writing to a file or filehandle, set C before writing to the file. Defaults to 0. =item C<< Append => 0|1 >> The behaviour of this option is dependent on the type of output data stream. =over 5 =item * A Buffer If C is enabled, all uncompressed data will be append to the end of the output buffer. Otherwise the output buffer will be cleared before any uncompressed data is written to it. =item * A Filename If C is enabled, the file will be opened in append mode. Otherwise the contents of the file, if any, will be truncated before any uncompressed data is written to it. =item * A Filehandle If C is enabled, the filehandle will be positioned to the end of the file via a call to C before any uncompressed data is written to it. Otherwise the file pointer will not be moved. =back When C is specified, and set to true, it will I all uncompressed data to the output data stream. So when the output is a filehandle it will carry out a seek to the eof before writing any uncompressed data. If the output is a filename, it will be opened for appending. If the output is a buffer, all uncompressed data will be appended to the existing buffer. Conversely when C is not specified, or it is present and is set to false, it will operate as follows. When the output is a filename, it will truncate the contents of the file before writing any uncompressed data. If the output is a filehandle its position will not be changed. If the output is a buffer, it will be wiped before any uncompressed data is output. Defaults to 0. =item C<< MultiStream => 0|1 >> This option is a no-op. =item C<< TrailingData => $scalar >> Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option. =back =head2 Examples To read the contents of the file C and write the uncompressed data to the file C. use strict ; use warnings ; use IO::Uncompress::RawInflate qw(rawinflate $RawInflateError) ; my $input = "file1.txt.1951"; my $output = "file1.txt"; rawinflate $input => $output or die "rawinflate failed: $RawInflateError\n"; To read from an existing Perl filehandle, C<$input>, and write the uncompressed data to a buffer, C<$buffer>. use strict ; use warnings ; use IO::Uncompress::RawInflate qw(rawinflate $RawInflateError) ; use IO::File ; my $input = new IO::File " \$buffer or die "rawinflate failed: $RawInflateError\n"; To uncompress all files in the directory "/my/home" that match "*.txt.1951" and store the compressed data in the same directory use strict ; use warnings ; use IO::Uncompress::RawInflate qw(rawinflate $RawInflateError) ; rawinflate '' => '' or die "rawinflate failed: $RawInflateError\n"; and if you want to compress each file one at a time, this will do the trick use strict ; use warnings ; use IO::Uncompress::RawInflate qw(rawinflate $RawInflateError) ; for my $input ( glob "/my/home/*.txt.1951" ) { my $output = $input; $output =~ s/.1951// ; rawinflate $input => $output or die "Error compressing '$input': $RawInflateError\n"; } =head1 OO Interface =head2 Constructor The format of the constructor for IO::Uncompress::RawInflate is shown below my $z = new IO::Uncompress::RawInflate $input [OPTS] or die "IO::Uncompress::RawInflate failed: $RawInflateError\n"; Returns an C object on success and undef on failure. The variable C<$RawInflateError> will contain an error message on failure. If you are running Perl 5.005 or better the object, C<$z>, returned from IO::Uncompress::RawInflate can be used exactly like an L filehandle. This means that all normal input file operations can be carried out with C<$z>. For example, to read a line from a compressed file/buffer you can use either of these forms $line = $z->getline(); $line = <$z>; The mandatory parameter C<$input> is used to determine the source of the compressed data. This parameter can take one of three forms. =over 5 =item A filename If the C<$input> parameter is a scalar, it is assumed to be a filename. This file will be opened for reading and the compressed data will be read from it. =item A filehandle If the C<$input> parameter is a filehandle, the compressed data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input> is a scalar reference, the compressed data will be read from C<$$input>. =back =head2 Constructor Options The option names defined below are case insensitive and can be optionally prefixed by a '-'. So all of the following are valid -AutoClose -autoclose AUTOCLOSE autoclose OPTS is a combination of the following options: =over 5 =item C<< AutoClose => 0|1 >> This option is only valid when the C<$input> parameter is a filehandle. If specified, and the value is true, it will result in the file being closed once either the C method is called or the IO::Uncompress::RawInflate object is destroyed. This parameter defaults to 0. =item C<< MultiStream => 0|1 >> Allows multiple concatenated compressed streams to be treated as a single compressed stream. Decompression will stop once either the end of the file/buffer is reached, an error is encountered (premature eof, corrupt compressed data) or the end of a stream is not immediately followed by the start of another stream. This parameter defaults to 0. =item C<< Prime => $string >> This option will uncompress the contents of C<$string> before processing the input file/buffer. This option can be useful when the compressed data is embedded in another file/data structure and it is not possible to work out where the compressed data begins without having to read the first few bytes. If this is the case, the uncompression can be I with these bytes using this option. =item C<< Transparent => 0|1 >> If this option is set and the input file/buffer is not compressed data, the module will allow reading of it anyway. In addition, if the input file/buffer does contain compressed data and there is non-compressed data immediately following it, setting this option will make this module treat the whole file/buffer as a single data stream. This option defaults to 1. =item C<< BlockSize => $num >> When reading the compressed input data, IO::Uncompress::RawInflate will read it in blocks of C<$num> bytes. This option defaults to 4096. =item C<< InputLength => $size >> When present this option will limit the number of compressed bytes read from the input file/buffer to C<$size>. This option can be used in the situation where there is useful data directly after the compressed data stream and you know beforehand the exact length of the compressed data stream. This option is mostly used when reading from a filehandle, in which case the file pointer will be left pointing to the first byte directly after the compressed data stream. This option defaults to off. =item C<< Append => 0|1 >> This option controls what the C method does with uncompressed data. If set to 1, all uncompressed data will be appended to the output parameter of the C method. If set to 0, the contents of the output parameter of the C method will be overwritten by the uncompressed data. Defaults to 0. =item C<< Strict => 0|1 >> This option is a no-op. =back =head2 Examples TODO =head1 Methods =head2 read Usage is $status = $z->read($buffer) Reads a block of compressed data (the size of the compressed block is determined by the C option in the constructor), uncompresses it and writes any uncompressed data into C<$buffer>. If the C parameter is set in the constructor, the uncompressed data will be appended to the C<$buffer> parameter. Otherwise C<$buffer> will be overwritten. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 read Usage is $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $status = read($z, $buffer, $length) $status = read($z, $buffer, $length, $offset) Attempt to read C<$length> bytes of uncompressed data into C<$buffer>. The main difference between this form of the C method and the previous one, is that this one will attempt to return I C<$length> bytes. The only circumstances that this function will not is if end-of-file or an IO error is encountered. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 getline Usage is $line = $z->getline() $line = <$z> Reads a single line. This method fully supports the use of the variable C<$/> (or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C is in use) to determine what constitutes an end of line. Paragraph mode, record mode and file slurp mode are all supported. =head2 getc Usage is $char = $z->getc() Read a single character. =head2 ungetc Usage is $char = $z->ungetc($string) =head2 inflateSync Usage is $status = $z->inflateSync() TODO =head2 getHeaderInfo Usage is $hdr = $z->getHeaderInfo(); @hdrs = $z->getHeaderInfo(); This method returns either a hash reference (in scalar context) or a list or hash references (in array context) that contains information about each of the header fields in the compressed data stream(s). =head2 tell Usage is $z->tell() tell $z Returns the uncompressed file offset. =head2 eof Usage is $z->eof(); eof($z); Returns true if the end of the compressed input stream has been reached. =head2 seek $z->seek($position, $whence); seek($z, $position, $whence); Provides a sub-set of the C functionality, with the restriction that it is only legal to seek forward in the input file/buffer. It is a fatal error to attempt to seek backward. Note that the implementation of C in this module does not provide true random access to a compressed file/buffer. It works by uncompressing data from the current offset in the file/buffer until it reaches the uncompressed offset specified in the parameters to C. For very small files this may be acceptable behaviour. For large files it may cause an unacceptable delay. The C<$whence> parameter takes one the usual values, namely SEEK_SET, SEEK_CUR or SEEK_END. Returns 1 on success, 0 on failure. =head2 binmode Usage is $z->binmode binmode $z ; This is a noop provided for completeness. =head2 opened $z->opened() Returns true if the object currently refers to a opened file/buffer. =head2 autoflush my $prev = $z->autoflush() my $prev = $z->autoflush(EXPR) If the C<$z> object is associated with a file or a filehandle, this method returns the current autoflush setting for the underlying filehandle. If C is present, and is non-zero, it will enable flushing after every write/print operation. If C<$z> is associated with a buffer, this method has no effect and always returns C. B that the special variable C<$|> B be used to set or retrieve the autoflush setting. =head2 input_line_number $z->input_line_number() $z->input_line_number(EXPR) Returns the current uncompressed line number. If C is present it has the effect of setting the line number. Note that setting the line number does not change the current position within the file/buffer being read. The contents of C<$/> are used to determine what constitutes a line terminator. =head2 fileno $z->fileno() fileno($z) If the C<$z> object is associated with a file or a filehandle, C will return the underlying file descriptor. Once the C method is called C will return C. If the C<$z> object is associated with a buffer, this method will return C. =head2 close $z->close() ; close $z ; Closes the output file/buffer. For most versions of Perl this method will be automatically invoked if the IO::Uncompress::RawInflate object is destroyed (either explicitly or by the variable with the reference to the object going out of scope). The exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In these cases, the C method will be called automatically, but not until global destruction of all live objects when the program is terminating. Therefore, if you want your scripts to be able to run on all versions of Perl, you should call C explicitly and not rely on automatic closing. Returns true on success, otherwise 0. If the C option has been enabled when the IO::Uncompress::RawInflate object was created, and the object is associated with a file, the underlying file will also be closed. =head2 nextStream Usage is my $status = $z->nextStream(); Skips to the next compressed data stream in the input file/buffer. If a new compressed data stream is found, the eof marker will be cleared and C<$.> will be reset to 0. Returns 1 if a new stream was found, 0 if none was found, and -1 if an error was encountered. =head2 trailingData Usage is my $data = $z->trailingData(); Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. It only makes sense to call this method once the end of the compressed data stream has been encountered. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option in the constructor. =head1 Importing No symbolic constants are required by this IO::Uncompress::RawInflate at present. =over 5 =item :all Imports C and C<$RawInflateError>. Same as doing this use IO::Uncompress::RawInflate qw(rawinflate $RawInflateError) ; =back =head1 EXAMPLES =head2 Working with Net::FTP See L =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L For RFC 1950, 1951 and 1952 see F, F and F The I compression library was written by Jean-loup Gailly F and Mark Adler F. The primary site for the I compression library is F. The primary site for gzip is F. =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2013 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. IO-Compress-2.063/lib/IO/Uncompress/Gunzip.pm0000644000175000017500000007623612211042537017350 0ustar paulpaul package IO::Uncompress::Gunzip ; require 5.006 ; # for RFC1952 use strict ; use warnings; use bytes; use IO::Uncompress::RawInflate 2.063 ; use Compress::Raw::Zlib 2.063 () ; use IO::Compress::Base::Common 2.063 qw(:Status ); use IO::Compress::Gzip::Constants 2.063 ; use IO::Compress::Zlib::Extra 2.063 ; require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $GunzipError); @ISA = qw( Exporter IO::Uncompress::RawInflate ); @EXPORT_OK = qw( $GunzipError gunzip ); %EXPORT_TAGS = %IO::Uncompress::RawInflate::DEFLATE_CONSTANTS ; push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; Exporter::export_ok_tags('all'); $GunzipError = ''; $VERSION = '2.063'; sub new { my $class = shift ; $GunzipError = ''; my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$GunzipError); $obj->_create(undef, 0, @_); } sub gunzip { my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$GunzipError); return $obj->_inf(@_) ; } sub getExtraParams { return ( 'parseextra' => [IO::Compress::Base::Common::Parse_boolean, 0] ) ; } sub ckParams { my $self = shift ; my $got = shift ; # gunzip always needs crc32 $got->setValue('crc32' => 1); return 1; } sub ckMagic { my $self = shift; my $magic ; $self->smartReadExact(\$magic, GZIP_ID_SIZE); *$self->{HeaderPending} = $magic ; return $self->HeaderError("Minimum header size is " . GZIP_MIN_HEADER_SIZE . " bytes") if length $magic != GZIP_ID_SIZE ; return $self->HeaderError("Bad Magic") if ! isGzipMagic($magic) ; *$self->{Type} = 'rfc1952'; return $magic ; } sub readHeader { my $self = shift; my $magic = shift; return $self->_readGzipHeader($magic); } sub chkTrailer { my $self = shift; my $trailer = shift; # Check CRC & ISIZE my ($CRC32, $ISIZE) = unpack("V V", $trailer) ; *$self->{Info}{CRC32} = $CRC32; *$self->{Info}{ISIZE} = $ISIZE; if (*$self->{Strict}) { return $self->TrailerError("CRC mismatch") if $CRC32 != *$self->{Uncomp}->crc32() ; my $exp_isize = *$self->{UnCompSize}->get32bit(); return $self->TrailerError("ISIZE mismatch. Got $ISIZE" . ", expected $exp_isize") if $ISIZE != $exp_isize ; } return STATUS_OK; } sub isGzipMagic { my $buffer = shift ; return 0 if length $buffer < GZIP_ID_SIZE ; my ($id1, $id2) = unpack("C C", $buffer) ; return $id1 == GZIP_ID1 && $id2 == GZIP_ID2 ; } sub _readFullGzipHeader($) { my ($self) = @_ ; my $magic = '' ; $self->smartReadExact(\$magic, GZIP_ID_SIZE); *$self->{HeaderPending} = $magic ; return $self->HeaderError("Minimum header size is " . GZIP_MIN_HEADER_SIZE . " bytes") if length $magic != GZIP_ID_SIZE ; return $self->HeaderError("Bad Magic") if ! isGzipMagic($magic) ; my $status = $self->_readGzipHeader($magic); delete *$self->{Transparent} if ! defined $status ; return $status ; } sub _readGzipHeader($) { my ($self, $magic) = @_ ; my ($HeaderCRC) ; my ($buffer) = '' ; $self->smartReadExact(\$buffer, GZIP_MIN_HEADER_SIZE - GZIP_ID_SIZE) or return $self->HeaderError("Minimum header size is " . GZIP_MIN_HEADER_SIZE . " bytes") ; my $keep = $magic . $buffer ; *$self->{HeaderPending} = $keep ; # now split out the various parts my ($cm, $flag, $mtime, $xfl, $os) = unpack("C C V C C", $buffer) ; $cm == GZIP_CM_DEFLATED or return $self->HeaderError("Not Deflate (CM is $cm)") ; # check for use of reserved bits return $self->HeaderError("Use of Reserved Bits in FLG field.") if $flag & GZIP_FLG_RESERVED ; my $EXTRA ; my @EXTRA = () ; if ($flag & GZIP_FLG_FEXTRA) { $EXTRA = "" ; $self->smartReadExact(\$buffer, GZIP_FEXTRA_HEADER_SIZE) or return $self->TruncatedHeader("FEXTRA Length") ; my ($XLEN) = unpack("v", $buffer) ; $self->smartReadExact(\$EXTRA, $XLEN) or return $self->TruncatedHeader("FEXTRA Body"); $keep .= $buffer . $EXTRA ; if ($XLEN && *$self->{'ParseExtra'}) { my $bad = IO::Compress::Zlib::Extra::parseRawExtra($EXTRA, \@EXTRA, 1, 1); return $self->HeaderError($bad) if defined $bad; } } my $origname ; if ($flag & GZIP_FLG_FNAME) { $origname = "" ; while (1) { $self->smartReadExact(\$buffer, 1) or return $self->TruncatedHeader("FNAME"); last if $buffer eq GZIP_NULL_BYTE ; $origname .= $buffer } $keep .= $origname . GZIP_NULL_BYTE ; return $self->HeaderError("Non ISO 8859-1 Character found in Name") if *$self->{Strict} && $origname =~ /$GZIP_FNAME_INVALID_CHAR_RE/o ; } my $comment ; if ($flag & GZIP_FLG_FCOMMENT) { $comment = ""; while (1) { $self->smartReadExact(\$buffer, 1) or return $self->TruncatedHeader("FCOMMENT"); last if $buffer eq GZIP_NULL_BYTE ; $comment .= $buffer } $keep .= $comment . GZIP_NULL_BYTE ; return $self->HeaderError("Non ISO 8859-1 Character found in Comment") if *$self->{Strict} && $comment =~ /$GZIP_FCOMMENT_INVALID_CHAR_RE/o ; } if ($flag & GZIP_FLG_FHCRC) { $self->smartReadExact(\$buffer, GZIP_FHCRC_SIZE) or return $self->TruncatedHeader("FHCRC"); $HeaderCRC = unpack("v", $buffer) ; my $crc16 = Compress::Raw::Zlib::crc32($keep) & 0xFF ; return $self->HeaderError("CRC16 mismatch.") if *$self->{Strict} && $crc16 != $HeaderCRC; $keep .= $buffer ; } # Assume compression method is deflated for xfl tests #if ($xfl) { #} *$self->{Type} = 'rfc1952'; return { 'Type' => 'rfc1952', 'FingerprintLength' => 2, 'HeaderLength' => length $keep, 'TrailerLength' => GZIP_TRAILER_SIZE, 'Header' => $keep, 'isMinimalHeader' => $keep eq GZIP_MINIMUM_HEADER ? 1 : 0, 'MethodID' => $cm, 'MethodName' => $cm == GZIP_CM_DEFLATED ? "Deflated" : "Unknown" , 'TextFlag' => $flag & GZIP_FLG_FTEXT ? 1 : 0, 'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0, 'NameFlag' => $flag & GZIP_FLG_FNAME ? 1 : 0, 'CommentFlag' => $flag & GZIP_FLG_FCOMMENT ? 1 : 0, 'ExtraFlag' => $flag & GZIP_FLG_FEXTRA ? 1 : 0, 'Name' => $origname, 'Comment' => $comment, 'Time' => $mtime, 'OsID' => $os, 'OsName' => defined $GZIP_OS_Names{$os} ? $GZIP_OS_Names{$os} : "Unknown", 'HeaderCRC' => $HeaderCRC, 'Flags' => $flag, 'ExtraFlags' => $xfl, 'ExtraFieldRaw' => $EXTRA, 'ExtraField' => [ @EXTRA ], #'CompSize'=> $compsize, #'CRC32'=> $CRC32, #'OrigSize'=> $ISIZE, } } 1; __END__ =head1 NAME IO::Uncompress::Gunzip - Read RFC 1952 files/buffers =head1 SYNOPSIS use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; my $status = gunzip $input => $output [,OPTS] or die "gunzip failed: $GunzipError\n"; my $z = new IO::Uncompress::Gunzip $input [OPTS] or die "gunzip failed: $GunzipError\n"; $status = $z->read($buffer) $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $line = $z->getline() $char = $z->getc() $char = $z->ungetc() $char = $z->opened() $status = $z->inflateSync() $data = $z->trailingData() $status = $z->nextStream() $data = $z->getHeaderInfo() $z->tell() $z->seek($position, $whence) $z->binmode() $z->fileno() $z->eof() $z->close() $GunzipError ; # IO::File mode <$z> read($z, $buffer); read($z, $buffer, $length); read($z, $buffer, $length, $offset); tell($z) seek($z, $position, $whence) binmode($z) fileno($z) eof($z) close($z) =head1 DESCRIPTION This module provides a Perl interface that allows the reading of files/buffers that conform to RFC 1952. For writing RFC 1952 files/buffers, see the companion module IO::Compress::Gzip. =head1 Functional Interface A top-level function, C, is provided to carry out "one-shot" uncompression between buffers and/or files. For finer control over the uncompression process, see the L section. use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; gunzip $input_filename_or_reference => $output_filename_or_reference [,OPTS] or die "gunzip failed: $GunzipError\n"; The functional interface needs Perl5.005 or better. =head2 gunzip $input_filename_or_reference => $output_filename_or_reference [, OPTS] C expects at least two parameters, C<$input_filename_or_reference> and C<$output_filename_or_reference>. =head3 The C<$input_filename_or_reference> parameter The parameter, C<$input_filename_or_reference>, is used to define the source of the compressed data. It can take one of the following forms: =over 5 =item A filename If the <$input_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for reading and the input data will be read from it. =item A filehandle If the C<$input_filename_or_reference> parameter is a filehandle, the input data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input_filename_or_reference> is a scalar reference, the input data will be read from C<$$input_filename_or_reference>. =item An array reference If C<$input_filename_or_reference> is an array reference, each element in the array must be a filename. The input data will be read from each file in turn. The complete array will be walked to ensure that it only contains valid filenames before any data is uncompressed. =item An Input FileGlob string If C<$input_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The input is the list of files that match the fileglob. See L for more details. =back If the C<$input_filename_or_reference> parameter is any other type, C will be returned. =head3 The C<$output_filename_or_reference> parameter The parameter C<$output_filename_or_reference> is used to control the destination of the uncompressed data. This parameter can take one of these forms. =over 5 =item A filename If the C<$output_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for writing and the uncompressed data will be written to it. =item A filehandle If the C<$output_filename_or_reference> parameter is a filehandle, the uncompressed data will be written to it. The string '-' can be used as an alias for standard output. =item A scalar reference If C<$output_filename_or_reference> is a scalar reference, the uncompressed data will be stored in C<$$output_filename_or_reference>. =item An Array Reference If C<$output_filename_or_reference> is an array reference, the uncompressed data will be pushed onto the array. =item An Output FileGlob If C<$output_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The output is the list of files that match the fileglob. When C<$output_filename_or_reference> is an fileglob string, C<$input_filename_or_reference> must also be a fileglob string. Anything else is an error. See L for more details. =back If the C<$output_filename_or_reference> parameter is any other type, C will be returned. =head2 Notes When C<$input_filename_or_reference> maps to multiple compressed files/buffers and C<$output_filename_or_reference> is a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a concatenation of all the uncompressed data from each of the input files/buffers. =head2 Optional Parameters Unless specified below, the optional parameters for C, C, are the same as those used with the OO interface defined in the L section below. =over 5 =item C<< AutoClose => 0|1 >> This option applies to any input or output data streams to C that are filehandles. If C is specified, and the value is true, it will result in all input and/or output filehandles being closed once C has completed. This parameter defaults to 0. =item C<< BinModeOut => 0|1 >> When writing to a file or filehandle, set C before writing to the file. Defaults to 0. =item C<< Append => 0|1 >> The behaviour of this option is dependent on the type of output data stream. =over 5 =item * A Buffer If C is enabled, all uncompressed data will be append to the end of the output buffer. Otherwise the output buffer will be cleared before any uncompressed data is written to it. =item * A Filename If C is enabled, the file will be opened in append mode. Otherwise the contents of the file, if any, will be truncated before any uncompressed data is written to it. =item * A Filehandle If C is enabled, the filehandle will be positioned to the end of the file via a call to C before any uncompressed data is written to it. Otherwise the file pointer will not be moved. =back When C is specified, and set to true, it will I all uncompressed data to the output data stream. So when the output is a filehandle it will carry out a seek to the eof before writing any uncompressed data. If the output is a filename, it will be opened for appending. If the output is a buffer, all uncompressed data will be appended to the existing buffer. Conversely when C is not specified, or it is present and is set to false, it will operate as follows. When the output is a filename, it will truncate the contents of the file before writing any uncompressed data. If the output is a filehandle its position will not be changed. If the output is a buffer, it will be wiped before any uncompressed data is output. Defaults to 0. =item C<< MultiStream => 0|1 >> If the input file/buffer contains multiple compressed data streams, this option will uncompress the whole lot as a single data stream. Defaults to 0. =item C<< TrailingData => $scalar >> Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option. =back =head2 Examples To read the contents of the file C and write the uncompressed data to the file C. use strict ; use warnings ; use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; my $input = "file1.txt.gz"; my $output = "file1.txt"; gunzip $input => $output or die "gunzip failed: $GunzipError\n"; To read from an existing Perl filehandle, C<$input>, and write the uncompressed data to a buffer, C<$buffer>. use strict ; use warnings ; use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; use IO::File ; my $input = new IO::File " \$buffer or die "gunzip failed: $GunzipError\n"; To uncompress all files in the directory "/my/home" that match "*.txt.gz" and store the compressed data in the same directory use strict ; use warnings ; use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; gunzip '' => '' or die "gunzip failed: $GunzipError\n"; and if you want to compress each file one at a time, this will do the trick use strict ; use warnings ; use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; for my $input ( glob "/my/home/*.txt.gz" ) { my $output = $input; $output =~ s/.gz// ; gunzip $input => $output or die "Error compressing '$input': $GunzipError\n"; } =head1 OO Interface =head2 Constructor The format of the constructor for IO::Uncompress::Gunzip is shown below my $z = new IO::Uncompress::Gunzip $input [OPTS] or die "IO::Uncompress::Gunzip failed: $GunzipError\n"; Returns an C object on success and undef on failure. The variable C<$GunzipError> will contain an error message on failure. If you are running Perl 5.005 or better the object, C<$z>, returned from IO::Uncompress::Gunzip can be used exactly like an L filehandle. This means that all normal input file operations can be carried out with C<$z>. For example, to read a line from a compressed file/buffer you can use either of these forms $line = $z->getline(); $line = <$z>; The mandatory parameter C<$input> is used to determine the source of the compressed data. This parameter can take one of three forms. =over 5 =item A filename If the C<$input> parameter is a scalar, it is assumed to be a filename. This file will be opened for reading and the compressed data will be read from it. =item A filehandle If the C<$input> parameter is a filehandle, the compressed data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input> is a scalar reference, the compressed data will be read from C<$$input>. =back =head2 Constructor Options The option names defined below are case insensitive and can be optionally prefixed by a '-'. So all of the following are valid -AutoClose -autoclose AUTOCLOSE autoclose OPTS is a combination of the following options: =over 5 =item C<< AutoClose => 0|1 >> This option is only valid when the C<$input> parameter is a filehandle. If specified, and the value is true, it will result in the file being closed once either the C method is called or the IO::Uncompress::Gunzip object is destroyed. This parameter defaults to 0. =item C<< MultiStream => 0|1 >> Allows multiple concatenated compressed streams to be treated as a single compressed stream. Decompression will stop once either the end of the file/buffer is reached, an error is encountered (premature eof, corrupt compressed data) or the end of a stream is not immediately followed by the start of another stream. This parameter defaults to 0. =item C<< Prime => $string >> This option will uncompress the contents of C<$string> before processing the input file/buffer. This option can be useful when the compressed data is embedded in another file/data structure and it is not possible to work out where the compressed data begins without having to read the first few bytes. If this is the case, the uncompression can be I with these bytes using this option. =item C<< Transparent => 0|1 >> If this option is set and the input file/buffer is not compressed data, the module will allow reading of it anyway. In addition, if the input file/buffer does contain compressed data and there is non-compressed data immediately following it, setting this option will make this module treat the whole file/buffer as a single data stream. This option defaults to 1. =item C<< BlockSize => $num >> When reading the compressed input data, IO::Uncompress::Gunzip will read it in blocks of C<$num> bytes. This option defaults to 4096. =item C<< InputLength => $size >> When present this option will limit the number of compressed bytes read from the input file/buffer to C<$size>. This option can be used in the situation where there is useful data directly after the compressed data stream and you know beforehand the exact length of the compressed data stream. This option is mostly used when reading from a filehandle, in which case the file pointer will be left pointing to the first byte directly after the compressed data stream. This option defaults to off. =item C<< Append => 0|1 >> This option controls what the C method does with uncompressed data. If set to 1, all uncompressed data will be appended to the output parameter of the C method. If set to 0, the contents of the output parameter of the C method will be overwritten by the uncompressed data. Defaults to 0. =item C<< Strict => 0|1 >> This option controls whether the extra checks defined below are used when carrying out the decompression. When Strict is on, the extra tests are carried out, when Strict is off they are not. The default for this option is off. =over 5 =item 1 If the FHCRC bit is set in the gzip FLG header byte, the CRC16 bytes in the header must match the crc16 value of the gzip header actually read. =item 2 If the gzip header contains a name field (FNAME) it consists solely of ISO 8859-1 characters. =item 3 If the gzip header contains a comment field (FCOMMENT) it consists solely of ISO 8859-1 characters plus line-feed. =item 4 If the gzip FEXTRA header field is present it must conform to the sub-field structure as defined in RFC 1952. =item 5 The CRC32 and ISIZE trailer fields must be present. =item 6 The value of the CRC32 field read must match the crc32 value of the uncompressed data actually contained in the gzip file. =item 7 The value of the ISIZE fields read must match the length of the uncompressed data actually read from the file. =back =item C<< ParseExtra => 0|1 >> If the gzip FEXTRA header field is present and this option is set, it will force the module to check that it conforms to the sub-field structure as defined in RFC 1952. If the C is on it will automatically enable this option. Defaults to 0. =back =head2 Examples TODO =head1 Methods =head2 read Usage is $status = $z->read($buffer) Reads a block of compressed data (the size of the compressed block is determined by the C option in the constructor), uncompresses it and writes any uncompressed data into C<$buffer>. If the C parameter is set in the constructor, the uncompressed data will be appended to the C<$buffer> parameter. Otherwise C<$buffer> will be overwritten. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 read Usage is $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $status = read($z, $buffer, $length) $status = read($z, $buffer, $length, $offset) Attempt to read C<$length> bytes of uncompressed data into C<$buffer>. The main difference between this form of the C method and the previous one, is that this one will attempt to return I C<$length> bytes. The only circumstances that this function will not is if end-of-file or an IO error is encountered. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 getline Usage is $line = $z->getline() $line = <$z> Reads a single line. This method fully supports the use of the variable C<$/> (or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C is in use) to determine what constitutes an end of line. Paragraph mode, record mode and file slurp mode are all supported. =head2 getc Usage is $char = $z->getc() Read a single character. =head2 ungetc Usage is $char = $z->ungetc($string) =head2 inflateSync Usage is $status = $z->inflateSync() TODO =head2 getHeaderInfo Usage is $hdr = $z->getHeaderInfo(); @hdrs = $z->getHeaderInfo(); This method returns either a hash reference (in scalar context) or a list or hash references (in array context) that contains information about each of the header fields in the compressed data stream(s). =over 5 =item Name The contents of the Name header field, if present. If no name is present, the value will be undef. Note this is different from a zero length name, which will return an empty string. =item Comment The contents of the Comment header field, if present. If no comment is present, the value will be undef. Note this is different from a zero length comment, which will return an empty string. =back =head2 tell Usage is $z->tell() tell $z Returns the uncompressed file offset. =head2 eof Usage is $z->eof(); eof($z); Returns true if the end of the compressed input stream has been reached. =head2 seek $z->seek($position, $whence); seek($z, $position, $whence); Provides a sub-set of the C functionality, with the restriction that it is only legal to seek forward in the input file/buffer. It is a fatal error to attempt to seek backward. Note that the implementation of C in this module does not provide true random access to a compressed file/buffer. It works by uncompressing data from the current offset in the file/buffer until it reaches the uncompressed offset specified in the parameters to C. For very small files this may be acceptable behaviour. For large files it may cause an unacceptable delay. The C<$whence> parameter takes one the usual values, namely SEEK_SET, SEEK_CUR or SEEK_END. Returns 1 on success, 0 on failure. =head2 binmode Usage is $z->binmode binmode $z ; This is a noop provided for completeness. =head2 opened $z->opened() Returns true if the object currently refers to a opened file/buffer. =head2 autoflush my $prev = $z->autoflush() my $prev = $z->autoflush(EXPR) If the C<$z> object is associated with a file or a filehandle, this method returns the current autoflush setting for the underlying filehandle. If C is present, and is non-zero, it will enable flushing after every write/print operation. If C<$z> is associated with a buffer, this method has no effect and always returns C. B that the special variable C<$|> B be used to set or retrieve the autoflush setting. =head2 input_line_number $z->input_line_number() $z->input_line_number(EXPR) Returns the current uncompressed line number. If C is present it has the effect of setting the line number. Note that setting the line number does not change the current position within the file/buffer being read. The contents of C<$/> are used to determine what constitutes a line terminator. =head2 fileno $z->fileno() fileno($z) If the C<$z> object is associated with a file or a filehandle, C will return the underlying file descriptor. Once the C method is called C will return C. If the C<$z> object is associated with a buffer, this method will return C. =head2 close $z->close() ; close $z ; Closes the output file/buffer. For most versions of Perl this method will be automatically invoked if the IO::Uncompress::Gunzip object is destroyed (either explicitly or by the variable with the reference to the object going out of scope). The exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In these cases, the C method will be called automatically, but not until global destruction of all live objects when the program is terminating. Therefore, if you want your scripts to be able to run on all versions of Perl, you should call C explicitly and not rely on automatic closing. Returns true on success, otherwise 0. If the C option has been enabled when the IO::Uncompress::Gunzip object was created, and the object is associated with a file, the underlying file will also be closed. =head2 nextStream Usage is my $status = $z->nextStream(); Skips to the next compressed data stream in the input file/buffer. If a new compressed data stream is found, the eof marker will be cleared and C<$.> will be reset to 0. Returns 1 if a new stream was found, 0 if none was found, and -1 if an error was encountered. =head2 trailingData Usage is my $data = $z->trailingData(); Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. It only makes sense to call this method once the end of the compressed data stream has been encountered. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option in the constructor. =head1 Importing No symbolic constants are required by this IO::Uncompress::Gunzip at present. =over 5 =item :all Imports C and C<$GunzipError>. Same as doing this use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; =back =head1 EXAMPLES =head2 Working with Net::FTP See L =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L For RFC 1950, 1951 and 1952 see F, F and F The I compression library was written by Jean-loup Gailly F and Mark Adler F. The primary site for the I compression library is F. The primary site for gzip is F. =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2013 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. IO-Compress-2.063/lib/IO/Uncompress/Unzip.pm0000644000175000017500000015025412211042537017172 0ustar paulpaulpackage IO::Uncompress::Unzip; require 5.006 ; # for RFC1952 use strict ; use warnings; #use bytes; use IO::File; use IO::Uncompress::RawInflate 2.063 ; use IO::Compress::Base::Common 2.063 qw(:Status ); use IO::Uncompress::Adapter::Inflate 2.063 ; use IO::Uncompress::Adapter::Identity 2.063 ; use IO::Compress::Zlib::Extra 2.063 ; use IO::Compress::Zip::Constants 2.063 ; use Compress::Raw::Zlib 2.063 () ; BEGIN { eval{ require IO::Uncompress::Adapter::Bunzip2 ; import IO::Uncompress::Adapter::Bunzip2 } ; eval{ require IO::Uncompress::Adapter::UnLzma ; import IO::Uncompress::Adapter::UnLzma } ; } require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $UnzipError, %headerLookup); $VERSION = '2.063'; $UnzipError = ''; @ISA = qw(Exporter IO::Uncompress::RawInflate); @EXPORT_OK = qw( $UnzipError unzip ); %EXPORT_TAGS = %IO::Uncompress::RawInflate::EXPORT_TAGS ; push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; Exporter::export_ok_tags('all'); %headerLookup = ( ZIP_CENTRAL_HDR_SIG, \&skipCentralDirectory, ZIP_END_CENTRAL_HDR_SIG, \&skipEndCentralDirectory, ZIP64_END_CENTRAL_REC_HDR_SIG, \&skipCentralDirectory64Rec, ZIP64_END_CENTRAL_LOC_HDR_SIG, \&skipCentralDirectory64Loc, ZIP64_ARCHIVE_EXTRA_SIG, \&skipArchiveExtra, ZIP64_DIGITAL_SIGNATURE_SIG, \&skipDigitalSignature, ); sub new { my $class = shift ; my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$UnzipError); $obj->_create(undef, 0, @_); } sub unzip { my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$UnzipError); return $obj->_inf(@_) ; } sub getExtraParams { return ( # # Zip header fields 'name' => [IO::Compress::Base::Common::Parse_any, undef], 'stream' => [IO::Compress::Base::Common::Parse_boolean, 0], # TODO - This means reading the central directory to get # 1. the local header offsets # 2. The compressed data length ); } sub ckParams { my $self = shift ; my $got = shift ; # unzip always needs crc32 $got->setValue('crc32' => 1); *$self->{UnzipData}{Name} = $got->getValue('name'); return 1; } sub mkUncomp { my $self = shift ; my $got = shift ; my $magic = $self->ckMagic() or return 0; *$self->{Info} = $self->readHeader($magic) or return undef ; return 1; } sub ckMagic { my $self = shift; my $magic ; $self->smartReadExact(\$magic, 4); *$self->{HeaderPending} = $magic ; return $self->HeaderError("Minimum header size is " . 4 . " bytes") if length $magic != 4 ; return $self->HeaderError("Bad Magic") if ! _isZipMagic($magic) ; *$self->{Type} = 'zip'; return $magic ; } sub fastForward { my $self = shift; my $offset = shift; # TODO - if Stream isn't enabled & reading from file, use seek my $buffer = ''; my $c = 1024 * 16; while ($offset > 0) { $c = length $offset if length $offset < $c ; $offset -= $c; $self->smartReadExact(\$buffer, $c) or return 0; } return 1; } sub readHeader { my $self = shift; my $magic = shift ; my $name = *$self->{UnzipData}{Name} ; my $hdr = $self->_readZipHeader($magic) ; while (defined $hdr) { if (! defined $name || $hdr->{Name} eq $name) { return $hdr ; } # skip the data # TODO - when Stream is off, use seek my $buffer; if (*$self->{ZipData}{Streaming}) { while (1) { my $b; my $status = $self->smartRead(\$b, 1024 * 16); return undef if $status <= 0 ; my $temp_buf; my $out; $status = *$self->{Uncomp}->uncompr(\$b, \$temp_buf, 0, $out); return $self->saveErrorString(undef, *$self->{Uncomp}{Error}, *$self->{Uncomp}{ErrorNo}) if $self->saveStatus($status) == STATUS_ERROR; if ($status == STATUS_ENDSTREAM) { *$self->{Uncomp}->reset(); $self->pushBack($b) ; last; } } # skip the trailer $self->smartReadExact(\$buffer, $hdr->{TrailerLength}) or return $self->saveErrorString(undef, "Truncated file"); } else { my $c = $hdr->{CompressedLength}->get64bit(); $self->fastForward($c) or return $self->saveErrorString(undef, "Truncated file"); $buffer = ''; } $self->chkTrailer($buffer) == STATUS_OK or return $self->saveErrorString(undef, "Truncated file"); $hdr = $self->_readFullZipHeader(); return $self->saveErrorString(undef, "Cannot find '$name'") if $self->smartEof(); } return undef; } sub chkTrailer { my $self = shift; my $trailer = shift; my ($sig, $CRC32, $cSize, $uSize) ; my ($cSizeHi, $uSizeHi) = (0, 0); if (*$self->{ZipData}{Streaming}) { $sig = unpack ("V", substr($trailer, 0, 4)); $CRC32 = unpack ("V", substr($trailer, 4, 4)); if (*$self->{ZipData}{Zip64} ) { $cSize = U64::newUnpack_V64 substr($trailer, 8, 8); $uSize = U64::newUnpack_V64 substr($trailer, 16, 8); } else { $cSize = U64::newUnpack_V32 substr($trailer, 8, 4); $uSize = U64::newUnpack_V32 substr($trailer, 12, 4); } return $self->TrailerError("Data Descriptor signature, got $sig") if $sig != ZIP_DATA_HDR_SIG; } else { ($CRC32, $cSize, $uSize) = (*$self->{ZipData}{Crc32}, *$self->{ZipData}{CompressedLen}, *$self->{ZipData}{UnCompressedLen}); } *$self->{Info}{CRC32} = *$self->{ZipData}{CRC32} ; *$self->{Info}{CompressedLength} = $cSize->get64bit(); *$self->{Info}{UncompressedLength} = $uSize->get64bit(); if (*$self->{Strict}) { return $self->TrailerError("CRC mismatch") if $CRC32 != *$self->{ZipData}{CRC32} ; return $self->TrailerError("CSIZE mismatch.") if ! $cSize->equal(*$self->{CompSize}); return $self->TrailerError("USIZE mismatch.") if ! $uSize->equal(*$self->{UnCompSize}); } my $reachedEnd = STATUS_ERROR ; # check for central directory or end of central directory while (1) { my $magic ; my $got = $self->smartRead(\$magic, 4); return $self->saveErrorString(STATUS_ERROR, "Truncated file") if $got != 4 && *$self->{Strict}; if ($got == 0) { return STATUS_EOF ; } elsif ($got < 0) { return STATUS_ERROR ; } elsif ($got < 4) { $self->pushBack($magic) ; return STATUS_OK ; } my $sig = unpack("V", $magic) ; my $hdr; if ($hdr = $headerLookup{$sig}) { if (&$hdr($self, $magic) != STATUS_OK ) { if (*$self->{Strict}) { return STATUS_ERROR ; } else { $self->clearError(); return STATUS_OK ; } } if ($sig == ZIP_END_CENTRAL_HDR_SIG) { return STATUS_OK ; last; } } elsif ($sig == ZIP_LOCAL_HDR_SIG) { $self->pushBack($magic) ; return STATUS_OK ; } else { # put the data back $self->pushBack($magic) ; last; } } return $reachedEnd ; } sub skipCentralDirectory { my $self = shift; my $magic = shift ; my $buffer; $self->smartReadExact(\$buffer, 46 - 4) or return $self->TrailerError("Minimum header size is " . 46 . " bytes") ; my $keep = $magic . $buffer ; *$self->{HeaderPending} = $keep ; #my $versionMadeBy = unpack ("v", substr($buffer, 4-4, 2)); #my $extractVersion = unpack ("v", substr($buffer, 6-4, 2)); #my $gpFlag = unpack ("v", substr($buffer, 8-4, 2)); #my $compressedMethod = unpack ("v", substr($buffer, 10-4, 2)); #my $lastModTime = unpack ("V", substr($buffer, 12-4, 4)); #my $crc32 = unpack ("V", substr($buffer, 16-4, 4)); my $compressedLength = unpack ("V", substr($buffer, 20-4, 4)); my $uncompressedLength = unpack ("V", substr($buffer, 24-4, 4)); my $filename_length = unpack ("v", substr($buffer, 28-4, 2)); my $extra_length = unpack ("v", substr($buffer, 30-4, 2)); my $comment_length = unpack ("v", substr($buffer, 32-4, 2)); #my $disk_start = unpack ("v", substr($buffer, 34-4, 2)); #my $int_file_attrib = unpack ("v", substr($buffer, 36-4, 2)); #my $ext_file_attrib = unpack ("V", substr($buffer, 38-4, 2)); #my $lcl_hdr_offset = unpack ("V", substr($buffer, 42-4, 2)); my $filename; my $extraField; my $comment ; if ($filename_length) { $self->smartReadExact(\$filename, $filename_length) or return $self->TruncatedTrailer("filename"); $keep .= $filename ; } if ($extra_length) { $self->smartReadExact(\$extraField, $extra_length) or return $self->TruncatedTrailer("extra"); $keep .= $extraField ; } if ($comment_length) { $self->smartReadExact(\$comment, $comment_length) or return $self->TruncatedTrailer("comment"); $keep .= $comment ; } return STATUS_OK ; } sub skipArchiveExtra { my $self = shift; my $magic = shift ; my $buffer; $self->smartReadExact(\$buffer, 4) or return $self->TrailerError("Minimum header size is " . 4 . " bytes") ; my $keep = $magic . $buffer ; my $size = unpack ("V", $buffer); $self->smartReadExact(\$buffer, $size) or return $self->TrailerError("Minimum header size is " . $size . " bytes") ; $keep .= $buffer ; *$self->{HeaderPending} = $keep ; return STATUS_OK ; } sub skipCentralDirectory64Rec { my $self = shift; my $magic = shift ; my $buffer; $self->smartReadExact(\$buffer, 8) or return $self->TrailerError("Minimum header size is " . 8 . " bytes") ; my $keep = $magic . $buffer ; my ($sizeLo, $sizeHi) = unpack ("V V", $buffer); my $size = $sizeHi * U64::MAX32 + $sizeLo; $self->fastForward($size) or return $self->TrailerError("Minimum header size is " . $size . " bytes") ; #$keep .= $buffer ; #*$self->{HeaderPending} = $keep ; #my $versionMadeBy = unpack ("v", substr($buffer, 0, 2)); #my $extractVersion = unpack ("v", substr($buffer, 2, 2)); #my $diskNumber = unpack ("V", substr($buffer, 4, 4)); #my $cntrlDirDiskNo = unpack ("V", substr($buffer, 8, 4)); #my $entriesInThisCD = unpack ("V V", substr($buffer, 12, 8)); #my $entriesInCD = unpack ("V V", substr($buffer, 20, 8)); #my $sizeOfCD = unpack ("V V", substr($buffer, 28, 8)); #my $offsetToCD = unpack ("V V", substr($buffer, 36, 8)); return STATUS_OK ; } sub skipCentralDirectory64Loc { my $self = shift; my $magic = shift ; my $buffer; $self->smartReadExact(\$buffer, 20 - 4) or return $self->TrailerError("Minimum header size is " . 20 . " bytes") ; my $keep = $magic . $buffer ; *$self->{HeaderPending} = $keep ; #my $startCdDisk = unpack ("V", substr($buffer, 4-4, 4)); #my $offsetToCD = unpack ("V V", substr($buffer, 8-4, 8)); #my $diskCount = unpack ("V", substr($buffer, 16-4, 4)); return STATUS_OK ; } sub skipEndCentralDirectory { my $self = shift; my $magic = shift ; my $buffer; $self->smartReadExact(\$buffer, 22 - 4) or return $self->TrailerError("Minimum header size is " . 22 . " bytes") ; my $keep = $magic . $buffer ; *$self->{HeaderPending} = $keep ; #my $diskNumber = unpack ("v", substr($buffer, 4-4, 2)); #my $cntrlDirDiskNo = unpack ("v", substr($buffer, 6-4, 2)); #my $entriesInThisCD = unpack ("v", substr($buffer, 8-4, 2)); #my $entriesInCD = unpack ("v", substr($buffer, 10-4, 2)); #my $sizeOfCD = unpack ("V", substr($buffer, 12-4, 4)); #my $offsetToCD = unpack ("V", substr($buffer, 16-4, 4)); my $comment_length = unpack ("v", substr($buffer, 20-4, 2)); my $comment ; if ($comment_length) { $self->smartReadExact(\$comment, $comment_length) or return $self->TruncatedTrailer("comment"); $keep .= $comment ; } return STATUS_OK ; } sub _isZipMagic { my $buffer = shift ; return 0 if length $buffer < 4 ; my $sig = unpack("V", $buffer) ; return $sig == ZIP_LOCAL_HDR_SIG ; } sub _readFullZipHeader($) { my ($self) = @_ ; my $magic = '' ; $self->smartReadExact(\$magic, 4); *$self->{HeaderPending} = $magic ; return $self->HeaderError("Minimum header size is " . 30 . " bytes") if length $magic != 4 ; return $self->HeaderError("Bad Magic") if ! _isZipMagic($magic) ; my $status = $self->_readZipHeader($magic); delete *$self->{Transparent} if ! defined $status ; return $status ; } sub _readZipHeader($) { my ($self, $magic) = @_ ; my ($HeaderCRC) ; my ($buffer) = '' ; $self->smartReadExact(\$buffer, 30 - 4) or return $self->HeaderError("Minimum header size is " . 30 . " bytes") ; my $keep = $magic . $buffer ; *$self->{HeaderPending} = $keep ; my $extractVersion = unpack ("v", substr($buffer, 4-4, 2)); my $gpFlag = unpack ("v", substr($buffer, 6-4, 2)); my $compressedMethod = unpack ("v", substr($buffer, 8-4, 2)); my $lastModTime = unpack ("V", substr($buffer, 10-4, 4)); my $crc32 = unpack ("V", substr($buffer, 14-4, 4)); my $compressedLength = U64::newUnpack_V32 substr($buffer, 18-4, 4); my $uncompressedLength = U64::newUnpack_V32 substr($buffer, 22-4, 4); my $filename_length = unpack ("v", substr($buffer, 26-4, 2)); my $extra_length = unpack ("v", substr($buffer, 28-4, 2)); my $filename; my $extraField; my @EXTRA = (); my $streamingMode = ($gpFlag & ZIP_GP_FLAG_STREAMING_MASK) ? 1 : 0 ; return $self->HeaderError("Encrypted content not supported") if $gpFlag & (ZIP_GP_FLAG_ENCRYPTED_MASK|ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK); return $self->HeaderError("Patch content not supported") if $gpFlag & ZIP_GP_FLAG_PATCHED_MASK; *$self->{ZipData}{Streaming} = $streamingMode; if ($filename_length) { $self->smartReadExact(\$filename, $filename_length) or return $self->TruncatedHeader("Filename"); $keep .= $filename ; } my $zip64 = 0 ; if ($extra_length) { $self->smartReadExact(\$extraField, $extra_length) or return $self->TruncatedHeader("Extra Field"); my $bad = IO::Compress::Zlib::Extra::parseRawExtra($extraField, \@EXTRA, 1, 0); return $self->HeaderError($bad) if defined $bad; $keep .= $extraField ; my %Extra ; for (@EXTRA) { $Extra{$_->[0]} = \$_->[1]; } if (defined $Extra{ZIP_EXTRA_ID_ZIP64()}) { $zip64 = 1 ; my $buff = ${ $Extra{ZIP_EXTRA_ID_ZIP64()} }; # This code assumes that all the fields in the Zip64 # extra field aren't necessarily present. The spec says that # they only exist if the equivalent local headers are -1. if (! $streamingMode) { my $offset = 0 ; if (U64::full32 $uncompressedLength->get32bit() ) { $uncompressedLength = U64::newUnpack_V64 substr($buff, 0, 8); $offset += 8 ; } if (U64::full32 $compressedLength->get32bit() ) { $compressedLength = U64::newUnpack_V64 substr($buff, $offset, 8); $offset += 8 ; } } } } *$self->{ZipData}{Zip64} = $zip64; if (! $streamingMode) { *$self->{ZipData}{Streaming} = 0; *$self->{ZipData}{Crc32} = $crc32; *$self->{ZipData}{CompressedLen} = $compressedLength; *$self->{ZipData}{UnCompressedLen} = $uncompressedLength; *$self->{CompressedInputLengthRemaining} = *$self->{CompressedInputLength} = $compressedLength->get64bit(); } *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(undef); *$self->{ZipData}{Method} = $compressedMethod; if ($compressedMethod == ZIP_CM_DEFLATE) { *$self->{Type} = 'zip-deflate'; my $obj = IO::Uncompress::Adapter::Inflate::mkUncompObject(1,0,0); *$self->{Uncomp} = $obj; } elsif ($compressedMethod == ZIP_CM_BZIP2) { return $self->HeaderError("Unsupported Compression format $compressedMethod") if ! defined $IO::Uncompress::Adapter::Bunzip2::VERSION ; *$self->{Type} = 'zip-bzip2'; my $obj = IO::Uncompress::Adapter::Bunzip2::mkUncompObject(); *$self->{Uncomp} = $obj; } elsif ($compressedMethod == ZIP_CM_LZMA) { return $self->HeaderError("Unsupported Compression format $compressedMethod") if ! defined $IO::Uncompress::Adapter::UnLzma::VERSION ; *$self->{Type} = 'zip-lzma'; my $LzmaHeader; $self->smartReadExact(\$LzmaHeader, 4) or return $self->saveErrorString(undef, "Truncated file"); my ($verHi, $verLo) = unpack ("CC", substr($LzmaHeader, 0, 2)); my $LzmaPropertiesSize = unpack ("v", substr($LzmaHeader, 2, 2)); my $LzmaPropertyData; $self->smartReadExact(\$LzmaPropertyData, $LzmaPropertiesSize) or return $self->saveErrorString(undef, "Truncated file"); if (! $streamingMode) { *$self->{ZipData}{CompressedLen}->subtract(4 + $LzmaPropertiesSize) ; *$self->{CompressedInputLengthRemaining} = *$self->{CompressedInputLength} = *$self->{ZipData}{CompressedLen}->get64bit(); } my $obj = IO::Uncompress::Adapter::UnLzma::mkUncompZipObject($LzmaPropertyData); *$self->{Uncomp} = $obj; } elsif ($compressedMethod == ZIP_CM_STORE) { *$self->{Type} = 'zip-stored'; my $obj = IO::Uncompress::Adapter::Identity::mkUncompObject($streamingMode, $zip64); *$self->{Uncomp} = $obj; } else { return $self->HeaderError("Unsupported Compression format $compressedMethod"); } return { 'Type' => 'zip', 'FingerprintLength' => 4, #'HeaderLength' => $compressedMethod == 8 ? length $keep : 0, 'HeaderLength' => length $keep, 'Zip64' => $zip64, 'TrailerLength' => ! $streamingMode ? 0 : $zip64 ? 24 : 16, 'Header' => $keep, 'CompressedLength' => $compressedLength , 'UncompressedLength' => $uncompressedLength , 'CRC32' => $crc32 , 'Name' => $filename, 'Time' => _dosToUnixTime($lastModTime), 'Stream' => $streamingMode, 'MethodID' => $compressedMethod, 'MethodName' => $compressedMethod == ZIP_CM_DEFLATE ? "Deflated" : $compressedMethod == ZIP_CM_BZIP2 ? "Bzip2" : $compressedMethod == ZIP_CM_LZMA ? "Lzma" : $compressedMethod == ZIP_CM_STORE ? "Stored" : "Unknown" , # 'TextFlag' => $flag & GZIP_FLG_FTEXT ? 1 : 0, # 'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0, # 'NameFlag' => $flag & GZIP_FLG_FNAME ? 1 : 0, # 'CommentFlag' => $flag & GZIP_FLG_FCOMMENT ? 1 : 0, # 'ExtraFlag' => $flag & GZIP_FLG_FEXTRA ? 1 : 0, # 'Comment' => $comment, # 'OsID' => $os, # 'OsName' => defined $GZIP_OS_Names{$os} # ? $GZIP_OS_Names{$os} : "Unknown", # 'HeaderCRC' => $HeaderCRC, # 'Flags' => $flag, # 'ExtraFlags' => $xfl, 'ExtraFieldRaw' => $extraField, 'ExtraField' => [ @EXTRA ], } } sub filterUncompressed { my $self = shift ; if (*$self->{ZipData}{Method} == ZIP_CM_DEFLATE) { *$self->{ZipData}{CRC32} = *$self->{Uncomp}->crc32() ; } else { *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(${$_[0]}, *$self->{ZipData}{CRC32}, $_[1]); } } # from Archive::Zip & info-zip sub _dosToUnixTime { my $dt = shift; my $year = ( ( $dt >> 25 ) & 0x7f ) + 80; my $mon = ( ( $dt >> 21 ) & 0x0f ) - 1; my $mday = ( ( $dt >> 16 ) & 0x1f ); my $hour = ( ( $dt >> 11 ) & 0x1f ); my $min = ( ( $dt >> 5 ) & 0x3f ); my $sec = ( ( $dt << 1 ) & 0x3e ); use POSIX 'mktime'; my $time_t = mktime( $sec, $min, $hour, $mday, $mon, $year, 0, 0, -1 ); return 0 if ! defined $time_t; return $time_t; } #sub scanCentralDirectory #{ # # Use cases # # 1 32-bit CD # # 2 64-bit CD # # my $self = shift ; # # my @CD = (); # my $offset = $self->findCentralDirectoryOffset(); # # return 0 # if ! defined $offset; # # $self->smarkSeek($offset, 0, SEEK_SET) ; # # # Now walk the Central Directory Records # my $buffer ; # while ($self->smartReadExact(\$buffer, 46) && # unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) { # # my $compressedLength = unpack ("V", substr($buffer, 20, 4)); # my $filename_length = unpack ("v", substr($buffer, 28, 2)); # my $extra_length = unpack ("v", substr($buffer, 30, 2)); # my $comment_length = unpack ("v", substr($buffer, 32, 2)); # # $self->smarkSeek($filename_length + $extra_length + $comment_length, 0, SEEK_CUR) # if $extra_length || $comment_length || $filename_length; # push @CD, $compressedLength ; # } # #} # #sub findCentralDirectoryOffset #{ # my $self = shift ; # # # Most common use-case is where there is no comment, so # # know exactly where the end of central directory record # # should be. # # $self->smarkSeek(-22, 0, SEEK_END) ; # # my $buffer; # $self->smartReadExact(\$buffer, 22) ; # # my $zip64 = 0; # my $centralDirOffset ; # if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) { # $centralDirOffset = unpack ("V", substr($buffer, 16, 2)); # } # else { # die "xxxx"; # } # # return $centralDirOffset ; #} # #sub is84BitCD #{ # # TODO # my $self = shift ; #} sub skip { my $self = shift; my $size = shift; use Fcntl qw(SEEK_CUR); if (ref $size eq 'U64') { $self->smartSeek($size->get64bit(), SEEK_CUR); } else { $self->smartSeek($size, SEEK_CUR); } } sub scanCentralDirectory { my $self = shift; my $here = $self->tell(); # Use cases # 1 32-bit CD # 2 64-bit CD my @CD = (); my $offset = $self->findCentralDirectoryOffset(); return () if ! defined $offset; $self->smarkSeek($offset, 0, SEEK_SET) ; # Now walk the Central Directory Records my $buffer ; while ($self->smartReadExact(\$buffer, 46) && unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) { my $compressedLength = unpack("V", substr($buffer, 20, 4)); my $uncompressedLength = unpack("V", substr($buffer, 24, 4)); my $filename_length = unpack("v", substr($buffer, 28, 2)); my $extra_length = unpack("v", substr($buffer, 30, 2)); my $comment_length = unpack("v", substr($buffer, 32, 2)); $self->skip($filename_length ) ; my $v64 = new U64 $compressedLength ; if (U64::full32 $compressedLength ) { $self->smartReadExact(\$buffer, $extra_length) ; die "xxx $offset $comment_length $filename_length $extra_length" . length($buffer) if length($buffer) != $extra_length; my $got = $self->get64Extra($buffer, U64::full32 $uncompressedLength); # If not Zip64 extra field, assume size is 0xFFFFFFFF $v64 = $got if defined $got; } else { $self->skip($extra_length) ; } $self->skip($comment_length ) ; push @CD, $v64 ; } $self->smartSeek($here, 0, SEEK_SET) ; return @CD; } sub get64Extra { my $self = shift ; my $buffer = shift; my $is_uncomp = shift ; my $extra = IO::Compress::Zlib::Extra::findID(0x0001, $buffer); if (! defined $extra) { return undef; } else { my $u64 = U64::newUnpack_V64(substr($extra, $is_uncomp ? 8 : 0)) ; return $u64; } } sub offsetFromZip64 { my $self = shift ; my $here = shift; $self->smartSeek($here - 20, 0, SEEK_SET) or die "xx $!" ; my $buffer; my $got = 0; $self->smartReadExact(\$buffer, 20) or die "xxx $here $got $!" ; if ( unpack("V", $buffer) == ZIP64_END_CENTRAL_LOC_HDR_SIG ) { my $cd64 = U64::Value_VV64 substr($buffer, 8, 8); $self->smartSeek($cd64, 0, SEEK_SET) ; $self->smartReadExact(\$buffer, 4) or die "xxx" ; if ( unpack("V", $buffer) == ZIP64_END_CENTRAL_REC_HDR_SIG ) { $self->smartReadExact(\$buffer, 8) or die "xxx" ; my $size = U64::Value_VV64($buffer); $self->smartReadExact(\$buffer, $size) or die "xxx" ; my $cd64 = U64::Value_VV64 substr($buffer, 36, 8); return $cd64 ; } die "zzz"; } die "zzz"; } use constant Pack_ZIP_END_CENTRAL_HDR_SIG => pack("V", ZIP_END_CENTRAL_HDR_SIG); sub findCentralDirectoryOffset { my $self = shift ; # Most common use-case is where there is no comment, so # know exactly where the end of central directory record # should be. $self->smartSeek(-22, 0, SEEK_END) ; my $here = $self->tell(); my $buffer; $self->smartReadExact(\$buffer, 22) or die "xxx" ; my $zip64 = 0; my $centralDirOffset ; if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) { $centralDirOffset = unpack("V", substr($buffer, 16, 4)); } else { $self->smartSeek(0, 0, SEEK_END) ; my $fileLen = $self->tell(); my $want = 0 ; while(1) { $want += 1024; my $seekTo = $fileLen - $want; if ($seekTo < 0 ) { $seekTo = 0; $want = $fileLen ; } $self->smartSeek( $seekTo, 0, SEEK_SET) or die "xxx $!" ; my $got; $self->smartReadExact($buffer, $want) or die "xxx " ; my $pos = rindex( $buffer, Pack_ZIP_END_CENTRAL_HDR_SIG); if ($pos >= 0) { #$here = $self->tell(); $here = $seekTo + $pos ; $centralDirOffset = unpack("V", substr($buffer, $pos + 16, 4)); last ; } return undef if $want == $fileLen; } } $centralDirOffset = $self->offsetFromZip64($here) if U64::full32 $centralDirOffset ; return $centralDirOffset ; } 1; __END__ =head1 NAME IO::Uncompress::Unzip - Read zip files/buffers =head1 SYNOPSIS use IO::Uncompress::Unzip qw(unzip $UnzipError) ; my $status = unzip $input => $output [,OPTS] or die "unzip failed: $UnzipError\n"; my $z = new IO::Uncompress::Unzip $input [OPTS] or die "unzip failed: $UnzipError\n"; $status = $z->read($buffer) $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $line = $z->getline() $char = $z->getc() $char = $z->ungetc() $char = $z->opened() $status = $z->inflateSync() $data = $z->trailingData() $status = $z->nextStream() $data = $z->getHeaderInfo() $z->tell() $z->seek($position, $whence) $z->binmode() $z->fileno() $z->eof() $z->close() $UnzipError ; # IO::File mode <$z> read($z, $buffer); read($z, $buffer, $length); read($z, $buffer, $length, $offset); tell($z) seek($z, $position, $whence) binmode($z) fileno($z) eof($z) close($z) =head1 DESCRIPTION This module provides a Perl interface that allows the reading of zlib files/buffers. For writing zip files/buffers, see the companion module IO::Compress::Zip. =head1 Functional Interface A top-level function, C, is provided to carry out "one-shot" uncompression between buffers and/or files. For finer control over the uncompression process, see the L section. use IO::Uncompress::Unzip qw(unzip $UnzipError) ; unzip $input_filename_or_reference => $output_filename_or_reference [,OPTS] or die "unzip failed: $UnzipError\n"; The functional interface needs Perl5.005 or better. =head2 unzip $input_filename_or_reference => $output_filename_or_reference [, OPTS] C expects at least two parameters, C<$input_filename_or_reference> and C<$output_filename_or_reference>. =head3 The C<$input_filename_or_reference> parameter The parameter, C<$input_filename_or_reference>, is used to define the source of the compressed data. It can take one of the following forms: =over 5 =item A filename If the <$input_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for reading and the input data will be read from it. =item A filehandle If the C<$input_filename_or_reference> parameter is a filehandle, the input data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input_filename_or_reference> is a scalar reference, the input data will be read from C<$$input_filename_or_reference>. =item An array reference If C<$input_filename_or_reference> is an array reference, each element in the array must be a filename. The input data will be read from each file in turn. The complete array will be walked to ensure that it only contains valid filenames before any data is uncompressed. =item An Input FileGlob string If C<$input_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The input is the list of files that match the fileglob. See L for more details. =back If the C<$input_filename_or_reference> parameter is any other type, C will be returned. =head3 The C<$output_filename_or_reference> parameter The parameter C<$output_filename_or_reference> is used to control the destination of the uncompressed data. This parameter can take one of these forms. =over 5 =item A filename If the C<$output_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for writing and the uncompressed data will be written to it. =item A filehandle If the C<$output_filename_or_reference> parameter is a filehandle, the uncompressed data will be written to it. The string '-' can be used as an alias for standard output. =item A scalar reference If C<$output_filename_or_reference> is a scalar reference, the uncompressed data will be stored in C<$$output_filename_or_reference>. =item An Array Reference If C<$output_filename_or_reference> is an array reference, the uncompressed data will be pushed onto the array. =item An Output FileGlob If C<$output_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The output is the list of files that match the fileglob. When C<$output_filename_or_reference> is an fileglob string, C<$input_filename_or_reference> must also be a fileglob string. Anything else is an error. See L for more details. =back If the C<$output_filename_or_reference> parameter is any other type, C will be returned. =head2 Notes When C<$input_filename_or_reference> maps to multiple compressed files/buffers and C<$output_filename_or_reference> is a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a concatenation of all the uncompressed data from each of the input files/buffers. =head2 Optional Parameters Unless specified below, the optional parameters for C, C, are the same as those used with the OO interface defined in the L section below. =over 5 =item C<< AutoClose => 0|1 >> This option applies to any input or output data streams to C that are filehandles. If C is specified, and the value is true, it will result in all input and/or output filehandles being closed once C has completed. This parameter defaults to 0. =item C<< BinModeOut => 0|1 >> When writing to a file or filehandle, set C before writing to the file. Defaults to 0. =item C<< Append => 0|1 >> The behaviour of this option is dependent on the type of output data stream. =over 5 =item * A Buffer If C is enabled, all uncompressed data will be append to the end of the output buffer. Otherwise the output buffer will be cleared before any uncompressed data is written to it. =item * A Filename If C is enabled, the file will be opened in append mode. Otherwise the contents of the file, if any, will be truncated before any uncompressed data is written to it. =item * A Filehandle If C is enabled, the filehandle will be positioned to the end of the file via a call to C before any uncompressed data is written to it. Otherwise the file pointer will not be moved. =back When C is specified, and set to true, it will I all uncompressed data to the output data stream. So when the output is a filehandle it will carry out a seek to the eof before writing any uncompressed data. If the output is a filename, it will be opened for appending. If the output is a buffer, all uncompressed data will be appended to the existing buffer. Conversely when C is not specified, or it is present and is set to false, it will operate as follows. When the output is a filename, it will truncate the contents of the file before writing any uncompressed data. If the output is a filehandle its position will not be changed. If the output is a buffer, it will be wiped before any uncompressed data is output. Defaults to 0. =item C<< MultiStream => 0|1 >> If the input file/buffer contains multiple compressed data streams, this option will uncompress the whole lot as a single data stream. Defaults to 0. =item C<< TrailingData => $scalar >> Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option. =back =head2 Examples Say you have a zip file, C, that only contains a single member, you can read it and write the uncompressed data to the file C like this. use strict ; use warnings ; use IO::Uncompress::Unzip qw(unzip $UnzipError) ; my $input = "file1.zip"; my $output = "file1.txt"; unzip $input => $output or die "unzip failed: $UnzipError\n"; If you have a zip file that contains multiple members and want to read a specific member from the file, say C<"data1">, use the C option use strict ; use warnings ; use IO::Uncompress::Unzip qw(unzip $UnzipError) ; my $input = "file1.zip"; my $output = "file1.txt"; unzip $input => $output, Name => "data1" or die "unzip failed: $UnzipError\n"; Alternatively, if you want to read the C<"data1"> member into memory, use a scalar reference for the C parameter. use strict ; use warnings ; use IO::Uncompress::Unzip qw(unzip $UnzipError) ; my $input = "file1.zip"; my $output ; unzip $input => \$output, Name => "data1" or die "unzip failed: $UnzipError\n"; # $output now contains the uncompressed data To read from an existing Perl filehandle, C<$input>, and write the uncompressed data to a buffer, C<$buffer>. use strict ; use warnings ; use IO::Uncompress::Unzip qw(unzip $UnzipError) ; use IO::File ; my $input = new IO::File " \$buffer or die "unzip failed: $UnzipError\n"; =head1 OO Interface =head2 Constructor The format of the constructor for IO::Uncompress::Unzip is shown below my $z = new IO::Uncompress::Unzip $input [OPTS] or die "IO::Uncompress::Unzip failed: $UnzipError\n"; Returns an C object on success and undef on failure. The variable C<$UnzipError> will contain an error message on failure. If you are running Perl 5.005 or better the object, C<$z>, returned from IO::Uncompress::Unzip can be used exactly like an L filehandle. This means that all normal input file operations can be carried out with C<$z>. For example, to read a line from a compressed file/buffer you can use either of these forms $line = $z->getline(); $line = <$z>; The mandatory parameter C<$input> is used to determine the source of the compressed data. This parameter can take one of three forms. =over 5 =item A filename If the C<$input> parameter is a scalar, it is assumed to be a filename. This file will be opened for reading and the compressed data will be read from it. =item A filehandle If the C<$input> parameter is a filehandle, the compressed data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input> is a scalar reference, the compressed data will be read from C<$$input>. =back =head2 Constructor Options The option names defined below are case insensitive and can be optionally prefixed by a '-'. So all of the following are valid -AutoClose -autoclose AUTOCLOSE autoclose OPTS is a combination of the following options: =over 5 =item C<< Name => "membername" >> Open "membername" from the zip file for reading. =item C<< AutoClose => 0|1 >> This option is only valid when the C<$input> parameter is a filehandle. If specified, and the value is true, it will result in the file being closed once either the C method is called or the IO::Uncompress::Unzip object is destroyed. This parameter defaults to 0. =item C<< MultiStream => 0|1 >> Treats the complete zip file/buffer as a single compressed data stream. When reading in multi-stream mode each member of the zip file/buffer will be uncompressed in turn until the end of the file/buffer is encountered. This parameter defaults to 0. =item C<< Prime => $string >> This option will uncompress the contents of C<$string> before processing the input file/buffer. This option can be useful when the compressed data is embedded in another file/data structure and it is not possible to work out where the compressed data begins without having to read the first few bytes. If this is the case, the uncompression can be I with these bytes using this option. =item C<< Transparent => 0|1 >> If this option is set and the input file/buffer is not compressed data, the module will allow reading of it anyway. In addition, if the input file/buffer does contain compressed data and there is non-compressed data immediately following it, setting this option will make this module treat the whole file/buffer as a single data stream. This option defaults to 1. =item C<< BlockSize => $num >> When reading the compressed input data, IO::Uncompress::Unzip will read it in blocks of C<$num> bytes. This option defaults to 4096. =item C<< InputLength => $size >> When present this option will limit the number of compressed bytes read from the input file/buffer to C<$size>. This option can be used in the situation where there is useful data directly after the compressed data stream and you know beforehand the exact length of the compressed data stream. This option is mostly used when reading from a filehandle, in which case the file pointer will be left pointing to the first byte directly after the compressed data stream. This option defaults to off. =item C<< Append => 0|1 >> This option controls what the C method does with uncompressed data. If set to 1, all uncompressed data will be appended to the output parameter of the C method. If set to 0, the contents of the output parameter of the C method will be overwritten by the uncompressed data. Defaults to 0. =item C<< Strict => 0|1 >> This option controls whether the extra checks defined below are used when carrying out the decompression. When Strict is on, the extra tests are carried out, when Strict is off they are not. The default for this option is off. =back =head2 Examples TODO =head1 Methods =head2 read Usage is $status = $z->read($buffer) Reads a block of compressed data (the size of the compressed block is determined by the C option in the constructor), uncompresses it and writes any uncompressed data into C<$buffer>. If the C parameter is set in the constructor, the uncompressed data will be appended to the C<$buffer> parameter. Otherwise C<$buffer> will be overwritten. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 read Usage is $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $status = read($z, $buffer, $length) $status = read($z, $buffer, $length, $offset) Attempt to read C<$length> bytes of uncompressed data into C<$buffer>. The main difference between this form of the C method and the previous one, is that this one will attempt to return I C<$length> bytes. The only circumstances that this function will not is if end-of-file or an IO error is encountered. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 getline Usage is $line = $z->getline() $line = <$z> Reads a single line. This method fully supports the use of the variable C<$/> (or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C is in use) to determine what constitutes an end of line. Paragraph mode, record mode and file slurp mode are all supported. =head2 getc Usage is $char = $z->getc() Read a single character. =head2 ungetc Usage is $char = $z->ungetc($string) =head2 inflateSync Usage is $status = $z->inflateSync() TODO =head2 getHeaderInfo Usage is $hdr = $z->getHeaderInfo(); @hdrs = $z->getHeaderInfo(); This method returns either a hash reference (in scalar context) or a list or hash references (in array context) that contains information about each of the header fields in the compressed data stream(s). =head2 tell Usage is $z->tell() tell $z Returns the uncompressed file offset. =head2 eof Usage is $z->eof(); eof($z); Returns true if the end of the compressed input stream has been reached. =head2 seek $z->seek($position, $whence); seek($z, $position, $whence); Provides a sub-set of the C functionality, with the restriction that it is only legal to seek forward in the input file/buffer. It is a fatal error to attempt to seek backward. Note that the implementation of C in this module does not provide true random access to a compressed file/buffer. It works by uncompressing data from the current offset in the file/buffer until it reaches the uncompressed offset specified in the parameters to C. For very small files this may be acceptable behaviour. For large files it may cause an unacceptable delay. The C<$whence> parameter takes one the usual values, namely SEEK_SET, SEEK_CUR or SEEK_END. Returns 1 on success, 0 on failure. =head2 binmode Usage is $z->binmode binmode $z ; This is a noop provided for completeness. =head2 opened $z->opened() Returns true if the object currently refers to a opened file/buffer. =head2 autoflush my $prev = $z->autoflush() my $prev = $z->autoflush(EXPR) If the C<$z> object is associated with a file or a filehandle, this method returns the current autoflush setting for the underlying filehandle. If C is present, and is non-zero, it will enable flushing after every write/print operation. If C<$z> is associated with a buffer, this method has no effect and always returns C. B that the special variable C<$|> B be used to set or retrieve the autoflush setting. =head2 input_line_number $z->input_line_number() $z->input_line_number(EXPR) Returns the current uncompressed line number. If C is present it has the effect of setting the line number. Note that setting the line number does not change the current position within the file/buffer being read. The contents of C<$/> are used to determine what constitutes a line terminator. =head2 fileno $z->fileno() fileno($z) If the C<$z> object is associated with a file or a filehandle, C will return the underlying file descriptor. Once the C method is called C will return C. If the C<$z> object is associated with a buffer, this method will return C. =head2 close $z->close() ; close $z ; Closes the output file/buffer. For most versions of Perl this method will be automatically invoked if the IO::Uncompress::Unzip object is destroyed (either explicitly or by the variable with the reference to the object going out of scope). The exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In these cases, the C method will be called automatically, but not until global destruction of all live objects when the program is terminating. Therefore, if you want your scripts to be able to run on all versions of Perl, you should call C explicitly and not rely on automatic closing. Returns true on success, otherwise 0. If the C option has been enabled when the IO::Uncompress::Unzip object was created, and the object is associated with a file, the underlying file will also be closed. =head2 nextStream Usage is my $status = $z->nextStream(); Skips to the next compressed data stream in the input file/buffer. If a new compressed data stream is found, the eof marker will be cleared and C<$.> will be reset to 0. Returns 1 if a new stream was found, 0 if none was found, and -1 if an error was encountered. =head2 trailingData Usage is my $data = $z->trailingData(); Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. It only makes sense to call this method once the end of the compressed data stream has been encountered. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option in the constructor. =head1 Importing No symbolic constants are required by this IO::Uncompress::Unzip at present. =over 5 =item :all Imports C and C<$UnzipError>. Same as doing this use IO::Uncompress::Unzip qw(unzip $UnzipError) ; =back =head1 EXAMPLES =head2 Working with Net::FTP See L =head2 Walking through a zip file The code below can be used to traverse a zip file, one compressed data stream at a time. use IO::Uncompress::Unzip qw($UnzipError); my $zipfile = "somefile.zip"; my $u = new IO::Uncompress::Unzip $zipfile or die "Cannot open $zipfile: $UnzipError"; my $status; for ($status = 1; $status > 0; $status = $u->nextStream()) { my $name = $u->getHeaderInfo()->{Name}; warn "Processing member $name\n" ; my $buff; while (($status = $u->read($buff)) > 0) { # Do something here } last if $status < 0; } die "Error processing $zipfile: $!\n" if $status < 0 ; Each individual compressed data stream is read until the logical end-of-file is reached. Then C is called. This will skip to the start of the next compressed data stream and clear the end-of-file flag. It is also worth noting that C can be called at any time -- you don't have to wait until you have exhausted a compressed data stream before skipping to the next one. =head2 Unzipping a complete zip file to disk Daniel S. Sterling has written a script that uses C to read a zip file and unzip its contents to disk. The script is available from L =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L For RFC 1950, 1951 and 1952 see F, F and F The I compression library was written by Jean-loup Gailly F and Mark Adler F. The primary site for the I compression library is F. The primary site for gzip is F. =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2013 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. IO-Compress-2.063/lib/IO/Uncompress/Base.pm0000644000175000017500000011226612211042535016736 0ustar paulpaul package IO::Uncompress::Base ; use strict ; use warnings; #use bytes; our (@ISA, $VERSION, @EXPORT_OK, %EXPORT_TAGS); @ISA = qw(Exporter IO::File); $VERSION = '2.063'; use constant G_EOF => 0 ; use constant G_ERR => -1 ; use IO::Compress::Base::Common 2.063 ; use IO::File ; use Symbol; use Scalar::Util (); use List::Util (); use Carp ; %EXPORT_TAGS = ( ); push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; sub smartRead { my $self = $_[0]; my $out = $_[1]; my $size = $_[2]; $$out = "" ; my $offset = 0 ; my $status = 1; if (defined *$self->{InputLength}) { return 0 if *$self->{InputLengthRemaining} <= 0 ; $size = List::Util::min($size, *$self->{InputLengthRemaining}); } if ( length *$self->{Prime} ) { $$out = substr(*$self->{Prime}, 0, $size) ; substr(*$self->{Prime}, 0, $size) = '' ; if (length $$out == $size) { *$self->{InputLengthRemaining} -= length $$out if defined *$self->{InputLength}; return length $$out ; } $offset = length $$out ; } my $get_size = $size - $offset ; if (defined *$self->{FH}) { if ($offset) { # Not using this # # *$self->{FH}->read($$out, $get_size, $offset); # # because the filehandle may not support the offset parameter # An example is Net::FTP my $tmp = ''; $status = *$self->{FH}->read($tmp, $get_size) ; substr($$out, $offset) = $tmp if defined $status && $status > 0 ; } else { $status = *$self->{FH}->read($$out, $get_size) } } elsif (defined *$self->{InputEvent}) { my $got = 1 ; while (length $$out < $size) { last if ($got = *$self->{InputEvent}->($$out, $get_size)) <= 0; } if (length $$out > $size ) { *$self->{Prime} = substr($$out, $size, length($$out)); substr($$out, $size, length($$out)) = ''; } *$self->{EventEof} = 1 if $got <= 0 ; } else { no warnings 'uninitialized'; my $buf = *$self->{Buffer} ; $$buf = '' unless defined $$buf ; substr($$out, $offset) = substr($$buf, *$self->{BufferOffset}, $get_size); if (*$self->{ConsumeInput}) { substr($$buf, 0, $get_size) = '' } else { *$self->{BufferOffset} += length($$out) - $offset } } *$self->{InputLengthRemaining} -= length($$out) #- $offset if defined *$self->{InputLength}; if (! defined $status) { $self->saveStatus($!) ; return STATUS_ERROR; } $self->saveStatus(length $$out < 0 ? STATUS_ERROR : STATUS_OK) ; return length $$out; } sub pushBack { my $self = shift ; return if ! defined $_[0] || length $_[0] == 0 ; if (defined *$self->{FH} || defined *$self->{InputEvent} ) { *$self->{Prime} = $_[0] . *$self->{Prime} ; *$self->{InputLengthRemaining} += length($_[0]); } else { my $len = length $_[0]; if($len > *$self->{BufferOffset}) { *$self->{Prime} = substr($_[0], 0, $len - *$self->{BufferOffset}) . *$self->{Prime} ; *$self->{InputLengthRemaining} = *$self->{InputLength}; *$self->{BufferOffset} = 0 } else { *$self->{InputLengthRemaining} += length($_[0]); *$self->{BufferOffset} -= length($_[0]) ; } } } sub smartSeek { my $self = shift ; my $offset = shift ; my $truncate = shift; my $position = shift || SEEK_SET; # TODO -- need to take prime into account if (defined *$self->{FH}) { *$self->{FH}->seek($offset, $position) } else { if ($position == SEEK_END) { *$self->{BufferOffset} = length ${ *$self->{Buffer} } + $offset ; } elsif ($position == SEEK_CUR) { *$self->{BufferOffset} += $offset ; } else { *$self->{BufferOffset} = $offset ; } substr(${ *$self->{Buffer} }, *$self->{BufferOffset}) = '' if $truncate; return 1; } } sub smartTell { my $self = shift ; if (defined *$self->{FH}) { return *$self->{FH}->tell() } else { return *$self->{BufferOffset} } } sub smartWrite { my $self = shift ; my $out_data = shift ; if (defined *$self->{FH}) { # flush needed for 5.8.0 defined *$self->{FH}->write($out_data, length $out_data) && defined *$self->{FH}->flush() ; } else { my $buf = *$self->{Buffer} ; substr($$buf, *$self->{BufferOffset}, length $out_data) = $out_data ; *$self->{BufferOffset} += length($out_data) ; return 1; } } sub smartReadExact { return $_[0]->smartRead($_[1], $_[2]) == $_[2]; } sub smartEof { my ($self) = $_[0]; local $.; return 0 if length *$self->{Prime} || *$self->{PushMode}; if (defined *$self->{FH}) { # Could use # # *$self->{FH}->eof() # # here, but this can cause trouble if # the filehandle is itself a tied handle, but it uses sysread. # Then we get into mixing buffered & non-buffered IO, # which will cause trouble my $info = $self->getErrInfo(); my $buffer = ''; my $status = $self->smartRead(\$buffer, 1); $self->pushBack($buffer) if length $buffer; $self->setErrInfo($info); return $status == 0 ; } elsif (defined *$self->{InputEvent}) { *$self->{EventEof} } else { *$self->{BufferOffset} >= length(${ *$self->{Buffer} }) } } sub clearError { my $self = shift ; *$self->{ErrorNo} = 0 ; ${ *$self->{Error} } = '' ; } sub getErrInfo { my $self = shift ; return [ *$self->{ErrorNo}, ${ *$self->{Error} } ] ; } sub setErrInfo { my $self = shift ; my $ref = shift; *$self->{ErrorNo} = $ref->[0] ; ${ *$self->{Error} } = $ref->[1] ; } sub saveStatus { my $self = shift ; my $errno = shift() + 0 ; *$self->{ErrorNo} = $errno; ${ *$self->{Error} } = '' ; return *$self->{ErrorNo} ; } sub saveErrorString { my $self = shift ; my $retval = shift ; ${ *$self->{Error} } = shift ; *$self->{ErrorNo} = @_ ? shift() + 0 : STATUS_ERROR ; return $retval; } sub croakError { my $self = shift ; $self->saveErrorString(0, $_[0]); croak $_[0]; } sub closeError { my $self = shift ; my $retval = shift ; my $errno = *$self->{ErrorNo}; my $error = ${ *$self->{Error} }; $self->close(); *$self->{ErrorNo} = $errno ; ${ *$self->{Error} } = $error ; return $retval; } sub error { my $self = shift ; return ${ *$self->{Error} } ; } sub errorNo { my $self = shift ; return *$self->{ErrorNo}; } sub HeaderError { my ($self) = shift; return $self->saveErrorString(undef, "Header Error: $_[0]", STATUS_ERROR); } sub TrailerError { my ($self) = shift; return $self->saveErrorString(G_ERR, "Trailer Error: $_[0]", STATUS_ERROR); } sub TruncatedHeader { my ($self) = shift; return $self->HeaderError("Truncated in $_[0] Section"); } sub TruncatedTrailer { my ($self) = shift; return $self->TrailerError("Truncated in $_[0] Section"); } sub postCheckParams { return 1; } sub checkParams { my $self = shift ; my $class = shift ; my $got = shift || IO::Compress::Base::Parameters::new(); my $Valid = { 'blocksize' => [IO::Compress::Base::Common::Parse_unsigned, 16 * 1024], 'autoclose' => [IO::Compress::Base::Common::Parse_boolean, 0], 'strict' => [IO::Compress::Base::Common::Parse_boolean, 0], 'append' => [IO::Compress::Base::Common::Parse_boolean, 0], 'prime' => [IO::Compress::Base::Common::Parse_any, undef], 'multistream' => [IO::Compress::Base::Common::Parse_boolean, 0], 'transparent' => [IO::Compress::Base::Common::Parse_any, 1], 'scan' => [IO::Compress::Base::Common::Parse_boolean, 0], 'inputlength' => [IO::Compress::Base::Common::Parse_unsigned, undef], 'binmodeout' => [IO::Compress::Base::Common::Parse_boolean, 0], #'decode' => [IO::Compress::Base::Common::Parse_any, undef], #'consumeinput' => [IO::Compress::Base::Common::Parse_boolean, 0], $self->getExtraParams(), #'Todo - Revert to ordinary file on end Z_STREAM_END'=> 0, # ContinueAfterEof } ; $Valid->{trailingdata} = [IO::Compress::Base::Common::Parse_writable_scalar, undef] if *$self->{OneShot} ; $got->parse($Valid, @_ ) or $self->croakError("${class}: " . $got->getError()) ; $self->postCheckParams($got) or $self->croakError("${class}: " . $self->error()) ; return $got; } sub _create { my $obj = shift; my $got = shift; my $append_mode = shift ; my $class = ref $obj; $obj->croakError("$class: Missing Input parameter") if ! @_ && ! $got ; my $inValue = shift ; *$obj->{OneShot} = 0 ; if (! $got) { $got = $obj->checkParams($class, undef, @_) or return undef ; } my $inType = whatIsInput($inValue, 1); $obj->ckInputParam($class, $inValue, 1) or return undef ; *$obj->{InNew} = 1; $obj->ckParams($got) or $obj->croakError("${class}: " . *$obj->{Error}); if ($inType eq 'buffer' || $inType eq 'code') { *$obj->{Buffer} = $inValue ; *$obj->{InputEvent} = $inValue if $inType eq 'code' ; } else { if ($inType eq 'handle') { *$obj->{FH} = $inValue ; *$obj->{Handle} = 1 ; # Need to rewind for Scan *$obj->{FH}->seek(0, SEEK_SET) if $got->getValue('scan'); } else { no warnings ; my $mode = '<'; $mode = '+<' if $got->getValue('scan'); *$obj->{StdIO} = ($inValue eq '-'); *$obj->{FH} = new IO::File "$mode $inValue" or return $obj->saveErrorString(undef, "cannot open file '$inValue': $!", $!) ; } *$obj->{LineNo} = $. = 0; setBinModeInput(*$obj->{FH}) ; my $buff = "" ; *$obj->{Buffer} = \$buff ; } # if ($got->getValue('decode')) { # my $want_encoding = $got->getValue('decode'); # *$obj->{Encoding} = IO::Compress::Base::Common::getEncoding($obj, $class, $want_encoding); # } # else { # *$obj->{Encoding} = undef; # } *$obj->{InputLength} = $got->parsed('inputlength') ? $got->getValue('inputlength') : undef ; *$obj->{InputLengthRemaining} = $got->getValue('inputlength'); *$obj->{BufferOffset} = 0 ; *$obj->{AutoClose} = $got->getValue('autoclose'); *$obj->{Strict} = $got->getValue('strict'); *$obj->{BlockSize} = $got->getValue('blocksize'); *$obj->{Append} = $got->getValue('append'); *$obj->{AppendOutput} = $append_mode || $got->getValue('append'); *$obj->{ConsumeInput} = $got->getValue('consumeinput'); *$obj->{Transparent} = $got->getValue('transparent'); *$obj->{MultiStream} = $got->getValue('multistream'); # TODO - move these two into RawDeflate *$obj->{Scan} = $got->getValue('scan'); *$obj->{ParseExtra} = $got->getValue('parseextra') || $got->getValue('strict') ; *$obj->{Type} = ''; *$obj->{Prime} = $got->getValue('prime') || '' ; *$obj->{Pending} = ''; *$obj->{Plain} = 0; *$obj->{PlainBytesRead} = 0; *$obj->{InflatedBytesRead} = 0; *$obj->{UnCompSize} = new U64; *$obj->{CompSize} = new U64; *$obj->{TotalInflatedBytesRead} = 0; *$obj->{NewStream} = 0 ; *$obj->{EventEof} = 0 ; *$obj->{ClassName} = $class ; *$obj->{Params} = $got ; if (*$obj->{ConsumeInput}) { *$obj->{InNew} = 0; *$obj->{Closed} = 0; return $obj } my $status = $obj->mkUncomp($got); return undef unless defined $status; *$obj->{InNew} = 0; *$obj->{Closed} = 0; if ($status) { # Need to try uncompressing to catch the case # where the compressed file uncompresses to an # empty string - so eof is set immediately. my $out_buffer = ''; $status = $obj->read(\$out_buffer); if ($status < 0) { *$obj->{ReadStatus} = [ $status, $obj->error(), $obj->errorNo() ]; } $obj->ungetc($out_buffer) if length $out_buffer; } else { return undef unless *$obj->{Transparent}; $obj->clearError(); *$obj->{Type} = 'plain'; *$obj->{Plain} = 1; $obj->pushBack(*$obj->{HeaderPending}) ; } push @{ *$obj->{InfoList} }, *$obj->{Info} ; $obj->saveStatus(STATUS_OK) ; *$obj->{InNew} = 0; *$obj->{Closed} = 0; return $obj; } sub ckInputParam { my $self = shift ; my $from = shift ; my $inType = whatIsInput($_[0], $_[1]); $self->croakError("$from: input parameter not a filename, filehandle, array ref or scalar ref") if ! $inType ; # if ($inType eq 'filename' ) # { # return $self->saveErrorString(1, "$from: input filename is undef or null string", STATUS_ERROR) # if ! defined $_[0] || $_[0] eq '' ; # # if ($_[0] ne '-' && ! -e $_[0] ) # { # return $self->saveErrorString(1, # "input file '$_[0]' does not exist", STATUS_ERROR); # } # } return 1; } sub _inf { my $obj = shift ; my $class = (caller)[0] ; my $name = (caller(1))[3] ; $obj->croakError("$name: expected at least 1 parameters\n") unless @_ >= 1 ; my $input = shift ; my $haveOut = @_ ; my $output = shift ; my $x = new IO::Compress::Base::Validator($class, *$obj->{Error}, $name, $input, $output) or return undef ; push @_, $output if $haveOut && $x->{Hash}; *$obj->{OneShot} = 1 ; my $got = $obj->checkParams($name, undef, @_) or return undef ; if ($got->parsed('trailingdata')) { # my $value = $got->valueRef('TrailingData'); # warn "TD $value "; # #$value = $$value; ## warn "TD $value $$value "; # # return retErr($obj, "Parameter 'TrailingData' not writable") # if readonly $$value ; # # if (ref $$value) # { # return retErr($obj,"Parameter 'TrailingData' not a scalar reference") # if ref $$value ne 'SCALAR' ; # # *$obj->{TrailingData} = $$value ; # } # else # { # return retErr($obj,"Parameter 'TrailingData' not a scalar") # if ref $value ne 'SCALAR' ; # # *$obj->{TrailingData} = $value ; # } *$obj->{TrailingData} = $got->getValue('trailingdata'); } *$obj->{MultiStream} = $got->getValue('multistream'); $got->setValue('multistream', 0); $x->{Got} = $got ; # if ($x->{Hash}) # { # while (my($k, $v) = each %$input) # { # $v = \$input->{$k} # unless defined $v ; # # $obj->_singleTarget($x, $k, $v, @_) # or return undef ; # } # # return keys %$input ; # } if ($x->{GlobMap}) { $x->{oneInput} = 1 ; foreach my $pair (@{ $x->{Pairs} }) { my ($from, $to) = @$pair ; $obj->_singleTarget($x, $from, $to, @_) or return undef ; } return scalar @{ $x->{Pairs} } ; } if (! $x->{oneOutput} ) { my $inFile = ($x->{inType} eq 'filenames' || $x->{inType} eq 'filename'); $x->{inType} = $inFile ? 'filename' : 'buffer'; foreach my $in ($x->{oneInput} ? $input : @$input) { my $out ; $x->{oneInput} = 1 ; $obj->_singleTarget($x, $in, $output, @_) or return undef ; } return 1 ; } # finally the 1 to 1 and n to 1 return $obj->_singleTarget($x, $input, $output, @_); croak "should not be here" ; } sub retErr { my $x = shift ; my $string = shift ; ${ $x->{Error} } = $string ; return undef ; } sub _singleTarget { my $self = shift ; my $x = shift ; my $input = shift; my $output = shift; my $buff = ''; $x->{buff} = \$buff ; my $fh ; if ($x->{outType} eq 'filename') { my $mode = '>' ; $mode = '>>' if $x->{Got}->getValue('append') ; $x->{fh} = new IO::File "$mode $output" or return retErr($x, "cannot open file '$output': $!") ; binmode $x->{fh} if $x->{Got}->valueOrDefault('binmodeout'); } elsif ($x->{outType} eq 'handle') { $x->{fh} = $output; binmode $x->{fh} if $x->{Got}->valueOrDefault('binmodeout'); if ($x->{Got}->getValue('append')) { seek($x->{fh}, 0, SEEK_END) or return retErr($x, "Cannot seek to end of output filehandle: $!") ; } } elsif ($x->{outType} eq 'buffer' ) { $$output = '' unless $x->{Got}->getValue('append'); $x->{buff} = $output ; } if ($x->{oneInput}) { defined $self->_rd2($x, $input, $output) or return undef; } else { for my $element ( ($x->{inType} eq 'hash') ? keys %$input : @$input) { defined $self->_rd2($x, $element, $output) or return undef ; } } if ( ($x->{outType} eq 'filename' && $output ne '-') || ($x->{outType} eq 'handle' && $x->{Got}->getValue('autoclose'))) { $x->{fh}->close() or return retErr($x, $!); delete $x->{fh}; } return 1 ; } sub _rd2 { my $self = shift ; my $x = shift ; my $input = shift; my $output = shift; my $z = IO::Compress::Base::Common::createSelfTiedObject($x->{Class}, *$self->{Error}); $z->_create($x->{Got}, 1, $input, @_) or return undef ; my $status ; my $fh = $x->{fh}; while (1) { while (($status = $z->read($x->{buff})) > 0) { if ($fh) { syswrite $fh, ${ $x->{buff} } or return $z->saveErrorString(undef, "Error writing to output file: $!", $!); ${ $x->{buff} } = '' ; } } if (! $x->{oneOutput} ) { my $ot = $x->{outType} ; if ($ot eq 'array') { push @$output, $x->{buff} } elsif ($ot eq 'hash') { $output->{$input} = $x->{buff} } my $buff = ''; $x->{buff} = \$buff; } last if $status < 0 || $z->smartEof(); last unless *$self->{MultiStream}; $status = $z->nextStream(); last unless $status == 1 ; } return $z->closeError(undef) if $status < 0 ; ${ *$self->{TrailingData} } = $z->trailingData() if defined *$self->{TrailingData} ; $z->close() or return undef ; return 1 ; } sub TIEHANDLE { return $_[0] if ref($_[0]); die "OOPS\n" ; } sub UNTIE { my $self = shift ; } sub getHeaderInfo { my $self = shift ; wantarray ? @{ *$self->{InfoList} } : *$self->{Info}; } sub readBlock { my $self = shift ; my $buff = shift ; my $size = shift ; if (defined *$self->{CompressedInputLength}) { if (*$self->{CompressedInputLengthRemaining} == 0) { delete *$self->{CompressedInputLength}; *$self->{CompressedInputLengthDone} = 1; return STATUS_OK ; } $size = List::Util::min($size, *$self->{CompressedInputLengthRemaining} ); *$self->{CompressedInputLengthRemaining} -= $size ; } my $status = $self->smartRead($buff, $size) ; return $self->saveErrorString(STATUS_ERROR, "Error Reading Data: $!", $!) if $status == STATUS_ERROR ; if ($status == 0 ) { *$self->{Closed} = 1 ; *$self->{EndStream} = 1 ; return $self->saveErrorString(STATUS_ERROR, "unexpected end of file", STATUS_ERROR); } return STATUS_OK; } sub postBlockChk { return STATUS_OK; } sub _raw_read { # return codes # >0 - ok, number of bytes read # =0 - ok, eof # <0 - not ok my $self = shift ; return G_EOF if *$self->{Closed} ; return G_EOF if *$self->{EndStream} ; my $buffer = shift ; my $scan_mode = shift ; if (*$self->{Plain}) { my $tmp_buff ; my $len = $self->smartRead(\$tmp_buff, *$self->{BlockSize}) ; return $self->saveErrorString(G_ERR, "Error reading data: $!", $!) if $len == STATUS_ERROR ; if ($len == 0 ) { *$self->{EndStream} = 1 ; } else { *$self->{PlainBytesRead} += $len ; $$buffer .= $tmp_buff; } return $len ; } if (*$self->{NewStream}) { $self->gotoNextStream() > 0 or return G_ERR; # For the headers that actually uncompressed data, put the # uncompressed data into the output buffer. $$buffer .= *$self->{Pending} ; my $len = length *$self->{Pending} ; *$self->{Pending} = ''; return $len; } my $temp_buf = ''; my $outSize = 0; my $status = $self->readBlock(\$temp_buf, *$self->{BlockSize}, $outSize) ; return G_ERR if $status == STATUS_ERROR ; my $buf_len = 0; if ($status == STATUS_OK) { my $beforeC_len = length $temp_buf; my $before_len = defined $$buffer ? length $$buffer : 0 ; $status = *$self->{Uncomp}->uncompr(\$temp_buf, $buffer, defined *$self->{CompressedInputLengthDone} || $self->smartEof(), $outSize); # Remember the input buffer if it wasn't consumed completely $self->pushBack($temp_buf) if *$self->{Uncomp}{ConsumesInput}; return $self->saveErrorString(G_ERR, *$self->{Uncomp}{Error}, *$self->{Uncomp}{ErrorNo}) if $self->saveStatus($status) == STATUS_ERROR; $self->postBlockChk($buffer, $before_len) == STATUS_OK or return G_ERR; $buf_len = defined $$buffer ? length($$buffer) - $before_len : 0; *$self->{CompSize}->add($beforeC_len - length $temp_buf) ; *$self->{InflatedBytesRead} += $buf_len ; *$self->{TotalInflatedBytesRead} += $buf_len ; *$self->{UnCompSize}->add($buf_len) ; $self->filterUncompressed($buffer, $before_len); # if (*$self->{Encoding}) { # use Encode ; # *$self->{PendingDecode} .= substr($$buffer, $before_len) ; # my $got = *$self->{Encoding}->decode(*$self->{PendingDecode}, Encode::FB_QUIET) ; # substr($$buffer, $before_len) = $got; # } } if ($status == STATUS_ENDSTREAM) { *$self->{EndStream} = 1 ; my $trailer; my $trailer_size = *$self->{Info}{TrailerLength} ; my $got = 0; if (*$self->{Info}{TrailerLength}) { $got = $self->smartRead(\$trailer, $trailer_size) ; } if ($got == $trailer_size) { $self->chkTrailer($trailer) == STATUS_OK or return G_ERR; } else { return $self->TrailerError("trailer truncated. Expected " . "$trailer_size bytes, got $got") if *$self->{Strict}; $self->pushBack($trailer) ; } # TODO - if want file pointer, do it here if (! $self->smartEof()) { *$self->{NewStream} = 1 ; if (*$self->{MultiStream}) { *$self->{EndStream} = 0 ; return $buf_len ; } } } # return the number of uncompressed bytes read return $buf_len ; } sub reset { my $self = shift ; return *$self->{Uncomp}->reset(); } sub filterUncompressed { } #sub isEndStream #{ # my $self = shift ; # return *$self->{NewStream} || # *$self->{EndStream} ; #} sub nextStream { my $self = shift ; my $status = $self->gotoNextStream(); $status == 1 or return $status ; *$self->{TotalInflatedBytesRead} = 0 ; *$self->{LineNo} = $. = 0; return 1; } sub gotoNextStream { my $self = shift ; if (! *$self->{NewStream}) { my $status = 1; my $buffer ; # TODO - make this more efficient if know the offset for the end of # the stream and seekable $status = $self->read($buffer) while $status > 0 ; return $status if $status < 0; } *$self->{NewStream} = 0 ; *$self->{EndStream} = 0 ; *$self->{CompressedInputLengthDone} = undef ; *$self->{CompressedInputLength} = undef ; $self->reset(); *$self->{UnCompSize}->reset(); *$self->{CompSize}->reset(); my $magic = $self->ckMagic(); if ( ! defined $magic) { if (! *$self->{Transparent} || $self->eof()) { *$self->{EndStream} = 1 ; return 0; } $self->clearError(); *$self->{Type} = 'plain'; *$self->{Plain} = 1; $self->pushBack(*$self->{HeaderPending}) ; } else { *$self->{Info} = $self->readHeader($magic); if ( ! defined *$self->{Info} ) { *$self->{EndStream} = 1 ; return -1; } } push @{ *$self->{InfoList} }, *$self->{Info} ; return 1; } sub streamCount { my $self = shift ; return 1 if ! defined *$self->{InfoList}; return scalar @{ *$self->{InfoList} } ; } #sub read #{ # my $status = myRead(@_); # return undef if $status < 0; # return $status; #} sub read { # return codes # >0 - ok, number of bytes read # =0 - ok, eof # <0 - not ok my $self = shift ; if (defined *$self->{ReadStatus} ) { my $status = *$self->{ReadStatus}[0]; $self->saveErrorString( @{ *$self->{ReadStatus} } ); delete *$self->{ReadStatus} ; return $status ; } return G_EOF if *$self->{Closed} ; my $buffer ; if (ref $_[0] ) { $self->croakError(*$self->{ClassName} . "::read: buffer parameter is read-only") if Scalar::Util::readonly(${ $_[0] }); $self->croakError(*$self->{ClassName} . "::read: not a scalar reference $_[0]" ) unless ref $_[0] eq 'SCALAR' ; $buffer = $_[0] ; } else { $self->croakError(*$self->{ClassName} . "::read: buffer parameter is read-only") if Scalar::Util::readonly($_[0]); $buffer = \$_[0] ; } my $length = $_[1] ; my $offset = $_[2] || 0; if (! *$self->{AppendOutput}) { if (! $offset) { $$buffer = '' ; } else { if ($offset > length($$buffer)) { $$buffer .= "\x00" x ($offset - length($$buffer)); } else { substr($$buffer, $offset) = ''; } } } elsif (! defined $$buffer) { $$buffer = '' ; } return G_EOF if !length *$self->{Pending} && *$self->{EndStream} ; # the core read will return 0 if asked for 0 bytes return 0 if defined $length && $length == 0 ; $length = $length || 0; $self->croakError(*$self->{ClassName} . "::read: length parameter is negative") if $length < 0 ; # Short-circuit if this is a simple read, with no length # or offset specified. unless ( $length || $offset) { if (length *$self->{Pending}) { $$buffer .= *$self->{Pending} ; my $len = length *$self->{Pending}; *$self->{Pending} = '' ; return $len ; } else { my $len = 0; $len = $self->_raw_read($buffer) while ! *$self->{EndStream} && $len == 0 ; return $len ; } } # Need to jump through more hoops - either length or offset # or both are specified. my $out_buffer = *$self->{Pending} ; *$self->{Pending} = ''; while (! *$self->{EndStream} && length($out_buffer) < $length) { my $buf_len = $self->_raw_read(\$out_buffer); return $buf_len if $buf_len < 0 ; } $length = length $out_buffer if length($out_buffer) < $length ; return 0 if $length == 0 ; $$buffer = '' if ! defined $$buffer; $offset = length $$buffer if *$self->{AppendOutput} ; *$self->{Pending} = $out_buffer; $out_buffer = \*$self->{Pending} ; substr($$buffer, $offset) = substr($$out_buffer, 0, $length) ; substr($$out_buffer, 0, $length) = '' ; return $length ; } sub _getline { my $self = shift ; my $status = 0 ; # Slurp Mode if ( ! defined $/ ) { my $data ; 1 while ($status = $self->read($data)) > 0 ; return ($status, \$data); } # Record Mode if ( ref $/ eq 'SCALAR' && ${$/} =~ /^\d+$/ && ${$/} > 0) { my $reclen = ${$/} ; my $data ; $status = $self->read($data, $reclen) ; return ($status, \$data); } # Paragraph Mode if ( ! length $/ ) { my $paragraph ; while (($status = $self->read($paragraph)) > 0 ) { if ($paragraph =~ s/^(.*?\n\n+)//s) { *$self->{Pending} = $paragraph ; my $par = $1 ; return (1, \$par); } } return ($status, \$paragraph); } # $/ isn't empty, or a reference, so it's Line Mode. { my $line ; my $p = \*$self->{Pending} ; while (($status = $self->read($line)) > 0 ) { my $offset = index($line, $/); if ($offset >= 0) { my $l = substr($line, 0, $offset + length $/ ); substr($line, 0, $offset + length $/) = ''; $$p = $line; return (1, \$l); } } return ($status, \$line); } } sub getline { my $self = shift; if (defined *$self->{ReadStatus} ) { $self->saveErrorString( @{ *$self->{ReadStatus} } ); delete *$self->{ReadStatus} ; return undef; } return undef if *$self->{Closed} || (!length *$self->{Pending} && *$self->{EndStream}) ; my $current_append = *$self->{AppendOutput} ; *$self->{AppendOutput} = 1; my ($status, $lineref) = $self->_getline(); *$self->{AppendOutput} = $current_append; return undef if $status < 0 || length $$lineref == 0 ; $. = ++ *$self->{LineNo} ; return $$lineref ; } sub getlines { my $self = shift; $self->croakError(*$self->{ClassName} . "::getlines: called in scalar context\n") unless wantarray; my($line, @lines); push(@lines, $line) while defined($line = $self->getline); return @lines; } sub READLINE { goto &getlines if wantarray; goto &getline; } sub getc { my $self = shift; my $buf; return $buf if $self->read($buf, 1); return undef; } sub ungetc { my $self = shift; *$self->{Pending} = "" unless defined *$self->{Pending} ; *$self->{Pending} = $_[0] . *$self->{Pending} ; } sub trailingData { my $self = shift ; if (defined *$self->{FH} || defined *$self->{InputEvent} ) { return *$self->{Prime} ; } else { my $buf = *$self->{Buffer} ; my $offset = *$self->{BufferOffset} ; return substr($$buf, $offset) ; } } sub eof { my $self = shift ; return (*$self->{Closed} || (!length *$self->{Pending} && ( $self->smartEof() || *$self->{EndStream}))) ; } sub tell { my $self = shift ; my $in ; if (*$self->{Plain}) { $in = *$self->{PlainBytesRead} ; } else { $in = *$self->{TotalInflatedBytesRead} ; } my $pending = length *$self->{Pending} ; return 0 if $pending > $in ; return $in - $pending ; } sub close { # todo - what to do if close is called before the end of the gzip file # do we remember any trailing data? my $self = shift ; return 1 if *$self->{Closed} ; untie *$self if $] >= 5.008 ; my $status = 1 ; if (defined *$self->{FH}) { if ((! *$self->{Handle} || *$self->{AutoClose}) && ! *$self->{StdIO}) { local $.; $! = 0 ; $status = *$self->{FH}->close(); return $self->saveErrorString(0, $!, $!) if !*$self->{InNew} && $self->saveStatus($!) != 0 ; } delete *$self->{FH} ; $! = 0 ; } *$self->{Closed} = 1 ; return 1; } sub DESTROY { my $self = shift ; local ($., $@, $!, $^E, $?); $self->close() ; } sub seek { my $self = shift ; my $position = shift; my $whence = shift ; my $here = $self->tell() ; my $target = 0 ; if ($whence == SEEK_SET) { $target = $position ; } elsif ($whence == SEEK_CUR) { $target = $here + $position ; } elsif ($whence == SEEK_END) { $target = $position ; $self->croakError(*$self->{ClassName} . "::seek: SEEK_END not allowed") ; } else { $self->croakError(*$self->{ClassName} ."::seek: unknown value, $whence, for whence parameter"); } # short circuit if seeking to current offset if ($target == $here) { # On ordinary filehandles, seeking to the current # position also clears the EOF condition, so we # emulate this behavior locally while simultaneously # cascading it to the underlying filehandle if (*$self->{Plain}) { *$self->{EndStream} = 0; seek(*$self->{FH},0,1) if *$self->{FH}; } return 1; } # Outlaw any attempt to seek backwards $self->croakError( *$self->{ClassName} ."::seek: cannot seek backwards") if $target < $here ; # Walk the file to the new offset my $offset = $target - $here ; my $got; while (($got = $self->read(my $buffer, List::Util::min($offset, *$self->{BlockSize})) ) > 0) { $offset -= $got; last if $offset == 0 ; } $here = $self->tell() ; return $offset == 0 ? 1 : 0 ; } sub fileno { my $self = shift ; return defined *$self->{FH} ? fileno *$self->{FH} : undef ; } sub binmode { 1; # my $self = shift ; # return defined *$self->{FH} # ? binmode *$self->{FH} # : 1 ; } sub opened { my $self = shift ; return ! *$self->{Closed} ; } sub autoflush { my $self = shift ; return defined *$self->{FH} ? *$self->{FH}->autoflush(@_) : undef ; } sub input_line_number { my $self = shift ; my $last = *$self->{LineNo}; $. = *$self->{LineNo} = $_[1] if @_ ; return $last; } *BINMODE = \&binmode; *SEEK = \&seek; *READ = \&read; *sysread = \&read; *TELL = \&tell; *EOF = \&eof; *FILENO = \&fileno; *CLOSE = \&close; sub _notAvailable { my $name = shift ; return sub { croak "$name Not Available: File opened only for intput" ; } ; } *print = _notAvailable('print'); *PRINT = _notAvailable('print'); *printf = _notAvailable('printf'); *PRINTF = _notAvailable('printf'); *write = _notAvailable('write'); *WRITE = _notAvailable('write'); #*sysread = \&read; #*syswrite = \&_notAvailable; package IO::Uncompress::Base ; 1 ; __END__ =head1 NAME IO::Uncompress::Base - Base Class for IO::Uncompress modules =head1 SYNOPSIS use IO::Uncompress::Base ; =head1 DESCRIPTION This module is not intended for direct use in application code. Its sole purpose is to be sub-classed by IO::Uncompress modules. =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2013 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. IO-Compress-2.063/lib/IO/Uncompress/Inflate.pm0000644000175000017500000006663412211042537017457 0ustar paulpaulpackage IO::Uncompress::Inflate ; # for RFC1950 use strict ; use warnings; use bytes; use IO::Compress::Base::Common 2.063 qw(:Status ); use IO::Compress::Zlib::Constants 2.063 ; use IO::Uncompress::RawInflate 2.063 ; require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $InflateError); $VERSION = '2.063'; $InflateError = ''; @ISA = qw( Exporter IO::Uncompress::RawInflate ); @EXPORT_OK = qw( $InflateError inflate ) ; %EXPORT_TAGS = %IO::Uncompress::RawInflate::DEFLATE_CONSTANTS ; push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; Exporter::export_ok_tags('all'); sub new { my $class = shift ; my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$InflateError); $obj->_create(undef, 0, @_); } sub inflate { my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$InflateError); return $obj->_inf(@_); } sub getExtraParams { return (); } sub ckParams { my $self = shift ; my $got = shift ; # gunzip always needs adler32 $got->setValue('adler32' => 1); return 1; } sub ckMagic { my $self = shift; my $magic ; $self->smartReadExact(\$magic, ZLIB_HEADER_SIZE); *$self->{HeaderPending} = $magic ; return $self->HeaderError("Header size is " . ZLIB_HEADER_SIZE . " bytes") if length $magic != ZLIB_HEADER_SIZE; #return $self->HeaderError("CRC mismatch.") return undef if ! $self->isZlibMagic($magic) ; *$self->{Type} = 'rfc1950'; return $magic; } sub readHeader { my $self = shift; my $magic = shift ; return $self->_readDeflateHeader($magic) ; } sub chkTrailer { my $self = shift; my $trailer = shift; my $ADLER32 = unpack("N", $trailer) ; *$self->{Info}{ADLER32} = $ADLER32; return $self->TrailerError("CRC mismatch") if *$self->{Strict} && $ADLER32 != *$self->{Uncomp}->adler32() ; return STATUS_OK; } sub isZlibMagic { my $self = shift; my $buffer = shift ; return 0 if length $buffer < ZLIB_HEADER_SIZE ; my $hdr = unpack("n", $buffer) ; #return 0 if $hdr % 31 != 0 ; return $self->HeaderError("CRC mismatch.") if $hdr % 31 != 0 ; my ($CMF, $FLG) = unpack "C C", $buffer; my $cm = bits($CMF, ZLIB_CMF_CM_OFFSET, ZLIB_CMF_CM_BITS) ; # Only Deflate supported return $self->HeaderError("Not Deflate (CM is $cm)") if $cm != ZLIB_CMF_CM_DEFLATED ; # Max window value is 7 for Deflate. my $cinfo = bits($CMF, ZLIB_CMF_CINFO_OFFSET, ZLIB_CMF_CINFO_BITS) ; return $self->HeaderError("CINFO > " . ZLIB_CMF_CINFO_MAX . " (CINFO is $cinfo)") if $cinfo > ZLIB_CMF_CINFO_MAX ; return 1; } sub bits { my $data = shift ; my $offset = shift ; my $mask = shift ; ($data >> $offset ) & $mask & 0xFF ; } sub _readDeflateHeader { my ($self, $buffer) = @_ ; # if (! $buffer) { # $self->smartReadExact(\$buffer, ZLIB_HEADER_SIZE); # # *$self->{HeaderPending} = $buffer ; # # return $self->HeaderError("Header size is " . # ZLIB_HEADER_SIZE . " bytes") # if length $buffer != ZLIB_HEADER_SIZE; # # return $self->HeaderError("CRC mismatch.") # if ! isZlibMagic($buffer) ; # } my ($CMF, $FLG) = unpack "C C", $buffer; my $FDICT = bits($FLG, ZLIB_FLG_FDICT_OFFSET, ZLIB_FLG_FDICT_BITS ), my $cm = bits($CMF, ZLIB_CMF_CM_OFFSET, ZLIB_CMF_CM_BITS) ; $cm == ZLIB_CMF_CM_DEFLATED or return $self->HeaderError("Not Deflate (CM is $cm)") ; my $DICTID; if ($FDICT) { $self->smartReadExact(\$buffer, ZLIB_FDICT_SIZE) or return $self->TruncatedHeader("FDICT"); $DICTID = unpack("N", $buffer) ; } *$self->{Type} = 'rfc1950'; return { 'Type' => 'rfc1950', 'FingerprintLength' => ZLIB_HEADER_SIZE, 'HeaderLength' => ZLIB_HEADER_SIZE, 'TrailerLength' => ZLIB_TRAILER_SIZE, 'Header' => $buffer, CMF => $CMF , CM => bits($CMF, ZLIB_CMF_CM_OFFSET, ZLIB_CMF_CM_BITS ), CINFO => bits($CMF, ZLIB_CMF_CINFO_OFFSET, ZLIB_CMF_CINFO_BITS ), FLG => $FLG , FCHECK => bits($FLG, ZLIB_FLG_FCHECK_OFFSET, ZLIB_FLG_FCHECK_BITS), FDICT => bits($FLG, ZLIB_FLG_FDICT_OFFSET, ZLIB_FLG_FDICT_BITS ), FLEVEL => bits($FLG, ZLIB_FLG_LEVEL_OFFSET, ZLIB_FLG_LEVEL_BITS ), DICTID => $DICTID , }; } 1 ; __END__ =head1 NAME IO::Uncompress::Inflate - Read RFC 1950 files/buffers =head1 SYNOPSIS use IO::Uncompress::Inflate qw(inflate $InflateError) ; my $status = inflate $input => $output [,OPTS] or die "inflate failed: $InflateError\n"; my $z = new IO::Uncompress::Inflate $input [OPTS] or die "inflate failed: $InflateError\n"; $status = $z->read($buffer) $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $line = $z->getline() $char = $z->getc() $char = $z->ungetc() $char = $z->opened() $status = $z->inflateSync() $data = $z->trailingData() $status = $z->nextStream() $data = $z->getHeaderInfo() $z->tell() $z->seek($position, $whence) $z->binmode() $z->fileno() $z->eof() $z->close() $InflateError ; # IO::File mode <$z> read($z, $buffer); read($z, $buffer, $length); read($z, $buffer, $length, $offset); tell($z) seek($z, $position, $whence) binmode($z) fileno($z) eof($z) close($z) =head1 DESCRIPTION This module provides a Perl interface that allows the reading of files/buffers that conform to RFC 1950. For writing RFC 1950 files/buffers, see the companion module IO::Compress::Deflate. =head1 Functional Interface A top-level function, C, is provided to carry out "one-shot" uncompression between buffers and/or files. For finer control over the uncompression process, see the L section. use IO::Uncompress::Inflate qw(inflate $InflateError) ; inflate $input_filename_or_reference => $output_filename_or_reference [,OPTS] or die "inflate failed: $InflateError\n"; The functional interface needs Perl5.005 or better. =head2 inflate $input_filename_or_reference => $output_filename_or_reference [, OPTS] C expects at least two parameters, C<$input_filename_or_reference> and C<$output_filename_or_reference>. =head3 The C<$input_filename_or_reference> parameter The parameter, C<$input_filename_or_reference>, is used to define the source of the compressed data. It can take one of the following forms: =over 5 =item A filename If the <$input_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for reading and the input data will be read from it. =item A filehandle If the C<$input_filename_or_reference> parameter is a filehandle, the input data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input_filename_or_reference> is a scalar reference, the input data will be read from C<$$input_filename_or_reference>. =item An array reference If C<$input_filename_or_reference> is an array reference, each element in the array must be a filename. The input data will be read from each file in turn. The complete array will be walked to ensure that it only contains valid filenames before any data is uncompressed. =item An Input FileGlob string If C<$input_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The input is the list of files that match the fileglob. See L for more details. =back If the C<$input_filename_or_reference> parameter is any other type, C will be returned. =head3 The C<$output_filename_or_reference> parameter The parameter C<$output_filename_or_reference> is used to control the destination of the uncompressed data. This parameter can take one of these forms. =over 5 =item A filename If the C<$output_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for writing and the uncompressed data will be written to it. =item A filehandle If the C<$output_filename_or_reference> parameter is a filehandle, the uncompressed data will be written to it. The string '-' can be used as an alias for standard output. =item A scalar reference If C<$output_filename_or_reference> is a scalar reference, the uncompressed data will be stored in C<$$output_filename_or_reference>. =item An Array Reference If C<$output_filename_or_reference> is an array reference, the uncompressed data will be pushed onto the array. =item An Output FileGlob If C<$output_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The output is the list of files that match the fileglob. When C<$output_filename_or_reference> is an fileglob string, C<$input_filename_or_reference> must also be a fileglob string. Anything else is an error. See L for more details. =back If the C<$output_filename_or_reference> parameter is any other type, C will be returned. =head2 Notes When C<$input_filename_or_reference> maps to multiple compressed files/buffers and C<$output_filename_or_reference> is a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a concatenation of all the uncompressed data from each of the input files/buffers. =head2 Optional Parameters Unless specified below, the optional parameters for C, C, are the same as those used with the OO interface defined in the L section below. =over 5 =item C<< AutoClose => 0|1 >> This option applies to any input or output data streams to C that are filehandles. If C is specified, and the value is true, it will result in all input and/or output filehandles being closed once C has completed. This parameter defaults to 0. =item C<< BinModeOut => 0|1 >> When writing to a file or filehandle, set C before writing to the file. Defaults to 0. =item C<< Append => 0|1 >> The behaviour of this option is dependent on the type of output data stream. =over 5 =item * A Buffer If C is enabled, all uncompressed data will be append to the end of the output buffer. Otherwise the output buffer will be cleared before any uncompressed data is written to it. =item * A Filename If C is enabled, the file will be opened in append mode. Otherwise the contents of the file, if any, will be truncated before any uncompressed data is written to it. =item * A Filehandle If C is enabled, the filehandle will be positioned to the end of the file via a call to C before any uncompressed data is written to it. Otherwise the file pointer will not be moved. =back When C is specified, and set to true, it will I all uncompressed data to the output data stream. So when the output is a filehandle it will carry out a seek to the eof before writing any uncompressed data. If the output is a filename, it will be opened for appending. If the output is a buffer, all uncompressed data will be appended to the existing buffer. Conversely when C is not specified, or it is present and is set to false, it will operate as follows. When the output is a filename, it will truncate the contents of the file before writing any uncompressed data. If the output is a filehandle its position will not be changed. If the output is a buffer, it will be wiped before any uncompressed data is output. Defaults to 0. =item C<< MultiStream => 0|1 >> If the input file/buffer contains multiple compressed data streams, this option will uncompress the whole lot as a single data stream. Defaults to 0. =item C<< TrailingData => $scalar >> Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option. =back =head2 Examples To read the contents of the file C and write the uncompressed data to the file C. use strict ; use warnings ; use IO::Uncompress::Inflate qw(inflate $InflateError) ; my $input = "file1.txt.1950"; my $output = "file1.txt"; inflate $input => $output or die "inflate failed: $InflateError\n"; To read from an existing Perl filehandle, C<$input>, and write the uncompressed data to a buffer, C<$buffer>. use strict ; use warnings ; use IO::Uncompress::Inflate qw(inflate $InflateError) ; use IO::File ; my $input = new IO::File " \$buffer or die "inflate failed: $InflateError\n"; To uncompress all files in the directory "/my/home" that match "*.txt.1950" and store the compressed data in the same directory use strict ; use warnings ; use IO::Uncompress::Inflate qw(inflate $InflateError) ; inflate '' => '' or die "inflate failed: $InflateError\n"; and if you want to compress each file one at a time, this will do the trick use strict ; use warnings ; use IO::Uncompress::Inflate qw(inflate $InflateError) ; for my $input ( glob "/my/home/*.txt.1950" ) { my $output = $input; $output =~ s/.1950// ; inflate $input => $output or die "Error compressing '$input': $InflateError\n"; } =head1 OO Interface =head2 Constructor The format of the constructor for IO::Uncompress::Inflate is shown below my $z = new IO::Uncompress::Inflate $input [OPTS] or die "IO::Uncompress::Inflate failed: $InflateError\n"; Returns an C object on success and undef on failure. The variable C<$InflateError> will contain an error message on failure. If you are running Perl 5.005 or better the object, C<$z>, returned from IO::Uncompress::Inflate can be used exactly like an L filehandle. This means that all normal input file operations can be carried out with C<$z>. For example, to read a line from a compressed file/buffer you can use either of these forms $line = $z->getline(); $line = <$z>; The mandatory parameter C<$input> is used to determine the source of the compressed data. This parameter can take one of three forms. =over 5 =item A filename If the C<$input> parameter is a scalar, it is assumed to be a filename. This file will be opened for reading and the compressed data will be read from it. =item A filehandle If the C<$input> parameter is a filehandle, the compressed data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input> is a scalar reference, the compressed data will be read from C<$$input>. =back =head2 Constructor Options The option names defined below are case insensitive and can be optionally prefixed by a '-'. So all of the following are valid -AutoClose -autoclose AUTOCLOSE autoclose OPTS is a combination of the following options: =over 5 =item C<< AutoClose => 0|1 >> This option is only valid when the C<$input> parameter is a filehandle. If specified, and the value is true, it will result in the file being closed once either the C method is called or the IO::Uncompress::Inflate object is destroyed. This parameter defaults to 0. =item C<< MultiStream => 0|1 >> Allows multiple concatenated compressed streams to be treated as a single compressed stream. Decompression will stop once either the end of the file/buffer is reached, an error is encountered (premature eof, corrupt compressed data) or the end of a stream is not immediately followed by the start of another stream. This parameter defaults to 0. =item C<< Prime => $string >> This option will uncompress the contents of C<$string> before processing the input file/buffer. This option can be useful when the compressed data is embedded in another file/data structure and it is not possible to work out where the compressed data begins without having to read the first few bytes. If this is the case, the uncompression can be I with these bytes using this option. =item C<< Transparent => 0|1 >> If this option is set and the input file/buffer is not compressed data, the module will allow reading of it anyway. In addition, if the input file/buffer does contain compressed data and there is non-compressed data immediately following it, setting this option will make this module treat the whole file/buffer as a single data stream. This option defaults to 1. =item C<< BlockSize => $num >> When reading the compressed input data, IO::Uncompress::Inflate will read it in blocks of C<$num> bytes. This option defaults to 4096. =item C<< InputLength => $size >> When present this option will limit the number of compressed bytes read from the input file/buffer to C<$size>. This option can be used in the situation where there is useful data directly after the compressed data stream and you know beforehand the exact length of the compressed data stream. This option is mostly used when reading from a filehandle, in which case the file pointer will be left pointing to the first byte directly after the compressed data stream. This option defaults to off. =item C<< Append => 0|1 >> This option controls what the C method does with uncompressed data. If set to 1, all uncompressed data will be appended to the output parameter of the C method. If set to 0, the contents of the output parameter of the C method will be overwritten by the uncompressed data. Defaults to 0. =item C<< Strict => 0|1 >> This option controls whether the extra checks defined below are used when carrying out the decompression. When Strict is on, the extra tests are carried out, when Strict is off they are not. The default for this option is off. =over 5 =item 1 The ADLER32 checksum field must be present. =item 2 The value of the ADLER32 field read must match the adler32 value of the uncompressed data actually contained in the file. =back =back =head2 Examples TODO =head1 Methods =head2 read Usage is $status = $z->read($buffer) Reads a block of compressed data (the size of the compressed block is determined by the C option in the constructor), uncompresses it and writes any uncompressed data into C<$buffer>. If the C parameter is set in the constructor, the uncompressed data will be appended to the C<$buffer> parameter. Otherwise C<$buffer> will be overwritten. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 read Usage is $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $status = read($z, $buffer, $length) $status = read($z, $buffer, $length, $offset) Attempt to read C<$length> bytes of uncompressed data into C<$buffer>. The main difference between this form of the C method and the previous one, is that this one will attempt to return I C<$length> bytes. The only circumstances that this function will not is if end-of-file or an IO error is encountered. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 getline Usage is $line = $z->getline() $line = <$z> Reads a single line. This method fully supports the use of the variable C<$/> (or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C is in use) to determine what constitutes an end of line. Paragraph mode, record mode and file slurp mode are all supported. =head2 getc Usage is $char = $z->getc() Read a single character. =head2 ungetc Usage is $char = $z->ungetc($string) =head2 inflateSync Usage is $status = $z->inflateSync() TODO =head2 getHeaderInfo Usage is $hdr = $z->getHeaderInfo(); @hdrs = $z->getHeaderInfo(); This method returns either a hash reference (in scalar context) or a list or hash references (in array context) that contains information about each of the header fields in the compressed data stream(s). =head2 tell Usage is $z->tell() tell $z Returns the uncompressed file offset. =head2 eof Usage is $z->eof(); eof($z); Returns true if the end of the compressed input stream has been reached. =head2 seek $z->seek($position, $whence); seek($z, $position, $whence); Provides a sub-set of the C functionality, with the restriction that it is only legal to seek forward in the input file/buffer. It is a fatal error to attempt to seek backward. Note that the implementation of C in this module does not provide true random access to a compressed file/buffer. It works by uncompressing data from the current offset in the file/buffer until it reaches the uncompressed offset specified in the parameters to C. For very small files this may be acceptable behaviour. For large files it may cause an unacceptable delay. The C<$whence> parameter takes one the usual values, namely SEEK_SET, SEEK_CUR or SEEK_END. Returns 1 on success, 0 on failure. =head2 binmode Usage is $z->binmode binmode $z ; This is a noop provided for completeness. =head2 opened $z->opened() Returns true if the object currently refers to a opened file/buffer. =head2 autoflush my $prev = $z->autoflush() my $prev = $z->autoflush(EXPR) If the C<$z> object is associated with a file or a filehandle, this method returns the current autoflush setting for the underlying filehandle. If C is present, and is non-zero, it will enable flushing after every write/print operation. If C<$z> is associated with a buffer, this method has no effect and always returns C. B that the special variable C<$|> B be used to set or retrieve the autoflush setting. =head2 input_line_number $z->input_line_number() $z->input_line_number(EXPR) Returns the current uncompressed line number. If C is present it has the effect of setting the line number. Note that setting the line number does not change the current position within the file/buffer being read. The contents of C<$/> are used to determine what constitutes a line terminator. =head2 fileno $z->fileno() fileno($z) If the C<$z> object is associated with a file or a filehandle, C will return the underlying file descriptor. Once the C method is called C will return C. If the C<$z> object is associated with a buffer, this method will return C. =head2 close $z->close() ; close $z ; Closes the output file/buffer. For most versions of Perl this method will be automatically invoked if the IO::Uncompress::Inflate object is destroyed (either explicitly or by the variable with the reference to the object going out of scope). The exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In these cases, the C method will be called automatically, but not until global destruction of all live objects when the program is terminating. Therefore, if you want your scripts to be able to run on all versions of Perl, you should call C explicitly and not rely on automatic closing. Returns true on success, otherwise 0. If the C option has been enabled when the IO::Uncompress::Inflate object was created, and the object is associated with a file, the underlying file will also be closed. =head2 nextStream Usage is my $status = $z->nextStream(); Skips to the next compressed data stream in the input file/buffer. If a new compressed data stream is found, the eof marker will be cleared and C<$.> will be reset to 0. Returns 1 if a new stream was found, 0 if none was found, and -1 if an error was encountered. =head2 trailingData Usage is my $data = $z->trailingData(); Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. It only makes sense to call this method once the end of the compressed data stream has been encountered. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option in the constructor. =head1 Importing No symbolic constants are required by this IO::Uncompress::Inflate at present. =over 5 =item :all Imports C and C<$InflateError>. Same as doing this use IO::Uncompress::Inflate qw(inflate $InflateError) ; =back =head1 EXAMPLES =head2 Working with Net::FTP See L =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L For RFC 1950, 1951 and 1952 see F, F and F The I compression library was written by Jean-loup Gailly F and Mark Adler F. The primary site for the I compression library is F. The primary site for gzip is F. =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2013 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. IO-Compress-2.063/lib/IO/Uncompress/Bunzip2.pm0000644000175000017500000006121312211042540017404 0ustar paulpaulpackage IO::Uncompress::Bunzip2 ; use strict ; use warnings; use bytes; use IO::Compress::Base::Common 2.063 qw(:Status ); use IO::Uncompress::Base 2.063 ; use IO::Uncompress::Adapter::Bunzip2 2.063 ; require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $Bunzip2Error); $VERSION = '2.063'; $Bunzip2Error = ''; @ISA = qw( Exporter IO::Uncompress::Base ); @EXPORT_OK = qw( $Bunzip2Error bunzip2 ) ; #%EXPORT_TAGS = %IO::Uncompress::Base::EXPORT_TAGS ; push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; #Exporter::export_ok_tags('all'); sub new { my $class = shift ; my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$Bunzip2Error); $obj->_create(undef, 0, @_); } sub bunzip2 { my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$Bunzip2Error); return $obj->_inf(@_); } sub getExtraParams { return ( 'verbosity' => [IO::Compress::Base::Common::Parse_boolean, 0], 'small' => [IO::Compress::Base::Common::Parse_boolean, 0], ); } sub ckParams { my $self = shift ; my $got = shift ; return 1; } sub mkUncomp { my $self = shift ; my $got = shift ; my $magic = $self->ckMagic() or return 0; *$self->{Info} = $self->readHeader($magic) or return undef ; my $Small = $got->getValue('small'); my $Verbosity = $got->getValue('verbosity'); my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::Bunzip2::mkUncompObject( $Small, $Verbosity); return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; *$self->{Uncomp} = $obj; return 1; } sub ckMagic { my $self = shift; my $magic ; $self->smartReadExact(\$magic, 4); *$self->{HeaderPending} = $magic ; return $self->HeaderError("Header size is " . 4 . " bytes") if length $magic != 4; return $self->HeaderError("Bad Magic.") if ! isBzip2Magic($magic) ; *$self->{Type} = 'bzip2'; return $magic; } sub readHeader { my $self = shift; my $magic = shift ; $self->pushBack($magic); *$self->{HeaderPending} = ''; return { 'Type' => 'bzip2', 'FingerprintLength' => 4, 'HeaderLength' => 4, 'TrailerLength' => 0, 'Header' => '$magic' }; } sub chkTrailer { return STATUS_OK; } sub isBzip2Magic { my $buffer = shift ; return $buffer =~ /^BZh\d$/; } 1 ; __END__ =head1 NAME IO::Uncompress::Bunzip2 - Read bzip2 files/buffers =head1 SYNOPSIS use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; my $status = bunzip2 $input => $output [,OPTS] or die "bunzip2 failed: $Bunzip2Error\n"; my $z = new IO::Uncompress::Bunzip2 $input [OPTS] or die "bunzip2 failed: $Bunzip2Error\n"; $status = $z->read($buffer) $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $line = $z->getline() $char = $z->getc() $char = $z->ungetc() $char = $z->opened() $data = $z->trailingData() $status = $z->nextStream() $data = $z->getHeaderInfo() $z->tell() $z->seek($position, $whence) $z->binmode() $z->fileno() $z->eof() $z->close() $Bunzip2Error ; # IO::File mode <$z> read($z, $buffer); read($z, $buffer, $length); read($z, $buffer, $length, $offset); tell($z) seek($z, $position, $whence) binmode($z) fileno($z) eof($z) close($z) =head1 DESCRIPTION This module provides a Perl interface that allows the reading of bzip2 files/buffers. For writing bzip2 files/buffers, see the companion module IO::Compress::Bzip2. =head1 Functional Interface A top-level function, C, is provided to carry out "one-shot" uncompression between buffers and/or files. For finer control over the uncompression process, see the L section. use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; bunzip2 $input_filename_or_reference => $output_filename_or_reference [,OPTS] or die "bunzip2 failed: $Bunzip2Error\n"; The functional interface needs Perl5.005 or better. =head2 bunzip2 $input_filename_or_reference => $output_filename_or_reference [, OPTS] C expects at least two parameters, C<$input_filename_or_reference> and C<$output_filename_or_reference>. =head3 The C<$input_filename_or_reference> parameter The parameter, C<$input_filename_or_reference>, is used to define the source of the compressed data. It can take one of the following forms: =over 5 =item A filename If the <$input_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for reading and the input data will be read from it. =item A filehandle If the C<$input_filename_or_reference> parameter is a filehandle, the input data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input_filename_or_reference> is a scalar reference, the input data will be read from C<$$input_filename_or_reference>. =item An array reference If C<$input_filename_or_reference> is an array reference, each element in the array must be a filename. The input data will be read from each file in turn. The complete array will be walked to ensure that it only contains valid filenames before any data is uncompressed. =item An Input FileGlob string If C<$input_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The input is the list of files that match the fileglob. See L for more details. =back If the C<$input_filename_or_reference> parameter is any other type, C will be returned. =head3 The C<$output_filename_or_reference> parameter The parameter C<$output_filename_or_reference> is used to control the destination of the uncompressed data. This parameter can take one of these forms. =over 5 =item A filename If the C<$output_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for writing and the uncompressed data will be written to it. =item A filehandle If the C<$output_filename_or_reference> parameter is a filehandle, the uncompressed data will be written to it. The string '-' can be used as an alias for standard output. =item A scalar reference If C<$output_filename_or_reference> is a scalar reference, the uncompressed data will be stored in C<$$output_filename_or_reference>. =item An Array Reference If C<$output_filename_or_reference> is an array reference, the uncompressed data will be pushed onto the array. =item An Output FileGlob If C<$output_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The output is the list of files that match the fileglob. When C<$output_filename_or_reference> is an fileglob string, C<$input_filename_or_reference> must also be a fileglob string. Anything else is an error. See L for more details. =back If the C<$output_filename_or_reference> parameter is any other type, C will be returned. =head2 Notes When C<$input_filename_or_reference> maps to multiple compressed files/buffers and C<$output_filename_or_reference> is a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a concatenation of all the uncompressed data from each of the input files/buffers. =head2 Optional Parameters Unless specified below, the optional parameters for C, C, are the same as those used with the OO interface defined in the L section below. =over 5 =item C<< AutoClose => 0|1 >> This option applies to any input or output data streams to C that are filehandles. If C is specified, and the value is true, it will result in all input and/or output filehandles being closed once C has completed. This parameter defaults to 0. =item C<< BinModeOut => 0|1 >> When writing to a file or filehandle, set C before writing to the file. Defaults to 0. =item C<< Append => 0|1 >> The behaviour of this option is dependent on the type of output data stream. =over 5 =item * A Buffer If C is enabled, all uncompressed data will be append to the end of the output buffer. Otherwise the output buffer will be cleared before any uncompressed data is written to it. =item * A Filename If C is enabled, the file will be opened in append mode. Otherwise the contents of the file, if any, will be truncated before any uncompressed data is written to it. =item * A Filehandle If C is enabled, the filehandle will be positioned to the end of the file via a call to C before any uncompressed data is written to it. Otherwise the file pointer will not be moved. =back When C is specified, and set to true, it will I all uncompressed data to the output data stream. So when the output is a filehandle it will carry out a seek to the eof before writing any uncompressed data. If the output is a filename, it will be opened for appending. If the output is a buffer, all uncompressed data will be appended to the existing buffer. Conversely when C is not specified, or it is present and is set to false, it will operate as follows. When the output is a filename, it will truncate the contents of the file before writing any uncompressed data. If the output is a filehandle its position will not be changed. If the output is a buffer, it will be wiped before any uncompressed data is output. Defaults to 0. =item C<< MultiStream => 0|1 >> If the input file/buffer contains multiple compressed data streams, this option will uncompress the whole lot as a single data stream. Defaults to 0. =item C<< TrailingData => $scalar >> Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option. =back =head2 Examples To read the contents of the file C and write the uncompressed data to the file C. use strict ; use warnings ; use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; my $input = "file1.txt.bz2"; my $output = "file1.txt"; bunzip2 $input => $output or die "bunzip2 failed: $Bunzip2Error\n"; To read from an existing Perl filehandle, C<$input>, and write the uncompressed data to a buffer, C<$buffer>. use strict ; use warnings ; use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; use IO::File ; my $input = new IO::File " \$buffer or die "bunzip2 failed: $Bunzip2Error\n"; To uncompress all files in the directory "/my/home" that match "*.txt.bz2" and store the compressed data in the same directory use strict ; use warnings ; use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; bunzip2 '' => '' or die "bunzip2 failed: $Bunzip2Error\n"; and if you want to compress each file one at a time, this will do the trick use strict ; use warnings ; use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; for my $input ( glob "/my/home/*.txt.bz2" ) { my $output = $input; $output =~ s/.bz2// ; bunzip2 $input => $output or die "Error compressing '$input': $Bunzip2Error\n"; } =head1 OO Interface =head2 Constructor The format of the constructor for IO::Uncompress::Bunzip2 is shown below my $z = new IO::Uncompress::Bunzip2 $input [OPTS] or die "IO::Uncompress::Bunzip2 failed: $Bunzip2Error\n"; Returns an C object on success and undef on failure. The variable C<$Bunzip2Error> will contain an error message on failure. If you are running Perl 5.005 or better the object, C<$z>, returned from IO::Uncompress::Bunzip2 can be used exactly like an L filehandle. This means that all normal input file operations can be carried out with C<$z>. For example, to read a line from a compressed file/buffer you can use either of these forms $line = $z->getline(); $line = <$z>; The mandatory parameter C<$input> is used to determine the source of the compressed data. This parameter can take one of three forms. =over 5 =item A filename If the C<$input> parameter is a scalar, it is assumed to be a filename. This file will be opened for reading and the compressed data will be read from it. =item A filehandle If the C<$input> parameter is a filehandle, the compressed data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input> is a scalar reference, the compressed data will be read from C<$$input>. =back =head2 Constructor Options The option names defined below are case insensitive and can be optionally prefixed by a '-'. So all of the following are valid -AutoClose -autoclose AUTOCLOSE autoclose OPTS is a combination of the following options: =over 5 =item C<< AutoClose => 0|1 >> This option is only valid when the C<$input> parameter is a filehandle. If specified, and the value is true, it will result in the file being closed once either the C method is called or the IO::Uncompress::Bunzip2 object is destroyed. This parameter defaults to 0. =item C<< MultiStream => 0|1 >> Allows multiple concatenated compressed streams to be treated as a single compressed stream. Decompression will stop once either the end of the file/buffer is reached, an error is encountered (premature eof, corrupt compressed data) or the end of a stream is not immediately followed by the start of another stream. This parameter defaults to 0. =item C<< Prime => $string >> This option will uncompress the contents of C<$string> before processing the input file/buffer. This option can be useful when the compressed data is embedded in another file/data structure and it is not possible to work out where the compressed data begins without having to read the first few bytes. If this is the case, the uncompression can be I with these bytes using this option. =item C<< Transparent => 0|1 >> If this option is set and the input file/buffer is not compressed data, the module will allow reading of it anyway. In addition, if the input file/buffer does contain compressed data and there is non-compressed data immediately following it, setting this option will make this module treat the whole file/buffer as a single data stream. This option defaults to 1. =item C<< BlockSize => $num >> When reading the compressed input data, IO::Uncompress::Bunzip2 will read it in blocks of C<$num> bytes. This option defaults to 4096. =item C<< InputLength => $size >> When present this option will limit the number of compressed bytes read from the input file/buffer to C<$size>. This option can be used in the situation where there is useful data directly after the compressed data stream and you know beforehand the exact length of the compressed data stream. This option is mostly used when reading from a filehandle, in which case the file pointer will be left pointing to the first byte directly after the compressed data stream. This option defaults to off. =item C<< Append => 0|1 >> This option controls what the C method does with uncompressed data. If set to 1, all uncompressed data will be appended to the output parameter of the C method. If set to 0, the contents of the output parameter of the C method will be overwritten by the uncompressed data. Defaults to 0. =item C<< Strict => 0|1 >> This option is a no-op. =item C<< Small => 0|1 >> When non-zero this options will make bzip2 use a decompression algorithm that uses less memory at the expense of increasing the amount of time taken for decompression. Default is 0. =back =head2 Examples TODO =head1 Methods =head2 read Usage is $status = $z->read($buffer) Reads a block of compressed data (the size of the compressed block is determined by the C option in the constructor), uncompresses it and writes any uncompressed data into C<$buffer>. If the C parameter is set in the constructor, the uncompressed data will be appended to the C<$buffer> parameter. Otherwise C<$buffer> will be overwritten. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 read Usage is $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $status = read($z, $buffer, $length) $status = read($z, $buffer, $length, $offset) Attempt to read C<$length> bytes of uncompressed data into C<$buffer>. The main difference between this form of the C method and the previous one, is that this one will attempt to return I C<$length> bytes. The only circumstances that this function will not is if end-of-file or an IO error is encountered. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 getline Usage is $line = $z->getline() $line = <$z> Reads a single line. This method fully supports the use of the variable C<$/> (or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C is in use) to determine what constitutes an end of line. Paragraph mode, record mode and file slurp mode are all supported. =head2 getc Usage is $char = $z->getc() Read a single character. =head2 ungetc Usage is $char = $z->ungetc($string) =head2 getHeaderInfo Usage is $hdr = $z->getHeaderInfo(); @hdrs = $z->getHeaderInfo(); This method returns either a hash reference (in scalar context) or a list or hash references (in array context) that contains information about each of the header fields in the compressed data stream(s). =head2 tell Usage is $z->tell() tell $z Returns the uncompressed file offset. =head2 eof Usage is $z->eof(); eof($z); Returns true if the end of the compressed input stream has been reached. =head2 seek $z->seek($position, $whence); seek($z, $position, $whence); Provides a sub-set of the C functionality, with the restriction that it is only legal to seek forward in the input file/buffer. It is a fatal error to attempt to seek backward. Note that the implementation of C in this module does not provide true random access to a compressed file/buffer. It works by uncompressing data from the current offset in the file/buffer until it reaches the uncompressed offset specified in the parameters to C. For very small files this may be acceptable behaviour. For large files it may cause an unacceptable delay. The C<$whence> parameter takes one the usual values, namely SEEK_SET, SEEK_CUR or SEEK_END. Returns 1 on success, 0 on failure. =head2 binmode Usage is $z->binmode binmode $z ; This is a noop provided for completeness. =head2 opened $z->opened() Returns true if the object currently refers to a opened file/buffer. =head2 autoflush my $prev = $z->autoflush() my $prev = $z->autoflush(EXPR) If the C<$z> object is associated with a file or a filehandle, this method returns the current autoflush setting for the underlying filehandle. If C is present, and is non-zero, it will enable flushing after every write/print operation. If C<$z> is associated with a buffer, this method has no effect and always returns C. B that the special variable C<$|> B be used to set or retrieve the autoflush setting. =head2 input_line_number $z->input_line_number() $z->input_line_number(EXPR) Returns the current uncompressed line number. If C is present it has the effect of setting the line number. Note that setting the line number does not change the current position within the file/buffer being read. The contents of C<$/> are used to determine what constitutes a line terminator. =head2 fileno $z->fileno() fileno($z) If the C<$z> object is associated with a file or a filehandle, C will return the underlying file descriptor. Once the C method is called C will return C. If the C<$z> object is associated with a buffer, this method will return C. =head2 close $z->close() ; close $z ; Closes the output file/buffer. For most versions of Perl this method will be automatically invoked if the IO::Uncompress::Bunzip2 object is destroyed (either explicitly or by the variable with the reference to the object going out of scope). The exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In these cases, the C method will be called automatically, but not until global destruction of all live objects when the program is terminating. Therefore, if you want your scripts to be able to run on all versions of Perl, you should call C explicitly and not rely on automatic closing. Returns true on success, otherwise 0. If the C option has been enabled when the IO::Uncompress::Bunzip2 object was created, and the object is associated with a file, the underlying file will also be closed. =head2 nextStream Usage is my $status = $z->nextStream(); Skips to the next compressed data stream in the input file/buffer. If a new compressed data stream is found, the eof marker will be cleared and C<$.> will be reset to 0. Returns 1 if a new stream was found, 0 if none was found, and -1 if an error was encountered. =head2 trailingData Usage is my $data = $z->trailingData(); Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. It only makes sense to call this method once the end of the compressed data stream has been encountered. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option in the constructor. =head1 Importing No symbolic constants are required by this IO::Uncompress::Bunzip2 at present. =over 5 =item :all Imports C and C<$Bunzip2Error>. Same as doing this use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error) ; =back =head1 EXAMPLES =head2 Working with Net::FTP See L =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L The primary site for the bzip2 program is F. See the module L =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2013 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. IO-Compress-2.063/lib/IO/Uncompress/Adapter/0000755000175000017500000000000012235214476017111 5ustar paulpaulIO-Compress-2.063/lib/IO/Uncompress/Adapter/Identity.pm0000755000175000017500000001074712211042540021235 0ustar paulpaulpackage IO::Uncompress::Adapter::Identity; use warnings; use strict; use bytes; use IO::Compress::Base::Common 2.063 qw(:Status); use IO::Compress::Zip::Constants ; our ($VERSION); $VERSION = '2.063'; use Compress::Raw::Zlib 2.063 (); sub mkUncompObject { my $streaming = shift; my $zip64 = shift; my $crc32 = 1; #shift ; my $adler32 = shift; bless { 'CompSize' => new U64 , # 0, 'UnCompSize' => 0, 'wantCRC32' => $crc32, 'CRC32' => Compress::Raw::Zlib::crc32(''), 'wantADLER32'=> $adler32, 'ADLER32' => Compress::Raw::Zlib::adler32(''), 'ConsumesInput' => 1, 'Streaming' => $streaming, 'Zip64' => $zip64, 'DataHdrSize' => $zip64 ? 24 : 16, 'Pending' => '', } ; } sub uncompr { my $self = shift; my $in = $_[0]; my $eof = $_[2]; my $len = length $$in; my $remainder = ''; if (defined $$in && $len) { if ($self->{Streaming}) { if (length $self->{Pending}) { $$in = $self->{Pending} . $$in ; $len = length $$in; $self->{Pending} = ''; } my $ind = index($$in, "\x50\x4b\x07\x08"); if ($ind < 0) { $len = length $$in; if ($len >= 3 && substr($$in, -3) eq "\x50\x4b\x07") { $ind = $len - 3 ; } elsif ($len >= 2 && substr($$in, -2) eq "\x50\x4b") { $ind = $len - 2 ; } elsif ($len >= 1 && substr($$in, -1) eq "\x50") { $ind = $len - 1 ; } } if ($ind >= 0) { $remainder = substr($$in, $ind) ; substr($$in, $ind) = '' ; } } if (length $remainder && length $remainder < $self->{DataHdrSize}) { $self->{Pending} = $remainder ; $remainder = ''; } elsif (length $remainder >= $self->{DataHdrSize}) { my $crc = unpack "V", substr($remainder, 4); if ($crc == Compress::Raw::Zlib::crc32($$in, $self->{CRC32})) { my ($l1, $l2) ; if ($self->{Zip64}) { $l1 = U64::newUnpack_V64(substr($remainder, 8)); $l2 = U64::newUnpack_V64(substr($remainder, 16)); } else { $l1 = U64::newUnpack_V32(substr($remainder, 8)); $l2 = U64::newUnpack_V32(substr($remainder, 12)); } my $newLen = $self->{CompSize}->clone(); $newLen->add(length $$in); if ($l1->equal($l2) && $l1->equal($newLen) ) { $eof = 1; } else { $$in .= substr($remainder, 0, 4) ; $remainder = substr($remainder, 4); #$self->{Pending} = substr($remainder, 4); #$remainder = ''; $eof = 0; } } else { $$in .= substr($remainder, 0, 4) ; $remainder = substr($remainder, 4); #$self->{Pending} = substr($remainder, 4); #$remainder = ''; $eof = 0; } } if (length $$in) { $self->{CompSize}->add(length $$in) ; $self->{CRC32} = Compress::Raw::Zlib::crc32($$in, $self->{CRC32}) if $self->{wantCRC32}; $self->{ADLER32} = Compress::Zlib::adler32($$in, $self->{ADLER32}) if $self->{wantADLER32}; } ${ $_[1] } .= $$in; $$in = $remainder; } return STATUS_ENDSTREAM if $eof; return STATUS_OK ; } sub reset { my $self = shift; $self->{CompSize} = 0; $self->{UnCompSize} = 0; $self->{CRC32} = Compress::Raw::Zlib::crc32(''); $self->{ADLER32} = Compress::Raw::Zlib::adler32(''); return STATUS_OK ; } #sub count #{ # my $self = shift ; # return $self->{UnCompSize} ; #} sub compressedBytes { my $self = shift ; return $self->{CompSize} ; } sub uncompressedBytes { my $self = shift ; return $self->{CompSize} ; } sub sync { return STATUS_OK ; } sub crc32 { my $self = shift ; return $self->{CRC32}; } sub adler32 { my $self = shift ; return $self->{ADLER32}; } 1; __END__ IO-Compress-2.063/lib/IO/Uncompress/Adapter/Inflate.pm0000644000175000017500000000632012211042540021013 0ustar paulpaulpackage IO::Uncompress::Adapter::Inflate; use strict; use warnings; #use bytes; use IO::Compress::Base::Common 2.063 qw(:Status); use Compress::Raw::Zlib 2.063 qw(Z_OK Z_BUF_ERROR Z_STREAM_END Z_FINISH MAX_WBITS); our ($VERSION); $VERSION = '2.063'; sub mkUncompObject { my $crc32 = shift || 1; my $adler32 = shift || 1; my $scan = shift || 0; my $inflate ; my $status ; if ($scan) { ($inflate, $status) = new Compress::Raw::Zlib::InflateScan #LimitOutput => 1, CRC32 => $crc32, ADLER32 => $adler32, WindowBits => - MAX_WBITS ; } else { ($inflate, $status) = new Compress::Raw::Zlib::Inflate AppendOutput => 1, LimitOutput => 1, CRC32 => $crc32, ADLER32 => $adler32, WindowBits => - MAX_WBITS ; } return (undef, "Could not create Inflation object: $status", $status) if $status != Z_OK ; return bless {'Inf' => $inflate, 'CompSize' => 0, 'UnCompSize' => 0, 'Error' => '', 'ConsumesInput' => 1, } ; } sub uncompr { my $self = shift ; my $from = shift ; my $to = shift ; my $eof = shift ; my $inf = $self->{Inf}; my $status = $inf->inflate($from, $to, $eof); $self->{ErrorNo} = $status; if ($status != Z_OK && $status != Z_STREAM_END && $status != Z_BUF_ERROR) { $self->{Error} = "Inflation Error: $status"; return STATUS_ERROR; } return STATUS_OK if $status == Z_BUF_ERROR ; # ??? return STATUS_OK if $status == Z_OK ; return STATUS_ENDSTREAM if $status == Z_STREAM_END ; return STATUS_ERROR ; } sub reset { my $self = shift ; $self->{Inf}->inflateReset(); return STATUS_OK ; } #sub count #{ # my $self = shift ; # $self->{Inf}->inflateCount(); #} sub crc32 { my $self = shift ; $self->{Inf}->crc32(); } sub compressedBytes { my $self = shift ; $self->{Inf}->compressedBytes(); } sub uncompressedBytes { my $self = shift ; $self->{Inf}->uncompressedBytes(); } sub adler32 { my $self = shift ; $self->{Inf}->adler32(); } sub sync { my $self = shift ; ( $self->{Inf}->inflateSync(@_) == Z_OK) ? STATUS_OK : STATUS_ERROR ; } sub getLastBlockOffset { my $self = shift ; $self->{Inf}->getLastBlockOffset(); } sub getEndOffset { my $self = shift ; $self->{Inf}->getEndOffset(); } sub resetLastBlockByte { my $self = shift ; $self->{Inf}->resetLastBlockByte(@_); } sub createDeflateStream { my $self = shift ; my $deflate = $self->{Inf}->createDeflateStream(@_); return bless {'Def' => $deflate, 'CompSize' => 0, 'UnCompSize' => 0, 'Error' => '', }, 'IO::Compress::Adapter::Deflate'; } 1; __END__ IO-Compress-2.063/lib/IO/Uncompress/Adapter/Bunzip2.pm0000644000175000017500000000377212211042540020772 0ustar paulpaulpackage IO::Uncompress::Adapter::Bunzip2; use strict; use warnings; use bytes; use IO::Compress::Base::Common 2.063 qw(:Status); use Compress::Raw::Bzip2 2.063 ; our ($VERSION, @ISA); $VERSION = '2.063'; sub mkUncompObject { my $small = shift || 0; my $verbosity = shift || 0; my ($inflate, $status) = new Compress::Raw::Bunzip2(1, 1, $small, $verbosity, 1); return (undef, "Could not create Inflation object: $status", $status) if $status != BZ_OK ; return bless {'Inf' => $inflate, 'CompSize' => 0, 'UnCompSize' => 0, 'Error' => '', 'ConsumesInput' => 1, } ; } sub uncompr { my $self = shift ; my $from = shift ; my $to = shift ; my $eof = shift ; my $inf = $self->{Inf}; my $status = $inf->bzinflate($from, $to); $self->{ErrorNo} = $status; if ($status != BZ_OK && $status != BZ_STREAM_END ) { $self->{Error} = "Inflation Error: $status"; return STATUS_ERROR; } return STATUS_OK if $status == BZ_OK ; return STATUS_ENDSTREAM if $status == BZ_STREAM_END ; return STATUS_ERROR ; } sub reset { my $self = shift ; my ($inf, $status) = new Compress::Raw::Bunzip2(); $self->{ErrorNo} = ($status == BZ_OK) ? 0 : $status ; if ($status != BZ_OK) { $self->{Error} = "Cannot create Inflate object: $status"; return STATUS_ERROR; } $self->{Inf} = $inf; return STATUS_OK ; } sub compressedBytes { my $self = shift ; $self->{Inf}->compressedBytes(); } sub uncompressedBytes { my $self = shift ; $self->{Inf}->uncompressedBytes(); } sub crc32 { my $self = shift ; #$self->{Inf}->crc32(); } sub adler32 { my $self = shift ; #$self->{Inf}->adler32(); } sub sync { my $self = shift ; #( $self->{Inf}->inflateSync(@_) == BZ_OK) # ? STATUS_OK # : STATUS_ERROR ; } 1; __END__ IO-Compress-2.063/lib/IO/Uncompress/AnyInflate.pm0000644000175000017500000006643012211042540020113 0ustar paulpaulpackage IO::Uncompress::AnyInflate ; # for RFC1950, RFC1951 or RFC1952 use strict; use warnings; use bytes; use IO::Compress::Base::Common 2.063 (); use IO::Uncompress::Adapter::Inflate 2.063 (); use IO::Uncompress::Base 2.063 ; use IO::Uncompress::Gunzip 2.063 ; use IO::Uncompress::Inflate 2.063 ; use IO::Uncompress::RawInflate 2.063 ; use IO::Uncompress::Unzip 2.063 ; require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $AnyInflateError); $VERSION = '2.063'; $AnyInflateError = ''; @ISA = qw( Exporter IO::Uncompress::Base ); @EXPORT_OK = qw( $AnyInflateError anyinflate ) ; %EXPORT_TAGS = %IO::Uncompress::Base::DEFLATE_CONSTANTS ; push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; Exporter::export_ok_tags('all'); # TODO - allow the user to pick a set of the three formats to allow # or just assume want to auto-detect any of the three formats. sub new { my $class = shift ; my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$AnyInflateError); $obj->_create(undef, 0, @_); } sub anyinflate { my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$AnyInflateError); return $obj->_inf(@_) ; } sub getExtraParams { use IO::Compress::Base::Common 2.063 qw(:Parse); return ( 'rawinflate' => [Parse_boolean, 0] ) ; } sub ckParams { my $self = shift ; my $got = shift ; # any always needs both crc32 and adler32 $got->setValue('crc32' => 1); $got->setValue('adler32' => 1); return 1; } sub mkUncomp { my $self = shift ; my $got = shift ; my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::Inflate::mkUncompObject(); return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; *$self->{Uncomp} = $obj; my @possible = qw( Inflate Gunzip Unzip ); unshift @possible, 'RawInflate' if 1 || $got->getValue('rawinflate'); my $magic = $self->ckMagic( @possible ); if ($magic) { *$self->{Info} = $self->readHeader($magic) or return undef ; return 1; } return 0 ; } sub ckMagic { my $self = shift; my @names = @_ ; my $keep = ref $self ; for my $class ( map { "IO::Uncompress::$_" } @names) { bless $self => $class; my $magic = $self->ckMagic(); if ($magic) { #bless $self => $class; return $magic ; } $self->pushBack(*$self->{HeaderPending}) ; *$self->{HeaderPending} = '' ; } bless $self => $keep; return undef; } 1 ; __END__ =head1 NAME IO::Uncompress::AnyInflate - Uncompress zlib-based (zip, gzip) file/buffer =head1 SYNOPSIS use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ; my $status = anyinflate $input => $output [,OPTS] or die "anyinflate failed: $AnyInflateError\n"; my $z = new IO::Uncompress::AnyInflate $input [OPTS] or die "anyinflate failed: $AnyInflateError\n"; $status = $z->read($buffer) $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $line = $z->getline() $char = $z->getc() $char = $z->ungetc() $char = $z->opened() $status = $z->inflateSync() $data = $z->trailingData() $status = $z->nextStream() $data = $z->getHeaderInfo() $z->tell() $z->seek($position, $whence) $z->binmode() $z->fileno() $z->eof() $z->close() $AnyInflateError ; # IO::File mode <$z> read($z, $buffer); read($z, $buffer, $length); read($z, $buffer, $length, $offset); tell($z) seek($z, $position, $whence) binmode($z) fileno($z) eof($z) close($z) =head1 DESCRIPTION This module provides a Perl interface that allows the reading of files/buffers that have been compressed in a number of formats that use the zlib compression library. The formats supported are =over 5 =item RFC 1950 =item RFC 1951 (optionally) =item gzip (RFC 1952) =item zip =back The module will auto-detect which, if any, of the supported compression formats is being used. =head1 Functional Interface A top-level function, C, is provided to carry out "one-shot" uncompression between buffers and/or files. For finer control over the uncompression process, see the L section. use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ; anyinflate $input_filename_or_reference => $output_filename_or_reference [,OPTS] or die "anyinflate failed: $AnyInflateError\n"; The functional interface needs Perl5.005 or better. =head2 anyinflate $input_filename_or_reference => $output_filename_or_reference [, OPTS] C expects at least two parameters, C<$input_filename_or_reference> and C<$output_filename_or_reference>. =head3 The C<$input_filename_or_reference> parameter The parameter, C<$input_filename_or_reference>, is used to define the source of the compressed data. It can take one of the following forms: =over 5 =item A filename If the <$input_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for reading and the input data will be read from it. =item A filehandle If the C<$input_filename_or_reference> parameter is a filehandle, the input data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input_filename_or_reference> is a scalar reference, the input data will be read from C<$$input_filename_or_reference>. =item An array reference If C<$input_filename_or_reference> is an array reference, each element in the array must be a filename. The input data will be read from each file in turn. The complete array will be walked to ensure that it only contains valid filenames before any data is uncompressed. =item An Input FileGlob string If C<$input_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The input is the list of files that match the fileglob. See L for more details. =back If the C<$input_filename_or_reference> parameter is any other type, C will be returned. =head3 The C<$output_filename_or_reference> parameter The parameter C<$output_filename_or_reference> is used to control the destination of the uncompressed data. This parameter can take one of these forms. =over 5 =item A filename If the C<$output_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for writing and the uncompressed data will be written to it. =item A filehandle If the C<$output_filename_or_reference> parameter is a filehandle, the uncompressed data will be written to it. The string '-' can be used as an alias for standard output. =item A scalar reference If C<$output_filename_or_reference> is a scalar reference, the uncompressed data will be stored in C<$$output_filename_or_reference>. =item An Array Reference If C<$output_filename_or_reference> is an array reference, the uncompressed data will be pushed onto the array. =item An Output FileGlob If C<$output_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The output is the list of files that match the fileglob. When C<$output_filename_or_reference> is an fileglob string, C<$input_filename_or_reference> must also be a fileglob string. Anything else is an error. See L for more details. =back If the C<$output_filename_or_reference> parameter is any other type, C will be returned. =head2 Notes When C<$input_filename_or_reference> maps to multiple compressed files/buffers and C<$output_filename_or_reference> is a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a concatenation of all the uncompressed data from each of the input files/buffers. =head2 Optional Parameters Unless specified below, the optional parameters for C, C, are the same as those used with the OO interface defined in the L section below. =over 5 =item C<< AutoClose => 0|1 >> This option applies to any input or output data streams to C that are filehandles. If C is specified, and the value is true, it will result in all input and/or output filehandles being closed once C has completed. This parameter defaults to 0. =item C<< BinModeOut => 0|1 >> When writing to a file or filehandle, set C before writing to the file. Defaults to 0. =item C<< Append => 0|1 >> The behaviour of this option is dependent on the type of output data stream. =over 5 =item * A Buffer If C is enabled, all uncompressed data will be append to the end of the output buffer. Otherwise the output buffer will be cleared before any uncompressed data is written to it. =item * A Filename If C is enabled, the file will be opened in append mode. Otherwise the contents of the file, if any, will be truncated before any uncompressed data is written to it. =item * A Filehandle If C is enabled, the filehandle will be positioned to the end of the file via a call to C before any uncompressed data is written to it. Otherwise the file pointer will not be moved. =back When C is specified, and set to true, it will I all uncompressed data to the output data stream. So when the output is a filehandle it will carry out a seek to the eof before writing any uncompressed data. If the output is a filename, it will be opened for appending. If the output is a buffer, all uncompressed data will be appended to the existing buffer. Conversely when C is not specified, or it is present and is set to false, it will operate as follows. When the output is a filename, it will truncate the contents of the file before writing any uncompressed data. If the output is a filehandle its position will not be changed. If the output is a buffer, it will be wiped before any uncompressed data is output. Defaults to 0. =item C<< MultiStream => 0|1 >> If the input file/buffer contains multiple compressed data streams, this option will uncompress the whole lot as a single data stream. Defaults to 0. =item C<< TrailingData => $scalar >> Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option. =back =head2 Examples To read the contents of the file C and write the uncompressed data to the file C. use strict ; use warnings ; use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ; my $input = "file1.txt.Compressed"; my $output = "file1.txt"; anyinflate $input => $output or die "anyinflate failed: $AnyInflateError\n"; To read from an existing Perl filehandle, C<$input>, and write the uncompressed data to a buffer, C<$buffer>. use strict ; use warnings ; use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ; use IO::File ; my $input = new IO::File " \$buffer or die "anyinflate failed: $AnyInflateError\n"; To uncompress all files in the directory "/my/home" that match "*.txt.Compressed" and store the compressed data in the same directory use strict ; use warnings ; use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ; anyinflate '' => '' or die "anyinflate failed: $AnyInflateError\n"; and if you want to compress each file one at a time, this will do the trick use strict ; use warnings ; use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ; for my $input ( glob "/my/home/*.txt.Compressed" ) { my $output = $input; $output =~ s/.Compressed// ; anyinflate $input => $output or die "Error compressing '$input': $AnyInflateError\n"; } =head1 OO Interface =head2 Constructor The format of the constructor for IO::Uncompress::AnyInflate is shown below my $z = new IO::Uncompress::AnyInflate $input [OPTS] or die "IO::Uncompress::AnyInflate failed: $AnyInflateError\n"; Returns an C object on success and undef on failure. The variable C<$AnyInflateError> will contain an error message on failure. If you are running Perl 5.005 or better the object, C<$z>, returned from IO::Uncompress::AnyInflate can be used exactly like an L filehandle. This means that all normal input file operations can be carried out with C<$z>. For example, to read a line from a compressed file/buffer you can use either of these forms $line = $z->getline(); $line = <$z>; The mandatory parameter C<$input> is used to determine the source of the compressed data. This parameter can take one of three forms. =over 5 =item A filename If the C<$input> parameter is a scalar, it is assumed to be a filename. This file will be opened for reading and the compressed data will be read from it. =item A filehandle If the C<$input> parameter is a filehandle, the compressed data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input> is a scalar reference, the compressed data will be read from C<$$input>. =back =head2 Constructor Options The option names defined below are case insensitive and can be optionally prefixed by a '-'. So all of the following are valid -AutoClose -autoclose AUTOCLOSE autoclose OPTS is a combination of the following options: =over 5 =item C<< AutoClose => 0|1 >> This option is only valid when the C<$input> parameter is a filehandle. If specified, and the value is true, it will result in the file being closed once either the C method is called or the IO::Uncompress::AnyInflate object is destroyed. This parameter defaults to 0. =item C<< MultiStream => 0|1 >> Allows multiple concatenated compressed streams to be treated as a single compressed stream. Decompression will stop once either the end of the file/buffer is reached, an error is encountered (premature eof, corrupt compressed data) or the end of a stream is not immediately followed by the start of another stream. This parameter defaults to 0. =item C<< Prime => $string >> This option will uncompress the contents of C<$string> before processing the input file/buffer. This option can be useful when the compressed data is embedded in another file/data structure and it is not possible to work out where the compressed data begins without having to read the first few bytes. If this is the case, the uncompression can be I with these bytes using this option. =item C<< Transparent => 0|1 >> If this option is set and the input file/buffer is not compressed data, the module will allow reading of it anyway. In addition, if the input file/buffer does contain compressed data and there is non-compressed data immediately following it, setting this option will make this module treat the whole file/buffer as a single data stream. This option defaults to 1. =item C<< BlockSize => $num >> When reading the compressed input data, IO::Uncompress::AnyInflate will read it in blocks of C<$num> bytes. This option defaults to 4096. =item C<< InputLength => $size >> When present this option will limit the number of compressed bytes read from the input file/buffer to C<$size>. This option can be used in the situation where there is useful data directly after the compressed data stream and you know beforehand the exact length of the compressed data stream. This option is mostly used when reading from a filehandle, in which case the file pointer will be left pointing to the first byte directly after the compressed data stream. This option defaults to off. =item C<< Append => 0|1 >> This option controls what the C method does with uncompressed data. If set to 1, all uncompressed data will be appended to the output parameter of the C method. If set to 0, the contents of the output parameter of the C method will be overwritten by the uncompressed data. Defaults to 0. =item C<< Strict => 0|1 >> This option controls whether the extra checks defined below are used when carrying out the decompression. When Strict is on, the extra tests are carried out, when Strict is off they are not. The default for this option is off. If the input is an RFC 1950 data stream, the following will be checked: =over 5 =item 1 The ADLER32 checksum field must be present. =item 2 The value of the ADLER32 field read must match the adler32 value of the uncompressed data actually contained in the file. =back If the input is a gzip (RFC 1952) data stream, the following will be checked: =over 5 =item 1 If the FHCRC bit is set in the gzip FLG header byte, the CRC16 bytes in the header must match the crc16 value of the gzip header actually read. =item 2 If the gzip header contains a name field (FNAME) it consists solely of ISO 8859-1 characters. =item 3 If the gzip header contains a comment field (FCOMMENT) it consists solely of ISO 8859-1 characters plus line-feed. =item 4 If the gzip FEXTRA header field is present it must conform to the sub-field structure as defined in RFC 1952. =item 5 The CRC32 and ISIZE trailer fields must be present. =item 6 The value of the CRC32 field read must match the crc32 value of the uncompressed data actually contained in the gzip file. =item 7 The value of the ISIZE fields read must match the length of the uncompressed data actually read from the file. =back =item C<< RawInflate => 0|1 >> When auto-detecting the compressed format, try to test for raw-deflate (RFC 1951) content using the C module. The reason this is not default behaviour is because RFC 1951 content can only be detected by attempting to uncompress it. This process is error prone and can result is false positives. Defaults to 0. =item C<< ParseExtra => 0|1 >> If the gzip FEXTRA header field is present and this option is set, it will force the module to check that it conforms to the sub-field structure as defined in RFC 1952. If the C is on it will automatically enable this option. Defaults to 0. =back =head2 Examples TODO =head1 Methods =head2 read Usage is $status = $z->read($buffer) Reads a block of compressed data (the size of the compressed block is determined by the C option in the constructor), uncompresses it and writes any uncompressed data into C<$buffer>. If the C parameter is set in the constructor, the uncompressed data will be appended to the C<$buffer> parameter. Otherwise C<$buffer> will be overwritten. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 read Usage is $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $status = read($z, $buffer, $length) $status = read($z, $buffer, $length, $offset) Attempt to read C<$length> bytes of uncompressed data into C<$buffer>. The main difference between this form of the C method and the previous one, is that this one will attempt to return I C<$length> bytes. The only circumstances that this function will not is if end-of-file or an IO error is encountered. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 getline Usage is $line = $z->getline() $line = <$z> Reads a single line. This method fully supports the use of the variable C<$/> (or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C is in use) to determine what constitutes an end of line. Paragraph mode, record mode and file slurp mode are all supported. =head2 getc Usage is $char = $z->getc() Read a single character. =head2 ungetc Usage is $char = $z->ungetc($string) =head2 inflateSync Usage is $status = $z->inflateSync() TODO =head2 getHeaderInfo Usage is $hdr = $z->getHeaderInfo(); @hdrs = $z->getHeaderInfo(); This method returns either a hash reference (in scalar context) or a list or hash references (in array context) that contains information about each of the header fields in the compressed data stream(s). =head2 tell Usage is $z->tell() tell $z Returns the uncompressed file offset. =head2 eof Usage is $z->eof(); eof($z); Returns true if the end of the compressed input stream has been reached. =head2 seek $z->seek($position, $whence); seek($z, $position, $whence); Provides a sub-set of the C functionality, with the restriction that it is only legal to seek forward in the input file/buffer. It is a fatal error to attempt to seek backward. Note that the implementation of C in this module does not provide true random access to a compressed file/buffer. It works by uncompressing data from the current offset in the file/buffer until it reaches the uncompressed offset specified in the parameters to C. For very small files this may be acceptable behaviour. For large files it may cause an unacceptable delay. The C<$whence> parameter takes one the usual values, namely SEEK_SET, SEEK_CUR or SEEK_END. Returns 1 on success, 0 on failure. =head2 binmode Usage is $z->binmode binmode $z ; This is a noop provided for completeness. =head2 opened $z->opened() Returns true if the object currently refers to a opened file/buffer. =head2 autoflush my $prev = $z->autoflush() my $prev = $z->autoflush(EXPR) If the C<$z> object is associated with a file or a filehandle, this method returns the current autoflush setting for the underlying filehandle. If C is present, and is non-zero, it will enable flushing after every write/print operation. If C<$z> is associated with a buffer, this method has no effect and always returns C. B that the special variable C<$|> B be used to set or retrieve the autoflush setting. =head2 input_line_number $z->input_line_number() $z->input_line_number(EXPR) Returns the current uncompressed line number. If C is present it has the effect of setting the line number. Note that setting the line number does not change the current position within the file/buffer being read. The contents of C<$/> are used to determine what constitutes a line terminator. =head2 fileno $z->fileno() fileno($z) If the C<$z> object is associated with a file or a filehandle, C will return the underlying file descriptor. Once the C method is called C will return C. If the C<$z> object is associated with a buffer, this method will return C. =head2 close $z->close() ; close $z ; Closes the output file/buffer. For most versions of Perl this method will be automatically invoked if the IO::Uncompress::AnyInflate object is destroyed (either explicitly or by the variable with the reference to the object going out of scope). The exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In these cases, the C method will be called automatically, but not until global destruction of all live objects when the program is terminating. Therefore, if you want your scripts to be able to run on all versions of Perl, you should call C explicitly and not rely on automatic closing. Returns true on success, otherwise 0. If the C option has been enabled when the IO::Uncompress::AnyInflate object was created, and the object is associated with a file, the underlying file will also be closed. =head2 nextStream Usage is my $status = $z->nextStream(); Skips to the next compressed data stream in the input file/buffer. If a new compressed data stream is found, the eof marker will be cleared and C<$.> will be reset to 0. Returns 1 if a new stream was found, 0 if none was found, and -1 if an error was encountered. =head2 trailingData Usage is my $data = $z->trailingData(); Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. It only makes sense to call this method once the end of the compressed data stream has been encountered. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option in the constructor. =head1 Importing No symbolic constants are required by this IO::Uncompress::AnyInflate at present. =over 5 =item :all Imports C and C<$AnyInflateError>. Same as doing this use IO::Uncompress::AnyInflate qw(anyinflate $AnyInflateError) ; =back =head1 EXAMPLES =head2 Working with Net::FTP See L =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L For RFC 1950, 1951 and 1952 see F, F and F The I compression library was written by Jean-loup Gailly F and Mark Adler F. The primary site for the I compression library is F. The primary site for gzip is F. =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2013 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. IO-Compress-2.063/lib/IO/Uncompress/AnyUncompress.pm0000644000175000017500000007136512211042540020672 0ustar paulpaulpackage IO::Uncompress::AnyUncompress ; use strict; use warnings; use bytes; use IO::Compress::Base::Common 2.063 (); use IO::Uncompress::Base 2.063 ; require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $AnyUncompressError); $VERSION = '2.063'; $AnyUncompressError = ''; @ISA = qw( Exporter IO::Uncompress::Base ); @EXPORT_OK = qw( $AnyUncompressError anyuncompress ) ; %EXPORT_TAGS = %IO::Uncompress::Base::DEFLATE_CONSTANTS ; push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; Exporter::export_ok_tags('all'); # TODO - allow the user to pick a set of the three formats to allow # or just assume want to auto-detect any of the three formats. BEGIN { eval ' use IO::Uncompress::Adapter::Inflate 2.063 ;'; eval ' use IO::Uncompress::Adapter::Bunzip2 2.063 ;'; eval ' use IO::Uncompress::Adapter::LZO 2.063 ;'; eval ' use IO::Uncompress::Adapter::Lzf 2.063 ;'; eval ' use IO::Uncompress::Adapter::UnLzma 2.063 ;'; eval ' use IO::Uncompress::Adapter::UnXz 2.063 ;'; eval ' use IO::Uncompress::Bunzip2 2.063 ;'; eval ' use IO::Uncompress::UnLzop 2.063 ;'; eval ' use IO::Uncompress::Gunzip 2.063 ;'; eval ' use IO::Uncompress::Inflate 2.063 ;'; eval ' use IO::Uncompress::RawInflate 2.063 ;'; eval ' use IO::Uncompress::Unzip 2.063 ;'; eval ' use IO::Uncompress::UnLzf 2.063 ;'; eval ' use IO::Uncompress::UnLzma 2.063 ;'; eval ' use IO::Uncompress::UnXz 2.063 ;'; } sub new { my $class = shift ; my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$AnyUncompressError); $obj->_create(undef, 0, @_); } sub anyuncompress { my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$AnyUncompressError); return $obj->_inf(@_) ; } sub getExtraParams { return ( 'rawinflate' => [IO::Compress::Base::Common::Parse_boolean, 0] , 'unlzma' => [IO::Compress::Base::Common::Parse_boolean, 0] ) ; } sub ckParams { my $self = shift ; my $got = shift ; # any always needs both crc32 and adler32 $got->setValue('crc32' => 1); $got->setValue('adler32' => 1); return 1; } sub mkUncomp { my $self = shift ; my $got = shift ; my $magic ; # try zlib first if (defined $IO::Uncompress::RawInflate::VERSION ) { my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::Inflate::mkUncompObject(); return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; *$self->{Uncomp} = $obj; my @possible = qw( Inflate Gunzip Unzip ); unshift @possible, 'RawInflate' if $got->getValue('rawinflate'); $magic = $self->ckMagic( @possible ); if ($magic) { *$self->{Info} = $self->readHeader($magic) or return undef ; return 1; } } if (defined $IO::Uncompress::UnLzma::VERSION && $got->getValue('unlzma')) { my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::UnLzma::mkUncompObject(); return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; *$self->{Uncomp} = $obj; my @possible = qw( UnLzma ); #unshift @possible, 'RawInflate' # if $got->getValue('rawinflate'); if ( *$self->{Info} = $self->ckMagic( @possible )) { return 1; } } if (defined $IO::Uncompress::UnXz::VERSION and $magic = $self->ckMagic('UnXz')) { *$self->{Info} = $self->readHeader($magic) or return undef ; my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::UnXz::mkUncompObject(); return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; *$self->{Uncomp} = $obj; return 1; } if (defined $IO::Uncompress::Bunzip2::VERSION and $magic = $self->ckMagic('Bunzip2')) { *$self->{Info} = $self->readHeader($magic) or return undef ; my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::Bunzip2::mkUncompObject(); return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; *$self->{Uncomp} = $obj; return 1; } if (defined $IO::Uncompress::UnLzop::VERSION and $magic = $self->ckMagic('UnLzop')) { *$self->{Info} = $self->readHeader($magic) or return undef ; my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::LZO::mkUncompObject(); return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; *$self->{Uncomp} = $obj; return 1; } if (defined $IO::Uncompress::UnLzf::VERSION and $magic = $self->ckMagic('UnLzf')) { *$self->{Info} = $self->readHeader($magic) or return undef ; my ($obj, $errstr, $errno) = IO::Uncompress::Adapter::Lzf::mkUncompObject(); return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; *$self->{Uncomp} = $obj; return 1; } return 0 ; } sub ckMagic { my $self = shift; my @names = @_ ; my $keep = ref $self ; for my $class ( map { "IO::Uncompress::$_" } @names) { bless $self => $class; my $magic = $self->ckMagic(); if ($magic) { #bless $self => $class; return $magic ; } $self->pushBack(*$self->{HeaderPending}) ; *$self->{HeaderPending} = '' ; } bless $self => $keep; return undef; } 1 ; __END__ =head1 NAME IO::Uncompress::AnyUncompress - Uncompress gzip, zip, bzip2 or lzop file/buffer =head1 SYNOPSIS use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ; my $status = anyuncompress $input => $output [,OPTS] or die "anyuncompress failed: $AnyUncompressError\n"; my $z = new IO::Uncompress::AnyUncompress $input [OPTS] or die "anyuncompress failed: $AnyUncompressError\n"; $status = $z->read($buffer) $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $line = $z->getline() $char = $z->getc() $char = $z->ungetc() $char = $z->opened() $data = $z->trailingData() $status = $z->nextStream() $data = $z->getHeaderInfo() $z->tell() $z->seek($position, $whence) $z->binmode() $z->fileno() $z->eof() $z->close() $AnyUncompressError ; # IO::File mode <$z> read($z, $buffer); read($z, $buffer, $length); read($z, $buffer, $length, $offset); tell($z) seek($z, $position, $whence) binmode($z) fileno($z) eof($z) close($z) =head1 DESCRIPTION This module provides a Perl interface that allows the reading of files/buffers that have been compressed with a variety of compression libraries. The formats supported are: =over 5 =item RFC 1950 =item RFC 1951 (optionally) =item gzip (RFC 1952) =item zip =item bzip2 =item lzop =item lzf =item lzma =item xz =back The module will auto-detect which, if any, of the supported compression formats is being used. =head1 Functional Interface A top-level function, C, is provided to carry out "one-shot" uncompression between buffers and/or files. For finer control over the uncompression process, see the L section. use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ; anyuncompress $input_filename_or_reference => $output_filename_or_reference [,OPTS] or die "anyuncompress failed: $AnyUncompressError\n"; The functional interface needs Perl5.005 or better. =head2 anyuncompress $input_filename_or_reference => $output_filename_or_reference [, OPTS] C expects at least two parameters, C<$input_filename_or_reference> and C<$output_filename_or_reference>. =head3 The C<$input_filename_or_reference> parameter The parameter, C<$input_filename_or_reference>, is used to define the source of the compressed data. It can take one of the following forms: =over 5 =item A filename If the <$input_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for reading and the input data will be read from it. =item A filehandle If the C<$input_filename_or_reference> parameter is a filehandle, the input data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input_filename_or_reference> is a scalar reference, the input data will be read from C<$$input_filename_or_reference>. =item An array reference If C<$input_filename_or_reference> is an array reference, each element in the array must be a filename. The input data will be read from each file in turn. The complete array will be walked to ensure that it only contains valid filenames before any data is uncompressed. =item An Input FileGlob string If C<$input_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The input is the list of files that match the fileglob. See L for more details. =back If the C<$input_filename_or_reference> parameter is any other type, C will be returned. =head3 The C<$output_filename_or_reference> parameter The parameter C<$output_filename_or_reference> is used to control the destination of the uncompressed data. This parameter can take one of these forms. =over 5 =item A filename If the C<$output_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for writing and the uncompressed data will be written to it. =item A filehandle If the C<$output_filename_or_reference> parameter is a filehandle, the uncompressed data will be written to it. The string '-' can be used as an alias for standard output. =item A scalar reference If C<$output_filename_or_reference> is a scalar reference, the uncompressed data will be stored in C<$$output_filename_or_reference>. =item An Array Reference If C<$output_filename_or_reference> is an array reference, the uncompressed data will be pushed onto the array. =item An Output FileGlob If C<$output_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The output is the list of files that match the fileglob. When C<$output_filename_or_reference> is an fileglob string, C<$input_filename_or_reference> must also be a fileglob string. Anything else is an error. See L for more details. =back If the C<$output_filename_or_reference> parameter is any other type, C will be returned. =head2 Notes When C<$input_filename_or_reference> maps to multiple compressed files/buffers and C<$output_filename_or_reference> is a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a concatenation of all the uncompressed data from each of the input files/buffers. =head2 Optional Parameters Unless specified below, the optional parameters for C, C, are the same as those used with the OO interface defined in the L section below. =over 5 =item C<< AutoClose => 0|1 >> This option applies to any input or output data streams to C that are filehandles. If C is specified, and the value is true, it will result in all input and/or output filehandles being closed once C has completed. This parameter defaults to 0. =item C<< BinModeOut => 0|1 >> When writing to a file or filehandle, set C before writing to the file. Defaults to 0. =item C<< Append => 0|1 >> The behaviour of this option is dependent on the type of output data stream. =over 5 =item * A Buffer If C is enabled, all uncompressed data will be append to the end of the output buffer. Otherwise the output buffer will be cleared before any uncompressed data is written to it. =item * A Filename If C is enabled, the file will be opened in append mode. Otherwise the contents of the file, if any, will be truncated before any uncompressed data is written to it. =item * A Filehandle If C is enabled, the filehandle will be positioned to the end of the file via a call to C before any uncompressed data is written to it. Otherwise the file pointer will not be moved. =back When C is specified, and set to true, it will I all uncompressed data to the output data stream. So when the output is a filehandle it will carry out a seek to the eof before writing any uncompressed data. If the output is a filename, it will be opened for appending. If the output is a buffer, all uncompressed data will be appended to the existing buffer. Conversely when C is not specified, or it is present and is set to false, it will operate as follows. When the output is a filename, it will truncate the contents of the file before writing any uncompressed data. If the output is a filehandle its position will not be changed. If the output is a buffer, it will be wiped before any uncompressed data is output. Defaults to 0. =item C<< MultiStream => 0|1 >> If the input file/buffer contains multiple compressed data streams, this option will uncompress the whole lot as a single data stream. Defaults to 0. =item C<< TrailingData => $scalar >> Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option. =back =head2 Examples To read the contents of the file C and write the uncompressed data to the file C. use strict ; use warnings ; use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ; my $input = "file1.txt.Compressed"; my $output = "file1.txt"; anyuncompress $input => $output or die "anyuncompress failed: $AnyUncompressError\n"; To read from an existing Perl filehandle, C<$input>, and write the uncompressed data to a buffer, C<$buffer>. use strict ; use warnings ; use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ; use IO::File ; my $input = new IO::File " \$buffer or die "anyuncompress failed: $AnyUncompressError\n"; To uncompress all files in the directory "/my/home" that match "*.txt.Compressed" and store the compressed data in the same directory use strict ; use warnings ; use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ; anyuncompress '' => '' or die "anyuncompress failed: $AnyUncompressError\n"; and if you want to compress each file one at a time, this will do the trick use strict ; use warnings ; use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ; for my $input ( glob "/my/home/*.txt.Compressed" ) { my $output = $input; $output =~ s/.Compressed// ; anyuncompress $input => $output or die "Error compressing '$input': $AnyUncompressError\n"; } =head1 OO Interface =head2 Constructor The format of the constructor for IO::Uncompress::AnyUncompress is shown below my $z = new IO::Uncompress::AnyUncompress $input [OPTS] or die "IO::Uncompress::AnyUncompress failed: $AnyUncompressError\n"; Returns an C object on success and undef on failure. The variable C<$AnyUncompressError> will contain an error message on failure. If you are running Perl 5.005 or better the object, C<$z>, returned from IO::Uncompress::AnyUncompress can be used exactly like an L filehandle. This means that all normal input file operations can be carried out with C<$z>. For example, to read a line from a compressed file/buffer you can use either of these forms $line = $z->getline(); $line = <$z>; The mandatory parameter C<$input> is used to determine the source of the compressed data. This parameter can take one of three forms. =over 5 =item A filename If the C<$input> parameter is a scalar, it is assumed to be a filename. This file will be opened for reading and the compressed data will be read from it. =item A filehandle If the C<$input> parameter is a filehandle, the compressed data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input> is a scalar reference, the compressed data will be read from C<$$input>. =back =head2 Constructor Options The option names defined below are case insensitive and can be optionally prefixed by a '-'. So all of the following are valid -AutoClose -autoclose AUTOCLOSE autoclose OPTS is a combination of the following options: =over 5 =item C<< AutoClose => 0|1 >> This option is only valid when the C<$input> parameter is a filehandle. If specified, and the value is true, it will result in the file being closed once either the C method is called or the IO::Uncompress::AnyUncompress object is destroyed. This parameter defaults to 0. =item C<< MultiStream => 0|1 >> Allows multiple concatenated compressed streams to be treated as a single compressed stream. Decompression will stop once either the end of the file/buffer is reached, an error is encountered (premature eof, corrupt compressed data) or the end of a stream is not immediately followed by the start of another stream. This parameter defaults to 0. =item C<< Prime => $string >> This option will uncompress the contents of C<$string> before processing the input file/buffer. This option can be useful when the compressed data is embedded in another file/data structure and it is not possible to work out where the compressed data begins without having to read the first few bytes. If this is the case, the uncompression can be I with these bytes using this option. =item C<< Transparent => 0|1 >> If this option is set and the input file/buffer is not compressed data, the module will allow reading of it anyway. In addition, if the input file/buffer does contain compressed data and there is non-compressed data immediately following it, setting this option will make this module treat the whole file/buffer as a single data stream. This option defaults to 1. =item C<< BlockSize => $num >> When reading the compressed input data, IO::Uncompress::AnyUncompress will read it in blocks of C<$num> bytes. This option defaults to 4096. =item C<< InputLength => $size >> When present this option will limit the number of compressed bytes read from the input file/buffer to C<$size>. This option can be used in the situation where there is useful data directly after the compressed data stream and you know beforehand the exact length of the compressed data stream. This option is mostly used when reading from a filehandle, in which case the file pointer will be left pointing to the first byte directly after the compressed data stream. This option defaults to off. =item C<< Append => 0|1 >> This option controls what the C method does with uncompressed data. If set to 1, all uncompressed data will be appended to the output parameter of the C method. If set to 0, the contents of the output parameter of the C method will be overwritten by the uncompressed data. Defaults to 0. =item C<< Strict => 0|1 >> This option controls whether the extra checks defined below are used when carrying out the decompression. When Strict is on, the extra tests are carried out, when Strict is off they are not. The default for this option is off. =item C<< RawInflate => 0|1 >> When auto-detecting the compressed format, try to test for raw-deflate (RFC 1951) content using the C module. The reason this is not default behaviour is because RFC 1951 content can only be detected by attempting to uncompress it. This process is error prone and can result is false positives. Defaults to 0. =item C<< UnLzma => 0|1 >> When auto-detecting the compressed format, try to test for lzma_alone content using the C module. The reason this is not default behaviour is because lzma_alone content can only be detected by attempting to uncompress it. This process is error prone and can result is false positives. Defaults to 0. =back =head2 Examples TODO =head1 Methods =head2 read Usage is $status = $z->read($buffer) Reads a block of compressed data (the size of the compressed block is determined by the C option in the constructor), uncompresses it and writes any uncompressed data into C<$buffer>. If the C parameter is set in the constructor, the uncompressed data will be appended to the C<$buffer> parameter. Otherwise C<$buffer> will be overwritten. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 read Usage is $status = $z->read($buffer, $length) $status = $z->read($buffer, $length, $offset) $status = read($z, $buffer, $length) $status = read($z, $buffer, $length, $offset) Attempt to read C<$length> bytes of uncompressed data into C<$buffer>. The main difference between this form of the C method and the previous one, is that this one will attempt to return I C<$length> bytes. The only circumstances that this function will not is if end-of-file or an IO error is encountered. Returns the number of uncompressed bytes written to C<$buffer>, zero if eof or a negative number on error. =head2 getline Usage is $line = $z->getline() $line = <$z> Reads a single line. This method fully supports the use of the variable C<$/> (or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C is in use) to determine what constitutes an end of line. Paragraph mode, record mode and file slurp mode are all supported. =head2 getc Usage is $char = $z->getc() Read a single character. =head2 ungetc Usage is $char = $z->ungetc($string) =head2 getHeaderInfo Usage is $hdr = $z->getHeaderInfo(); @hdrs = $z->getHeaderInfo(); This method returns either a hash reference (in scalar context) or a list or hash references (in array context) that contains information about each of the header fields in the compressed data stream(s). =head2 tell Usage is $z->tell() tell $z Returns the uncompressed file offset. =head2 eof Usage is $z->eof(); eof($z); Returns true if the end of the compressed input stream has been reached. =head2 seek $z->seek($position, $whence); seek($z, $position, $whence); Provides a sub-set of the C functionality, with the restriction that it is only legal to seek forward in the input file/buffer. It is a fatal error to attempt to seek backward. Note that the implementation of C in this module does not provide true random access to a compressed file/buffer. It works by uncompressing data from the current offset in the file/buffer until it reaches the uncompressed offset specified in the parameters to C. For very small files this may be acceptable behaviour. For large files it may cause an unacceptable delay. The C<$whence> parameter takes one the usual values, namely SEEK_SET, SEEK_CUR or SEEK_END. Returns 1 on success, 0 on failure. =head2 binmode Usage is $z->binmode binmode $z ; This is a noop provided for completeness. =head2 opened $z->opened() Returns true if the object currently refers to a opened file/buffer. =head2 autoflush my $prev = $z->autoflush() my $prev = $z->autoflush(EXPR) If the C<$z> object is associated with a file or a filehandle, this method returns the current autoflush setting for the underlying filehandle. If C is present, and is non-zero, it will enable flushing after every write/print operation. If C<$z> is associated with a buffer, this method has no effect and always returns C. B that the special variable C<$|> B be used to set or retrieve the autoflush setting. =head2 input_line_number $z->input_line_number() $z->input_line_number(EXPR) Returns the current uncompressed line number. If C is present it has the effect of setting the line number. Note that setting the line number does not change the current position within the file/buffer being read. The contents of C<$/> are used to determine what constitutes a line terminator. =head2 fileno $z->fileno() fileno($z) If the C<$z> object is associated with a file or a filehandle, C will return the underlying file descriptor. Once the C method is called C will return C. If the C<$z> object is associated with a buffer, this method will return C. =head2 close $z->close() ; close $z ; Closes the output file/buffer. For most versions of Perl this method will be automatically invoked if the IO::Uncompress::AnyUncompress object is destroyed (either explicitly or by the variable with the reference to the object going out of scope). The exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In these cases, the C method will be called automatically, but not until global destruction of all live objects when the program is terminating. Therefore, if you want your scripts to be able to run on all versions of Perl, you should call C explicitly and not rely on automatic closing. Returns true on success, otherwise 0. If the C option has been enabled when the IO::Uncompress::AnyUncompress object was created, and the object is associated with a file, the underlying file will also be closed. =head2 nextStream Usage is my $status = $z->nextStream(); Skips to the next compressed data stream in the input file/buffer. If a new compressed data stream is found, the eof marker will be cleared and C<$.> will be reset to 0. Returns 1 if a new stream was found, 0 if none was found, and -1 if an error was encountered. =head2 trailingData Usage is my $data = $z->trailingData(); Returns the data, if any, that is present immediately after the compressed data stream once uncompression is complete. It only makes sense to call this method once the end of the compressed data stream has been encountered. This option can be used when there is useful information immediately following the compressed data stream, and you don't know the length of the compressed data stream. If the input is a buffer, C will return everything from the end of the compressed data stream to the end of the buffer. If the input is a filehandle, C will return the data that is left in the filehandle input buffer once the end of the compressed data stream has been reached. You can then use the filehandle to read the rest of the input file. Don't bother using C if the input is a filename. If you know the length of the compressed data stream before you start uncompressing, you can avoid having to use C by setting the C option in the constructor. =head1 Importing No symbolic constants are required by this IO::Uncompress::AnyUncompress at present. =over 5 =item :all Imports C and C<$AnyUncompressError>. Same as doing this use IO::Uncompress::AnyUncompress qw(anyuncompress $AnyUncompressError) ; =back =head1 EXAMPLES =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2013 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. IO-Compress-2.063/lib/IO/Compress/0000755000175000017500000000000012235214476015166 5ustar paulpaulIO-Compress-2.063/lib/IO/Compress/Base.pm0000644000175000017500000005642012211042535016372 0ustar paulpaul package IO::Compress::Base ; require 5.006 ; use strict ; use warnings; use IO::Compress::Base::Common 2.063 ; use IO::File (); ; use Scalar::Util (); #use File::Glob; #require Exporter ; use Carp() ; use Symbol(); #use bytes; our (@ISA, $VERSION); @ISA = qw(Exporter IO::File); $VERSION = '2.063'; #Can't locate object method "SWASHNEW" via package "utf8" (perhaps you forgot to load "utf8"?) at .../ext/Compress-Zlib/Gzip/blib/lib/Compress/Zlib/Common.pm line 16. sub saveStatus { my $self = shift ; ${ *$self->{ErrorNo} } = shift() + 0 ; ${ *$self->{Error} } = '' ; return ${ *$self->{ErrorNo} } ; } sub saveErrorString { my $self = shift ; my $retval = shift ; ${ *$self->{Error} } = shift ; ${ *$self->{ErrorNo} } = shift() + 0 if @_ ; return $retval; } sub croakError { my $self = shift ; $self->saveErrorString(0, $_[0]); Carp::croak $_[0]; } sub closeError { my $self = shift ; my $retval = shift ; my $errno = *$self->{ErrorNo}; my $error = ${ *$self->{Error} }; $self->close(); *$self->{ErrorNo} = $errno ; ${ *$self->{Error} } = $error ; return $retval; } sub error { my $self = shift ; return ${ *$self->{Error} } ; } sub errorNo { my $self = shift ; return ${ *$self->{ErrorNo} } ; } sub writeAt { my $self = shift ; my $offset = shift; my $data = shift; if (defined *$self->{FH}) { my $here = tell(*$self->{FH}); return $self->saveErrorString(undef, "Cannot seek to end of output filehandle: $!", $!) if $here < 0 ; seek(*$self->{FH}, $offset, IO::Handle::SEEK_SET) or return $self->saveErrorString(undef, "Cannot seek to end of output filehandle: $!", $!) ; defined *$self->{FH}->write($data, length $data) or return $self->saveErrorString(undef, $!, $!) ; seek(*$self->{FH}, $here, IO::Handle::SEEK_SET) or return $self->saveErrorString(undef, "Cannot seek to end of output filehandle: $!", $!) ; } else { substr(${ *$self->{Buffer} }, $offset, length($data)) = $data ; } return 1; } sub outputPayload { my $self = shift ; return $self->output(@_); } sub output { my $self = shift ; my $data = shift ; my $last = shift ; return 1 if length $data == 0 && ! $last ; if ( *$self->{FilterContainer} ) { *_ = \$data; &{ *$self->{FilterContainer} }(); } if (length $data) { if ( defined *$self->{FH} ) { defined *$self->{FH}->write( $data, length $data ) or return $self->saveErrorString(0, $!, $!); } else { ${ *$self->{Buffer} } .= $data ; } } return 1; } sub getOneShotParams { return ( 'multistream' => [IO::Compress::Base::Common::Parse_boolean, 1], ); } our %PARAMS = ( # Generic Parameters 'autoclose' => [IO::Compress::Base::Common::Parse_boolean, 0], 'encode' => [IO::Compress::Base::Common::Parse_any, undef], 'strict' => [IO::Compress::Base::Common::Parse_boolean, 1], 'append' => [IO::Compress::Base::Common::Parse_boolean, 0], 'binmodein' => [IO::Compress::Base::Common::Parse_boolean, 0], 'filtercontainer' => [IO::Compress::Base::Common::Parse_code, undef], ); sub checkParams { my $self = shift ; my $class = shift ; my $got = shift || IO::Compress::Base::Parameters::new(); $got->parse( { %PARAMS, $self->getExtraParams(), *$self->{OneShot} ? $self->getOneShotParams() : (), }, @_) or $self->croakError("${class}: " . $got->getError()) ; return $got ; } sub _create { my $obj = shift; my $got = shift; *$obj->{Closed} = 1 ; my $class = ref $obj; $obj->croakError("$class: Missing Output parameter") if ! @_ && ! $got ; my $outValue = shift ; my $oneShot = 1 ; if (! $got) { $oneShot = 0 ; $got = $obj->checkParams($class, undef, @_) or return undef ; } my $lax = ! $got->getValue('strict') ; my $outType = IO::Compress::Base::Common::whatIsOutput($outValue); $obj->ckOutputParam($class, $outValue) or return undef ; if ($outType eq 'buffer') { *$obj->{Buffer} = $outValue; } else { my $buff = "" ; *$obj->{Buffer} = \$buff ; } # Merge implies Append my $merge = $got->getValue('merge') ; my $appendOutput = $got->getValue('append') || $merge ; *$obj->{Append} = $appendOutput; *$obj->{FilterContainer} = $got->getValue('filtercontainer') ; if ($merge) { # Switch off Merge mode if output file/buffer is empty/doesn't exist if (($outType eq 'buffer' && length $$outValue == 0 ) || ($outType ne 'buffer' && (! -e $outValue || (-w _ && -z _))) ) { $merge = 0 } } # If output is a file, check that it is writable #no warnings; #if ($outType eq 'filename' && -e $outValue && ! -w _) # { return $obj->saveErrorString(undef, "Output file '$outValue' is not writable" ) } $obj->ckParams($got) or $obj->croakError("${class}: " . $obj->error()); if ($got->getValue('encode')) { my $want_encoding = $got->getValue('encode'); *$obj->{Encoding} = IO::Compress::Base::Common::getEncoding($obj, $class, $want_encoding); my $x = *$obj->{Encoding}; } else { *$obj->{Encoding} = undef; } $obj->saveStatus(STATUS_OK) ; my $status ; if (! $merge) { *$obj->{Compress} = $obj->mkComp($got) or return undef; *$obj->{UnCompSize} = new U64 ; *$obj->{CompSize} = new U64 ; if ( $outType eq 'buffer') { ${ *$obj->{Buffer} } = '' unless $appendOutput ; } else { if ($outType eq 'handle') { *$obj->{FH} = $outValue ; setBinModeOutput(*$obj->{FH}) ; #$outValue->flush() ; *$obj->{Handle} = 1 ; if ($appendOutput) { seek(*$obj->{FH}, 0, IO::Handle::SEEK_END) or return $obj->saveErrorString(undef, "Cannot seek to end of output filehandle: $!", $!) ; } } elsif ($outType eq 'filename') { no warnings; my $mode = '>' ; $mode = '>>' if $appendOutput; *$obj->{FH} = new IO::File "$mode $outValue" or return $obj->saveErrorString(undef, "cannot open file '$outValue': $!", $!) ; *$obj->{StdIO} = ($outValue eq '-'); setBinModeOutput(*$obj->{FH}) ; } } *$obj->{Header} = $obj->mkHeader($got) ; $obj->output( *$obj->{Header} ) or return undef; $obj->beforePayload(); } else { *$obj->{Compress} = $obj->createMerge($outValue, $outType) or return undef; } *$obj->{Closed} = 0 ; *$obj->{AutoClose} = $got->getValue('autoclose') ; *$obj->{Output} = $outValue; *$obj->{ClassName} = $class; *$obj->{Got} = $got; *$obj->{OneShot} = 0 ; return $obj ; } sub ckOutputParam { my $self = shift ; my $from = shift ; my $outType = IO::Compress::Base::Common::whatIsOutput($_[0]); $self->croakError("$from: output parameter not a filename, filehandle or scalar ref") if ! $outType ; #$self->croakError("$from: output filename is undef or null string") #if $outType eq 'filename' && (! defined $_[0] || $_[0] eq '') ; $self->croakError("$from: output buffer is read-only") if $outType eq 'buffer' && Scalar::Util::readonly(${ $_[0] }); return 1; } sub _def { my $obj = shift ; my $class= (caller)[0] ; my $name = (caller(1))[3] ; $obj->croakError("$name: expected at least 1 parameters\n") unless @_ >= 1 ; my $input = shift ; my $haveOut = @_ ; my $output = shift ; my $x = new IO::Compress::Base::Validator($class, *$obj->{Error}, $name, $input, $output) or return undef ; push @_, $output if $haveOut && $x->{Hash}; *$obj->{OneShot} = 1 ; my $got = $obj->checkParams($name, undef, @_) or return undef ; $x->{Got} = $got ; # if ($x->{Hash}) # { # while (my($k, $v) = each %$input) # { # $v = \$input->{$k} # unless defined $v ; # # $obj->_singleTarget($x, 1, $k, $v, @_) # or return undef ; # } # # return keys %$input ; # } if ($x->{GlobMap}) { $x->{oneInput} = 1 ; foreach my $pair (@{ $x->{Pairs} }) { my ($from, $to) = @$pair ; $obj->_singleTarget($x, 1, $from, $to, @_) or return undef ; } return scalar @{ $x->{Pairs} } ; } if (! $x->{oneOutput} ) { my $inFile = ($x->{inType} eq 'filenames' || $x->{inType} eq 'filename'); $x->{inType} = $inFile ? 'filename' : 'buffer'; foreach my $in ($x->{oneInput} ? $input : @$input) { my $out ; $x->{oneInput} = 1 ; $obj->_singleTarget($x, $inFile, $in, \$out, @_) or return undef ; push @$output, \$out ; #if ($x->{outType} eq 'array') # { push @$output, \$out } #else # { $output->{$in} = \$out } } return 1 ; } # finally the 1 to 1 and n to 1 return $obj->_singleTarget($x, 1, $input, $output, @_); Carp::croak "should not be here" ; } sub _singleTarget { my $obj = shift ; my $x = shift ; my $inputIsFilename = shift; my $input = shift; if ($x->{oneInput}) { $obj->getFileInfo($x->{Got}, $input) if isaScalar($input) || (isaFilename($input) and $inputIsFilename) ; my $z = $obj->_create($x->{Got}, @_) or return undef ; defined $z->_wr2($input, $inputIsFilename) or return $z->closeError(undef) ; return $z->close() ; } else { my $afterFirst = 0 ; my $inputIsFilename = ($x->{inType} ne 'array'); my $keep = $x->{Got}->clone(); #for my $element ( ($x->{inType} eq 'hash') ? keys %$input : @$input) for my $element ( @$input) { my $isFilename = isaFilename($element); if ( $afterFirst ++ ) { defined addInterStream($obj, $element, $isFilename) or return $obj->closeError(undef) ; } else { $obj->getFileInfo($x->{Got}, $element) if isaScalar($element) || $isFilename; $obj->_create($x->{Got}, @_) or return undef ; } defined $obj->_wr2($element, $isFilename) or return $obj->closeError(undef) ; *$obj->{Got} = $keep->clone(); } return $obj->close() ; } } sub _wr2 { my $self = shift ; my $source = shift ; my $inputIsFilename = shift; my $input = $source ; if (! $inputIsFilename) { $input = \$source if ! ref $source; } if ( ref $input && ref $input eq 'SCALAR' ) { return $self->syswrite($input, @_) ; } if ( ! ref $input || isaFilehandle($input)) { my $isFilehandle = isaFilehandle($input) ; my $fh = $input ; if ( ! $isFilehandle ) { $fh = new IO::File "<$input" or return $self->saveErrorString(undef, "cannot open file '$input': $!", $!) ; } binmode $fh if *$self->{Got}->valueOrDefault('binmodein') ; my $status ; my $buff ; my $count = 0 ; while ($status = read($fh, $buff, 16 * 1024)) { $count += length $buff; defined $self->syswrite($buff, @_) or return undef ; } return $self->saveErrorString(undef, $!, $!) if ! defined $status ; if ( (!$isFilehandle || *$self->{AutoClose}) && $input ne '-') { $fh->close() or return undef ; } return $count ; } Carp::croak "Should not be here"; return undef; } sub addInterStream { my $self = shift ; my $input = shift ; my $inputIsFilename = shift ; if (*$self->{Got}->getValue('multistream')) { $self->getFileInfo(*$self->{Got}, $input) #if isaFilename($input) and $inputIsFilename ; if isaScalar($input) || isaFilename($input) ; # TODO -- newStream needs to allow gzip/zip header to be modified return $self->newStream(); } elsif (*$self->{Got}->getValue('autoflush')) { #return $self->flush(Z_FULL_FLUSH); } return 1 ; } sub getFileInfo { } sub TIEHANDLE { return $_[0] if ref($_[0]); die "OOPS\n" ; } sub UNTIE { my $self = shift ; } sub DESTROY { my $self = shift ; local ($., $@, $!, $^E, $?); $self->close() ; # TODO - memory leak with 5.8.0 - this isn't called until # global destruction # %{ *$self } = () ; undef $self ; } sub filterUncompressed { } sub syswrite { my $self = shift ; my $buffer ; if (ref $_[0] ) { $self->croakError( *$self->{ClassName} . "::write: not a scalar reference" ) unless ref $_[0] eq 'SCALAR' ; $buffer = $_[0] ; } else { $buffer = \$_[0] ; } if (@_ > 1) { my $slen = defined $$buffer ? length($$buffer) : 0; my $len = $slen; my $offset = 0; $len = $_[1] if $_[1] < $len; if (@_ > 2) { $offset = $_[2] || 0; $self->croakError(*$self->{ClassName} . "::write: offset outside string") if $offset > $slen; if ($offset < 0) { $offset += $slen; $self->croakError( *$self->{ClassName} . "::write: offset outside string") if $offset < 0; } my $rem = $slen - $offset; $len = $rem if $rem < $len; } $buffer = \substr($$buffer, $offset, $len) ; } return 0 if (! defined $$buffer || length $$buffer == 0) && ! *$self->{FlushPending}; # *$self->{Pending} .= $$buffer ; # # return length $$buffer # if (length *$self->{Pending} < 1024 * 16 && ! *$self->{FlushPending}) ; # # $$buffer = *$self->{Pending} ; # *$self->{Pending} = ''; if (*$self->{Encoding}) { $$buffer = *$self->{Encoding}->encode($$buffer); } else { $] >= 5.008 and ( utf8::downgrade($$buffer, 1) or Carp::croak "Wide character in " . *$self->{ClassName} . "::write:"); } $self->filterUncompressed($buffer); my $buffer_length = defined $$buffer ? length($$buffer) : 0 ; *$self->{UnCompSize}->add($buffer_length) ; my $outBuffer=''; my $status = *$self->{Compress}->compr($buffer, $outBuffer) ; return $self->saveErrorString(undef, *$self->{Compress}{Error}, *$self->{Compress}{ErrorNo}) if $status == STATUS_ERROR; *$self->{CompSize}->add(length $outBuffer) ; $self->outputPayload($outBuffer) or return undef; return $buffer_length; } sub print { my $self = shift; #if (ref $self) { # $self = *$self{GLOB} ; #} if (defined $\) { if (defined $,) { defined $self->syswrite(join($,, @_) . $\); } else { defined $self->syswrite(join("", @_) . $\); } } else { if (defined $,) { defined $self->syswrite(join($,, @_)); } else { defined $self->syswrite(join("", @_)); } } } sub printf { my $self = shift; my $fmt = shift; defined $self->syswrite(sprintf($fmt, @_)); } sub _flushCompressed { my $self = shift ; my $outBuffer=''; my $status = *$self->{Compress}->flush($outBuffer, @_) ; return $self->saveErrorString(0, *$self->{Compress}{Error}, *$self->{Compress}{ErrorNo}) if $status == STATUS_ERROR; if ( defined *$self->{FH} ) { *$self->{FH}->clearerr(); } *$self->{CompSize}->add(length $outBuffer) ; $self->outputPayload($outBuffer) or return 0; return 1; } sub flush { my $self = shift ; $self->_flushCompressed(@_) or return 0; if ( defined *$self->{FH} ) { defined *$self->{FH}->flush() or return $self->saveErrorString(0, $!, $!); } return 1; } sub beforePayload { } sub _newStream { my $self = shift ; my $got = shift; my $class = ref $self; $self->_writeTrailer() or return 0 ; $self->ckParams($got) or $self->croakError("newStream: $self->{Error}"); if ($got->getValue('encode')) { my $want_encoding = $got->getValue('encode'); *$self->{Encoding} = IO::Compress::Base::Common::getEncoding($self, $class, $want_encoding); } else { *$self->{Encoding} = undef; } *$self->{Compress} = $self->mkComp($got) or return 0; *$self->{Header} = $self->mkHeader($got) ; $self->output(*$self->{Header} ) or return 0; *$self->{UnCompSize}->reset(); *$self->{CompSize}->reset(); $self->beforePayload(); return 1 ; } sub newStream { my $self = shift ; my $got = $self->checkParams('newStream', *$self->{Got}, @_) or return 0 ; $self->_newStream($got); # *$self->{Compress} = $self->mkComp($got) # or return 0; # # *$self->{Header} = $self->mkHeader($got) ; # $self->output(*$self->{Header} ) # or return 0; # # *$self->{UnCompSize}->reset(); # *$self->{CompSize}->reset(); # # $self->beforePayload(); # # return 1 ; } sub reset { my $self = shift ; return *$self->{Compress}->reset() ; } sub _writeTrailer { my $self = shift ; my $trailer = ''; my $status = *$self->{Compress}->close($trailer) ; return $self->saveErrorString(0, *$self->{Compress}{Error}, *$self->{Compress}{ErrorNo}) if $status == STATUS_ERROR; *$self->{CompSize}->add(length $trailer) ; $trailer .= $self->mkTrailer(); defined $trailer or return 0; return $self->output($trailer); } sub _writeFinalTrailer { my $self = shift ; return $self->output($self->mkFinalTrailer()); } sub close { my $self = shift ; return 1 if *$self->{Closed} || ! *$self->{Compress} ; *$self->{Closed} = 1 ; untie *$self if $] >= 5.008 ; *$self->{FlushPending} = 1 ; $self->_writeTrailer() or return 0 ; $self->_writeFinalTrailer() or return 0 ; $self->output( "", 1 ) or return 0; if (defined *$self->{FH}) { if ((! *$self->{Handle} || *$self->{AutoClose}) && ! *$self->{StdIO}) { $! = 0 ; *$self->{FH}->close() or return $self->saveErrorString(0, $!, $!); } delete *$self->{FH} ; # This delete can set $! in older Perls, so reset the errno $! = 0 ; } return 1; } #sub total_in #sub total_out #sub msg # #sub crc #{ # my $self = shift ; # return *$self->{Compress}->crc32() ; #} # #sub msg #{ # my $self = shift ; # return *$self->{Compress}->msg() ; #} # #sub dict_adler #{ # my $self = shift ; # return *$self->{Compress}->dict_adler() ; #} # #sub get_Level #{ # my $self = shift ; # return *$self->{Compress}->get_Level() ; #} # #sub get_Strategy #{ # my $self = shift ; # return *$self->{Compress}->get_Strategy() ; #} sub tell { my $self = shift ; return *$self->{UnCompSize}->get32bit() ; } sub eof { my $self = shift ; return *$self->{Closed} ; } sub seek { my $self = shift ; my $position = shift; my $whence = shift ; my $here = $self->tell() ; my $target = 0 ; #use IO::Handle qw(SEEK_SET SEEK_CUR SEEK_END); use IO::Handle ; if ($whence == IO::Handle::SEEK_SET) { $target = $position ; } elsif ($whence == IO::Handle::SEEK_CUR || $whence == IO::Handle::SEEK_END) { $target = $here + $position ; } else { $self->croakError(*$self->{ClassName} . "::seek: unknown value, $whence, for whence parameter"); } # short circuit if seeking to current offset return 1 if $target == $here ; # Outlaw any attempt to seek backwards $self->croakError(*$self->{ClassName} . "::seek: cannot seek backwards") if $target < $here ; # Walk the file to the new offset my $offset = $target - $here ; my $buffer ; defined $self->syswrite("\x00" x $offset) or return 0; return 1 ; } sub binmode { 1; # my $self = shift ; # return defined *$self->{FH} # ? binmode *$self->{FH} # : 1 ; } sub fileno { my $self = shift ; return defined *$self->{FH} ? *$self->{FH}->fileno() : undef ; } sub opened { my $self = shift ; return ! *$self->{Closed} ; } sub autoflush { my $self = shift ; return defined *$self->{FH} ? *$self->{FH}->autoflush(@_) : undef ; } sub input_line_number { return undef ; } sub _notAvailable { my $name = shift ; return sub { Carp::croak "$name Not Available: File opened only for output" ; } ; } *read = _notAvailable('read'); *READ = _notAvailable('read'); *readline = _notAvailable('readline'); *READLINE = _notAvailable('readline'); *getc = _notAvailable('getc'); *GETC = _notAvailable('getc'); *FILENO = \&fileno; *PRINT = \&print; *PRINTF = \&printf; *WRITE = \&syswrite; *write = \&syswrite; *SEEK = \&seek; *TELL = \&tell; *EOF = \&eof; *CLOSE = \&close; *BINMODE = \&binmode; #*sysread = \&_notAvailable; #*syswrite = \&_write; 1; __END__ =head1 NAME IO::Compress::Base - Base Class for IO::Compress modules =head1 SYNOPSIS use IO::Compress::Base ; =head1 DESCRIPTION This module is not intended for direct use in application code. Its sole purpose is to be sub-classed by IO::Compress modules. =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2013 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. IO-Compress-2.063/lib/IO/Compress/FAQ.pod0000644000175000017500000004756212211042540016300 0ustar paulpaul =head1 NAME IO::Compress::FAQ -- Frequently Asked Questions about IO::Compress =head1 DESCRIPTION Common questions answered. =head1 GENERAL =head2 Compatibility with Unix compress/uncompress. Although C has a pair of functions called C and C, they are I related to the Unix programs of the same name. The C module is not compatible with Unix C. If you have the C program available, you can use this to read compressed files open F, "uncompress -c $filename |"; while () { ... Alternatively, if you have the C program available, you can use this to read compressed files open F, "gunzip -c $filename |"; while () { ... and this to write compress files, if you have the C program available open F, "| compress -c $filename "; print F "data"; ... close F ; =head2 Accessing .tar.Z files The C module can optionally use C (via the C module) to access tar files that have been compressed with C. Unfortunately tar files compressed with the Unix C utility cannot be read by C and so cannot be directly accessed by C. If the C or C programs are available, you can use one of these workarounds to read C<.tar.Z> files from C Firstly with C use strict; use warnings; use Archive::Tar; open F, "uncompress -c $filename |"; my $tar = Archive::Tar->new(*F); ... and this with C use strict; use warnings; use Archive::Tar; open F, "gunzip -c $filename |"; my $tar = Archive::Tar->new(*F); ... Similarly, if the C program is available, you can use this to write a C<.tar.Z> file use strict; use warnings; use Archive::Tar; use IO::File; my $fh = new IO::File "| compress -c >$filename"; my $tar = Archive::Tar->new(); ... $tar->write($fh); $fh->close ; =head2 How do I recompress using a different compression? This is easier that you might expect if you realise that all the C objects are derived from C and that all the C modules can read from an C filehandle. So, for example, say you have a file compressed with gzip that you want to recompress with bzip2. Here is all that is needed to carry out the recompression. use IO::Uncompress::Gunzip ':all'; use IO::Compress::Bzip2 ':all'; my $gzipFile = "somefile.gz"; my $bzipFile = "somefile.bz2"; my $gunzip = new IO::Uncompress::Gunzip $gzipFile or die "Cannot gunzip $gzipFile: $GunzipError\n" ; bzip2 $gunzip => $bzipFile or die "Cannot bzip2 to $bzipFile: $Bzip2Error\n" ; Note, there is a limitation of this technique. Some compression file formats store extra information along with the compressed data payload. For example, gzip can optionally store the original filename and Zip stores a lot of information about the original file. If the original compressed file contains any of this extra information, it will not be transferred to the new compressed file using the technique above. =head1 ZIP =head2 What Compression Types do IO::Compress::Zip & IO::Uncompress::Unzip support? The following compression formats are supported by C and C =over 5 =item * Store (method 0) No compression at all. =item * Deflate (method 8) This is the default compression used when creating a zip file with C. =item * Bzip2 (method 12) Only supported if the C module is installed. =item * Lzma (method 14) Only supported if the C module is installed. =back =head2 Can I Read/Write Zip files larger the 4 Gig? Yes, both the C and C modules support the zip feature called I. That allows them to read/write files/buffers larger than 4Gig. If you are creating a Zip file using the one-shot interface, and any of the input files is greater than 4Gig, a zip64 complaint zip file will be created. zip "really-large-file" => "my.zip"; Similarly with the one-shot interface, if the input is a buffer larger than 4 Gig, a zip64 complaint zip file will be created. zip \$really_large_buffer => "my.zip"; The one-shot interface allows you to force the creation of a zip64 zip file by including the C option. zip $filehandle => "my.zip", Zip64 => 1; If you want to create a zip64 zip file with the OO interface you must specify the C option. my $zip = new IO::Compress::Zip "whatever", Zip64 => 1; When uncompressing with C, it will automatically detect if the zip file is zip64. If you intend to manipulate the Zip64 zip files created with C using an external zip/unzip, make sure that it supports Zip64. In particular, if you are using Info-Zip you need to have zip version 3.x or better to update a Zip64 archive and unzip version 6.x to read a zip64 archive. =head2 Can I write more that 64K entries is a Zip files? Yes. Zip64 allows this. See previous question. =head2 Zip Resources The primary reference for zip files is the "appnote" document available at L An alternatively is the Info-Zip appnote. This is available from L =head1 GZIP =head2 Gzip Resources The primary reference for gzip files is RFC 1952 L The primary site for gzip is F. =head2 Dealing with Concatenated gzip files If the gunzip program encounters a file containing multiple gzip files concatenated together it will automatically uncompress them all. The example below illustrates this behaviour $ echo abc | gzip -c >x.gz $ echo def | gzip -c >>x.gz $ gunzip -c x.gz abc def By default C will I behave like the gunzip program. It will only uncompress the first gzip data stream in the file, as shown below $ perl -MIO::Uncompress::Gunzip=:all -e 'gunzip "x.gz" => \*STDOUT' abc To force C to uncompress all the gzip data streams, include the C option, as shown below $ perl -MIO::Uncompress::Gunzip=:all -e 'gunzip "x.gz" => \*STDOUT, MultiStream => 1' abc def =head1 ZLIB =head2 Zlib Resources The primary site for the I compression library is F. =head1 Bzip2 =head2 Bzip2 Resources The primary site for bzip2 is F. =head2 Dealing with Concatenated bzip2 files If the bunzip2 program encounters a file containing multiple bzip2 files concatenated together it will automatically uncompress them all. The example below illustrates this behaviour $ echo abc | bzip2 -c >x.bz2 $ echo def | bzip2 -c >>x.bz2 $ bunzip2 -c x.bz2 abc def By default C will I behave like the bunzip2 program. It will only uncompress the first bunzip2 data stream in the file, as shown below $ perl -MIO::Uncompress::Bunzip2=:all -e 'bunzip2 "x.bz2" => \*STDOUT' abc To force C to uncompress all the bzip2 data streams, include the C option, as shown below $ perl -MIO::Uncompress::Bunzip2=:all -e 'bunzip2 "x.bz2" => \*STDOUT, MultiStream => 1' abc def =head2 Interoperating with Pbzip2 Pbzip2 (L) is a parallel implementation of bzip2. The output from pbzip2 consists of a series of concatenated bzip2 data streams. By default C will only uncompress the first bzip2 data stream in a pbzip2 file. To uncompress the complete pbzip2 file you must include the C option, like this. bunzip2 $input => \$output, MultiStream => 1 or die "bunzip2 failed: $Bunzip2Error\n"; =head1 HTTP & NETWORK =head2 Apache::GZip Revisited Below is a mod_perl Apache compression module, called C, taken from F package Apache::GZip; #File: Apache::GZip.pm use strict vars; use Apache::Constants ':common'; use Compress::Zlib; use IO::File; use constant GZIP_MAGIC => 0x1f8b; use constant OS_MAGIC => 0x03; sub handler { my $r = shift; my ($fh,$gz); my $file = $r->filename; return DECLINED unless $fh=IO::File->new($file); $r->header_out('Content-Encoding'=>'gzip'); $r->send_http_header; return OK if $r->header_only; tie *STDOUT,'Apache::GZip',$r; print($_) while <$fh>; untie *STDOUT; return OK; } sub TIEHANDLE { my($class,$r) = @_; # initialize a deflation stream my $d = deflateInit(-WindowBits=>-MAX_WBITS()) || return undef; # gzip header -- don't ask how I found out $r->print(pack("nccVcc",GZIP_MAGIC,Z_DEFLATED,0,time(),0,OS_MAGIC)); return bless { r => $r, crc => crc32(undef), d => $d, l => 0 },$class; } sub PRINT { my $self = shift; foreach (@_) { # deflate the data my $data = $self->{d}->deflate($_); $self->{r}->print($data); # keep track of its length and crc $self->{l} += length($_); $self->{crc} = crc32($_,$self->{crc}); } } sub DESTROY { my $self = shift; # flush the output buffers my $data = $self->{d}->flush; $self->{r}->print($data); # print the CRC and the total length (uncompressed) $self->{r}->print(pack("LL",@{$self}{qw/crc l/})); } 1; Here's the Apache configuration entry you'll need to make use of it. Once set it will result in everything in the /compressed directory will be compressed automagically. SetHandler perl-script PerlHandler Apache::GZip Although at first sight there seems to be quite a lot going on in C, you could sum up what the code was doing as follows -- read the contents of the file in C<< $r->filename >>, compress it and write the compressed data to standard output. That's all. This code has to jump through a few hoops to achieve this because =over =item 1. The gzip support in C version 1.x can only work with a real filesystem filehandle. The filehandles used by Apache modules are not associated with the filesystem. =item 2. That means all the gzip support has to be done by hand - in this case by creating a tied filehandle to deal with creating the gzip header and trailer. =back C doesn't have that filehandle limitation (this was one of the reasons for writing it in the first place). So if C is used instead of C the whole tied filehandle code can be removed. Here is the rewritten code. package Apache::GZip; use strict vars; use Apache::Constants ':common'; use IO::Compress::Gzip; use IO::File; sub handler { my $r = shift; my ($fh,$gz); my $file = $r->filename; return DECLINED unless $fh=IO::File->new($file); $r->header_out('Content-Encoding'=>'gzip'); $r->send_http_header; return OK if $r->header_only; my $gz = new IO::Compress::Gzip '-', Minimal => 1 or return DECLINED ; print $gz $_ while <$fh>; return OK; } or even more succinctly, like this, using a one-shot gzip package Apache::GZip; use strict vars; use Apache::Constants ':common'; use IO::Compress::Gzip qw(gzip); sub handler { my $r = shift; $r->header_out('Content-Encoding'=>'gzip'); $r->send_http_header; return OK if $r->header_only; gzip $r->filename => '-', Minimal => 1 or return DECLINED ; return OK; } 1; The use of one-shot C above just reads from C<< $r->filename >> and writes the compressed data to standard output. Note the use of the C option in the code above. When using gzip for Content-Encoding you should I use this option. In the example above it will prevent the filename being included in the gzip header and make the size of the gzip data stream a slight bit smaller. =head2 Compressed files and Net::FTP The C module provides two low-level methods called C and C that both return filehandles. These filehandles can used with the C modules to compress or uncompress files read from or written to an FTP Server on the fly, without having to create a temporary file. Firstly, here is code that uses C to uncompressed a file as it is read from the FTP Server. use Net::FTP; use IO::Uncompress::Gunzip qw(:all); my $ftp = new Net::FTP ... my $retr_fh = $ftp->retr($compressed_filename); gunzip $retr_fh => $outFilename, AutoClose => 1 or die "Cannot uncompress '$compressed_file': $GunzipError\n"; and this to compress a file as it is written to the FTP Server use Net::FTP; use IO::Compress::Gzip qw(:all); my $stor_fh = $ftp->stor($filename); gzip "filename" => $stor_fh, AutoClose => 1 or die "Cannot compress '$filename': $GzipError\n"; =head1 MISC =head2 Using C to uncompress data embedded in a larger file/buffer. A fairly common use-case is where compressed data is embedded in a larger file/buffer and you want to read both. As an example consider the structure of a zip file. This is a well-defined file format that mixes both compressed and uncompressed sections of data in a single file. For the purposes of this discussion you can think of a zip file as sequence of compressed data streams, each of which is prefixed by an uncompressed local header. The local header contains information about the compressed data stream, including the name of the compressed file and, in particular, the length of the compressed data stream. To illustrate how to use C here is a script that walks a zip file and prints out how many lines are in each compressed file (if you intend write code to walking through a zip file for real see L ). Also, although this example uses the zlib-based compression, the technique can be used by the other C modules. use strict; use warnings; use IO::File; use IO::Uncompress::RawInflate qw(:all); use constant ZIP_LOCAL_HDR_SIG => 0x04034b50; use constant ZIP_LOCAL_HDR_LENGTH => 30; my $file = $ARGV[0] ; my $fh = new IO::File "<$file" or die "Cannot open '$file': $!\n"; while (1) { my $sig; my $buffer; my $x ; ($x = $fh->read($buffer, ZIP_LOCAL_HDR_LENGTH)) == ZIP_LOCAL_HDR_LENGTH or die "Truncated file: $!\n"; my $signature = unpack ("V", substr($buffer, 0, 4)); last unless $signature == ZIP_LOCAL_HDR_SIG; # Read Local Header my $gpFlag = unpack ("v", substr($buffer, 6, 2)); my $compressedMethod = unpack ("v", substr($buffer, 8, 2)); my $compressedLength = unpack ("V", substr($buffer, 18, 4)); my $uncompressedLength = unpack ("V", substr($buffer, 22, 4)); my $filename_length = unpack ("v", substr($buffer, 26, 2)); my $extra_length = unpack ("v", substr($buffer, 28, 2)); my $filename ; $fh->read($filename, $filename_length) == $filename_length or die "Truncated file\n"; $fh->read($buffer, $extra_length) == $extra_length or die "Truncated file\n"; if ($compressedMethod != 8 && $compressedMethod != 0) { warn "Skipping file '$filename' - not deflated $compressedMethod\n"; $fh->read($buffer, $compressedLength) == $compressedLength or die "Truncated file\n"; next; } if ($compressedMethod == 0 && $gpFlag & 8 == 8) { die "Streamed Stored not supported for '$filename'\n"; } next if $compressedLength == 0; # Done reading the Local Header my $inf = new IO::Uncompress::RawInflate $fh, Transparent => 1, InputLength => $compressedLength or die "Cannot uncompress $file [$filename]: $RawInflateError\n" ; my $line_count = 0; while (<$inf>) { ++ $line_count; } print "$filename: $line_count\n"; } The majority of the code above is concerned with reading the zip local header data. The code that I want to focus on is at the bottom. while (1) { # read local zip header data # get $filename # get $compressedLength my $inf = new IO::Uncompress::RawInflate $fh, Transparent => 1, InputLength => $compressedLength or die "Cannot uncompress $file [$filename]: $RawInflateError\n" ; my $line_count = 0; while (<$inf>) { ++ $line_count; } print "$filename: $line_count\n"; } The call to C creates a new filehandle C<$inf> that can be used to read from the parent filehandle C<$fh>, uncompressing it as it goes. The use of the C option will guarantee that I C<$compressedLength> bytes of compressed data will be read from the C<$fh> filehandle (The only exception is for an error case like a truncated file or a corrupt data stream). This means that once RawInflate is finished C<$fh> will be left at the byte directly after the compressed data stream. Now consider what the code looks like without C while (1) { # read local zip header data # get $filename # get $compressedLength # read all the compressed data into $data read($fh, $data, $compressedLength); my $inf = new IO::Uncompress::RawInflate \$data, Transparent => 1, or die "Cannot uncompress $file [$filename]: $RawInflateError\n" ; my $line_count = 0; while (<$inf>) { ++ $line_count; } print "$filename: $line_count\n"; } The difference here is the addition of the temporary variable C<$data>. This is used to store a copy of the compressed data while it is being uncompressed. If you know that C<$compressedLength> isn't that big then using temporary storage won't be a problem. But if C<$compressedLength> is very large or you are writing an application that other people will use, and so have no idea how big C<$compressedLength> will be, it could be an issue. Using C avoids the use of temporary storage and means the application can cope with large compressed data streams. One final point -- obviously C can only be used whenever you know the length of the compressed data beforehand, like here with a zip file. =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L L L, L, L, L =head1 AUTHOR This module was written by Paul Marquess, F. =head1 MODIFICATION HISTORY See the Changes file. =head1 COPYRIGHT AND LICENSE Copyright (c) 2005-2013 Paul Marquess. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. IO-Compress-2.063/lib/IO/Compress/Zip.pm0000644000175000017500000016136412211042540016262 0ustar paulpaulpackage IO::Compress::Zip ; use strict ; use warnings; use bytes; use IO::Compress::Base::Common 2.063 qw(:Status ); use IO::Compress::RawDeflate 2.063 (); use IO::Compress::Adapter::Deflate 2.063 ; use IO::Compress::Adapter::Identity 2.063 ; use IO::Compress::Zlib::Extra 2.063 ; use IO::Compress::Zip::Constants 2.063 ; use File::Spec(); use Config; use Compress::Raw::Zlib 2.063 (); BEGIN { eval { require IO::Compress::Adapter::Bzip2 ; import IO::Compress::Adapter::Bzip2 2.063 ; require IO::Compress::Bzip2 ; import IO::Compress::Bzip2 2.063 ; } ; eval { require IO::Compress::Adapter::Lzma ; import IO::Compress::Adapter::Lzma 2.063 ; require IO::Compress::Lzma ; import IO::Compress::Lzma 2.063 ; } ; } require Exporter ; our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, %DEFLATE_CONSTANTS, $ZipError); $VERSION = '2.063'; $ZipError = ''; @ISA = qw(Exporter IO::Compress::RawDeflate); @EXPORT_OK = qw( $ZipError zip ) ; %EXPORT_TAGS = %IO::Compress::RawDeflate::DEFLATE_CONSTANTS ; push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; $EXPORT_TAGS{zip_method} = [qw( ZIP_CM_STORE ZIP_CM_DEFLATE ZIP_CM_BZIP2 ZIP_CM_LZMA)]; push @{ $EXPORT_TAGS{all} }, @{ $EXPORT_TAGS{zip_method} }; Exporter::export_ok_tags('all'); sub new { my $class = shift ; my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$ZipError); $obj->_create(undef, @_); } sub zip { my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$ZipError); return $obj->_def(@_); } sub isMethodAvailable { my $method = shift; # Store & Deflate are always available return 1 if $method == ZIP_CM_STORE || $method == ZIP_CM_DEFLATE ; return 1 if $method == ZIP_CM_BZIP2 and defined $IO::Compress::Adapter::Bzip2::VERSION; return 1 if $method == ZIP_CM_LZMA and defined $IO::Compress::Adapter::Lzma::VERSION; return 0; } sub beforePayload { my $self = shift ; if (*$self->{ZipData}{Sparse} ) { my $inc = 1024 * 100 ; my $NULLS = ("\x00" x $inc) ; my $sparse = *$self->{ZipData}{Sparse} ; *$self->{CompSize}->add( $sparse ); *$self->{UnCompSize}->add( $sparse ); *$self->{FH}->seek($sparse, IO::Handle::SEEK_CUR); *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32($NULLS, *$self->{ZipData}{CRC32}) for 1 .. int $sparse / $inc; *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(substr($NULLS, 0, $sparse % $inc), *$self->{ZipData}{CRC32}) if $sparse % $inc; } } sub mkComp { my $self = shift ; my $got = shift ; my ($obj, $errstr, $errno) ; if (*$self->{ZipData}{Method} == ZIP_CM_STORE) { ($obj, $errstr, $errno) = IO::Compress::Adapter::Identity::mkCompObject( $got->getValue('level'), $got->getValue('strategy') ); *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(undef); } elsif (*$self->{ZipData}{Method} == ZIP_CM_DEFLATE) { ($obj, $errstr, $errno) = IO::Compress::Adapter::Deflate::mkCompObject( $got->getValue('crc32'), $got->getValue('adler32'), $got->getValue('level'), $got->getValue('strategy') ); } elsif (*$self->{ZipData}{Method} == ZIP_CM_BZIP2) { ($obj, $errstr, $errno) = IO::Compress::Adapter::Bzip2::mkCompObject( $got->getValue('blocksize100k'), $got->getValue('workfactor'), $got->getValue('verbosity') ); *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(undef); } elsif (*$self->{ZipData}{Method} == ZIP_CM_LZMA) { ($obj, $errstr, $errno) = IO::Compress::Adapter::Lzma::mkRawZipCompObject($got->getValue('preset'), $got->getValue('extreme'), ); *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(undef); } return $self->saveErrorString(undef, $errstr, $errno) if ! defined $obj; if (! defined *$self->{ZipData}{SizesOffset}) { *$self->{ZipData}{SizesOffset} = 0; *$self->{ZipData}{Offset} = new U64 ; } *$self->{ZipData}{AnyZip64} = 0 if ! defined *$self->{ZipData}{AnyZip64} ; return $obj; } sub reset { my $self = shift ; *$self->{Compress}->reset(); *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(''); return STATUS_OK; } sub filterUncompressed { my $self = shift ; if (*$self->{ZipData}{Method} == ZIP_CM_DEFLATE) { *$self->{ZipData}{CRC32} = *$self->{Compress}->crc32(); } else { *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(${$_[0]}, *$self->{ZipData}{CRC32}); } } sub canonicalName { # This sub is derived from Archive::Zip::_asZipDirName # Return the normalized name as used in a zip file (path # separators become slashes, etc.). # Will translate internal slashes in path components (i.e. on Macs) to # underscores. Discards volume names. # When $forceDir is set, returns paths with trailing slashes # # input output # . '.' # ./a a # ./a/b a/b # ./a/b/ a/b # a/b/ a/b # /a/b/ a/b # c:\a\b\c.doc a/b/c.doc # on Windows # "i/o maps:whatever" i_o maps/whatever # on Macs my $name = shift; my $forceDir = shift ; my ( $volume, $directories, $file ) = File::Spec->splitpath( File::Spec->canonpath($name), $forceDir ); my @dirs = map { $_ =~ s{/}{_}g; $_ } File::Spec->splitdir($directories); if ( @dirs > 0 ) { pop (@dirs) if $dirs[-1] eq '' } # remove empty component push @dirs, defined($file) ? $file : '' ; my $normalised_path = join '/', @dirs; # Leading directory separators should not be stored in zip archives. # Example: # C:\a\b\c\ a/b/c # C:\a\b\c.txt a/b/c.txt # /a/b/c/ a/b/c # /a/b/c.txt a/b/c.txt $normalised_path =~ s{^/}{}; # remove leading separator return $normalised_path; } sub mkHeader { my $self = shift; my $param = shift ; *$self->{ZipData}{LocalHdrOffset} = U64::clone(*$self->{ZipData}{Offset}); my $comment = ''; $comment = $param->valueOrDefault('comment') ; my $filename = ''; $filename = $param->valueOrDefault('name') ; $filename = canonicalName($filename) if length $filename && $param->getValue('canonicalname') ; if (defined *$self->{ZipData}{FilterName} ) { local *_ = \$filename ; &{ *$self->{ZipData}{FilterName} }() ; } # if ( $param->getValue('utf8') ) { # require Encode ; # $filename = Encode::encode_utf8($filename) # if length $filename ; # $comment = Encode::encode_utf8($comment) # if length $comment ; # } my $hdr = ''; my $time = _unixToDosTime($param->getValue('time')); my $extra = ''; my $ctlExtra = ''; my $empty = 0; my $osCode = $param->getValue('os_code') ; my $extFileAttr = 0 ; # This code assumes Unix. # TODO - revisit this $extFileAttr = 0100644 << 16 if $osCode == ZIP_OS_CODE_UNIX ; if (*$self->{ZipData}{Zip64}) { $empty = IO::Compress::Base::Common::MAX32; my $x = ''; $x .= pack "V V", 0, 0 ; # uncompressedLength $x .= pack "V V", 0, 0 ; # compressedLength $extra .= IO::Compress::Zlib::Extra::mkSubField(ZIP_EXTRA_ID_ZIP64, $x); } if (! $param->getValue('minimal')) { if ($param->parsed('mtime')) { $extra .= mkExtendedTime($param->getValue('mtime'), $param->getValue('atime'), $param->getValue('ctime')); $ctlExtra .= mkExtendedTime($param->getValue('mtime')); } if ( $osCode == ZIP_OS_CODE_UNIX ) { if ( $param->getValue('want_exunixn') ) { my $ux3 = mkUnixNExtra( @{ $param->getValue('want_exunixn') }); $extra .= $ux3; $ctlExtra .= $ux3; } if ( $param->getValue('exunix2') ) { $extra .= mkUnix2Extra( @{ $param->getValue('exunix2') }); $ctlExtra .= mkUnix2Extra(); } } $extFileAttr = $param->getValue('extattr') if defined $param->getValue('extattr') ; $extra .= $param->getValue('extrafieldlocal') if defined $param->getValue('extrafieldlocal'); $ctlExtra .= $param->getValue('extrafieldcentral') if defined $param->getValue('extrafieldcentral'); } my $method = *$self->{ZipData}{Method} ; my $gpFlag = 0 ; $gpFlag |= ZIP_GP_FLAG_STREAMING_MASK if *$self->{ZipData}{Stream} ; $gpFlag |= ZIP_GP_FLAG_LZMA_EOS_PRESENT if $method == ZIP_CM_LZMA ; # $gpFlag |= ZIP_GP_FLAG_LANGUAGE_ENCODING # if $param->getValue('utf8') && (length($filename) || length($comment)); my $version = $ZIP_CM_MIN_VERSIONS{$method}; $version = ZIP64_MIN_VERSION if ZIP64_MIN_VERSION > $version && *$self->{ZipData}{Zip64}; my $madeBy = ($param->getValue('os_code') << 8) + $version; my $extract = $version; *$self->{ZipData}{Version} = $version; *$self->{ZipData}{MadeBy} = $madeBy; my $ifa = 0; $ifa |= ZIP_IFA_TEXT_MASK if $param->getValue('textflag'); $hdr .= pack "V", ZIP_LOCAL_HDR_SIG ; # signature $hdr .= pack 'v', $extract ; # extract Version & OS $hdr .= pack 'v', $gpFlag ; # general purpose flag (set streaming mode) $hdr .= pack 'v', $method ; # compression method (deflate) $hdr .= pack 'V', $time ; # last mod date/time $hdr .= pack 'V', 0 ; # crc32 - 0 when streaming $hdr .= pack 'V', $empty ; # compressed length - 0 when streaming $hdr .= pack 'V', $empty ; # uncompressed length - 0 when streaming $hdr .= pack 'v', length $filename ; # filename length $hdr .= pack 'v', length $extra ; # extra length $hdr .= $filename ; # Remember the offset for the compressed & uncompressed lengths in the # local header. if (*$self->{ZipData}{Zip64}) { *$self->{ZipData}{SizesOffset} = *$self->{ZipData}{Offset}->get64bit() + length($hdr) + 4 ; } else { *$self->{ZipData}{SizesOffset} = *$self->{ZipData}{Offset}->get64bit() + 18; } $hdr .= $extra ; my $ctl = ''; $ctl .= pack "V", ZIP_CENTRAL_HDR_SIG ; # signature $ctl .= pack 'v', $madeBy ; # version made by $ctl .= pack 'v', $extract ; # extract Version $ctl .= pack 'v', $gpFlag ; # general purpose flag (streaming mode) $ctl .= pack 'v', $method ; # compression method (deflate) $ctl .= pack 'V', $time ; # last mod date/time $ctl .= pack 'V', 0 ; # crc32 $ctl .= pack 'V', $empty ; # compressed length $ctl .= pack 'V', $empty ; # uncompressed length $ctl .= pack 'v', length $filename ; # filename length *$self->{ZipData}{ExtraOffset} = length $ctl; *$self->{ZipData}{ExtraSize} = length $ctlExtra ; $ctl .= pack 'v', length $ctlExtra ; # extra length $ctl .= pack 'v', length $comment ; # file comment length $ctl .= pack 'v', 0 ; # disk number start $ctl .= pack 'v', $ifa ; # internal file attributes $ctl .= pack 'V', $extFileAttr ; # external file attributes # offset to local hdr if (*$self->{ZipData}{LocalHdrOffset}->is64bit() ) { $ctl .= pack 'V', IO::Compress::Base::Common::MAX32 ; } else { $ctl .= *$self->{ZipData}{LocalHdrOffset}->getPacked_V32() ; } $ctl .= $filename ; $ctl .= $ctlExtra ; $ctl .= $comment ; *$self->{ZipData}{Offset}->add32(length $hdr) ; *$self->{ZipData}{CentralHeader} = $ctl; return $hdr; } sub mkTrailer { my $self = shift ; my $crc32 ; if (*$self->{ZipData}{Method} == ZIP_CM_DEFLATE) { $crc32 = pack "V", *$self->{Compress}->crc32(); } else { $crc32 = pack "V", *$self->{ZipData}{CRC32}; } my $ctl = *$self->{ZipData}{CentralHeader} ; my $sizes ; if (! *$self->{ZipData}{Zip64}) { $sizes .= *$self->{CompSize}->getPacked_V32() ; # Compressed size $sizes .= *$self->{UnCompSize}->getPacked_V32() ; # Uncompressed size } else { $sizes .= *$self->{CompSize}->getPacked_V64() ; # Compressed size $sizes .= *$self->{UnCompSize}->getPacked_V64() ; # Uncompressed size } my $data = $crc32 . $sizes ; my $xtrasize = *$self->{UnCompSize}->getPacked_V64() ; # Uncompressed size $xtrasize .= *$self->{CompSize}->getPacked_V64() ; # Compressed size my $hdr = ''; if (*$self->{ZipData}{Stream}) { $hdr = pack "V", ZIP_DATA_HDR_SIG ; # signature $hdr .= $data ; } else { $self->writeAt(*$self->{ZipData}{LocalHdrOffset}->get64bit() + 14, $crc32) or return undef; $self->writeAt(*$self->{ZipData}{SizesOffset}, *$self->{ZipData}{Zip64} ? $xtrasize : $sizes) or return undef; } # Central Header Record/Zip64 extended field substr($ctl, 16, length $crc32) = $crc32 ; my $x = ''; # uncompressed length if (*$self->{UnCompSize}->isAlmost64bit() || *$self->{ZipData}{Zip64} > 1) { $x .= *$self->{UnCompSize}->getPacked_V64() ; } else { substr($ctl, 24, 4) = *$self->{UnCompSize}->getPacked_V32() ; } # compressed length if (*$self->{CompSize}->isAlmost64bit() || *$self->{ZipData}{Zip64} > 1) { $x .= *$self->{CompSize}->getPacked_V64() ; } else { substr($ctl, 20, 4) = *$self->{CompSize}->getPacked_V32() ; } # Local Header offset $x .= *$self->{ZipData}{LocalHdrOffset}->getPacked_V64() if *$self->{ZipData}{LocalHdrOffset}->is64bit() ; # disk no - always zero, so don't need it #$x .= pack "V", 0 ; if (length $x) { my $xtra = IO::Compress::Zlib::Extra::mkSubField(ZIP_EXTRA_ID_ZIP64, $x); $ctl .= $xtra ; substr($ctl, *$self->{ZipData}{ExtraOffset}, 2) = pack 'v', *$self->{ZipData}{ExtraSize} + length $xtra; *$self->{ZipData}{AnyZip64} = 1; } *$self->{ZipData}{Offset}->add32(length($hdr)); *$self->{ZipData}{Offset}->add( *$self->{CompSize} ); push @{ *$self->{ZipData}{CentralDir} }, $ctl ; return $hdr; } sub mkFinalTrailer { my $self = shift ; my $comment = ''; $comment = *$self->{ZipData}{ZipComment} ; my $cd_offset = *$self->{ZipData}{Offset}->get32bit() ; # offset to start central dir my $entries = @{ *$self->{ZipData}{CentralDir} }; *$self->{ZipData}{AnyZip64} = 1 if *$self->{ZipData}{Offset}->is64bit || $entries >= 0xFFFF ; my $cd = join '', @{ *$self->{ZipData}{CentralDir} }; my $cd_len = length $cd ; my $z64e = ''; if ( *$self->{ZipData}{AnyZip64} ) { my $v = *$self->{ZipData}{Version} ; my $mb = *$self->{ZipData}{MadeBy} ; $z64e .= pack 'v', $mb ; # Version made by $z64e .= pack 'v', $v ; # Version to extract $z64e .= pack 'V', 0 ; # number of disk $z64e .= pack 'V', 0 ; # number of disk with central dir $z64e .= U64::pack_V64 $entries ; # entries in central dir on this disk $z64e .= U64::pack_V64 $entries ; # entries in central dir $z64e .= U64::pack_V64 $cd_len ; # size of central dir $z64e .= *$self->{ZipData}{Offset}->getPacked_V64() ; # offset to start central dir $z64e = pack("V", ZIP64_END_CENTRAL_REC_HDR_SIG) # signature . U64::pack_V64(length $z64e) . $z64e ; *$self->{ZipData}{Offset}->add32(length $cd) ; $z64e .= pack "V", ZIP64_END_CENTRAL_LOC_HDR_SIG; # signature $z64e .= pack 'V', 0 ; # number of disk with central dir $z64e .= *$self->{ZipData}{Offset}->getPacked_V64() ; # offset to end zip64 central dir $z64e .= pack 'V', 1 ; # Total number of disks $cd_offset = IO::Compress::Base::Common::MAX32 ; $cd_len = IO::Compress::Base::Common::MAX32 if IO::Compress::Base::Common::isGeMax32 $cd_len ; $entries = 0xFFFF if $entries >= 0xFFFF ; } my $ecd = ''; $ecd .= pack "V", ZIP_END_CENTRAL_HDR_SIG ; # signature $ecd .= pack 'v', 0 ; # number of disk $ecd .= pack 'v', 0 ; # number of disk with central dir $ecd .= pack 'v', $entries ; # entries in central dir on this disk $ecd .= pack 'v', $entries ; # entries in central dir $ecd .= pack 'V', $cd_len ; # size of central dir $ecd .= pack 'V', $cd_offset ; # offset to start central dir $ecd .= pack 'v', length $comment ; # zipfile comment length $ecd .= $comment; return $cd . $z64e . $ecd ; } sub ckParams { my $self = shift ; my $got = shift; $got->setValue('crc32' => 1); if (! $got->parsed('time') ) { # Modification time defaults to now. $got->setValue('time' => time) ; } if ($got->parsed('extime') ) { my $timeRef = $got->getValue('extime'); if ( defined $timeRef) { return $self->saveErrorString(undef, "exTime not a 3-element array ref") if ref $timeRef ne 'ARRAY' || @$timeRef != 3; } $got->setValue("mtime", $timeRef->[1]); $got->setValue("atime", $timeRef->[0]); $got->setValue("ctime", $timeRef->[2]); } # Unix2/3 Extended Attribute for my $name (qw(exunix2 exunixn)) { if ($got->parsed($name) ) { my $idRef = $got->getValue($name); if ( defined $idRef) { return $self->saveErrorString(undef, "$name not a 2-element array ref") if ref $idRef ne 'ARRAY' || @$idRef != 2; } $got->setValue("uid", $idRef->[0]); $got->setValue("gid", $idRef->[1]); $got->setValue("want_$name", $idRef); } } *$self->{ZipData}{AnyZip64} = 1 if $got->getValue('zip64'); *$self->{ZipData}{Zip64} = $got->getValue('zip64'); *$self->{ZipData}{Stream} = $got->getValue('stream'); my $method = $got->getValue('method'); return $self->saveErrorString(undef, "Unknown Method '$method'") if ! defined $ZIP_CM_MIN_VERSIONS{$method}; return $self->saveErrorString(undef, "Bzip2 not available") if $method == ZIP_CM_BZIP2 and ! defined $IO::Compress::Adapter::Bzip2::VERSION; return $self->saveErrorString(undef, "Lzma not available") if $method == ZIP_CM_LZMA and ! defined $IO::Compress::Adapter::Lzma::VERSION; *$self->{ZipData}{Method} = $method; *$self->{ZipData}{ZipComment} = $got->getValue('zipcomment') ; for my $name (qw( extrafieldlocal extrafieldcentral )) { my $data = $got->getValue($name) ; if (defined $data) { my $bad = IO::Compress::Zlib::Extra::parseExtraField($data, 1, 0) ; return $self->saveErrorString(undef, "Error with $name Parameter: $bad") if $bad ; $got->setValue($name, $data) ; } } return undef if defined $IO::Compress::Bzip2::VERSION and ! IO::Compress::Bzip2::ckParams($self, $got); if ($got->parsed('sparse') ) { *$self->{ZipData}{Sparse} = $got->getValue('sparse') ; *$self->{ZipData}{Method} = ZIP_CM_STORE; } if ($got->parsed('filtername')) { my $v = $got->getValue('filtername') ; *$self->{ZipData}{FilterName} = $v if ref $v eq 'CODE' ; } return 1 ; } sub outputPayload { my $self = shift ; return 1 if *$self->{ZipData}{Sparse} ; return $self->output(@_); } #sub newHeader #{ # my $self = shift ; # # return $self->mkHeader(*$self->{Got}); #} our %PARAMS = ( 'stream' => [IO::Compress::Base::Common::Parse_boolean, 1], #'store' => [IO::Compress::Base::Common::Parse_boolean, 0], 'method' => [IO::Compress::Base::Common::Parse_unsigned, ZIP_CM_DEFLATE], # # Zip header fields 'minimal' => [IO::Compress::Base::Common::Parse_boolean, 0], 'zip64' => [IO::Compress::Base::Common::Parse_boolean, 0], 'comment' => [IO::Compress::Base::Common::Parse_any, ''], 'zipcomment'=> [IO::Compress::Base::Common::Parse_any, ''], 'name' => [IO::Compress::Base::Common::Parse_any, ''], 'filtername'=> [IO::Compress::Base::Common::Parse_code, undef], 'canonicalname'=> [IO::Compress::Base::Common::Parse_boolean, 0], # 'utf8' => [IO::Compress::Base::Common::Parse_boolean, 0], 'time' => [IO::Compress::Base::Common::Parse_any, undef], 'extime' => [IO::Compress::Base::Common::Parse_any, undef], 'exunix2' => [IO::Compress::Base::Common::Parse_any, undef], 'exunixn' => [IO::Compress::Base::Common::Parse_any, undef], 'extattr' => [IO::Compress::Base::Common::Parse_any, $Compress::Raw::Zlib::gzip_os_code == 3 ? 0100644 << 16 : 0], 'os_code' => [IO::Compress::Base::Common::Parse_unsigned, $Compress::Raw::Zlib::gzip_os_code], 'textflag' => [IO::Compress::Base::Common::Parse_boolean, 0], 'extrafieldlocal' => [IO::Compress::Base::Common::Parse_any, undef], 'extrafieldcentral'=> [IO::Compress::Base::Common::Parse_any, undef], # Lzma 'preset' => [IO::Compress::Base::Common::Parse_unsigned, 6], 'extreme' => [IO::Compress::Base::Common::Parse_boolean, 0], # For internal use only 'sparse' => [IO::Compress::Base::Common::Parse_unsigned, 0], IO::Compress::RawDeflate::getZlibParams(), defined $IO::Compress::Bzip2::VERSION ? IO::Compress::Bzip2::getExtraParams() : () ); sub getExtraParams { return %PARAMS ; } sub getInverseClass { return ('IO::Uncompress::Unzip', \$IO::Uncompress::Unzip::UnzipError); } sub getFileInfo { my $self = shift ; my $params = shift; my $filename = shift ; if (IO::Compress::Base::Common::isaScalar($filename)) { $params->setValue(zip64 => 1) if IO::Compress::Base::Common::isGeMax32 length (${ $filename }) ; return ; } my ($mode, $uid, $gid, $size, $atime, $mtime, $ctime) ; if ( $params->parsed('storelinks') ) { ($mode, $uid, $gid, $size, $atime, $mtime, $ctime) = (lstat($filename))[2, 4,5,7, 8,9,10] ; } else { ($mode, $uid, $gid, $size, $atime, $mtime, $ctime) = (stat($filename))[2, 4,5,7, 8,9,10] ; } $params->setValue(textflag => -T $filename ) if ! $params->parsed('textflag'); $params->setValue(zip64 => 1) if IO::Compress::Base::Common::isGeMax32 $size ; $params->setValue('name' => $filename) if ! $params->parsed('name') ; $params->setValue('time' => $mtime) if ! $params->parsed('time') ; if ( ! $params->parsed('extime')) { $params->setValue('mtime' => $mtime) ; $params->setValue('atime' => $atime) ; $params->setValue('ctime' => undef) ; # No Creation time # TODO - see if can fillout creation time on non-Unix } # NOTE - Unix specific code alert if (! $params->parsed('extattr')) { use Fcntl qw(:mode) ; my $attr = $mode << 16; $attr |= ZIP_A_RONLY if ($mode & S_IWRITE) == 0 ; $attr |= ZIP_A_DIR if ($mode & S_IFMT ) == S_IFDIR ; $params->setValue('extattr' => $attr); } $params->setValue('want_exunixn', [$uid, $gid]); $params->setValue('uid' => $uid) ; $params->setValue('gid' => $gid) ; } sub mkExtendedTime { # order expected is m, a, c my $times = ''; my $bit = 1 ; my $flags = 0; for my $time (@_) { if (defined $time) { $flags |= $bit; $times .= pack("V", $time); } $bit <<= 1 ; } return IO::Compress::Zlib::Extra::mkSubField(ZIP_EXTRA_ID_EXT_TIMESTAMP, pack("C", $flags) . $times); } sub mkUnix2Extra { my $ids = ''; for my $id (@_) { $ids .= pack("v", $id); } return IO::Compress::Zlib::Extra::mkSubField(ZIP_EXTRA_ID_INFO_ZIP_UNIX2, $ids); } sub mkUnixNExtra { my $uid = shift; my $gid = shift; # Assumes UID/GID are 32-bit my $ids ; $ids .= pack "C", 1; # version $ids .= pack "C", $Config{uidsize}; $ids .= pack "V", $uid; $ids .= pack "C", $Config{gidsize}; $ids .= pack "V", $gid; return IO::Compress::Zlib::Extra::mkSubField(ZIP_EXTRA_ID_INFO_ZIP_UNIXN, $ids); } # from Archive::Zip sub _unixToDosTime # Archive::Zip::Member { my $time_t = shift; # TODO - add something to cope with unix time < 1980 my ( $sec, $min, $hour, $mday, $mon, $year ) = localtime($time_t); my $dt = 0; $dt += ( $sec >> 1 ); $dt += ( $min << 5 ); $dt += ( $hour << 11 ); $dt += ( $mday << 16 ); $dt += ( ( $mon + 1 ) << 21 ); $dt += ( ( $year - 80 ) << 25 ); return $dt; } 1; __END__ =head1 NAME IO::Compress::Zip - Write zip files/buffers =head1 SYNOPSIS use IO::Compress::Zip qw(zip $ZipError) ; my $status = zip $input => $output [,OPTS] or die "zip failed: $ZipError\n"; my $z = new IO::Compress::Zip $output [,OPTS] or die "zip failed: $ZipError\n"; $z->print($string); $z->printf($format, $string); $z->write($string); $z->syswrite($string [, $length, $offset]); $z->flush(); $z->tell(); $z->eof(); $z->seek($position, $whence); $z->binmode(); $z->fileno(); $z->opened(); $z->autoflush(); $z->input_line_number(); $z->newStream( [OPTS] ); $z->deflateParams(); $z->close() ; $ZipError ; # IO::File mode print $z $string; printf $z $format, $string; tell $z eof $z seek $z, $position, $whence binmode $z fileno $z close $z ; =head1 DESCRIPTION This module provides a Perl interface that allows writing zip compressed data to files or buffer. The primary purpose of this module is to provide streaming write access to zip files and buffers. It is not a general-purpose file archiver. If that is what you want, check out C. At present three compression methods are supported by IO::Compress::Zip, namely Store (no compression at all), Deflate, Bzip2 and LZMA. Note that to create Bzip2 content, the module C must be installed. Note that to create LZMA content, the module C must be installed. For reading zip files/buffers, see the companion module L. =head1 Functional Interface A top-level function, C, is provided to carry out "one-shot" compression between buffers and/or files. For finer control over the compression process, see the L section. use IO::Compress::Zip qw(zip $ZipError) ; zip $input_filename_or_reference => $output_filename_or_reference [,OPTS] or die "zip failed: $ZipError\n"; The functional interface needs Perl5.005 or better. =head2 zip $input_filename_or_reference => $output_filename_or_reference [, OPTS] C expects at least two parameters, C<$input_filename_or_reference> and C<$output_filename_or_reference>. =head3 The C<$input_filename_or_reference> parameter The parameter, C<$input_filename_or_reference>, is used to define the source of the uncompressed data. It can take one of the following forms: =over 5 =item A filename If the <$input_filename_or_reference> parameter is a simple scalar, it is assumed to be a filename. This file will be opened for reading and the input data will be read from it. =item A filehandle If the C<$input_filename_or_reference> parameter is a filehandle, the input data will be read from it. The string '-' can be used as an alias for standard input. =item A scalar reference If C<$input_filename_or_reference> is a scalar reference, the input data will be read from C<$$input_filename_or_reference>. =item An array reference If C<$input_filename_or_reference> is an array reference, each element in the array must be a filename. The input data will be read from each file in turn. The complete array will be walked to ensure that it only contains valid filenames before any data is compressed. =item An Input FileGlob string If C<$input_filename_or_reference> is a string that is delimited by the characters "<" and ">" C will assume that it is an I. The input is the list of files that match the fileglob. See L for more details. =back If the C<$input_filename_or_reference> parameter is any other type, C will be returned. In addition, if C<$input_filename_or_reference> is a simple filename, the default values for the C, C