Data-TableReader-0.011/0000755000175000017500000000000013505216123015650 5ustar silverdirksilverdirkData-TableReader-0.011/lib/0000755000175000017500000000000013505216123016416 5ustar silverdirksilverdirkData-TableReader-0.011/lib/Data/0000755000175000017500000000000013505216123017267 5ustar silverdirksilverdirkData-TableReader-0.011/lib/Data/TableReader/0000755000175000017500000000000013505216123021441 5ustar silverdirksilverdirkData-TableReader-0.011/lib/Data/TableReader/Decoder/0000755000175000017500000000000013505216123023006 5ustar silverdirksilverdirkData-TableReader-0.011/lib/Data/TableReader/Decoder/Spreadsheet.pm0000644000175000017500000001224313505216123025615 0ustar silverdirksilverdirkpackage Data::TableReader::Decoder::Spreadsheet; use Moo 2; use Carp 'croak'; use IO::Handle; extends 'Data::TableReader::Decoder'; # ABSTRACT: Base class for implementing spreadsheet decoders our $VERSION = '0.011'; # VERSION has workbook => ( is => 'lazy' ); has sheet => ( is => 'ro' ); has xls_formatter => ( is => 'rw' ); # Arrayref of all sheets we can search has _sheets => ( is => 'lazy' ); sub _build__sheets { my $self= shift; # If we have ->sheet and it is a worksheet object, then no need to do anything else if ($self->sheet && ref($self->sheet) && ref($self->sheet)->can('get_cell')) { return [ $self->sheet ]; } # Else we need to scan sheets from the excel file. Make sure we have the file my @sheets= $self->workbook->worksheets; @sheets or croak "No worksheets in file?"; if (defined $self->sheet) { if (ref($self->sheet) eq 'Regexp') { @sheets= grep { $_->get_name =~ $self->sheet } @sheets; } elsif (ref($self->sheet) eq 'CODE') { @sheets= grep { $self->sheet->($_) } @sheets; } elsif (!ref $self->sheet) { @sheets= grep { $_->get_name eq $self->sheet } @sheets; } else { croak "Unknown type of sheet specification: ".$self->sheet; } } return \@sheets; } sub _oo_rowmax_fix { # openoffice saves bogus rowmax, try and fix my ($s, $rowmax)= @_; my $final_row_max= ($s and ref $s->{Cells} eq "ARRAY" and $#{$s->{Cells}} < $rowmax) # ? $#{$s->{Cells}} : $rowmax; return $final_row_max; } sub iterator { my $self= shift; my $sheets= $self->_sheets; my $sheet= $sheets->[0]; my ($colmin, $colmax)= $sheet? $sheet->col_range() : (0,-1); my ($rowmin, $rowmax)= $sheet? $sheet->row_range() : (0,-1); $rowmax= _oo_rowmax_fix $sheet, $rowmax; my $row= $rowmin-1; Data::TableReader::Decoder::Spreadsheet::_Iter->new( sub { my $slice= shift; return undef unless $row < $rowmax; ++$row; my $x; if ($slice) { return [ map { $x= ($x= $sheet->get_cell($row, $_)) && $x->value; defined $x? $x : '' } @$slice ]; } else { return [ map { $x= ($x= $sheet->get_cell($row, $_)) && $x->value; defined $x? $x : '' } 0 .. $colmax ]; } }, { sheets => $sheets, sheet_idx => 0, sheet_ref => \$sheet, row_ref => \$row, colmax_ref => \$colmax, rowmax_ref => \$rowmax, origin => [ $sheet, $row ], } ); } # If you need to subclass this iterator, don't. Just implement your own. # i.e. I'm not declaring this implementation stable, yet. use Data::TableReader::Iterator; BEGIN { @Data::TableReader::Decoder::Spreadsheet::_Iter::ISA= ('Data::TableReader::Iterator'); } sub Data::TableReader::Decoder::Spreadsheet::_Iter::position { my $f= shift->_fields; 'row '.${ $f->{row_ref} }; } sub Data::TableReader::Decoder::Spreadsheet::_Iter::progress { my $f= shift->_fields; return ${ $f->{row_ref} } / (${ $f->{rowmax_ref} } || 1); } sub Data::TableReader::Decoder::Spreadsheet::_Iter::tell { my $f= shift->_fields; return [ $f->{sheet_idx}, ${$f->{row_ref}} ]; } sub Data::TableReader::Decoder::Spreadsheet::_Iter::seek { my ($self, $to)= @_; my $f= $self->_fields; $to ||= $f->{origin}; my ($sheet_idx, $row)= @$to; my $sheet= $f->{sheets}[$sheet_idx]; my ($colmin, $colmax)= $sheet? $sheet->col_range() : (0,-1); my ($rowmin, $rowmax)= $sheet? $sheet->row_range() : (0,-1); $rowmax= _oo_rowmax_fix $sheet, $rowmax; $row= $rowmin-1 unless defined $row; $f->{sheet_idx}= $sheet_idx; ${$f->{sheet_ref}}= $sheet; ${$f->{row_ref}}= $row; ${$f->{colmax_ref}}= $colmax; ${$f->{rowmax_ref}}= $rowmax; 1; } sub Data::TableReader::Decoder::Spreadsheet::_Iter::next_dataset { my $self= shift; my $f= $self->_fields; return defined $f->{sheets}[ $f->{sheet_idx}+1 ] && $self->seek([ $f->{sheet_idx}+1 ]); } 1; __END__ =pod =encoding UTF-8 =head1 NAME Data::TableReader::Decoder::Spreadsheet - Base class for implementing spreadsheet decoders =head1 VERSION version 0.011 =head1 DESCRIPTION This is a base class for any file format that exposes a spreadsheet API compatible with L. =head1 ATTRIBUTES See attributes from parent class: L. =head2 workbook This is an instance of L, L, or L (which all happen to have the same API). Subclasses can lazy-build this from the C. =head2 sheet This is either a sheet name, a regex for matching a sheet name, or a parser's worksheet object. It is also optional; if not set, all sheets will be iterated. =head2 xls_formatter An optional object that is passed to Excel parsers L and L. It governs how raw data in cells is formatted into values depending on the type of the cell. The parsers create one of their own if none is provided, usually L. Note that it does not work for Spreadsheet::XLSX, which hardcodes the formatter as Spreadsheet::XLSX::Fmt2007. =head1 AUTHOR Michael Conrad =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2019 by Michael Conrad. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut Data-TableReader-0.011/lib/Data/TableReader/Decoder/IdiotCSV.pm0000644000175000017500000000314413505216123024772 0ustar silverdirksilverdirkpackage Data::TableReader::Decoder::IdiotCSV; use Moo 2; use Try::Tiny; use Carp; use Log::Any '$log'; extends 'Data::TableReader::Decoder::CSV'; # ABSTRACT: Access rows of a badly formatted comma-delimited text file our $VERSION = '0.011'; # VERSION sub _build_parser { my $args= shift->_parser_args || {}; Data::TableReader::Decoder::CSV->default_csv_module->new({ binary => 1, allow_loose_quotes => 1, allow_whitespace => 1, auto_diag => 1, escape_char => undef, %$args, }); } 1; __END__ =pod =encoding UTF-8 =head1 NAME Data::TableReader::Decoder::IdiotCSV - Access rows of a badly formatted comma-delimited text file =head1 VERSION version 0.011 =head1 DESCRIPTION This decoder deals with those special people who think that encoding CSV is as simple as print join(',', map { qq{"$_"} } @row)."\n"; regardless of their data containing quote characters or newlines, resulting in garbage like "First Name","Last Name","Nickname" "Joe","Smith",""SuperJoe, to the rescue"" This can actually be processed by (recent versions of) the L module with the following configuration: { binary => 1, allow_loose_quotes => 1, allow_whitespace => 1, escape_char => undef, } And so this module is simply a subclass of L which provides those defaults to the parser. =head1 AUTHOR Michael Conrad =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2019 by Michael Conrad. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut Data-TableReader-0.011/lib/Data/TableReader/Decoder/XLSX.pm0000644000175000017500000000360413505216123024145 0ustar silverdirksilverdirkpackage Data::TableReader::Decoder::XLSX; use Moo 2; use Carp; use Try::Tiny; extends 'Data::TableReader::Decoder::Spreadsheet'; our @xlsx_probe_modules= qw( Spreadsheet::ParseXLSX Spreadsheet::XLSX ); our $default_xlsx_module; sub default_xlsx_module { $default_xlsx_module ||= Data::TableReader::Decoder::_first_sufficient_module('XLSX parser', \@xlsx_probe_modules); } # ABSTRACT: Access sheets/rows of a modern Microsoft Excel workbook our $VERSION = '0.011'; # VERSION sub _build_workbook { my $self= shift; my $wbook; my $f= $self->file_handle; if (ref $f and ref($f)->can('worksheets')) { $wbook= $f; } else { my $class= $self->default_xlsx_module; # Spreadsheet::XLSX has an incompatible constructor if ($class->isa('Spreadsheet::XLSX')) { $wbook= $class->new($f); } else { $wbook= $class->new->parse($f, $self->xls_formatter); } } defined $wbook or croak "Can't parse file '".$self->file_name."'"; return $wbook; } 1; __END__ =pod =encoding UTF-8 =head1 NAME Data::TableReader::Decoder::XLSX - Access sheets/rows of a modern Microsoft Excel workbook =head1 VERSION version 0.011 =head1 DESCRIPTION See L. This subclass simply parses the input using an instance of L. =head1 CLASS METHODS =head2 default_xlsx_module Initializes C<@Data::TableReader::Decoder::XLSX::default_xlsx_module> to the first available module in the list of C<@Data::TableReader::Decoder::XLSX::xlsx_probe_modules> and returns the cached value every time afterward. Those variables can be modified as needed, if you have other XLSX modules available. =head1 AUTHOR Michael Conrad =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2019 by Michael Conrad. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut Data-TableReader-0.011/lib/Data/TableReader/Decoder/Mock.pm0000644000175000017500000000575113505216123024245 0ustar silverdirksilverdirkpackage Data::TableReader::Decoder::Mock; use Moo 2; use Carp 'croak'; use IO::Handle; extends 'Data::TableReader::Decoder'; # ABSTRACT: Mock decoder for test cases our $VERSION = '0.011'; # VERSION has data => ( is => 'rw' ); sub iterator { my $self= shift; my $data= $self->data; my $table= $data->[0]; my $colmax= $table? scalar(@{$table->[0]})-1 : -1; my $rowmax= $table? $#$table : -1; my $row= -1; Data::TableReader::Decoder::Mock::_Iter->new( sub { my $slice= shift; return undef unless $row < $rowmax; ++$row; my $datarow= $table->[$row]; return [ @{$datarow}[@$slice] ] if $slice; return $datarow; }, { data => $data, table_idx => 0, table_ref => \$table, row_ref => \$row, colmax_ref => \$colmax, rowmax_ref => \$rowmax, origin => [ $table, $row ], } ); } # If you need to subclass this iterator, don't. Just implement your own. # i.e. I'm not declaring this implementation stable, yet. use Data::TableReader::Iterator; BEGIN { @Data::TableReader::Decoder::Mock::_Iter::ISA= ('Data::TableReader::Iterator'); } sub Data::TableReader::Decoder::Mock::_Iter::position { my $f= shift->_fields; 'row '.${ $f->{row_ref} }; } sub Data::TableReader::Decoder::Mock::_Iter::progress { my $f= shift->_fields; return ${ $f->{row_ref} } / (${ $f->{rowmax_ref} } || 1); } sub Data::TableReader::Decoder::Mock::_Iter::tell { my $f= shift->_fields; return [ $f->{table_idx}, ${$f->{row_ref}} ]; } sub Data::TableReader::Decoder::Mock::_Iter::seek { my ($self, $to)= @_; my $f= $self->_fields; $to ||= $f->{origin}; my ($table_idx, $row)= @$to; my $table= $f->{data}[$table_idx]; my $colmax= $table? scalar(@{$table->[0]})-1 : -1; my $rowmax= $table? $#$table : -1; $row= -1 unless defined $row; $f->{table_idx}= $table_idx; ${$f->{table_ref}}= $table; ${$f->{row_ref}}= $row; ${$f->{colmax_ref}}= $colmax; ${$f->{rowmax_ref}}= $rowmax; 1; } sub Data::TableReader::Decoder::Mock::_Iter::next_dataset { my $self= shift; my $f= $self->_fields; return defined $f->{data}[ $f->{table_idx}+1 ] && $self->seek([ $f->{table_idx}+1 ]); } 1; __END__ =pod =encoding UTF-8 =head1 NAME Data::TableReader::Decoder::Mock - Mock decoder for test cases =head1 VERSION version 0.011 =head1 SYNOPSIS decoder => { CLASS => 'Mock', data => [ [ # Data Set 0 [ 1, 2, 3, 4, 5 ], ... ], [ # Data Set 1 [ 1, 2, 3, 4, 5 ], ... ], ] } This doesn't actually decode anything; it just returns verbatim rows of data from arrayrefs that you supply. =head1 ATTRIBUTES See attributes from parent class: L. =head2 data The verbatim data which will be returned by the iterator. =head1 AUTHOR Michael Conrad =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2019 by Michael Conrad. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut Data-TableReader-0.011/lib/Data/TableReader/Decoder/XLS.pm0000644000175000017500000000325713505216123024021 0ustar silverdirksilverdirkpackage Data::TableReader::Decoder::XLS; use Moo 2; use Carp; extends 'Data::TableReader::Decoder::Spreadsheet'; our @xls_probe_modules= qw( Spreadsheet::ParseExcel ); our $default_xls_module; sub default_xls_module { $default_xls_module ||= Data::TableReader::Decoder::_first_sufficient_module('XLS parser', \@xls_probe_modules); } # ABSTRACT: Access sheets/rows of a Microsoft Excel '97 workbook our $VERSION = '0.011'; # VERSION sub _build_workbook { my $self= shift; my $wbook; my $f= $self->file_handle; if (ref $f and ref($f)->can('worksheets')) { $wbook= $f; } else { $wbook= $self->default_xls_module->new->parse($f, $self->xls_formatter); } defined $wbook or croak "Can't parse file '".$self->file_name."'"; return $wbook; } 1; __END__ =pod =encoding UTF-8 =head1 NAME Data::TableReader::Decoder::XLS - Access sheets/rows of a Microsoft Excel '97 workbook =head1 VERSION version 0.011 =head1 DESCRIPTION See L. This subclass simply parses the input using an instance of L. =head1 CLASS METHODS =head2 default_xls_module Initializes C<@Data::TableReader::Decoder::XLS::default_xls_module> to the first available module in the list of C<@Data::TableReader::Decoder::XLS::xls_probe_modules> and returns the cached value every time afterward. Those variables can be modified as needed, if you have other XLS modules available. =head1 AUTHOR Michael Conrad =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2019 by Michael Conrad. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut Data-TableReader-0.011/lib/Data/TableReader/Decoder/TSV.pm0000644000175000017500000000223113505216123024016 0ustar silverdirksilverdirkpackage Data::TableReader::Decoder::TSV; use Moo 2; use Try::Tiny; use Carp; use Log::Any '$log'; # ABSTRACT: Access rows of a tab-delimited text file our $VERSION = '0.011'; # VERSION extends 'Data::TableReader::Decoder::CSV'; sub _build_parser { my $args= shift->_parser_args || {}; Data::TableReader::Decoder::CSV->default_csv_module->new({ binary => 1, allow_loose_quotes => 1, auto_diag => 2, sep_char => "\t", escape_char => undef, %$args, }); } 1; __END__ =pod =encoding UTF-8 =head1 NAME Data::TableReader::Decoder::TSV - Access rows of a tab-delimited text file =head1 VERSION version 0.011 =head1 DESCRIPTION This module is a simple subclass of L which supplies these defaults for the parser: parser => { binary => 1, allow_loose_quotes => 1, sep_char => "\t", escape_char => undef, auto_diag => 2, } =head1 AUTHOR Michael Conrad =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2019 by Michael Conrad. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut Data-TableReader-0.011/lib/Data/TableReader/Decoder/CSV.pm0000644000175000017500000001743113505216123024005 0ustar silverdirksilverdirkpackage Data::TableReader::Decoder::CSV; use Moo 2; use Try::Tiny; use Carp; use IO::Handle; extends 'Data::TableReader::Decoder'; # ABSTRACT: Access rows of a comma-delimited text file our $VERSION = '0.011'; # VERSION our @csv_probe_modules= ( ['Text::CSV_XS' => 1.06], ['Text::CSV' => 1.91] ); our $default_csv_module; sub default_csv_module { $default_csv_module ||= Data::TableReader::Decoder::_first_sufficient_module('CSV parser', \@csv_probe_modules); } has _parser_args => ( is => 'ro', init_arg => 'parser' ); has parser => ( is => 'lazy', init_arg => undef ); sub _build_parser { my $self= shift; my $args= $self->_parser_args || {}; return $args if ref($args)->can('getline'); return $self->default_csv_module->new({ binary => 1, allow_loose_quotes => 1, auto_diag => 2, %$args }); } has autodetect_encoding => ( is => 'rw', default => sub { 1 } ); sub encoding { my ($self, $enc)= @_; my $fh= $self->file_handle; if (defined $enc) { binmode($fh, ":encoding($enc)"); return $enc; } my @layers= PerlIO::get_layers($fh); if (($enc)= grep { /^encoding|^utf/ } @layers) { # extract encoding name return 'UTF-8' if $enc eq 'utf8'; return uc($1) if $enc =~ /encoding\(([^)]+)\)/; return uc($enc); # could throw a parse error, but this is probably more useful behavior } # fh_start_pos will be set if we have already checked for BOM if ($self->autodetect_encoding && !defined $self->_fh_start_pos) { $self->_fh_start_pos(tell $fh or 0); if (($enc= $self->_autodetect_bom($fh))) { binmode($fh, ":encoding($enc)"); # re-mark the start after the BOM $self->_fh_start_pos(tell $fh or 0); return $enc; } } return ''; } has _fh_start_pos => ( is => 'rw' ); has _iterator => ( is => 'rw', weak_ref => 1 ); has _row_ref => ( is => 'rw' ); sub iterator { my $self= shift; croak "Multiple iterators on CSV stream not supported yet" if $self->_iterator; my $parser= $self->parser; my $fh= $self->file_handle; my $row_ref= $self->_row_ref; # Keeping this object is just an indication of whether an iterator has been used yet if (!$row_ref) { $self->_row_ref($row_ref= \(my $row= 0)); # trigger BOM detection if needed my $enc= $self->encoding; $self->_log->('debug', "encoding is ".($enc||'maybe utf8')); # ensure _fh_start_pos is set $self->_fh_start_pos(tell $fh or 0); } elsif ($$row_ref) { $self->_log->('debug', 'Seeking back to start of input'); seek($fh, $self->_fh_start_pos, 0) or die "Can't seek back to start of stream"; $$row_ref= 0; } my $i= Data::TableReader::Decoder::CSV::_Iter->new( sub { ++$$row_ref; my $r= $parser->getline($fh) or return undef; @$r= @{$r}[ @{$_[0]} ] if $_[0]; # optional slice argument return $r; }, { row => $row_ref, fh => $fh, origin => $self->_fh_start_pos, } ); $self->_iterator($i); return $i; } # This design is simplified from File::BOM in that it ignores UTF-32 # and in any "normal" case it can read from a pipe with only one # character to push back, avoiding the need to tie the file handle. # It also checks for whether layers have already been enabled. # It also avoids seeking to the start of the file handle, in case # the user deliberately seeked to a position. sub _autodetect_bom { my ($self, $fh)= @_; my $fpos= tell($fh); local $!; read($fh, my $buf, 1) || return; if ($buf eq "\xFF" || $buf eq "\xFE" || $buf eq "\xEF") { if (read($fh, $buf, 1, 1)) { if ($buf eq "\xFF\xFE") { return 'UTF-16LE'; } elsif ($buf eq "\xFE\xFF") { return 'UTF-16BE'; } elsif ($buf eq "\xEF\xBB" and read($fh, $buf, 1, 2) and $buf eq "\xEF\xBB\xBF") { return 'UTF-8'; } } } # It wasn't a BOM. Try to undo our read. $self->_log->('debug', 'No BOM in stream, seeking back to start'); if (length $buf == 1) { $fh->ungetc(ord $buf); } elsif (!seek($fh, $fpos, 0)) { # Can't seek if ($fh->can('ungets')) { # support for FileHandle::Unget $fh->ungets($buf); } else { croak "Can't seek input handle after BOM detection; You should set an encoding manually, buffer the entire input, or use FileHandle::Unget"; } } return; } # If you need to subclass this iterator, don't. Just implement your own. # i.e. I'm not declaring this implementation stable, yet. use Data::TableReader::Iterator; BEGIN { @Data::TableReader::Decoder::CSV::_Iter::ISA= ('Data::TableReader::Iterator'); } sub Data::TableReader::Decoder::CSV::_Iter::position { my $f= shift->_fields; 'row '.${ $f->{row} }; } sub Data::TableReader::Decoder::CSV::_Iter::progress { my $f= shift->_fields; # lazy-build the file size, using seek unless (exists $f->{file_size}) { my $pos= tell $f->{fh}; if (defined $pos and $pos >= 0 and seek($f->{fh}, 0, 2)) { $f->{file_size}= tell($f->{fh}); seek($f->{fh}, $pos, 0) or die "seek: $!"; } else { $f->{file_size}= undef; } } return $f->{file_size}? (tell $f->{fh})/$f->{file_size} : undef; } sub Data::TableReader::Decoder::CSV::_Iter::tell { my $f= shift->_fields; my $pos= tell($f->{fh}); return undef unless defined $pos && $pos >= 0; return [ $pos, ${$f->{row}} ]; } sub Data::TableReader::Decoder::CSV::_Iter::seek { my ($self, $to)= @_; my $f= $self->_fields; seek($f->{fh}, ($to? $to->[0] : $f->{origin}), 0) or croak("seek failed: $!"); ${ $f->{row} }= $to? $to->[1] : 0; 1; } 1; __END__ =pod =encoding UTF-8 =head1 NAME Data::TableReader::Decoder::CSV - Access rows of a comma-delimited text file =head1 VERSION version 0.011 =head1 DESCRIPTION This decoder wraps an instance of either L or L. You may pass your own options via the L attribute, which will override the defaults of this module on a per-field basis. This module defaults to: parser => { binary => 1, allow_loose_quotes => 1, auto_diag => 2, } This module makes an attempt at automatic unicode support: =over =item * If the stream has a PerlIO encoding() on it, no additional decoding is done. =item * If the stream has a BOM (byte-order mark) for UTF-8 or UTF-16, it adds that encoding with C. =item * Else, it lets the parser decide. The default Text::CSV parser will automatically upgrade UTF-8 sequences that it finds. (and, you can't disable this without also disabling unicode received from IO layers, which seems like a bug...) =back Because auto-detection might need to read multiple bytes, it is possible that for non-seekable streams (like pipes, stdin, etc) this may result in an exception. Only un-seekable streams beginning with C<"\xEF">, C<"\xFE">, or C<"\xFF"> will have this problem. You can solve this by supplying an encoding layer on the file handle (avoiding detection), setting L to false, buffering the entire input in a scalar and creating a file handle from that (making it seekable), or using a file handle that supports "ungets" like L. =head1 ATTRIBUTES =head2 parser An instance of L or L or compatible, or arguments to pass to the constructor. Constructor arguments are passed to CSV_XS if it is installed, else CSV. =head2 autodetect_encoding Whether to look for a byte-order mark on the input. =head2 encoding If autodetection is enabled, this will first check for a byte-order mark on the input. Else, or afterward, it will return whatever encoding PerlIO layer is configured on the file handle. Setting this attribute will change the PerlIO layer on the file handle, possibly skipping detection. =head2 iterator my $iterator= $decoder->iterator; Return an L which returns each row of the table as an arrayref. =head1 AUTHOR Michael Conrad =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2019 by Michael Conrad. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut Data-TableReader-0.011/lib/Data/TableReader/Iterator.pm0000644000175000017500000000563513505216123023601 0ustar silverdirksilverdirkpackage Data::TableReader::Iterator; use strict; use warnings; use Try::Tiny; use Carp; use Scalar::Util 'refaddr'; # ABSTRACT: Base class for iterators (blessed coderefs) our $VERSION = '0.011'; # VERSION our %_iterator_fields; sub new { my ($class, $sub, $fields)= @_; ref $sub eq 'CODE' and ref $fields eq 'HASH' or die "Expected new(CODEREF, HASHREF)"; $_iterator_fields{refaddr $sub}= $fields; return bless $sub, $class; } sub _fields { $_iterator_fields{refaddr shift}; } sub DESTROY { delete $_iterator_fields{refaddr shift}; } sub progress { undef; } sub tell { undef; } sub seek { undef; } sub next_dataset { undef; } 1; __END__ =pod =encoding UTF-8 =head1 NAME Data::TableReader::Iterator - Base class for iterators (blessed coderefs) =head1 VERSION version 0.011 =head1 SYNOPSIS my $iter= $record_reader->iterator; while (my $rec= $iter->()) { ... my $position= $iter->tell; print "Marking position $position"; # position stringifies to human-readable ... $iter->seek($position); } if ($iter->next_dataset) { # iterate some more while ($rec= $iter->()) { ... printf "Have processed %3d %% of the file", $iter->progress*100; } } =head1 DESCRIPTION This is the abstract base class for iterators used in Data::TableReader, which are blessed coderefs that return records on each call. The coderef should support a single argument of a "slice" to extract from the record, in case not all of the record is needed. =head1 ATTRIBUTES =head2 position Return a human-readable string describing the current location within the source file. This will be something like C<"$filename row $row"> or C<"$filename $worksheet:$cell_id">. =head2 progress An estimate of how much of the data has already been returned. If the stream is not seekable this may return undef. =head1 METHODS =head2 new $iter= Data::TableReader::Iterator->new( \&coderef, \%fields ); The iterator is a blessed coderef. The first argument is the coderef to be blessed, and the second argument is the magic hashref of fields to be made available as C<< $iter->_fields >>. =head2 tell If seeking is supported, this will return some value that can be passed to seek to come back to this point in the stream. This value will always be true. If seeking is not supported this will return undef. =head2 seek $iter->seek($pos); Seek to a point previously reported by L. If seeking is not supported this will die. If C<$pos> is any false value it means to seek to the start of the stream. =head2 next_dataset If a file format supports more than one tabular group of data, this method allows you to jump to the next. =head1 AUTHOR Michael Conrad =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2019 by Michael Conrad. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut Data-TableReader-0.011/lib/Data/TableReader/Decoder.pm0000644000175000017500000000405013505216123023343 0ustar silverdirksilverdirkpackage Data::TableReader::Decoder; use Moo 2; # ABSTRACT: Base class for table decoders our $VERSION = '0.011'; # VERSION has file_name => ( is => 'ro', required => 1 ); has file_handle => ( is => 'ro', required => 1 ); has _log => ( is => 'ro', required => 1 ); *log= *_log; # back-compat, but deprecated since it doesn't match ->log on TableReader sub _first_sufficient_module { my ($name, $modules, $req_versions)= @_; require Module::Runtime; for my $mod (@$modules) { my ($pkg, $ver)= ref $mod eq 'ARRAY'? @$mod : ( $mod, 0 ); return $pkg if eval { Module::Runtime::use_module($pkg, $ver) }; } require Carp; Carp::croak "No $name available (or of sufficient version); install one of: " .join(', ', map +(ref $_ eq 'ARRAY'? "$_->[0] >= $_->[1]" : $_), @$modules); } 1; __END__ =pod =encoding UTF-8 =head1 NAME Data::TableReader::Decoder - Base class for table decoders =head1 VERSION version 0.011 =head1 DESCRIPTION This is an abstract base class describing the API for decoders. A decoder's job is to iterate table rows of a file containing tabular data. If a file provides multiple tables of data (such as worksheets, or tags) then the decode should also support the "next_dataset" method. =head1 ATTRIBUTES =head2 filename Set by TableReader. Useful for logging. =head2 file_handle Set by TableReader. This is what the iterator should parse. =head1 METHODS =head2 iterator This must be implemented by the subclass, to return an instance of L. The iterator should return an arrayref each time it is called, and accept one optional argument of a "slice" needed from the record. All decoder iterators return arrayrefs, so the slice should be an arrayref of column indicies equivalent to the perl syntax @row[@$slice] =head1 AUTHOR Michael Conrad =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2019 by Michael Conrad. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut Data-TableReader-0.011/lib/Data/TableReader/Field.pm0000644000175000017500000001712113505216123023024 0ustar silverdirksilverdirkpackage Data::TableReader::Field; use Moo 2; use Carp (); # ABSTRACT: Field specification for Data::TableReader our $VERSION = '0.011'; # VERSION has name => ( is => 'ro', required => 1 ); has header => ( is => 'ro' ); has required => ( is => 'ro', default => sub { 1 } ); has trim => ( is => 'ro', default => sub { 1 } ); has blank => ( is => 'ro' ); # default is undef has type => ( is => 'ro', isa => sub { ref $_[0] eq 'CODE' or $_[0]->can('validate') } ); has array => ( is => 'ro' ); has follows => ( is => 'ro' ); sub follows_list { my $f= shift->follows; ref $f? @$f : defined $f? ( $f ) : () } has header_regex => ( is => 'lazy' ); sub _build_header_regex { my $self= shift; my $h= $self->header; unless (defined $h) { $h= $self->name; $h =~ s/([[:lower:]])([[:upper:]])/$1 $2/g; # split words on camelCase $h =~ s/([[:alpha:]])([[:digit:]])/$1 $2/g; # or digit $h =~ s/([[:digit:]])([[:alpha:]])/$1 $2/g; $h =~ s/_/ /g; # then split on underscore } return $h if ref($h) eq 'Regexp'; my $pattern= join "[\\W_]*", map { $_ eq "\n"? '\n' : "\Q$_\E" } grep { defined && length } split /(\n)|\s+|(\W)/, $h; # capture newline or non-word, except for other whitespace return qr/^[\W_]*$pattern[\W_]*$/im; } has trim_coderef => ( is => 'lazy' ); sub _default_trim_coderef { $_ =~ s/\s+$//; $_ =~ s/^\s+//; } sub _build_trim_coderef { my $t= shift->trim; return undef unless $t; return \&_default_trim_coderef if !ref $t; return $t if ref $t eq 'CODE'; return sub { s/$t//g; } if ref $t eq 'Regexp'; Carp::croak("Can't convert ".ref($t)." to a coderef"); } 1; __END__ =pod =encoding UTF-8 =head1 NAME Data::TableReader::Field - Field specification for Data::TableReader =head1 VERSION version 0.011 =head1 DESCRIPTION This class describes aspects of one of the fields you want to find in your spreadsheet. =head1 ATTRIBUTES =head2 name Required. Used for the hashref key if you pull records as hashes, and used in diagnostic messages. =head2 header A string or regex describing the column header you want to find in the spreadsheet. If you specify a regex, it is used directly. If you specify a string, it becomes the regex matching any string with the same words (\w+) and non-whitespace (\S+) characters in the same order, case insensitive, surrounded by any amount of non-alphanumeric garbage (C<[\W_]*>). When no header is specified, the L is used as a string after first breaking it into words on underscore or camel-case or numeric boundaries. This deserves some examples: Name Implied Default Header "zipcode" "zipcode" "ZipCode" "Zip Code" "Zip_Code" "zip Code" "zip5" "zip 5" Header Regex Could Match... "ZipCode" /^[\W_]*ZipCode[\W_]*$/i "zipcode:" "zip_code" /^[\W_]*zip_code[\W_]*$/i "--ZIP_CODE--" "zip code" /^[\W_]*zip[\W_]*code[\W_]*$/i "ZIP\nCODE " "zip-code" /^[\W_]*zip[\W_]*-[\W_]*code[\W_]*$/i "ZIP-CODE:" qr/Zip.*Code/ /Zip.*Code/ "Post(Zip)Code" If this default matching doesn't meet your needs or paranoia level, then you should always specify your own header regexes. (If your data actually doesn't have any header at all and you want to brazenly assume the columns match the fields, see reader attribute L) =head2 required Whether or not this field must be found in order to detect a table. Defaults is B. Note this does B require the field of a row to contain data in order to read a record from the table; it just requires a column to exist. =head2 trim # remove leading/trailing whitespace trim => 1 # remove leading/trailing whitespace but also remove "N/A" and "NULL" trim => qr( ^ \s* N/A \s* $ | ^ \s* NULL \s* $ | ^ \s+ | \s+ $ )xi # custom search/replace in a coderef trim => sub { s/[\0-\1F\7F]+/ /g; s/^\s+//; s/\s+$//; }; If set to a non-reference, this is treated as a boolean of whether to remove leading and trailing whitespace. If set to a coderef, the coderef will be called for each value with C<$_> set to the current value; it should modify C<$_> as appropriate (return value is ignored). It can also be set to a regular expression of all the patterns to remove, as per C<< s/$regexp//g >>. Default is B<1>, which is equivalent to a regular expression of C<< qr/(^\s+)|(\s+$)/ >>. =head2 blank The value to extract when the spreadsheet cell is an empty string or undef. (after any processing done by L) Default is C. Another common value would be C<"">. =head2 type A L type (or any object or class with a C method) or a coderef which returns a validation error message (undef if it is valid). use Types::Standard; ... type => Maybe[Int] # or without Type::Tiny type => sub { $_[0] =~ /^\w+/? undef : "word-characters only" }, This is an optional feature and there is no default. The behavior of a validation failure depends on the options to TableReader. =head2 array Boolean of whether this field can be found multiple times in one table. Default is B. If true, the value of the field will always be an arrayref (even if only one column matched). =head2 follows Name (or arrayref of names) of a field which this field must follow, in a first-to-last ordering of the columns. This field must occur immediately after the named field(s), or after another field which also has a C restriction and follows the named field(s). The purpose of this attribute is to resolve ambiguous columns. Suppose you expect columns with the following headers: Father | | | | Mother | | | FirstName | LastName | Tel. | Email | FirstName | LastName | Tel. | Email You can use C to identify the first column, but after FirstName the rest are ambiguous. But, TableReader can figure it out if you say: { name => 'father_first', header => qr/Father\nFirstName/ }, { name => 'father_last', header => 'LastName', follows => 'father_first' }, { name => 'father_tel', header => 'Tel.', follows => 'father_first' }, { name => 'father_email', header => 'Email', follows => 'father_first' }, .. and so on. Note how C<'father_first'> is used for each as the C name; this way if any non-required fields (like maybe C) are completely removed from the file, TableReader will still be able to find C and C. You can also use this to accumulate an array of columns that lack headers: Scores | | | | | | | OtherData 12% | 35% | 42% | 18% | 65% | 99% | 55% | xyz { name => 'scores', array => 1, trim => 1 }, { name => 'scores', array => 1, trim => 1, header => '', follows => 'scores' }, The second field definition has an empty header, which would normally make it rather ambiguous and potentially capture blank-header columns that might not be part of the array. But, because it must follow a column named 'scores' there's no ambiguity; you get exactly any column starting from the header C<'Scores'> until a column of any other header. =head2 follows_list Convenience accessor for C<< @{ ->follows } >>, useful because C might only be a scalar. =head2 header_regex L, coerced to a regex if it wasn't already =head1 AUTHOR Michael Conrad =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2019 by Michael Conrad. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut Data-TableReader-0.011/lib/Data/TableReader.pm0000644000175000017500000010331413505216123022001 0ustar silverdirksilverdirkpackage Data::TableReader; use Moo 2; use Try::Tiny; use Carp; use List::Util 'max'; use Module::Runtime 'require_module'; use Data::TableReader::Field; use Data::TableReader::Iterator; # ABSTRACT: Extract records from "dirty" tabular data sources our $VERSION = '0.011'; # VERSION has input => ( is => 'rw', required => 1 ); has _file_handle => ( is => 'lazy' ); has _decoder_arg => ( is => 'rw', init_arg => 'decoder' ); has decoder => ( is => 'lazy', init_arg => undef ); has fields => ( is => 'rw', required => 1, coerce => \&_coerce_field_list ); sub field_list { @{ shift->fields } } has field_by_name => ( is => 'lazy' ); has record_class => ( is => 'rw', required => 1, default => sub { 'HASH' } ); has static_field_order => ( is => 'rw' ); # force order of columns has header_row_at => ( is => 'rw', default => sub { [1,10] } ); # row of header, or range to scan has header_row_combine => ( is => 'rw', lazy => 1, builder => 1 ); has on_unknown_columns => ( is => 'rw', default => sub { 'use' } ); has on_blank_row => ( is => 'rw', default => sub { 'next' } ); has on_validation_fail => ( is => 'rw', default => sub { 'die' } ); has log => ( is => 'rw', trigger => sub { shift->_clear_log } ); # Open 'input' if it isn't already a file handle sub _build__file_handle { my $self= shift; my $i= $self->input; return undef if ref($i) && (ref($i) eq "Spreadsheet::ParseExcel::Worksheet"); return $i if ref($i) && (ref($i) eq 'GLOB' or ref($i)->can('read')); open(my $fh, '<', $i) or croak "open($i): $!"; binmode $fh; return $fh; } # Create ::Decoder instance either from user-supplied args, or by detecting input format sub _build_decoder { my $self= shift; my $decoder_arg= $self->_decoder_arg; my $decoder_ref= ref $decoder_arg; my ($class, @args); if (!$decoder_arg) { ($class, @args)= $self->detect_input_format; $self->_log->('trace', "Detected input format as %s", $class); } elsif (!$decoder_ref) { $class= $decoder_arg; } elsif ($decoder_ref eq "HASH" or $decoder_ref eq "ARRAY") { ($class, @args)= $decoder_ref eq "ARRAY"? @$decoder_arg : do { my %tmp= %$decoder_arg; (delete($tmp{CLASS}), %tmp); }; if(!$class) { my ($input_class, @input_args)= $self->detect_input_format; croak "decoder class not in arguments and unable to identify decoder class from input" if !$input_class; ($class, @args)= ($input_class, @input_args, @args); } } elsif ($decoder_ref->can('iterator')) { return $decoder_arg; } else { croak "Can't create decoder from $decoder_ref"; } $class= "Data::TableReader::Decoder::$class" unless $class =~ /::/; require_module($class) or croak "$class does not exist or is not installed"; $self->_log->('trace', 'Creating decoder %s on input %s', $class, $self->input); return $class->new( file_name => ($self->input eq ($self->_file_handle||"") ? '' : $self->input), file_handle => $self->_file_handle, _log => $self->_log, @args ); } # User supplies any old perl data, but this field should always be an arrayref of ::Field sub _coerce_field_list { my ($list)= @_; defined $list and ref $list eq 'ARRAY' or croak "'fields' must be a non-empty arrayref"; my @list= @$list; # clone it, to make sure we don't unexpectedly alter the caller's data for (@list) { if (!ref $_) { $_= Data::TableReader::Field->new({ name => $_ }); } elsif (ref $_ eq 'HASH') { my %args= %$_; # "isa" alias for the 'type' attribute $args{type}= delete $args{isa} if defined $args{isa} && !defined $args{type}; $_= Data::TableReader::Field->new(\%args) } else { croak "Can't coerce '$_' to a Field object" } } return \@list; } sub _build_field_by_name { my $self= shift; # reverse list so first field of a name takes precedence { map { $_->name => $_ } reverse @{ $self->fields } } } sub _build_header_row_combine { my $self= shift; # If headers contain "\n", we need to collect multiple cells per column max map { 1+(()= ($_->header_regex =~ /\\n|\n/g)) } $self->field_list; } # 'log' can be a variety of things, but '_log' will always be a coderef has _log => ( is => 'lazy', clearer => 1 ); sub _build__log { _log_fn(shift->log); } sub _log_fn { my $dest= shift; !$dest? sub { my ($level, $msg, @args)= @_; return unless $level eq 'warn' or $level eq 'error'; $msg= sprintf($msg, @args) if @args; warn $msg."\n"; } : ref $dest eq 'ARRAY'? sub { my ($level, $msg, @args)= @_; return unless $level eq 'warn' or $level eq 'error'; $msg= sprintf($msg, @args) if @args; push @$dest, [ $level, $msg ]; } : ref($dest)->can('info')? sub { my ($level, $msg, @args)= @_; $dest->$level( @args? sprintf($msg, @args) : $msg ) if $dest->can('is_'.$level)->($dest); } : croak "Don't know how to log to $dest"; } sub detect_input_format { my ($self, $filename, $magic)= @_; my $input= $self->input; # As convenience to spreadsheet users, let input be a parsed workbook/worksheet object. return ('XLSX', sheet => $input) if ref($input) && ref($input)->can('get_cell'); return ('XLSX', workbook => $input) if ref($input) && ref($input)->can('worksheets'); # Load first block of file, unless supplied my $fpos; if (!defined $magic) { my $fh= $self->_file_handle; # Need to be able to seek. if (seek($fh, 0, 1)) { $fpos= tell $fh; read($fh, $magic, 4096); seek($fh, $fpos, 0) or croak "seek: $!"; } elsif ($fh->can('ungets')) { $fpos= 0; # to indicate that we did try reading the file read($fh, $magic, 4096); $fh->ungets($magic); } else { $self->_log->('notice',"Can't fully detect input format because handle is not seekable." ." Consider fully buffering the file, or using FileHandle::Unget"); $magic= ''; } } # Excel is obvious so check it first. This handles cases where an excel file is # erroneously named ".csv" and sillyness like that. return ( 'XLSX' ) if $magic =~ /^PK(\x03\x04|\x05\x06|\x07\x08)/; return ( 'XLS' ) if $magic =~ /^\xD0\xCF\x11\xE0/; # Else trust the file extension, because TSV with commas can be very similar to CSV with # tabs in the data, and some crazy person might store an HTML document as the first element # of a CSV file. # Detect filename if not supplied if (!defined $filename) { $filename= ''; $filename= "$input" if defined $input and (!ref $input || ref($input) =~ /path|file/i); } if ($filename =~ /\.([^.]+)$/) { my $suffix= uc($1); return 'HTML' if $suffix eq 'HTM'; return $suffix; } # Else probe some more... $self->_log->('debug',"Probing file format because no filename suffix"); length $magic or croak "Can't probe format. No filename suffix, and " .(!defined $fpos? "unseekable file handle" : "no content"); # HTML is pretty obvious return 'HTML' if $magic =~ /^(\xEF\xBB\xBF|\xFF\xFE|\xFE\xFF)?<(!DOCTYPE )HTML/i; # Else guess between CSV and TSV my ($probably_csv, $probably_tsv)= (0,0); ++$probably_csv if $magic =~ /^(\xEF\xBB\xBF|\xFF\xFE|\xFE\xFF)?["']?[\w ]+["']?,/; ++$probably_tsv if $magic =~ /^(\xEF\xBB\xBF|\xFF\xFE|\xFE\xFF)?["']?[\w ]+["']?\t/; my $comma_count= () = ($magic =~ /,/g); my $tab_count= () = ($magic =~ /\t/g); my $eol_count= () = ($magic =~ /\n/g); ++$probably_csv if $comma_count > $eol_count and $comma_count > $tab_count; ++$probably_tsv if $tab_count > $eol_count and $tab_count > $comma_count; $self->_log->('debug', 'probe results: comma_count=%d tab_count=%d eol_count=%d probably_csv=%d probably_tsv=%d', $comma_count, $tab_count, $eol_count, $probably_csv, $probably_tsv); return 'CSV' if $probably_csv and $probably_csv > $probably_tsv; return 'TSV' if $probably_tsv and $probably_tsv > $probably_csv; croak "Can't determine file format"; } has _table_found => ( is => 'rw', lazy => 1, builder => 1, clearer => 1, predicate => 1 ); sub _build__table_found { my $self= shift; my %loc= ( croak_on_fail => 1 ); $self->_find_table($self->decoder->iterator, \%loc); \%loc; } sub find_table { my $self= shift; return 1 if $self->_has_table_found; my %loc; if ($self->_find_table($self->decoder->iterator, \%loc)) { $self->_table_found(\%loc); return 1; } return 0; } sub col_map { shift->_table_found->{col_map}; } sub field_map { shift->_table_found->{field_map}; } sub _find_table { my ($self, $data_iter, $stash)= @_; $stash ||= {}; while (!$self->_find_table_in_dataset($data_iter, $stash) && !defined $stash->{fatal} && $data_iter->next_dataset) {} if ($stash->{col_map}) { # Calculate field map from col map my $col_map= $stash->{col_map}; my %fmap; for my $i (0 .. $#$col_map) { next unless $col_map->[$i]; if ($col_map->[$i]->array) { push @{ $fmap{$col_map->[$i]->name} }, $i; } else { $fmap{$col_map->[$i]->name}= $i; } } $stash->{field_map}= \%fmap; # And record the stream position of the start of the table $stash->{first_record_pos}= $data_iter->tell; $stash->{data_iter}= $data_iter; return $stash; } else { my $err= $stash->{fatal} || "Can't locate valid header"; $self->_log->('error', $err); croak $err if $stash->{croak_on_fail}; return undef; } } sub _find_table_in_dataset { my ($self, $data_iter, $stash)= @_; # If header_row_at is undef, then there is no header. # Ensure static_field_order, then set up columns. my @fields= $self->field_list; my $header_at= $self->header_row_at; if (!defined $header_at) { unless ($self->static_field_order) { $stash->{fatal}= "You must enable 'static_field_order' if there is no header row"; return; } $stash->{col_map}= \@fields; return 1; } # If headers contain "\n", we need to collect multiple cells per column my $row_accum= $self->header_row_combine; my ($start, $end)= ref $header_at? @$header_at : ( $header_at, $header_at ); my @rows; # If header_row_at doesn't start at 1, seek forward if ($start > 1) { $self->_log->('trace', 'Skipping to row %s', $start); push @rows, $data_iter->() for 1..$start-1; } # Scan through the rows of the dataset up to the end of header_row_at, accumulating rows so that # multi-line regexes can match. for ($start .. $end) { my $vals= $data_iter->(); if (!$vals) { # if undef, we reached end of dataset $self->_log->('trace', 'EOF'); last; } if ($row_accum > 1) { push @rows, $vals; shift @rows while @rows > $row_accum; $vals= [ map { my $c= $_; join("\n", map $_->[$c], @rows) } 0 .. $#{$rows[-1]} ]; $stash->{context}= $row_accum.' rows ending at '.$data_iter->position; } else { $stash->{context}= $data_iter->position; } $self->_log->('trace', 'Checking for headers on %s', $stash->{context}); $stash->{context}.= ': '; $stash->{col_map}= $self->static_field_order? # If static field order, look for headers in sequence $self->_match_headers_static($vals, $stash) # else search for each header : $self->_match_headers_dynamic($vals, $stash); return 1 if $stash->{col_map}; return if $stash->{fatal}; $self->_log->('debug', '%sNo match', $stash->{context}); } $self->_log->('warn','No row in dataset matched full header requirements'); return; } sub _match_headers_static { my ($self, $header, $stash)= @_; my $fields= $self->fields; for my $i (0 .. $#$fields) { next if $header->[$i] =~ $fields->[$i]->header_regex; # Field header doesn't match. Start over on next row. $self->_log->('debug','%sMissing field %s', $stash->{context}||'', $fields->[$i]->name); return; } # found a match for every field! $self->_log->('debug','%sFound!', $stash->{context}||''); return $fields; } sub _match_headers_dynamic { my ($self, $header, $stash)= @_; my $context= $stash->{context} || ''; my %col_map; my $fields= $self->fields; my $free_fields= $stash->{free_fields} ||= [ # Sort required fields to front, to fail faster on non-matching rows sort { $a->required? -1 : $b->required? 1 : 0 } grep { !$_->follows_list } @$fields ]; my $follows_fields= $stash->{follows_fields} ||= [ grep { $_->follows_list } @$fields ]; for my $f (@$free_fields) { my $hr= $f->header_regex; $self->_log->('debug', 'looking for %s', $hr); my @found= grep { $header->[$_] =~ $hr } 0 .. $#$header; if (@found == 1) { if ($col_map{$found[0]}) { $self->_log->('debug', 'search status: '._colmap_progress_str(\%col_map, $header)); $self->_log->('info','%sField %s and %s both match column %s', $context, $f->name, $col_map{$found[0]}->name, $found[0]); return; } $col_map{$found[0]}= $f; } elsif (@found > 1) { if ($f->array) { # Array columns may be found more than once $col_map{$_}= $f for @found; } else { $self->_log->('debug', 'search status: '._colmap_progress_str(\%col_map, $header)); $self->_log->('info','%sField %s matches more than one column: %s', $context, $f->name, join(', ', @found)); return; } } elsif ($f->required) { $self->_log->('debug', 'search status: '._colmap_progress_str(\%col_map, $header)); $self->_log->('info','%sNo match for required field %s', $context, $f->name); return; } # else Not required, and not found } # Need to have found at least one column (even if none required) unless (keys %col_map) { $self->_log->('debug', 'search status: '._colmap_progress_str(\%col_map, $header)); $self->_log->('debug','%sNo field headers found', $context); return; } # Now, check for any of the 'follows' fields, some of which might also be 'required'. if (@$follows_fields) { my %following; my %found; for my $i (0 .. $#$header) { if ($col_map{$i}) { %following= ( $col_map{$i}->name => $col_map{$i} ); } else { my $val= $header->[$i]; my @match; for my $f (@$follows_fields) { next unless grep $following{$_}, $f->follows_list; push @match, $f if $val =~ $f->header_regex; } if (@match == 1) { if ($found{$match[0]} && !$match[0]->array) { $self->_log->('debug', 'search status: '._colmap_progress_str(\%col_map, $header)); $self->_log->('info','%sField %s matches multiple columns', $context, $match[0]->name); return; } $col_map{$i}= $match[0]; $found{$match[0]}= $i; $following{$match[0]->name}= $match[0]; } elsif (@match > 1) { $self->_log->('debug', 'search status: '._colmap_progress_str(\%col_map, $header)); $self->_log->('info','%sField %s and %s both match column %d', $context, $match[0]->name, $match[1]->name, $i+1); return; } else { %following= (); } } } # Check if any of the 'follows' fields were required if (my @unfound= grep { !$found{$_} && $_->required } @$follows_fields) { $self->_log->('debug', 'search status: '._colmap_progress_str(\%col_map, $header)); $self->_log->('info','%sNo match for required %s %s', $context, (@unfound > 1? ('fields', join(', ', map { $_->name } sort @unfound)) : ('field', $unfound[0]->name) )); return; } } # Now, if there are any un-claimed columns, handle per 'on_unknown_columns' setting. my @unclaimed= grep { !$col_map{$_} } 0 .. $#$header; if (@unclaimed) { my $act= $self->on_unknown_columns; my $unknown_list= join(', ', map $self->_fmt_header_text($header->[$_]), @unclaimed); $act= $act->($self, $header, \@unclaimed) if ref $act eq 'CODE'; if ($act eq 'use') { $self->_log->('warn','%sIgnoring unknown columns: %s', $context, $unknown_list); } elsif ($act eq 'next') { $self->_log->('warn','%sWould match except for unknown columns: %s', $context, $unknown_list); } elsif ($act eq 'die') { $stash->{fatal}= "${context}Header row includes unknown columns: $unknown_list"; } else { $stash->{fatal}= "Invalid action '$act' for 'on_unknown_columns'"; } $self->_log->('debug', 'search status: '._colmap_progress_str(\%col_map, $header)); return if $stash->{fatal}; } return [ map $col_map{$_}, 0 .. $#$header ]; } # Make header string readable for log messages sub _fmt_header_text { shift if ref $_[0]; my $x= shift; $x =~ s/ ( [^[:print:]] ) / sprintf("\\x%02X", ord $1 ) /gex; qq{"$x"}; } # format the colmap into a string sub _colmap_progress_str { my ($colmap, $headers)= @_; join(' ', map { $colmap->{$_}? $_.'='.$colmap->{$_}->name : $_.':'._fmt_header_text($headers->[$_]) } 0 .. $#$headers) } sub iterator { my $self= shift; my $fields= $self->fields; # Creating the record iterator consumes the data source's iterator. # The first time after detecting the table, we continue with the same iterator. # Every time after that we need to create a new data iterator and seek to the # first record under the header. my $data_iter= delete $self->_table_found->{data_iter}; unless ($data_iter) { $data_iter= $self->decoder->iterator; $data_iter->seek($self->_table_found->{first_record_pos}); } my $col_map= $self->_table_found->{col_map}; my $field_map= $self->_table_found->{field_map}; my @row_slice; # one column index per field, and possibly more for array_val_map my @arrayvals; # list of source index and destination index for building array values my @field_names; # ordered list of field names where row slice should be assigned my %trimmer; # list of trim functions and the array indicies they should be applied to my @blank_val; # blank value per each fetched column my @type_check;# list of my $class; # optional object class for the resulting rows # If result is array, the slice of the row must match the position of the fields in the # $self->fields array. If a field was not found it will get an undef for that slot. # It also results in an undef for secondary fields of the same name as the first. if ($self->record_class eq 'ARRAY') { my %remaining= %$field_map; @row_slice= map { my $src= delete $remaining{$_->name}; defined $src? $src : 0x7FFFFFFF } @$fields; } # If result is anything else, then only slice out the columns that are used for the fields # that we located. else { $class= $self->record_class unless 'HASH' eq $self->record_class; @field_names= keys %$field_map; @row_slice= values %$field_map; } # For any field whose value is an array of more that one source column, # encode those details in @arrayvals, and update @row_slice and @trim_idx accordingly for (0 .. $#row_slice) { if (!ref $row_slice[$_]) { my $field= $col_map->[$row_slice[$_]]; if (my $t= $field->trim_coderef) { $trimmer{$t} ||= [ $t, [] ]; push @{ $trimmer{$t}[1] }, $_; } push @blank_val, $field->blank; push @type_check, $self->_make_validation_callback($field, $_) if $field->type; } else { # This field is an array-value, so add the src columns to @row_slice # and list it in @arrayvals, and update @trim_idx if needed my $src= $row_slice[$_]; $row_slice[$_]= 0x7FFFFFFF; my $from= @row_slice; push @row_slice, @$src; push @arrayvals, [ $_, $from, scalar @$src ]; for ($from .. $#row_slice) { my $field= $col_map->[$row_slice[$_]]; if (my $t= $field->trim_coderef) { $trimmer{$t} ||= [ $t, [] ]; push @{ $trimmer{$t}[1] }, $_; } push @blank_val, $field->blank; push @type_check, $self->_make_validation_callback($field, $_) if $field->type; } } } my @trim= values %trimmer; @arrayvals= reverse @arrayvals; my ($n_blank, $first_blank, $eof); my $sub= sub { again: # Pull the specific slice of the next row that we need my $row= !$eof && $data_iter->(\@row_slice) or ++$eof && return undef; # Apply 'trim' to any column whose field requested it for my $t (@trim) { $t->[0]->() for grep defined, @{$row}[@{$t->[1]}]; } # Apply 'blank value' to every column which is zero length $n_blank= 0; $row->[$_]= $blank_val[$_] for grep { (!defined $row->[$_] || !length $row->[$_]) && ++$n_blank } 0..$#$row; # If all are blank, then handle according to $on_blank_row setting if ($n_blank == @$row) { $first_blank ||= $data_iter->position; goto again; } elsif ($first_blank) { unless ($self->_handle_blank_row($first_blank, $data_iter->position)) { $eof= 1; return undef; } $first_blank= undef; } # Check type constraints, if any if (@type_check) { if (my @failed= map $_->($row), @type_check) { $self->_handle_validation_fail(\@failed, $row, $data_iter->position.': ') or goto again; } } # Collect all the array-valued fields from the tail of the row $row->[$_->[0]]= [ splice @$row, $_->[1], $_->[2] ] for @arrayvals; # stop here if the return class is 'ARRAY' return $row unless @field_names; # Convert the row to a hashref my %rec; @rec{@field_names}= @$row; # Construct a class, if requested, else return hashref return $class? $class->new(\%rec) : \%rec; }; return Data::TableReader::_RecIter->new( $sub, { data_iter => $data_iter, reader => $self }, ); } sub _make_validation_callback { my ($self, $field, $index)= @_; my $t= $field->type; ref $t eq 'CODE'? sub { my $e= $t->($_[0][$index]); defined $e? ([ $field, $index, $e ]) : () } : $t->can('validate')? sub { my $e= $t->validate($_[0][$index]); defined $e? ([ $field, $index, $e ]) : () } : croak "Invalid type constraint $t on field ".$field->name; } sub _handle_blank_row { my ($self, $first, $last)= @_; my $act= $self->on_blank_row; $act= $act->($self, $first, $last) if ref $act eq 'CODE'; if ($act eq 'next') { $self->_log->('warn', 'Skipping blank rows from %s until %s', $first, $last); return 1; } if ($act eq 'last') { $self->_log->('warn', 'Ending at blank row %s', $first); return 0; } if ($act eq 'die') { my $msg= "Encountered blank rows at $first..$last"; $self->_log->('error', $msg); croak $msg; } croak "Invalid value for 'on_blank_row': \"$act\""; } sub _handle_validation_fail { my ($self, $failures, $values, $context)= @_; my $act= $self->on_validation_fail; $act= $act->($self, $failures, $values, $context) if ref $act eq 'CODE'; my $errors= join(', ', map $_->[0]->name.': '.$_->[2], @$failures); if ($act eq 'next') { $self->_log->('warn', "%sSkipped for data errors: %s", $context, $errors) if $errors; return 0; } if ($act eq 'use') { $self->_log->('warn', "%sPossible data errors: %s", $context, $errors) if $errors; return 1; } if ($act eq 'die') { my $msg= "${context}Invalid record: $errors"; $self->_log->('error', $msg); croak $msg; } } BEGIN { @Data::TableReader::_RecIter::ISA= ( 'Data::TableReader::Iterator' ) } sub Data::TableReader::_RecIter::all { my $self= shift; my (@rec, $x); push @rec, $x while ($x= $self->()); return \@rec; } sub Data::TableReader::_RecIter::position { shift->_fields->{data_iter}->position(@_); } sub Data::TableReader::_RecIter::progress { shift->_fields->{data_iter}->progress(@_); } sub Data::TableReader::_RecIter::tell { shift->_fields->{data_iter}->tell(@_); } sub Data::TableReader::_RecIter::seek { shift->_fields->{data_iter}->seek(@_); } sub Data::TableReader::_RecIter::next_dataset { shift->_fields->{reader}->_log ->('warn',"Searching for supsequent table headers is not supported yet"); return 0; } 1; __END__ =pod =encoding UTF-8 =head1 NAME Data::TableReader - Extract records from "dirty" tabular data sources =head1 VERSION version 0.011 =head1 SYNOPSIS # Find a row in the Excel file containing the headers # "address", "city", "state", "zip" (in any order) # and then convert each row under that into a hashref of those fields. my $records= Data::TableReader>new( input => 'path/to/file.xlsx', fields => [qw( address city state zip )], ) ->iterator->all; but there's plenty of options to choose from... my $tr= Data::TableReader->new( # path or file handle # let it auto-detect the format (but can override that if we need) input => 'path/to/file.csv', # We want these fields to exist in the file (identified by headers) fields => [ { name => 'address', header => qr/street|address/i }, 'city', 'state', # can validate with Type::Tiny classes { name => 'zip', header => qr/zip\b|postal/i, type => US_Zipcode }, ], # Our data provider is horrible; just ignore any nonsense we encounter on_blank_row => 'next', on_validation_fail => 'next', # Capture warnings and show to user who uploaded file log => \(my @messages) ); my $records= $tr->iterator->all; ... $http_response->body( encode_json({ messages => \@messages }) ); =head1 DESCRIPTION This module is designed to take "loose" or "dirty" tabular data sources (such as Excel, CSV, TSV, or HTML) which may have been edited by non-technical humans and extract the data into sanitized records, while also verifying that the data file contains roughly the schema you were expecting. It is primarily intended for making automated imports of data from non-automated or unstable sources, and providing human-readable feedback about the validity of the data file. =head1 ATTRIBUTES =head2 input This can be a file name or L instance or file handle or a L object. If a file handle, it must be seekable in order to auto-detect the file format, I you may specify the decoder directly to avoid auto-detection. =head2 decoder This is either an instance of L, or a class name, or a partial class name to be appended as C<"Data::TableReader::Decoder::$name"> or an arrayref or hashref of arguments to build the decoder. In an arrayref the first argument can be undef, and in a hashref the CLASS argument can be missing or undef. In those cases it will be detected from the input attribute and any default arguments combined with (and if necessary trumped by) the extra arguments in the arrayref or hashref. Examples: 'CSV' # becomes Data::TableReader::Decoder::CSV->new() [ 'CSV', sep_char => "|" ] # becomes Data::TableReader::Decoder::CSV->new(sep_char => "|") { CLASS => 'CSV', sep_char => "|" } # becomes Data::TableReader::Decoder::CSV->new({ sep_char => "|" }) =head2 fields An arrayref of L objects which this module should search for within the tables (worksheets etc.) of L. If an element of this array is a hashref or string, it will be coerced to an instance of L, with plain strings becoming the C attribute. See L for how names are automatically converted to the header-matching regex. =head2 record_class Default is the special value C<'HASH'> for un-blessed hashref records. The special value C<'ARRAY'> will result in arrayrefs with fields in the same order they were specified in the L specification. Setting it to anything else will return records created with C<< $record_class->new(\%fields); >> =head2 static_field_order Boolean, whether the L must be found in columns in the exact order that they were specified. Default is false. =head2 header_row_at Row number, or range of row numbers where the header must be found. (All row numbers in this module are 1-based, to match end-user expectations.) The default is C<[1,10]> to limit header scanning to the first 10 rows. As a special case, if you are reading a source which lacks headers and you trust the source to deliver the columns in the right order, you can set this to undef if you also set C<< static_field_order => 1 >>. =head2 on_unknown_columns on_unknown_columns => 'use' # warn, and then use the table on_unknown_columns => 'next' # warn, and then look for another table which matches on_unknown_columns => 'die' # fatal error on_unknown_columns => sub { my ($reader, $col_headers)= @_; ...; return $opt; # one of the above values } This determines handling for columns that aren't associated with any field. The "required" columns must all be found before it considers this setting, but once it has found everything it needs to make this a candidate, you might or might not care about the leftover columns. =over =item C<'use'> (default) You don't care if there are extra columns, just log warnings about them and proceed extracting from this table. =item C<'next'> Extra columns mean that you didn't find the table you wanted. Log the near-miss, and keep searching additional rows or additional tables. =item C<'die'> This header is probably what you want, but you consider extra columns to be an error condition. Logs the details and calls C. =item C You can add your own logic to handle this. Inspect the headers however you like, and then return one of the above values. =back =head2 on_blank_rows on_blank_rows => 'next' # warn, and then skip the row(s) on_blank_rows => 'last' # warn, and stop iterating the table on_blank_rows => 'die' # fatal error on_blank_rows => 'use' # actually try to return the blank rows as records on_blank_rows => sub { my ($reader, $first_blank_rownum, $last_blank_rownum)= @_; ...; return $opt; # one of the above values } This determines what happens when you've found the table, are extracting records, and encounter a series of blank rows (defined as a row with no printable characters in any field) followed by non-blank rows. If you use the callback, it suppresses the default warning, since you can generate your own. The default is C<'next'>. =head2 on_validation_fail on_validation_fail => 'next' # warn, and then skip the record on_validation_fail => 'use' # warn, and then use the record anyway on_validation_fail => 'die' # fatal error on_validation_fail => sub { my ($reader, $failures, $values, $context)= @_; for (@$failures) { my ($field, $value_index, $message)= @$_; ... # $field is a Data::TableReader::Field # $values->[$value_index] is the string that failed validation # $message is the error returned from the validation function # $context is a string describing the source of the row, like "Row 5" # You may modify $values to alter the record that is about to be created } # Clear the failures array to suppress warnings, if you actually corrected # the validation problems. @$failures= () if $opt eq 'use'; # return one of the above constants to tell the iterator what to do next return $opt; } This determines what happens when you've found the table, are extracting records, and one row fails its validation. In addition to deciding an option, the callback gives you a chance to alter the record before C<'use'>ing it. If you use the callback, it suppresses the default warning, since you can generate your own. The default is 'die'. =head2 log If undefined (the default) all log messages above 'info' will be emitted with C. If set to an object, it should support an API of: trace, is_trace debug, is_debug info, is_info warn, is_warn error, is_error such as L and may other perl logging modules use. You can also set it to a coderef such as: my @messages; sub { my ($level, $message)= @_; push @messages, [ $level, $message ] if grep { $level eq $_ } qw( info warn error ); }; for a simple way to capture the messages without involving a logging module. And for extra convenience, you can set it to an arrayref which will receive any message that would otherwise have gone to 'warn' or 'error'. =head1 METHODS =head2 detect_input_format my ($class, @args)= $tr->detect_input_format( $filename, $head_of_file ); This is used internally to detect the format of a file, but you can call it manually if you like. The first argument (optional) is a file name, and the second argument (also optional) is the first few hundred bytes of the file. Missing arguments will be pulled from L if possible. The return value is the best guess of module name and constructor arguments that should be used to parse the file. However, this doesn't guarantee such module actually exists or is installed; it might just echo the file extension back to you. =head2 find_table if ($tr->find_table) { ... } Search through the input for the beginning of the records, identified by a header row matching the various constraints defined in L. If L is undef, then this does nothing and assumes success. Returns a boolean of whether it succeeded. This method does B C on failure like L does, on the assumption that you want to handle them gracefully. All diagnostics about the search are logged via L. =head2 col_map This is a lazy attribute from table detection. After calling L you can inspect which fields were found for each column via this method. If called before C, this triggers table detection and throws an exception if one isn't found. Returns an arrayref with one element for each column, each undefined or a reference to the Field object it matched. =head2 field_map This is another lazy attribute from table detection, mapping from field name to column index/indicies which the field will be loaded from. If called before C, this triggers table detection and throws an exception if one isn't found. Returns a hashref where key is the field name, and value is either a single column index, or an arrayref of column indicies if the field is an L field. =head2 iterator my $iter= $tr->iterator; while (my $rec= $iter->()) { ... } Create an iterator. If the table has not been located, then find it and C if it can't be found. Depending on the decoder and input filehandle, you might only be able to have one instance of the iterator at a time. The iterator derives from L but also has a method "all" which returns all records in an arrayref. my $records= $tr->iterator->all; =head1 THANKS Portions of this software were funded by L. =head1 AUTHOR Michael Conrad =head1 CONTRIBUTOR =for stopwords Christian Walde Christian Walde =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2019 by Michael Conrad. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut Data-TableReader-0.011/xt/0000755000175000017500000000000013505216123016303 5ustar silverdirksilverdirkData-TableReader-0.011/xt/release/0000755000175000017500000000000013505216123017723 5ustar silverdirksilverdirkData-TableReader-0.011/xt/release/pod-coverage.t0000644000175000017500000000200013505216123022453 0ustar silverdirksilverdirk#!perl # This file was automatically generated by Dist::Zilla::Plugin::Test::Pod::Coverage::Configurable. use Test::Pod::Coverage 1.08; use Test::More 0.88; BEGIN { if ( $] <= 5.008008 ) { plan skip_all => 'These tests require Pod::Coverage::TrustPod, which only works with Perl 5.8.9+'; } } use Pod::Coverage::TrustPod; my %skip = map { $_ => 1 } qw( ); my @modules; for my $module ( all_modules() ) { next if $skip{$module}; push @modules, $module; } plan skip_all => 'All the modules we found were excluded from POD coverage test.' unless @modules; plan tests => scalar @modules; my %trustme = (); my @also_private = ( qr/^BUILD$/u ); for my $module ( sort @modules ) { pod_coverage_ok( $module, { coverage_class => 'Pod::Coverage::TrustPod', also_private => \@also_private, trustme => $trustme{$module} || [], }, "pod coverage for $module" ); } done_testing(); Data-TableReader-0.011/xt/author/0000755000175000017500000000000013505216123017605 5ustar silverdirksilverdirkData-TableReader-0.011/xt/author/pod-syntax.t0000644000175000017500000000025213505216123022077 0ustar silverdirksilverdirk#!perl # This file was automatically generated by Dist::Zilla::Plugin::PodSyntaxTests. use strict; use warnings; use Test::More; use Test::Pod 1.41; all_pod_files_ok(); Data-TableReader-0.011/t/0000755000175000017500000000000013505216123016113 5ustar silverdirksilverdirkData-TableReader-0.011/t/data/0000755000175000017500000000000013505216123017024 5ustar silverdirksilverdirkData-TableReader-0.011/t/data/AddressAuxData.xlsx0000754000175000017500000001560513505216123022612 0ustar silverdirksilverdirkPK3tK _rels/.rels­’ÁNÃ0 †ï{Š*÷5Ý@¡¦»LH»!4À$nµ‰£ÄƒòöD ²ÃŽq~þb¥ÞLn,Þ0&K^‰UY‰½&c}§ÄËþqy/6Í¢~Æ8GRoC*rOJôÌáAʤ{tJ èóMKÑçcìd=@‡r]Uw2þdˆæ„YìŒqgV¢Ø¼„Mmk5nIz>3âW"“!vÈJL£|§8¼ e† yÞe}¹Ëßï” 0HM—!æîÈÓ·Ž!ý”Ë阘º¹ærpbôͼ„0gt{M#}HLîŸ3_J‹ZžüËæPK…š4šîÎPK3tKdocProps/custom.xmlµÓMo ðû>EýB«UkÚm5;ì°Ä—;£Ô’µ@Ýšeß}tµz4Ù”ÓÃKþ? Í?ëÊ9Q¥™à1ð8”‘3~ˆÁn»v§ÀÑóW‚Ó4Tƒyò½*!©2ŒjÇ&pƒÒ9ƒP“’ÖXì4·3…P56¶«P#4äXSn Ð’£6¢vå%ty³“ùkd.H»;½ß6Òæ%Ñ9¼qŠÚ°<_YfY€×_…©ë!oé†Ãpâ¢)BþÒO×ábõ Ù.öÃqmO¾rßÝ“<™Y%?´Q‰çm¼E°ÿi{ÛpCÉQ1Ót8%i‹»q£ž{¶§*ÆßuZb~ y‡¾ QÙßònpÐÃ/­¹“[‘aCŒŽ{tCpESõ`prK¬Ú7{Ûƒ×o–üPK?1ðC@«PK3tKdocProps/app.xmlÐ=kÃ0ཿˆ¬¶·5!È -¥S ÜÒÍ(Ò9Q±>ÎÁù÷Uh2w<Þãá½ã›ÙŽÅb2ÞµdY1R€S^·oÉG÷Z®H‘P:-Gï %'Hd#îø{ô"HE\jÉ1¬)MêV¦*Ç.'ƒVbãžúa0 ^¼š,8¤5c …ÁiÐeøÉE\ñ¿¨öê·_úìN!{‚wåØ bÉéuàO!ŒFIÌÇ‹­ÙEx;kô±bÕ}U/¶ÆMsÿµjú桸YèsÛoPH³lñ<™Q—5§·§×‰PK@À"âgPK3tKdocProps/core.xml’_o Åß÷)Þ[ÀšMI‹[|Ú’%j¶ìÐk%+hõÛ¯­³s‹{ãr?—[,ŽºIàƒ²¦D4#(#m¥L]¢Íz™ÎP¢0•h¬  ¿+¤cÒzxõÖ BÒL`Ò•h£c¹-BÖ9L'n­×"v¥¯±òSÔ€'„Üc QT" ÜS7Ñ7²’#Òí}3*‰¡ &L3м¼7 Ê•S«xrpÓzG÷1¨ÑضmÖæƒµËOñûËójxjªLß* ˆßA˜ô "TI`çë.Ê[þø´^">!ô!¥$%Ó5%l:cÓü£ÀÎ÷ÀóÚz®¥5^T½iÜëõF˜zßµ•ƒI7«A·Æ@ºûÛ­úG"šÒùzB™³|~•èàý x8¨~t8-ðu9T¿„PK°ÜtClPK3tKxl/_rels/workbook.xml.rels½’ÏjÃ0 ‡ï} £ûâ$cŒ8½”A¯[÷ÆQâÐÄ6’ö§o?-…Rv(;Éö÷û@jÖïó¤^‘xŒÁ@U” 0¸Øa0ð¼{¸ºƒu»jq²’Ÿ°«ü'°/’îµfçq¶\Ä„!ßô‘f+¹¤A'ëöv@]—å­¦%Ú#¦ÚvhÛU v‡„aǾn¢{™1ȉÍr˜3ÑÒ€bà«.2ôéøú’ño‘öìå×à§•å>êœÌõ?ËÔçdn.:o »'¡¼eËù,Ûß2«Fí^ûPKpå°éÚ²PK3tKxl/sharedStrings.xmlm”KoÔ0Çï| ËNÐ$!(Iªe*bûÛÚÛt3$Öú<¶ýö8´ äÉ!ÿ<3ÿy9õ郳êF2Á7º:*µB¿ ñ}£o®¿¼~¯1ølðØèG$}Ú¾¨‰X%WO˜Ç“¢ Ý€è(ŒèÓÍp:ƾ 1"t4 ²³ÅqY¾+¯Õ.LžýöƒV“7?'\?ƒJ·5™¶æöÖ·u1ŸŸØªë"åxmø1g[þwfvÁa*0ÇÕñµ ¾W[^ò8 …Ïå™È×N–9.+užÚ°û³¥åØ)sµ.Õn$ýS½º i‚vÙoB<Õ›£ƒáAaœ,pZõ²çªzU–åüåÖ³Þ,&rœ‡ň®þÅ}³÷þ$"¯rruâ^--ÅßK©´—rßrr³•=#µò=Ú…É€5é1x#[¸'ñF„Ìúã¯ÖŒP\Å=xÉ¿çä,LIJï×ø°àí0¦4Òº‚ 2éÿZR¤ŸCûPKÊ3Ç)‹ZPK3tKxl/worksheets/sheet2.xml½XMsâ8½ï¯pù>ø›05!@™Ij“Ù©Ú›‚Vmye“üúiÉ6IS•½$‡”Ýz~Ýz­?ÆŸ¹uÀ¬&´œØÞÀµ-\nhJÊÝÄþþ¼ø”ØVÍQ™¢œ–xb¿âÚþ<ýk|¤ìgaÌ- (ë‰q^]9N½Épê­p +[Ê Äá–휺b¥ò¡"w|×:"¥Ý0\±÷pÐí–lð Ýì \ò†„áq(¿ÎHUÛÓ±ÌðȬ-É9f_i eoQ^cX«Ð?aþ½’ëü™>B [v¦c§}x:N dªX o'öïj „üCð±î][GR¦ôøÈ(Ç.¥l(­:£Çl`Ÿ£ú"¸d$½'%†(gû6ø7=Îh~ 2AGú ÿbFOFv”~·üDÉÑËÎ!9N/Ò<ìyYž^‹šŸR¼Eûœ‹ e]ü{™Ø¥<NZ‰3œçBÛÚì  CÛz£´xÚ äó\·wÿM>®F…Ð÷è•î¥`íª8E/”þ!ÁëŠöÉmá+$N\[…m!ˆpSÍmÜ¿oµêÿd«nãs'qÿºkÚBž)8­ ü-²Baá ´»g:Üt {¯å¡BŽªº'tFÒŸ{^ _æ¦ b¤Äü¼ •Ä~IÊ3XD±›D®ïú¡?ò"Qïû™G-óÈÀ ÅFÑ(òB/vÝÐûÌžëG]Ùî;ëva¤Z7ˆ£é˜Ñ£ÅäÆ7ûšÓ¢Ñðœ¸ÓX-¡Awmhƒ´9¿÷ø€sYжØ H'Îg-³ÂÃâcà0õü±s¶ëâ´™˜7¿O\’,:H÷ÌR ¬ $á%Éú̺p@À“Šþ«èk**¥_ë/º„Ì á%dÞ@Ü>$¾„¬t¥w†<É%dm€ŒN ¥ƒV:è)P6JG®ë)jë¨`4ð‚$Ž”Ã=Ó‘Ÿ’p»£¡*Ê<ÐÄWÈ:BÑ~iàp• Êöî ¥”µ˜;~pCcÕ%:˜øI H:Ó‘²ƒ~ÆÊlÌC]eNˆ2§Ëô&Q䆮‚[¨”y¾3@”š×HbndôÁŒŒTÚs­£ ‘Q’ø#EŒ™Ž”L†ªÛ<ÒE)Ô!2iËHkd0н$Q¶°2P©i€¨i¨ù~l#çC½xõëÛQ&g9ÔÔôcßB¥w+•29wˆÂ²Ö!R3þ`5c½xe¦ˆòe³Œu5ab|õóje R&áN‡„Ê$¬uˆ¦¦Ó{W-0ÛI Qƒ û’‹®^´ñ~sïj)ß•ø <¡Œ;g°˜Œ”ü¡’ŽÔÊÀÊ•>[¿ÝÙö©ð¥Ë(#o´ä(ŸÍŬ÷Þ^“¾à4æö+b;‰siÝAÜÚÅöÌ”¼‚~¡Gw—IÏ)î"ÏKÜèôg[[ ®Ö¸äœ õ¾·VaöDÞpãyzÎpKXÍ…ÿû¶/^$“ݘï­iioO̶퓯.Ÿ3\>ÀÎáô2GÇ®(ã ð†û? Ý1»i ÝYß—m~~)Óá'«o¥ õÜóZ7£…øM¡þ·Ä’r¡Þ Ü©=7³-6ÙõåÙЊàÎ^4ú.¤”VJ¶[è]Ée‚sM]ø!Mç‡óOÇ4M›à¬õ®á²al§ë~2¸=ýV3ý PKƒ˜K\ïPK3tKxl/worksheets/sheet1.xml½VMoÛ8½ï¯t_ëÃ’¬¶‹ÖN¶ ¤u°N·@o´DYD(RKRv“_¿CR’ɇôÒbr8|3|CñÍòÃÏŠ:',$álå3ßu0ËxNØqå~{ºÿ3u©Ëå ¯Ü,Ýë?–g.že‰±r€É•[*UßzžÌJ\!9ã5f°RpQ!Sqôd-0ÊͦŠz¡ï'^…s-­x/ ’á-Ïš 3eA¦HAú²$µt×KáQ8¡ ‹/<‡´ D%†µñ«oµYWOü ݲ·^zíæõ2'A³â\¬ÜÁí]¤=ŒÃ¿Ÿå`ìœ ËùùQp…3e¨´Ž,ùùÐP$ßÿ$ ƒU‰¦5þÃÏN?MP‘áÂ,xoäXBê¸P=¤B‡=¦çÃ}»FQ²©œö9.PC•NÂqÑÙOp”•Ë4ß y­Cl0¥úü®“iß¿?‰\ç•ójŸ! ì¾?˜5ÛÇVÍózáá«]Õ—èÀù³6i\_WÏœBó^#}áÚ,\õ„m6wñpn·:ò?S)Xë ©‡ã®f÷æJÁh™Þ?cÍ*$Í"·ÛÓù­—pviþk(ª¥æ¹å¾$yŽ/%¯ÐO€_Â̯T/š%}^’«r妳d‘Á|žøópžÞè|ß¶ÈáäÀŸÍÓ4Iæ76¯!Ï[äùuäEƱŸø‹0]Ä¿†øaÜ‚GW ‰~û¡FáM` h7eØ"…ÖKÁÏŽ0ŒfT¼²Å¹ÄíŠ7ÎÀzwõmÜ~ø„©Ier8Ÿ§/¾4°Y?/§µ¿ôN:ÁÖãÓÔ#xë±™z„o=¶Sù[»©GÔ{xÀMOPø› '‰Å#‚¦Ɉ ©ÇbDÐÔ#4ô`– 8Y¤×IŠ~3IÑ$ý›IS`tÑ6W\F7m;uÓ½‹&oðáÕ‚0µ«Æ:%ˆ41;^„ll¥í¤¥ä‚¼r¦Ý€pc1x0 ûP$›.xV®¿ q$˜¹ƒg¨Àv ú`F𾸂u³Ò¨¨žÅAúqÿç:¾ºäõ-BSƒÕXìÉ+¶Ïø@ì "¤Ò’öµ©ɵíÄ÷öµl§½¨¸Ž†Ý “t ì©Äl'‡;$Ù®¡æB D@î‰5ïXl­F]ø=P”=dù÷’¨¾yqrý@º¸á•î’¤–t† äý8ñîÚ÷åÙÖ@²«ËÅ’ñšè:š'Ë。ÒÉIQ@í˜2.9uæ]žß._ÓzÉóܶ9pÕcZDkîÇÃ`0í»ÏõÿPK’gÐ6åÁ PK3tK xl/styles.xmlíXÑnÛ }ßW úºÕvš¤íd»j»fš4UU›J“¦=Û¨, mÒ¯Û±Óuk²iK5ç8\Î=>LðO9÷XHÂY½}Ì"–ðv:ywTˆÅˆr†¸Äž„o|©–ßd+ ˜ `¦TñÞqd”áÉ}^`¦{.r¤tS¤Ž,F±4ƒrê \wìäˆ0úlžOr%AÄçLpÐ@ÀŸb­m<„ÀÒóXKùxqyq}ú:? uƒ÷Þîí¹&Ô©r…~ÂÙ*åZ ôå#¸GTSx&<â” ÒY'·ü• QŽmØ9¢d&ˆ”º´ð œ!!µC–¯Ìns¬eZ£<ÑuBwdžÏl‡slúþ†kea&ŽPÚyW úR 6Ñ PÕ§ËBÏ=Óo®¥)ã~ ´ô£Ö€²Ðyg\Äz¥Ô™=XC &(å ÑÛ"€ ¢ÃúÀX †>ʼnÒÄ‚¤™)/C¢Ïu¥cR[榢ÓG˜Ò³ì¾$«§w5é"yºLXÙЫÙh¯ª–©j ¢ Ë 7$åZଠé@§”¤,ÇkW‚+©r×(áÐGu ȸ šÚL`Šú…2›Œ"‘ìóB ðB]s…,‹Öô P1Õ`c"aq™X÷ÉLv7åÒtk›ŠF <ºÃq-2#±ÚŠtÉšSîÊ'o[Ÿ*ëFµá¶SõkðzÄ z1ψÙzmõbz1½˜^L/f1Ã]úR½R3Ü)5ƒ]RsüÅ8íã»=Ì·ÎñÃmñ‹ä©ò¶žß”þÚÎôÛ†›Ùöç&ü?wm£??÷,Ò;iÙ¨_Ÿ/²Í©¶ºÖF³ía æz(€—æÖ޶œ›Í U„U­h.õƒœY¬•kæœç9ªY¼Q‡æ`CðÕýÖP;Tã ¨æB`-¦ÃÓps¦Ž®£ÛáËÙ®°0ˬ!:îž'Z}Òôä:«áð;PKánW;ìVPK3tKxl/workbook.xmlSÛŽÚ0}ïWX~‡\`) Š¢]©7-ÛÝg'™ÇŽla«þ{'Ù²jUõ!‰çâ3gfN·M%È´áJF4ú”€ÌTÎå.¢ß“Á”c™Ì™P"zCo—ï'¥÷©R{‚÷¥‰him=÷<“•P13T5HŒJWÌ¢©wž©5°Ü”¶^èû¯b\Òa®ÿCÏ`­²CÒv ³ÈÞ”¼6t¹(¸€§®!Âêú3«vÌDF½å+í¯š¤,Ûê³#Z0a-ÕéKú2‹1!(É™…`æû”7Êb&–Agëxâp2¿ã­éï”æ/JZ&¶™VBDÔêÃ¥µ<û[dÛê‘¥¦w6Ï\æêQ\Ñùê|rÇgžÛ8Mǽïø®´³ËÒ‡vPMg”\ëŠ8†ëµ6ä]uävÖ‰t]å¹cP Ĺïs,î¤b1z䆧Ië9Ç€¾ÏÃô À¡!kfÙÕýð÷GŽTÏ'–á ¹ù±:Hì"ð± Å'•·øÈæÝïÅ^ƒ° ûú¾´¸ÐØÆºïEBáùE žjè4èäHÉAóˆþx? 'ñtÂU0Áæfða4¾$›$ÁáÇëx–üDi:Ô9>qÇßXÿÙÛ3Ê£‰è¦É@¬'Óº·£æõ²ZþPKÆr³PK3tK[Content_Types].xmlÅ”OKÃ@Åïýa¯’lÛƒˆ$íAð¨ëYÖÝi²4û‡mM¿½³i+(‚ -õ’%dÞû½RÎ;Óf[¨­Ø¤³ ¬tJÛºb¯ËÇüŽÍg£r¹ó€ÕZ¬X£¿çeF`á ãoZäEz²ìa/HÌŠ ï[-E¤||kÕZ~ %e_ƒöxCŒÿNRN.‚óÈå£3E*Dt«•–@6C’bo“{ò„5œ'ÆÙdèH©@ eK`8üØè¤>‘ص‡¹~¸°~wn¨×˜1±ÔK ´Èxv«Ñ €H7øæýGŽtõ^‡¼?¦Îòå?0ÇäŸr`ܵpñô¦'tค]: #´=ðG%ïÿ–³OPKvg¨rC\PK3tK…š4šîÎ _rels/.relsPK3tK?1ðC@«'docProps/custom.xmlPK3tK@À"âg¨docProps/app.xmlPK3tK°ÜtClÈdocProps/core.xmlPK3tKpå°éÚ²Jxl/_rels/workbook.xml.relsPK3tKÊ3Ç)‹Zlxl/sharedStrings.xmlPK3tKƒ˜K\ï9xl/worksheets/sheet2.xmlPK3tK’gÐ6åÁ † xl/worksheets/sheet1.xmlPK3tKánW;ìV ±xl/styles.xmlPK3tKÆr³Øxl/workbook.xmlPK3tKvg¨rC\%[Content_Types].xmlPK Æ©Data-TableReader-0.011/t/data/AddressAuxData.xls0000754000175000017500000002000013505216123022403 0ustar silverdirksilverdirkÐÏࡱá;þÿ  þÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿýÿÿÿÿÿÿÿþÿÿÿ þÿÿÿþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿRoot Entryÿÿÿÿÿÿÿÿÿÿÿÿþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþÿÿÿ  !"#$%&'()*+,-./0123456789:þÿÿÿ<þÿÿÿþÿÿÿ?@ABþÿÿÿDEFGHþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ » Ìá°Áâ\pCalc B°aÀ=œ¯¼=@ 8Ý@"·Ú1ÜCalibri1ÈÿArial1ÈÿArial1ÈÿArial1ܼCalibri ¤GENERAL ¥#,##0à¤õÿ À àõÿ ôÀ àõÿ ôÀ àõÿ ôÀ àõÿ ôÀ àõÿ ôÀ àõÿ ôÀ àõÿ ôÀ àõÿ ôÀ àõÿ ôÀ àõÿ ôÀ àõÿ ôÀ àõÿ ôÀ àõÿ ôÀ àõÿ ôÀ ठÀ à+õÿ ðÀ à)õÿ ðÀ à,õÿ ðÀ à*õÿ ðÀ à õÿ ðÀ ठÀ à¤"8À ॠÀ “€ÿ“€ÿ“€ÿ“€ÿ“€ÿ“€ÿ`…¡ Addresses…q Aux DataŒÁÁTëbðZð 3 ð¿ À@@ñ  ÷ü©1)NameAddressCityStateZipSomeone 123 Long St SomewhereOHAnother 01 Main St Elsewhere Zip CodesCitiesState Postal CodesLatLonwith population > 1,000,000PostCodeCountry PopulationAlbertaABCA New York CityNew YorkAlaskaAKUS Los Angeles CaliforniaAlabamaALChicagoIllinoisArkansasARHoustonTexasAmerican SamoaASÿ)cc » Ì d-Cëâ6?_€% Á*+‚ƒ„&ffffffæ?'ffffffæ?(è?)è?¡"d,, ¶` ¶`à? ¶` ¶`à?U} ®} e } · } ’   ý ý ý ý ý ý ý ý ý ~ ºÉý  ý  ý  ý ~ ºÉìPðHðð0ð( ð ð>¶@ggÿÿÿÿ » Ì d-Cëâ6?_€% Á*+‚ƒ„&ffffffæ?'ffffffæ?(è?)è?¡"d,, ¶` ¶`à? ¶` ¶`à?U} ’} } ’        ý  ¾ ý  ¾ ý ¾  ý ý ý ý ý ý  ý  ~ &¿– # Â‘C@Ê©aj-UÀý ý ý ý ý  ý  ~ *¿–Ð]g—C@ šyrM.UÀý ý ~ à ý ý  ý  ~ .¿/ˆHMËC@¦I*S2UÀý ý ~ ®lòý ý   ý  ý !ý "~ Š ¦ý #ý  $ý  ý %ý &~ ‚&Œý 'ý  (ý  å ìPðH ðð0ð( ð ð>¶@ggÿÿÿÿ þÿ ÿÿÿÿÀFMicrosoft Excel 97-TabelleBiff8þÿà…ŸòùOh«‘+'³Ù0  px„ ¬¸Ä Ð Ü è ô éýmconrad1@Tê*@@€?tA(=Ó@¶Ÿ¤aÓþÿÕÍÕœ.“—+,ù®DÕÍÕœ.“—+,ù®\éýHÈÐàèðø AppVersion DocSecurityHyperlinksChangedLinksUpToDate ScaleCrop ShareDocéý12.0000 Root EntryÿÿÿÿÿÿÿÿÀF@WorkbookÿÿÿÿµCompObjÿÿÿÿÿÿÿÿ;IOle ÿÿÿÿÿÿÿÿÿÿÿÿ=SummaryInformation(ÿÿÿÿÿÿÿÿ><DocumentSummaryInformation8ÿÿÿÿÿÿÿÿÿÿÿÿCdÿÿÿÿÿÿÿÿÿÿÿÿþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþÿÿÿData-TableReader-0.011/t/12-decoder-idiot-csv.t0000644000175000017500000000144613505216123022031 0ustar silverdirksilverdirk#! /usr/bin/env perl use strict; use warnings; use Test::More; use Try::Tiny; use Data::TableReader::Decoder::IdiotCSV; plan skip_all => 'Need a CSV parser for this test' unless try { Data::TableReader::Decoder::IdiotCSV->default_csv_module }; my $input= < '', file_handle => $input_fh, _log => sub {} ], 'IdiotCSV decoder' ); ok( my $iter= $d->iterator, 'got iterator' ); is_deeply( $iter->(), [ 'First Name', 'Last Name', 'Nickname' ], 'first row' ); is_deeply( $iter->(), [ 'Joe', 'Smith', '"SuperJoe, to the rescue"' ], 'second row' ); is_deeply( $iter->(), undef, 'no third row' ); done_testing; Data-TableReader-0.011/t/35-record-iterator.t0000644000175000017500000000317613505216123021641 0ustar silverdirksilverdirk#! /usr/bin/env perl use strict; use warnings; use Test::More; use Log::Any '$log'; use Log::Any::Adapter 'TAP'; use_ok( 'Data::TableReader' ) or BAIL_OUT; subtest trim_options => sub { my $re= new_ok( 'Data::TableReader', [ input => \'', decoder => { CLASS => 'Mock', data => [ [ [qw( trim retrim codetrim notrim )], [ ' abc ', ' abc ', ' abc ', ' abc ' ], ], ] }, fields => [ { name => 'notrim', trim => 0 }, { name => 'trim', trim => 1 }, { name => 'retrim', trim => qr/a/ }, { name => 'codetrim', trim => sub { s/b//ig } }, ], log => $log ], 'TableReader' ); ok( $re->find_table, 'find_table' ) or die "Can't continue without table"; ok( my $i= $re->iterator, 'create iterator' ); is_deeply( $i->all, [ { trim => 'abc', retrim => ' bc ', codetrim => ' ac ', notrim => ' abc ' } ], 'values' ); }; subtest multiple_iterator => sub { my $re= new_ok( 'Data::TableReader', [ input => \'', decoder => { CLASS => 'Mock', data => [ [ [qw( a b c )], [qw( 1 2 3 )], ] ] }, fields => ['a','b','c'], log => $log ], 'TableReader' ); ok( $re->find_table, 'find_table' ) or die "Can't continue without table"; ok( my $i= $re->iterator, 'create iterator' ); Scalar::Util::weaken( my $wref= $i ); undef $i; is( $wref, undef, 'first iterator garbage collected' ); ok( my $i2= $re->iterator, 'second interator' ); ok( my $i3= $re->iterator, 'third iterator' ); is_deeply( $i2->all, [ { a => 1, b => 2, c => 3 } ], 'read rows from i2' ); is_deeply( $i3->all, [ { a => 1, b => 2, c => 3 } ], 'read rows from i3' ); }; done_testing; Data-TableReader-0.011/t/40-locate-table.t0000644000175000017500000001432113505216123021056 0ustar silverdirksilverdirk#! /usr/bin/env perl use strict; use warnings; use Test::More; use File::Spec::Functions 'catfile'; use Log::Any '$log'; use Log::Any::Adapter 'TAP'; use_ok( 'Data::TableReader' ) or BAIL_OUT; # Find fields in the exact order they are present in the file subtest basic => sub { my $ex= new_ok( 'Data::TableReader', [ input => \'', decoder => { CLASS => 'Mock', data => mock_data() }, fields => [ { name => 'name' }, { name => 'address' }, { name => 'city' }, { name => 'state' }, { name => 'zip' }, ], log => $log, ], 'TableReader' ); ok( $ex->find_table, 'found table' ); is_deeply( $ex->col_map, $ex->fields, 'col map' ); is_deeply( $ex->field_map, { name => 0, address => 1, city => 2, state => 3, zip => 4 }, 'field map' ); ok( my $i= $ex->iterator, 'iterator' ); is_deeply( $i->(), { name => 'Someone', address => '123 Long St', city => 'Somewhere', state => 'OH', zip => '45678' }, 'first row' ); is_deeply( $i->(), { name => 'Another', address => '01 Main St', city => 'Elsewhere', state => 'OH', zip => '45678' }, 'third row' ); is( $i->(), undef, 'eof' ); }; subtest find_on_second_sheet => sub { my $ex= new_ok( 'Data::TableReader', [ input => \'', decoder => { CLASS => 'Mock', data => mock_data() }, fields => [ { name => 'postcode' }, { name => 'country' }, { name => 'state' }, ], log => $log, ], 'TableReader' ); ok( $ex->find_table, 'found table' ); ok( my $i= $ex->iterator, 'iterator' ); is_deeply( $i->(), { state => 'Alberta', postcode => 'AB', country => 'CA' }, 'row 1' ); is_deeply( $i->(), { state => 'Alaska', postcode => 'AK', country => 'US' }, 'row 2' ); is_deeply( $i->(), { state => 'Alabama', postcode => 'AL', country => 'US' }, 'row 3' ); is_deeply( $i->(), { state => 'Arkansas',postcode => 'AR', country => 'US' }, 'row 4' ); is_deeply( $i->(), { state => 'American Samoa', postcode => 'AS', country => 'US' }, 'row 5' ); is( $i->(), undef, 'eof' ); }; subtest find_required => sub { my $ex= new_ok( 'Data::TableReader', [ input => \'', decoder => { CLASS => 'Mock', data => [ [ [qw( q w e r t y )], [qw( q w e r t a s d )], ] ], }, fields => [ { name => 'q', required => 1 }, { name => 'w', required => 1 }, { name => 'a', required => 0 }, { name => 'b', required => 0 }, { name => 'y', required => 0 }, { name => 's', required => 1 }, ], log => $log, ], 'TableReader' ); ok( $ex->find_table, 'found table' ); is_deeply( $ex->field_map, { q => 0, w => 1, a => 5, s => 6 }, 'field_map' ); is_deeply( $ex->iterator->all(), [], 'immediate eof' ); }; subtest multiline_header => sub { my $ex= new_ok( 'Data::TableReader', [ input => \'', decoder => { CLASS => 'Mock', data => [ [ [qw( a b c )], [qw( d e f )], [qw( g b c )], [qw( A B C )], ] ] }, fields => [ { name => 'a', header => "d g" }, { name => 'b', header => "b\ne\nb" }, { name => 'c', header => qr/f\nc$/ }, ], log => $log, ], 'TableReader' ); is( $ex->header_row_combine, 3, 'header_row_combine' ); ok( $ex->find_table, 'found table' ); is_deeply( $ex->field_map, { a => 0, b => 1, c => 2 }, 'field_map' ); is_deeply( $ex->iterator->all(), [{a=>'A',b=>'B',c=>'C'}], 'found row' ); }; subtest multi_column => sub { my $ex= new_ok( 'Data::TableReader', [ input => \'', decoder => { CLASS => 'Mock', data => [ [ [qw( a b a c a d )], [qw( 1 2 3 4 5 6 )], ] ] }, fields => [ { name => 'a', header => qr/a|c/, array => 1 }, { name => 'd' }, ], log => $log, ], 'TableReader' ); ok( $ex->find_table, 'found table' ); is_deeply( $ex->field_map, { a => [0,2,3,4], d => 5 }, 'field_map' ); is_deeply( $ex->iterator->all(), [{a => [1,3,4,5], d => 6}], 'rows' ); }; subtest array_at_end => sub { my $ex= new_ok( 'Data::TableReader', [ input => \'', decoder => { CLASS => 'Mock', data => [ [ [qw( a b c ),'','','','',''], [qw( 1 2 3 4 5 6 7 8 9 )], [qw( 1 2 3 4 5 6 7 8 9 10 11 12 13 )], [qw( 1 2 3 4 )], ] ] }, fields => [ 'a', { name => 'c', array => 1 }, { name => 'c', array => 1, header => '', follows => 'c' }, ], log => $log, ], 'TableReader' ); ok( $ex->find_table, 'found table' ); is_deeply( $ex->field_map, { a => 0, c => [2,3,4,5,6,7] }, 'field_map' ); my $i= $ex->iterator; is_deeply( $i->(), { a => 1, c => [3,4,5,6,7,8] }, 'row1' ); is_deeply( $i->(), { a => 1, c => [3,4,5,6,7,8] }, 'row1' ); is_deeply( $i->(), { a => 1, c => [3,4,undef,undef,undef,undef] }, 'row1' ); is( $i->(), undef, 'eof' ); }; subtest complex_follows => sub { my $ex= new_ok( 'Data::TableReader', [ input => \'', decoder => { CLASS => 'Mock', data => [ [ ['name', 'start coords','','','','end coords','','','',''], ['', 'x', 'y', 'w', 'h', 'x','y','w','h'], ['foo', '1', '1', '6', '6', '2','2','8','8'], ] ] }, fields => [ 'name', { name => 'start_x', header => qr/start.*\nx/ }, { name => 'start_y', header => 'y', follows => 'start_x' }, { name => 'end_x', header => qr/end.*\nx/ }, { name => 'end_y', header => 'y', follows => 'end_x' } ], log => $log, ], 'TableReader' ); ok( $ex->find_table, 'found table' ); is_deeply( $ex->field_map, { name => 0, start_x => 1, start_y => 2, end_x => 5, end_y => 6 }, 'field_map' ); my $i= $ex->iterator; is_deeply( $i->(), { name => 'foo', start_x => 1, start_y => 1, end_x => 2, end_y => 2 }, 'row1' ); is( $i->(), undef, 'eof' ); }; done_testing; sub mock_data { [ [ map { [ split /\t/, $_, 5 ] } split "\n", <<'END' Name Address City State Zip Someone 123 Long St Somewhere OH 45678 Another 01 Main St Elsewhere OH 45678 END ], [ map { [ split /\t/, $_, 11 ] } split "\n", <<'END' Zip Codes Cities State Postal Codes Zip Lat Lon with population > 1,000,000 State PostCode Country 45001 39.138752 -84.709618 City State Population Alberta AB CA 45002 39.182833 -84.723477 New York City New York 8,550,405 Alaska AK US 45003 39.588296 -84.786326 Los Angeles California 3,971,883 Alabama AL US Chicago Illinois 2,720,546 Arkansas AR US Houston Texas 2,296,224 American Samoa AS US END ] ] }Data-TableReader-0.011/t/15-decoder-xls.t0000644000175000017500000000462713505216123020745 0ustar silverdirksilverdirk#! /usr/bin/env perl use strict; use warnings; use Test::More; use Try::Tiny; use File::Spec::Functions 'catfile'; use Data::TableReader::Decoder::XLSX; use Data::TableReader::Decoder::XLS; use Data::TableReader; SKIP: { skip "Need an XLS parser", 1 unless try { Data::TableReader::Decoder::XLS->default_xls_module }; subtest XLS => sub { my $xls= new_ok( 'Data::TableReader::Decoder::XLS', [ file_name => '', file_handle => open_data('AddressAuxData.xls'), _log => sub {} ], 'XLS decoder' ); run_test($xls); run_test_w_sheet($xls); }; } SKIP: { skip "Need an XLSX parser", 1 unless try { Data::TableReader::Decoder::XLSX->default_xlsx_module }; subtest XLSX => sub { my $xlsx= new_ok( 'Data::TableReader::Decoder::XLSX', [ file_name => '', file_handle => open_data('AddressAuxData.xlsx'), _log => sub {} ], 'XLSX decoder' ); run_test($xlsx); run_test_w_sheet($xlsx); }; } done_testing; # Both worksheets have the same data, so just repeat the tests sub run_test { my ($decoder, $skip_second)= @_; ok( my $iter= $decoder->iterator, 'got iterator' ); ok( my $iter2= $decoder->iterator, 'second parallel iterator' ); is_deeply( $iter->(), [ 'Name', 'Address', 'City', 'State', 'Zip' ], 'row 1' ); is_deeply( $iter2->(), [ 'Name', 'Address', 'City', 'State', 'Zip' ], 'row 1 from iterator 2' ); is_deeply( $iter->(), [ 'Someone', '123 Long St', 'Somewhere', 'OH', 45678 ], 'row 2' ); ok( my $pos= $iter->tell, 'tell position' ); is_deeply( $iter->(), [ ('') x 5 ], 'row 3 blank' ); is_deeply( $iter->(), [ 'Another', '01 Main St', 'Elsewhere', 'OH', 45678 ], 'row 4' ); is_deeply( $iter->(), undef, 'no row 5' ); return if $skip_second; ok( $iter->next_dataset, 'next dataset (worksheet)' ); is_deeply( $iter->(), [ 'Zip Codes', '', '', '', 'Cities', '', '', '', 'State Postal Codes', '', '' ], 'sheet 2, first row' ); ok( $iter->seek($pos), 'seek back to previous sheet' ); is_deeply( $iter->(), [ ('') x 5 ], 'row 3 blank' ); is_deeply( $iter->(), [ 'Another', '01 Main St', 'Elsewhere', 'OH', 45678 ], 'row 4' ); is_deeply( $iter->(), undef, 'no row 5' ); } sub run_test_w_sheet { my ($decoder) = @_; run_test(Data::TableReader->new(input => $decoder->_sheets->[0], fields => [])->decoder, "skip_second"); return; } sub open_data { my $name= shift; my $t_dir= __FILE__; $t_dir =~ s,[^\/\\]+$,,; $name= catfile($t_dir, 'data', $name); open(my $fh, "<:raw", $name) or die "open($name): $!"; return $fh; } Data-TableReader-0.011/t/11-decoder-tsv.t0000644000175000017500000000157613505216123020747 0ustar silverdirksilverdirk#! /usr/bin/env perl use strict; use warnings; use Test::More; use Try::Tiny; use Data::TableReader::Decoder::TSV; plan skip_all => 'Need a CSV parser for this test' unless try { Data::TableReader::Decoder::TSV->default_csv_module }; my $input= < '', file_handle => $input_fh, _log => sub {} ], 'TSV decoder' ); ok( my $iter= $d->iterator, 'got iterator' ); is_deeply( $iter->(), [ 'a', 'b', 'c', 'd' ], 'first row' ); is_deeply( $iter->(), [ '1', '2', '3', '4' ], 'second row' ); is_deeply( $iter->(), undef, 'no third row' ); ok( $iter->seek(0), 'rewind' ); is_deeply( $iter->(), [ 'a', 'b', 'c', 'd' ], 'first row again' ); is_deeply( $iter->([2,1]), [ '3', '2' ], 'slice from second row' ); ok( !$iter->next_dataset, 'no next dataset' ); done_testing; Data-TableReader-0.011/t/10-decoder-csv.t0000644000175000017500000001073413505216123020721 0ustar silverdirksilverdirk#! /usr/bin/env perl use strict; use warnings; use Test::More; use Try::Tiny; use Log::Any '$log'; use Log::Any::Adapter 'TAP'; use Data::TableReader::Decoder::CSV; my $csvmod; plan skip_all => 'Need a CSV parser for this test' unless try { $csvmod= Data::TableReader::Decoder::CSV->default_csv_module }; note "CSV decoder is ".$csvmod." version ".$csvmod->VERSION; my $log_fn= sub { $log->can($_[0])->($log, $_[1]) }; sub test_basic { my $input= ascii(); open my $input_fh, '<', \$input or die; my $d= new_ok( 'Data::TableReader::Decoder::CSV', [ file_name => '', file_handle => $input_fh, _log => $log_fn ], 'CSV decoder' ); ok( my $iter= $d->iterator, 'got iterator' ); is_deeply( $iter->(), [ 'a', 'b', 'c', 'd' ], 'first row' ); is_deeply( $iter->(), [ '1', '2', '3', '4' ], 'second row' ); is_deeply( $iter->(), undef, 'no third row' ); ok( $iter->seek(0), 'rewind' ); is_deeply( $iter->(), [ 'a', 'b', 'c', 'd' ], 'first row again' ); is_deeply( $iter->([2,1]), [ '3', '2' ], 'slice from second row' ); ok( !$iter->next_dataset, 'no next dataset' ); } sub test_multi_iterator { my $input= ascii(); open my $input_fh, '<', \$input or die; my $d= new_ok( 'Data::TableReader::Decoder::CSV', [ file_name => '', file_handle => $input_fh, _log => $log_fn ], 'CSV decoder' ); ok( my $iter= $d->iterator, 'create first iterator' ); # This might be supported in the future, but for now ensure it dies like( (try { $d->iterator } catch {$_}), qr/multiple iterator/i, 'error for multiple iterators' ); undef $iter; # release old iterator, freeing up the file handle to create a new one ok( $iter= $d->iterator, 'new iterator' ); is_deeply( $iter->(), [ 'a', 'b', 'c', 'd' ], 'first row again' ); } sub test_utf_bom { for my $input_fn (qw( utf8_bom utf16_le_bom utf16_be_bom utf8_nobom deceptive_utf8_nobom )) { subtest "seekable $input_fn" => sub { my $input= main->$input_fn; open my $input_fh, '<', \$input or die; my $d= new_ok( 'Data::TableReader::Decoder::CSV', [ file_name => '', file_handle => $input_fh, _log => $log_fn ], "CSV decoder for $input_fn" ); ok( my $iter= $d->iterator, 'got iterator' ); like( $iter->()[0], qr/^\x{FFFD}?test$/, 'first row' ); is_deeply( $iter->(), [ "\x{8A66}\x{3057}", 1, 2, 3 ], 'second row' ); is_deeply( $iter->(), [ "\x{27000}" ], 'third row' ); is_deeply( $iter->(), undef, 'no fourth row' ); ok( $iter->seek(0), 'rewind' ); # workaround for a perl bug! the input string gets corrupted substr($input,0,8)= substr(main->$input_fn,0,8); like( $iter->()[0], qr/^\x{FFFD}?test$/, 'first row' ); is_deeply( $iter->([0,3]), [ "\x{8A66}\x{3057}", 3 ], 'slice from second row' ); ok( !$iter->next_dataset, 'no next dataset' ); }; subtest "nonseekable $input_fn" => sub { my $input= main->$input_fn; pipe(my ($input_fh, $out_fh)) or die "pipe: $!"; print $out_fh $input or die "print(pipe_out): $!"; close $out_fh or die "close: $!"; my $d= new_ok( 'Data::TableReader::Decoder::CSV', [ file_name => '', file_handle => $input_fh, _log => $log_fn ], "CSV decoder for $input_fn" ); if ($input_fn =~ /deceptive/) { # Some inputs on non-seekable file handles will result in this exception. # This is expected. like( (try { $d->iterator } catch {$_}), qr/seek/, 'can\'t seek exception' ); } else { ok( my $iter= $d->iterator, 'got iterator' ); like( $iter->()[0], qr/^\x{FFFD}?test$/, 'first row' ); is_deeply( $iter->(), [ "\x{8A66}\x{3057}", 1, 2, 3 ], 'second row' ); is_deeply( $iter->(), [ "\x{27000}" ], 'third row' ); is_deeply( $iter->(), undef, 'no fourth row' ); ok( !$iter->next_dataset, 'no next dataset' ); } }; } } subtest basic => \&test_basic; subtest multi_iter => \&test_multi_iterator; subtest utf_bom => \&test_utf_bom; done_testing; sub ascii { return < sub { open(my $csv, '<', \"X\nabc\n123\ndef\n") or die; my $tr= new_ok( 'Data::TableReader', [ input => $csv, decoder => 'CSV', fields => [{ name => 'X', type => \&is_alpha }], log => $log ], 'TableReader' ); my $i= $tr->iterator; is_deeply( (try { $i->() }), { X => 'abc' }, 'valid row' ); like( (try { $i->() } catch { $_ }), qr/not alpha/, 'invalid row' ); is_deeply( (try { $i->() }), { X => 'def' }, 'valid row' ); is( $i->(), undef, 'eof' ); }; subtest validation_next => sub { open(my $csv, '<', \"X\n1\n2\n\n15\nX\nY\nZ\n16\n") or die; my $tr= new_ok( 'Data::TableReader', [ input => $csv, decoder => 'CSV', fields => [{ name => 'X', type => \&is_num }], on_validation_fail => 'next', log => $log ], 'TableReader' ); is_deeply( $tr->iterator->all, [ { X => 1 }, { X => 2 }, { X => 15 }, { X => 16 } ], 'only numeric values' ); }; subtest validation_use => sub { open(my $csv, '<', \"X\n1\n2\nx\n15\n") or die; my @log; my $tr= new_ok( 'Data::TableReader', [ input => $csv, decoder => 'CSV', fields => [{ name => 'X', type => \&is_num }], on_validation_fail => 'use', log => \@log ], 'TableReader' ); is_deeply( $tr->iterator->all, [ { X => 1 }, { X => 2 }, { X => 'x' }, { X => 15 } ], 'keep all values' ); is( scalar(grep { $_->[0] eq 'warn' } @log), 1, 'one warning' ); like( $log[0][1], qr/not numeric/, 'warn about non-numeric' ); }; subtest validation_custom => sub { open(my $csv, '<', \"X\n1\n2\nx\n15\n") or die; my @log; my $tr= new_ok( 'Data::TableReader', [ input => $csv, decoder => 'CSV', fields => [{ name => 'X', type => \&is_num }], on_validation_fail => sub { my ($reader, $failures, $values, $context)= @_; for (@$failures) { my ($field, $value_index, $message)= @$_; if ($field->name eq 'X') { $values->[$value_index]= 0; $_= undef; } } @$failures= grep defined, @$failures; return 'use'; }, log => \@log ], 'TableReader' ); is_deeply( $tr->iterator->all, [ { X => 1 }, { X => 2 }, { X => 0 }, { X => 15 } ], 'keep munged value' ); is_deeply( \@log, [], 'no warnings' ); }; done_testing; sub open_data { my $name= shift; my $t_dir= __FILE__; $t_dir =~ s,[^\/]+$,,; $name= catfile($t_dir, 'data', $name); open(my $fh, "<:raw", $name) or die "open($name): $!"; return $fh; } Data-TableReader-0.011/t/05-field.t0000644000175000017500000000452513505216123017613 0ustar silverdirksilverdirk#! /usr/bin/env perl use strict; use warnings; use Test::More; use_ok( 'Data::TableReader::Field' ) or BAIL_OUT; subtest header_regex => sub { my @tests= ({ name => 'name', header => undef, match => [ 'name', ' nAMe', '^%name&*^%^' ], nomatch => [ 'na me', 'names', 'surname', 'first name' ], },{ name => 'first_name', header => undef, match => [ 'first name', 'FirstName', ' First_Name', 'First.Name' ], nomatch => [ 'first_nam', 'first.*', 'name', 'first name 0' ], },{ name => 'first_name', header => 'first_name', match => [ 'first_name', ' first_name', 'first_name#"$:^' ], nomatch => [ 'first name', 'first.name', 'first _name', 'FirstName' ], },{ name => 'zip5', match => [ 'zip5', 'zip 5', 'zip_5', 'ZIP-5' ], nomatch => [ 'zip' ], },{ name => 'ZipCode', match => [ 'ZipCode', 'zip code', 'zip.code', 'ZIP CODE', '--ZIP CODE--' ], nomatch => [ 'ZipCode(5)' ], },{ name => 'ZipCode', header => "Zip\nCode", match => ["Zip\nCode", "zip \n code"], nomatch => [ 'zipcode' ], }); plan tests => scalar @tests; for my $t (@tests) { subtest "name=$t->{name} header=".($t->{header}||'') => sub { plan tests => 1 + @{$t->{match}} + @{$t->{nomatch}}; my $field= new_ok( 'Data::TableReader::Field', [ name => $t->{name}, header => $t->{header} ], 'field' ); like( $_, $field->header_regex, "match $_" ) for @{ $t->{match} }; unlike( $_, $field->header_regex, "nomatch $_" ) for @{ $t->{nomatch} }; }; } }; subtest trim => sub { my @tests= ({ name => 'true', trim => 1, input => [ 'x', ' x', 'x ', ' x ', ' ' ], expected => [ 'x', 'x', 'x', 'x', '' ], },{ name => 'regex', trim => qr/^\s*N\/A\s*$|^\s*NULL\s*$|^\s+|\s+$/i, input => [ 'x', ' x', 'x ', ' x ', 'N/A', ' N/A ', ' Null ' ], expected => [ 'x', 'x', 'x', 'x', '', '', '' ], },{ name => 'coderef', trim => sub { s/\s+/_/g; }, input => [ 'x x', ' x' ], expected => [ 'x_x', '_x' ], }); plan tests => scalar @tests; for my $t (@tests) { subtest "name=$t->{name}" => sub { plan tests => 1 + @{$t->{input}}; my $field= new_ok( 'Data::TableReader::Field', [ name => $t->{name}, trim => $t->{trim} ], 'field' ); for (0.. $#{$t->{input}}) { my ($in, $expected)= ( $t->{input}[$_], $t->{expected}[$_] ); $field->trim_coderef->() for my $out= $in; is( $out, $expected, $in ); } }; } }; done_testing; Data-TableReader-0.011/Makefile.PL0000644000175000017500000000315413505216123017625 0ustar silverdirksilverdirk# This file was automatically generated by Dist::Zilla::Plugin::MakeMaker v6.012. use strict; use warnings; use ExtUtils::MakeMaker; my %WriteMakefileArgs = ( "ABSTRACT" => "Locate and read records from human-edited data tables (Excel, CSV)", "AUTHOR" => "Michael Conrad ", "CONFIGURE_REQUIRES" => { "ExtUtils::MakeMaker" => 0 }, "DISTNAME" => "Data-TableReader", "LICENSE" => "perl", "NAME" => "Data::TableReader", "PREREQ_PM" => { "Carp" => 0, "IO::Handle" => 0, "List::Util" => 0, "Log::Any" => 0, "Module::Runtime" => 0, "Moo" => 2, "Scalar::Util" => 0, "Text::CSV" => "1.95", "Try::Tiny" => 0, "strict" => 0, "warnings" => 0 }, "TEST_REQUIRES" => { "File::Spec::Functions" => 0, "Log::Any::Adapter" => 0, "Log::Any::Adapter::TAP" => 0, "Test::More" => 0 }, "VERSION" => "0.011", "test" => { "TESTS" => "t/*.t" } ); my %FallbackPrereqs = ( "Carp" => 0, "File::Spec::Functions" => 0, "IO::Handle" => 0, "List::Util" => 0, "Log::Any" => 0, "Log::Any::Adapter" => 0, "Log::Any::Adapter::TAP" => 0, "Module::Runtime" => 0, "Moo" => 2, "Scalar::Util" => 0, "Test::More" => 0, "Text::CSV" => "1.95", "Try::Tiny" => 0, "strict" => 0, "warnings" => 0 ); unless ( eval { ExtUtils::MakeMaker->VERSION(6.63_03) } ) { delete $WriteMakefileArgs{TEST_REQUIRES}; delete $WriteMakefileArgs{BUILD_REQUIRES}; $WriteMakefileArgs{PREREQ_PM} = \%FallbackPrereqs; } delete $WriteMakefileArgs{CONFIGURE_REQUIRES} unless eval { ExtUtils::MakeMaker->VERSION(6.52) }; WriteMakefile(%WriteMakefileArgs); Data-TableReader-0.011/weaver.ini0000644000175000017500000000057013505216123017644 0ustar silverdirksilverdirk[@CorePrep] [-SingleEncoding] [Name] [Version] [Region / prelude] [Generic / SYNOPSIS] [Generic / DESCRIPTION] [Generic / OVERVIEW] [Collect / ATTRIBUTES] command = attr [Collect / METHODS] command = method [Collect / FUNCTIONS] command = func [Leftovers] [Region / postlude] [Generic / Thanks] [Authors] [-Transformer] transformer = List [Contributors] [Legal] Data-TableReader-0.011/META.json0000644000175000017500000000464413505216123017301 0ustar silverdirksilverdirk{ "abstract" : "Locate and read records from human-edited data tables (Excel, CSV)", "author" : [ "Michael Conrad " ], "dynamic_config" : 0, "generated_by" : "Dist::Zilla version 6.012, CPAN::Meta::Converter version 2.150001", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : 2 }, "name" : "Data-TableReader", "prereqs" : { "configure" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "develop" : { "requires" : { "Pod::Coverage::TrustPod" : "0", "Pod::Elemental::Transformer::List" : "0", "Pod::Weaver::Section::Contributors" : "0", "Test::More" : "0.88", "Test::Pod" : "1.41", "Test::Pod::Coverage" : "1.08" } }, "runtime" : { "recommends" : { "Data::TableReader::Decoder::HTML" : "0", "Spreadsheet::ParseExcel" : "0", "Spreadsheet::ParseXLSX" : "0", "Text::CSV_XS" : "1.06" }, "requires" : { "Carp" : "0", "IO::Handle" : "0", "List::Util" : "0", "Log::Any" : "0", "Module::Runtime" : "0", "Moo" : "2", "Scalar::Util" : "0", "Text::CSV" : "1.95", "Try::Tiny" : "0", "strict" : "0", "warnings" : "0" } }, "test" : { "recommends" : { "Spreadsheet::ParseXLSX" : "0" }, "requires" : { "File::Spec::Functions" : "0", "Log::Any::Adapter" : "0", "Log::Any::Adapter::TAP" : "0", "Test::More" : "0" } } }, "release_status" : "stable", "resources" : { "bugtracker" : { "web" : "https://github.com/nrdvana/perl-Data-TableReader/issues" }, "homepage" : "https://github.com/nrdvana/perl-Data-TableReader", "repository" : { "type" : "git", "url" : "https://github.com/nrdvana/perl-Data-TableReader.git", "web" : "https://github.com/nrdvana/perl-Data-TableReader" } }, "version" : "0.011", "x_contributors" : [ "Christian Walde " ], "x_generated_by_perl" : "v5.20.2", "x_serialization_backend" : "Cpanel::JSON::XS version 3.0115" } Data-TableReader-0.011/README.md0000644000175000017500000000071113505216123017126 0ustar silverdirksilverdirk# Data::TableReader This perl module helps take un-sanitized tabular data sources and verify that they match your expected schema, and then iterate the records. You can install the latest stable release of this module from CPAN: cpanm Data::TableReader or build and install this source code with the Dist::Zilla tool: dzil build cpanm ./Data-TableReader-$VERSION The source is found at http://github.com/nrdvana/perl-Data-TableReader Data-TableReader-0.011/META.yml0000644000175000017500000000231313505216123017120 0ustar silverdirksilverdirk--- abstract: 'Locate and read records from human-edited data tables (Excel, CSV)' author: - 'Michael Conrad ' build_requires: File::Spec::Functions: '0' Log::Any::Adapter: '0' Log::Any::Adapter::TAP: '0' Test::More: '0' configure_requires: ExtUtils::MakeMaker: '0' dynamic_config: 0 generated_by: 'Dist::Zilla version 6.012, CPAN::Meta::Converter version 2.150001' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: Data-TableReader recommends: Data::TableReader::Decoder::HTML: '0' Spreadsheet::ParseExcel: '0' Spreadsheet::ParseXLSX: '0' Text::CSV_XS: '1.06' requires: Carp: '0' IO::Handle: '0' List::Util: '0' Log::Any: '0' Module::Runtime: '0' Moo: '2' Scalar::Util: '0' Text::CSV: '1.95' Try::Tiny: '0' strict: '0' warnings: '0' resources: bugtracker: https://github.com/nrdvana/perl-Data-TableReader/issues homepage: https://github.com/nrdvana/perl-Data-TableReader repository: https://github.com/nrdvana/perl-Data-TableReader.git version: '0.011' x_contributors: - 'Christian Walde ' x_generated_by_perl: v5.20.2 x_serialization_backend: 'YAML::Tiny version 1.69' Data-TableReader-0.011/MANIFEST0000644000175000017500000000146213505216123017004 0ustar silverdirksilverdirk# This file was automatically generated by Dist::Zilla::Plugin::Manifest v6.012. Changes LICENSE MANIFEST META.json META.yml Makefile.PL README README.md dist.ini lib/Data/TableReader.pm lib/Data/TableReader/Decoder.pm lib/Data/TableReader/Decoder/CSV.pm lib/Data/TableReader/Decoder/IdiotCSV.pm lib/Data/TableReader/Decoder/Mock.pm lib/Data/TableReader/Decoder/Spreadsheet.pm lib/Data/TableReader/Decoder/TSV.pm lib/Data/TableReader/Decoder/XLS.pm lib/Data/TableReader/Decoder/XLSX.pm lib/Data/TableReader/Field.pm lib/Data/TableReader/Iterator.pm t/05-field.t t/10-decoder-csv.t t/11-decoder-tsv.t t/12-decoder-idiot-csv.t t/15-decoder-xls.t t/35-record-iterator.t t/40-locate-table.t t/50-validation.t t/data/AddressAuxData.xls t/data/AddressAuxData.xlsx weaver.ini xt/author/pod-syntax.t xt/release/pod-coverage.t Data-TableReader-0.011/dist.ini0000644000175000017500000000260013505216123017312 0ustar silverdirksilverdirkname = Data-TableReader abstract = Locate and read records from human-edited data tables (Excel, CSV) author = Michael Conrad license = Perl_5 copyright_holder = Michael Conrad [MetaResources] homepage = https://github.com/nrdvana/perl-Data-TableReader bugtracker.web = https://github.com/nrdvana/perl-Data-TableReader/issues repository.web = https://github.com/nrdvana/perl-Data-TableReader repository.url = https://github.com/nrdvana/perl-Data-TableReader.git repository.type = git [@Git] [Git::Contributors] [Git::GatherDir] exclude_match = ^t/tmp exclude_match = ^Decoder-.* include_untracked = 0 [Encoding] encoding = bytes match = ^t/data/ [Git::NextVersion] [OurPkgVersion] [PodWeaver] [PodSyntaxTests] [Test::Pod::Coverage::Configurable] also_private=BUILD [MakeMaker] [License] [Readme] ; authordep Pod::Weaver::Section::Contributors ; authordep Pod::Elemental::Transformer::List [AutoPrereqs] [Prereqs] Text::CSV = 1.95 [Prereqs / RuntimeRecommends] Text::CSV_XS = 1.06 Spreadsheet::ParseExcel = 0 Spreadsheet::ParseXLSX = 0 Data::TableReader::Decoder::HTML = 0 [Prereqs / TestRequires] Log::Any::Adapter::TAP = 0 [Prereqs / TestRecommends] Spreadsheet::ParseXLSX = 0 [Prereqs / DevelopRequires] Pod::Weaver::Section::Contributors = 0 Pod::Elemental::Transformer::List = 0 [Manifest] [MetaYAML] [MetaJSON] [UploadToCPAN] Data-TableReader-0.011/LICENSE0000644000175000017500000004366313505216123016671 0ustar silverdirksilverdirkThis software is copyright (c) 2019 by Michael Conrad. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. Terms of the Perl programming language system itself a) the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version, or b) the "Artistic License" --- The GNU General Public License, Version 1, February 1989 --- This software is Copyright (c) 2019 by Michael Conrad. This is free software, licensed under: The GNU General Public License, Version 1, February 1989 GNU GENERAL PUBLIC LICENSE Version 1, February 1989 Copyright (C) 1989 Free Software Foundation, Inc. 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The license agreements of most software companies try to keep users at the mercy of those companies. By contrast, our General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. The General Public License applies to the Free Software Foundation's software and to any other program whose authors commit to using it. You can use it for your programs, too. When we speak of free software, we are referring to freedom, not price. Specifically, the General Public License is designed to make sure that you have the freedom to give away or sell copies of free software, that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of a such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must tell them their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any work containing the Program or a portion of it, either verbatim or with modifications. Each licensee is addressed as "you". 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this General Public License and to the absence of any warranty; and give any other recipients of the Program a copy of this General Public License along with the Program. You may charge a fee for the physical act of transferring a copy. 2. You may modify your copy or copies of the Program or any portion of it, and copy and distribute such modifications under the terms of Paragraph 1 above, provided that you also do the following: a) cause the modified files to carry prominent notices stating that you changed the files and the date of any change; and b) cause the whole of any work that you distribute or publish, that in whole or in part contains the Program or any part thereof, either with or without modifications, to be licensed at no charge to all third parties under the terms of this General Public License (except that you may choose to grant warranty protection to some or all third parties, at your option). c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the simplest and most usual way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this General Public License. d) You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. Mere aggregation of another independent work with the Program (or its derivative) on a volume of a storage or distribution medium does not bring the other work under the scope of these terms. 3. You may copy and distribute the Program (or a portion or derivative of it, under Paragraph 2) in object code or executable form under the terms of Paragraphs 1 and 2 above provided that you also do one of the following: a) accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Paragraphs 1 and 2 above; or, b) accompany it with a written offer, valid for at least three years, to give any third party free (except for a nominal charge for the cost of distribution) a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Paragraphs 1 and 2 above; or, c) accompany it with the information you received as to where the corresponding source code may be obtained. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form alone.) Source code for a work means the preferred form of the work for making modifications to it. For an executable file, complete source code means all the source code for all modules it contains; but, as a special exception, it need not include source code for modules which are standard libraries that accompany the operating system on which the executable file runs, or for standard header files or definitions files that accompany that operating system. 4. You may not copy, modify, sublicense, distribute or transfer the Program except as expressly provided under this General Public License. Any attempt otherwise to copy, modify, sublicense, distribute or transfer the Program is void, and will automatically terminate your rights to use the Program under this License. However, parties who have received copies, or rights to use copies, from you under this General Public License will not have their licenses terminated so long as such parties remain in full compliance. 5. By copying, distributing or modifying the Program (or any work based on the Program) you indicate your acceptance of this license to do so, and all its terms and conditions. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. 7. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of the license which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the license, you may choose any version ever published by the Free Software Foundation. 8. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS Appendix: How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to humanity, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) 19yy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) 19xx name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (a program to direct compilers to make passes at assemblers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice That's all there is to it! --- The Artistic License 1.0 --- This software is Copyright (c) 2019 by Michael Conrad. This is free software, licensed under: The Artistic License 1.0 The Artistic License Preamble The intent of this document is to state the conditions under which a Package may be copied, such that the Copyright Holder maintains some semblance of artistic control over the development of the package, while giving the users of the package the right to use and distribute the Package in a more-or-less customary fashion, plus the right to make reasonable modifications. Definitions: - "Package" refers to the collection of files distributed by the Copyright Holder, and derivatives of that collection of files created through textual modification. - "Standard Version" refers to such a Package if it has not been modified, or has been modified in accordance with the wishes of the Copyright Holder. - "Copyright Holder" is whoever is named in the copyright or copyrights for the package. - "You" is you, if you're thinking about copying or distributing this Package. - "Reasonable copying fee" is whatever you can justify on the basis of media cost, duplication charges, time of people involved, and so on. (You will not be required to justify it to the Copyright Holder, but only to the computing community at large as a market that must bear the fee.) - "Freely Available" means that no fee is charged for the item itself, though there may be fees involved in handling the item. It also means that recipients of the item may redistribute it under the same conditions they received it. 1. You may make and give away verbatim copies of the source form of the Standard Version of this Package without restriction, provided that you duplicate all of the original copyright notices and associated disclaimers. 2. You may apply bug fixes, portability fixes and other modifications derived from the Public Domain or from the Copyright Holder. A Package modified in such a way shall still be considered the Standard Version. 3. You may otherwise modify your copy of this Package in any way, provided that you insert a prominent notice in each changed file stating how and when you changed that file, and provided that you do at least ONE of the following: a) place your modifications in the Public Domain or otherwise make them Freely Available, such as by posting said modifications to Usenet or an equivalent medium, or placing the modifications on a major archive site such as ftp.uu.net, or by allowing the Copyright Holder to include your modifications in the Standard Version of the Package. b) use the modified Package only within your corporation or organization. c) rename any non-standard executables so the names do not conflict with standard executables, which must also be provided, and provide a separate manual page for each non-standard executable that clearly documents how it differs from the Standard Version. d) make other distribution arrangements with the Copyright Holder. 4. You may distribute the programs of this Package in object code or executable form, provided that you do at least ONE of the following: a) distribute a Standard Version of the executables and library files, together with instructions (in the manual page or equivalent) on where to get the Standard Version. b) accompany the distribution with the machine-readable source of the Package with your modifications. c) accompany any non-standard executables with their corresponding Standard Version executables, giving the non-standard executables non-standard names, and clearly documenting the differences in manual pages (or equivalent), together with instructions on where to get the Standard Version. d) make other distribution arrangements with the Copyright Holder. 5. You may charge a reasonable copying fee for any distribution of this Package. You may charge any fee you choose for support of this Package. You may not charge a fee for this Package itself. However, you may distribute this Package in aggregate with other (possibly commercial) programs as part of a larger (possibly commercial) software distribution provided that you do not advertise this Package as a product of your own. 6. The scripts and library files supplied as input to or produced as output from the programs of this Package do not automatically fall under the copyright of this Package, but belong to whomever generated them, and may be sold commercially, and may be aggregated with this Package. 7. C or perl subroutines supplied by you and linked into this Package shall not be considered part of this Package. 8. The name of the Copyright Holder may not be used to endorse or promote products derived from this software without specific prior written permission. 9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. The End Data-TableReader-0.011/Changes0000644000175000017500000000414613505216123017150 0ustar silverdirksilverdirkVersion 0.011 - 2019-06-27 - Allow per-field 'trim' value to be a regex or coderef - Change dist version-munger to Dist::Zilla::Plugin::OurPkgVersion - Exclude ::Decoder::HTML files from getting shipped with main dist Version 0.010 - 2019-01-02 - Fix several bugs related to multi-line headers - Also allow 'input' to be a Spreadsheet::ParseExcel::Workbook - Auto-detection support for ::Decoder::HTML companion module Version 0.008 - 2018-11-28 - 'input' attribute may now be a Spreadsheet::ParseExcel::Worksheet - When specifying 'decoder' attribute as an arrayref or hashref, the CLASS argument is now optional, allowing auto-detection while still supplying extre optional attributes for the decoder. - Wrokaround for Libre Office row-max bug - New Decoder::XLSX attribute 'xls_formatter', useful for casting local datetimes to a known time zone. - Prevent loading Text::CSV_XS if its version is too old. Version 0.007 - 2018-03-15 - Fix various bugs in detect_input_format relevant to CSV files - Enhanced debug-level logging during the table search algorighm Version 0.006 - 2018-03-08 - Auto-detect CSV Unicode format by looking for BOM, unless handle already has an IO layer to specify encoding. - Improved logging, and logging API. 'log' attribute is no longer public on Decoder objects. Version 0.005 - 2017-11-26 - Remove 'filters' feature since it adds overhead and not much value - Fix bug when creating multiple parallel record iterators - Sanitize data headers written in logging messages Version 0.004 - 2017-11-26 - Re-add Text::CSV as a hard dependency since it's small and I need a recent version for all the tests to pass. Version 0.003 - 2017-11-25 - Remove hard dependencies on CSV and Excel parsers - Use new "Mock" decoder for format-independent test cases - Fix handling of $TableReader->log to match API description - Fix bugs to be compatible back to 5.8.9 Version 0.002 - 2017-11-24 - Fixed 'filters' feature - Iterators of decoders are now created on each call, rather than cached - Many documentation improvements Version 0.001 - 2017-11-24 - Initial public version Data-TableReader-0.011/README0000644000175000017500000000062613505216123016534 0ustar silverdirksilverdirkThis archive contains the distribution Data-TableReader, version 0.011: Locate and read records from human-edited data tables (Excel, CSV) This software is copyright (c) 2019 by Michael Conrad. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. This README file was generated by Dist::Zilla::Plugin::Readme v6.012.