File-LoadLines-1.047/0000755000400000040000000000000014742661063012063 5ustar jvjvFile-LoadLines-1.047/t/0000755000400000040000000000000014742661063012326 5ustar jvjvFile-LoadLines-1.047/t/test7.dat0000644000400000040000000000017013364657723014073 0ustar jvjvThe €urø quick brown fox jümps óver €urø the lazy dog Play it again €urø ♩ ♪ ♫ ♬ €urø ♯ â™­ â™® File-LoadLines-1.047/t/test1.dat0000644000400000040000000000017013364612321014046 0ustar jvjvThe €urø quick brown fox jümps óver €urø the lazy dog Play it again €urø ♩ ♪ ♫ ♬ €urø ♯ â™­ â™® File-LoadLines-1.047/t/test0.dat0000644000400000040000000000013413364662463014061 0ustar jvjvThe ¤urø quick brown fox jümps óver ¤urø the lazy dog Play it again ¤urø x x x x ¤urø x x x File-LoadLines-1.047/t/13-nochomp.t0000644000400000040000000000246614122354137014400 0ustar jvjv#! perl # Test return of lines in array. use strict; use warnings; use Test::More tests => 6 * 9; use utf8; use File::LoadLines; -d "t" && chdir "t"; my @lengths = ( 24, 28, 18, 18 ); my $delta; sub testlines { my ( $file, $options ) = @_; $options->{chomp} = 0; my @lines = loadlines( $file, $options ); is( scalar(@lines), 4, "lines $file" ); my $tally = 0; my $line = 0; foreach ( @lines ) { is( length($_), $delta+$lengths[$line], "line $line" ); $line++; $tally++ if /€urø/; } is( $tally, 4, "matches" ); } $delta = 1; # test1.dat: UTF-8 Unicode text testlines("test1.dat"); # test3.dat: UTF-8 Unicode (with BOM) text testlines("test3.dat"); # test5.dat: Little-endian UTF-16 Unicode text testlines("test5.dat"); # test7.dat: UTF-8 Unicode text, with CR line terminators testlines("test7.dat"); # test8.dat: UTF-8 Unicode (with BOM) text, with CR line terminators testlines("test8.dat"); # test9.dat: Little-endian UTF-16 Unicode text, with CR line terminators testlines("test9.dat"); $delta = 2; # test2.dat: UTF-8 Unicode text, with CRLF line terminators testlines("test2.dat"); # test4.dat: UTF-8 Unicode (with BOM) text, with CRLF line terminators testlines("test4.dat"); # test6.dat: Little-endian UTF-16 Unicode text, with CRLF, CR line terminators testlines("test6.dat"); File-LoadLines-1.047/t/29-encoding.t0000644000400000040000000000275114562763112014534 0ustar jvjv#! perl # Test non Latin filenames from file. use strict; use warnings; use Test::More tests => 4; use utf8; use Encode qw(encode_utf8 encode); use File::LoadLines; binmode( STDERR, ':utf8' ); # The file. my $filename = "testň.dat"; # And its contents. my $reftext = "Hi There!"; -d "t" && chdir "t"; # We explicitly create the file, since this name is not portable and # treated badly by archivers and unpackers. if ( $^O =~ /mswin/i ) { require Win32API::File; my $fn = encode('UTF-16LE', "$filename").chr(0).chr(0); # Sometimes CREATE_ALWAYS barfs. Win32API::File::DeleteFileW($fn); # Create the file. my $fh = Win32API::File::CreateFileW ( $fn, Win32API::File::GENERIC_WRITE(), 0, [], Win32API::File::CREATE_ALWAYS(), 0, []); die("$filename: $^E (Win32)\n") if $^E; # Get handle and store contents. Win32API::File::OsFHandleOpen( 'FILE', $fh, "w") or die("$filename: $!\n"); print FILE ( $reftext, "\n" ); close(FILE); } else { open( my $fh, '>', encode_utf8($filename) ); die("$filename: $!\n") unless $fh; print $fh ( $reftext, "\n" ); close($fh); } my $options = {}; my @lines = loadlines( "testW.dat", $options ); is( $options->{encoding}, "UTF-8", "returned encoding" ); is( $lines[0], $filename, "correct data" ); $options = {}; @lines = loadlines( encode_utf8($lines[0]), $options ); is( $options->{encoding}, "ASCII", "returned encoding 2" ); is( $lines[0], $reftext, "correct data 2" ); unlink(encode_utf8($filename)); File-LoadLines-1.047/t/20-encoding.t0000644000400000040000000000111513764465151014521 0ustar jvjv#! perl # Test explicit encoding. use strict; use warnings; use Test::More tests => 6; use utf8; use File::LoadLines; -d "t" && chdir "t"; my @lengths = ( 24, 28, 18, 18 ); sub testlines { my ( $file, $options ) = @_; $options //= {}; my @lines = loadlines( $file, $options ); is( scalar(@lines), 4, "lines" ); my $tally = 0; my $line = 0; foreach ( @lines ) { is( length($_), $lengths[$line], "line $line" ); $line++; } } # test0.dat: ISO-8859.15 text my $o = {}; testlines( "test_.dat", $o ); is ( $o->{encoding}, "ASCII", "returned encoding" ); File-LoadLines-1.047/t/28-encoding.t0000644000400000040000000000324614564154435014537 0ustar jvjv#!/usr/bin/perl use strict; use warnings; use utf8; use Test::More; use Encode qw( encode from_to ); use File::LoadLines; # Reference data. my @data = ( "{title: Swing Low Sweet Chariot}", "{subtitle: Sub Títlë}" ); mkdir("out") unless -d "out"; # Recode to UTF-8. my $data = join("\n", @data) . "\n"; $data = encode("UTF-8", $data); my @BOMs = qw( UTF-8 UTF-16BE UTF-16LE UTF-32BE UTF-32LE ); my @noBOMs = qw( ISO-8859-1 UTF-8 ); my %enc2bom = map { $_ => encode($_, "\x{feff}") } @BOMs; enctest( $_, 1 ) for @noBOMs; enctest($_) for @BOMs; done_testing( 4 * 3 * (@noBOMs + @BOMs) ); sub enctest { my ( $enc, $nobom ) = @_; my $encoded = $data; _enctest( $encoded, $enc, $nobom ); $encoded = $data; $encoded =~ s/\n/\x0a/g; _enctest( $encoded, $enc, $nobom, "LF" ); $encoded = $data; $encoded =~ s/\n/\x0d/g; _enctest( $encoded, $enc, $nobom, "CR" ); $encoded = $data; $encoded =~ s/\n/\x0d\x0a/g; _enctest( $encoded, $enc, $nobom, "CRLF" ); } sub _enctest { my ( $encoded, $enc, $nobom, $crlf ) = @_; from_to( $encoded, "UTF-8", $enc ); unless ( $nobom ) { BAIL_OUT("Unknown encoding: $enc") unless $enc2bom{$enc}; $encoded = $enc2bom{$enc} . $encoded; } my $fn = "out/$enc.cho"; open( my $fh, ">:raw", $fn ) or die("$fn: $!\n"); print $fh $encoded; close($fh); $enc .= " (no BOM)" if $nobom; $enc .= " ($crlf)" if $crlf; my $opts = { fail => "soft" }; my @d = loadlines( $fn, $opts ); note("$fn: " . $opts->{error} ) unless @d; ok( scalar( @d ) == 2, "$enc: Two lines" ); is( $d[0], $data[0], "$enc: Line 1" ); is( $d[1], $data[1], "$enc: Line 2" ); unlink($fn); } File-LoadLines-1.047/t/00-load.t0000644000400000040000000000023314564154334013645 0ustar jvjv#!perl -T use Test::More tests => 1; BEGIN { use_ok( 'File::LoadLines' ); } note( "Testing File::LoadLines $File::LoadLines::VERSION, Perl $], $^X" ); File-LoadLines-1.047/t/22-encoding.t0000644000400000040000000000124413764464207014527 0ustar jvjv#! perl # Test auto-sense Latin encoding. use strict; use warnings; use Test::More tests => 7; use utf8; use File::LoadLines; -d "t" && chdir "t"; my @lengths = ( 24, 28, 18, 18 ); sub testlines { my ( $file, $options ) = @_; $options //= {}; my @lines = loadlines( $file, $options ); is( scalar(@lines), 4, "lines" ); my $tally = 0; my $line = 0; foreach ( @lines ) { is( length($_), $lengths[$line], "line $line" ); $line++; $tally++ if /¤urø/; } is( $tally, 4, "matches" ); } # test0.dat: ISO-8859.1 text # Should auto-sense. my $o = {}; testlines( "test0.dat", $o ); is( $o->{encoding}, "ISO-8859-1", "returned encoding" ); File-LoadLines-1.047/t/11-basic.t0000644000400000040000000000236013364663631014016 0ustar jvjv#! perl # Test return of lines in array ref. use strict; use warnings; use Test::More tests => 6 * 9; use utf8; use File::LoadLines; -d "t" && chdir "t"; my @lengths = ( 24, 28, 18, 18 ); sub testlines { my ( $file, $options ) = @_; my $lines = loadlines( $file, $options ); is( scalar(@$lines), 4, "lines" ); my $tally = 0; my $line = 0; foreach ( @$lines ) { is( length($_), $lengths[$line], "line $line" ); $line++; $tally++ if /€urø/; } is( $tally, 4, "matches" ); } # test1.dat: UTF-8 Unicode text testlines("test1.dat"); # test2.dat: UTF-8 Unicode text, with CRLF line terminators testlines("test2.dat"); # test3.dat: UTF-8 Unicode (with BOM) text testlines("test3.dat"); # test4.dat: UTF-8 Unicode (with BOM) text, with CRLF line terminators testlines("test4.dat"); # test5.dat: Little-endian UTF-16 Unicode text testlines("test5.dat"); # test6.dat: Little-endian UTF-16 Unicode text, with CRLF, CR line terminators testlines("test6.dat"); # test7.dat: UTF-8 Unicode text, with CR line terminators testlines("test7.dat"); # test8.dat: UTF-8 Unicode (with BOM) text, with CR line terminators testlines("test8.dat"); # test9.dat: Little-endian UTF-16 Unicode text, with CR line terminators testlines("test9.dat"); File-LoadLines-1.047/t/17-softfail.t0000644000400000040000000000055414564154364014556 0ustar jvjv#! perl # Test soft failure. use strict; use warnings; use Test::More tests => 2; use utf8; use File::LoadLines; -d "t" && chdir "t"; # Testing soft errors. my $opt = { fail => "soft" }; my @lines = loadlines( "a hopefully not existing file", $opt ); is( scalar(@lines), 0, "no lines" ); ok( $opt->{error}, "error ok" ); note("Error message: $opt->{error}"); File-LoadLines-1.047/t/12-basic.t0000644000400000040000000000043413370554460014013 0ustar jvjv#! perl # Test parameter types. use strict; use warnings; use Test::Exception tests => 2; use utf8; use File::LoadLines; throws_ok { loadlines() } qr/Missing filename/, 'caught missing file'; throws_ok { loadlines('dummy.txt', []) } qr/Invalid options/, 'caught invalid options'; File-LoadLines-1.047/t/testW.dat0000644000400000040000000000001314006071532014105 0ustar jvjvtestň.dat File-LoadLines-1.047/t/15-blob.t0000644000400000040000000000065214526430275013656 0ustar jvjv#! perl # Test blob reading. use strict; use warnings; use Test::More tests => 3; use utf8; use File::LoadLines; -d "t" && chdir "t"; # Testing not-touching. my $data = "\x{EF}\x{BB}\x{BF}first\0\nsecond\r\nthird\0\r\n"; my @lines = loadlines( \$data, { blob => 1 } ); is( scalar(@lines), 1, "single lines" ); is( $lines[0], $data, "data\@ ok" ); my $d = loadlines( \$data, { blob => 1 } ); is( $d, $data, "data\$ ok" ); File-LoadLines-1.047/t/test5.dat0000644000400000040000000000027213364612600014055 0ustar jvjvÿþThe ¬ urø quick brown fox jümps óver ¬ urø the lazy dog Play it again ¬ urø i& j& k& l& ¬ urø o& m& n& File-LoadLines-1.047/t/21-encoding.t0000644000400000040000000000124613764464221014524 0ustar jvjv#! perl # Test explicit encoding. use strict; use warnings; use Test::More tests => 7; use utf8; use File::LoadLines; -d "t" && chdir "t"; my @lengths = ( 24, 28, 18, 18 ); sub testlines { my ( $file, $options ) = @_; $options //= {}; my @lines = loadlines( $file, $options ); is( scalar(@lines), 4, "lines" ); my $tally = 0; my $line = 0; foreach ( @lines ) { is( length($_), $lengths[$line], "line $line" ); $line++; $tally++ if /€urø/; } is( $tally, 4, "matches" ); } # test0.dat: ISO-8859.15 text my $o = { encoding => "iso-8859-15" }; testlines( "test0.dat", $o ); is ( $o->{encoding}, "iso-8859-15", "returned encoding" ); File-LoadLines-1.047/t/test_.dat0000644000400000040000000000013413764464506014142 0ustar jvjvThe quick brown fox jump The quick brown fox jumps ov The quick brown fo The quick brown fo File-LoadLines-1.047/t/test6.dat0000644000400000040000000000030213364612643014057 0ustar jvjvÿþThe ¬ urø quick brown fox jümps óver ¬ urø the lazy dog Play it again ¬ urø i& j& k& l& ¬ urø o& m& n& File-LoadLines-1.047/t/14-noeof.t0000644000400000040000000000107114122354250014027 0ustar jvjv#! perl # Test return of lines in array. use strict; use warnings; use Test::More tests => 3; use utf8; use File::LoadLines; -d "t" && chdir "t"; # Testing a missing final line terminator. my $data = "first\r\nsecond\r\nthird"; my @lines = loadlines( \$data, { chomp => 0 } ); is( scalar(@lines), 3, "three lines CRLF" ); $data = "first\rsecond\rthird"; @lines = loadlines( \$data, { chomp => 0 } ); is( scalar(@lines), 3, "three lines CR" ); $data = "first\nsecond\nthird"; @lines = loadlines( \$data, { chomp => 0 } ); is( scalar(@lines), 3, "three lines NL" ); File-LoadLines-1.047/t/16-blob.t0000644000400000040000000000062514526604035013654 0ustar jvjv#! perl # Test blob reading. use strict; use warnings; use Test::More tests => 3; use utf8; use File::LoadLines 'loadblob'; -d "t" && chdir "t"; # Testing not-touching. my $data = "\x{EF}\x{BB}\x{BF}first\0\nsecond\r\nthird\0\r\n"; my @lines = loadblob( \$data ); is( scalar(@lines), 1, "single lines" ); is( $lines[0], $data, "data\@ ok" ); my $d = loadblob( \$data ); is( $d, $data, "data\$ ok" ); File-LoadLines-1.047/t/10-basic.t0000644000400000040000000000235213364663640014016 0ustar jvjv#! perl # Test return of lines in array. use strict; use warnings; use Test::More tests => 6 * 9; use utf8; use File::LoadLines; -d "t" && chdir "t"; my @lengths = ( 24, 28, 18, 18 ); sub testlines { my ( $file, $options ) = @_; my @lines = loadlines( $file, $options ); is( scalar(@lines), 4, "lines" ); my $tally = 0; my $line = 0; foreach ( @lines ) { is( length($_), $lengths[$line], "line $line" ); $line++; $tally++ if /€urø/; } is( $tally, 4, "matches" ); } # test1.dat: UTF-8 Unicode text testlines("test1.dat"); # test2.dat: UTF-8 Unicode text, with CRLF line terminators testlines("test2.dat"); # test3.dat: UTF-8 Unicode (with BOM) text testlines("test3.dat"); # test4.dat: UTF-8 Unicode (with BOM) text, with CRLF line terminators testlines("test4.dat"); # test5.dat: Little-endian UTF-16 Unicode text testlines("test5.dat"); # test6.dat: Little-endian UTF-16 Unicode text, with CRLF, CR line terminators testlines("test6.dat"); # test7.dat: UTF-8 Unicode text, with CR line terminators testlines("test7.dat"); # test8.dat: UTF-8 Unicode (with BOM) text, with CR line terminators testlines("test8.dat"); # test9.dat: Little-endian UTF-16 Unicode text, with CR line terminators testlines("test9.dat"); File-LoadLines-1.047/t/test4.dat0000644000400000040000000000017713364612514014064 0ustar jvjvThe €urø quick brown fox jümps óver €urø the lazy dog Play it again €urø ♩ ♪ ♫ ♬ €urø ♯ â™­ â™® File-LoadLines-1.047/t/test2.dat0000644000400000040000000000017413364612400014051 0ustar jvjvThe €urø quick brown fox jümps óver €urø the lazy dog Play it again €urø ♩ ♪ ♫ ♬ €urø ♯ â™­ â™® File-LoadLines-1.047/t/test9.dat0000644000400000040000000000027213364657740014077 0ustar jvjvÿþThe ¬ urø quick brown fox jümps óver ¬ urø the lazy dog Play it again ¬ urø i& j& k& l& ¬ urø o& m& n& File-LoadLines-1.047/t/test8.dat0000644000400000040000000000017313364657730014075 0ustar jvjvThe €urø quick brown fox jümps óver €urø the lazy dog Play it again €urø ♩ ♪ ♫ ♬ €urø ♯ â™­ â™® File-LoadLines-1.047/t/test3.dat0000644000400000040000000000017313364612443014060 0ustar jvjvThe €urø quick brown fox jümps óver €urø the lazy dog Play it again €urø ♩ ♪ ♫ ♬ €urø ♯ â™­ â™® File-LoadLines-1.047/Makefile.PL0000644000400000040000000000172414562473044014041 0ustar jvjv#! perl # Ease the life of the CPAN testers. use v5.10.1; use strict; use warnings; # Need EU::MM that understands *_REQUIRES. use ExtUtils::MakeMaker 6.76; WriteMakefile ( NAME => 'File::LoadLines', AUTHOR => 'Johan Vromans ', VERSION_FROM => 'lib/File/LoadLines.pm', ABSTRACT_FROM => 'lib/File/LoadLines.pm', LICENSE => 'perl_5', PL_FILES => {}, MIN_PERL_VERSION => v5.10.1, PREREQ_PM => { 'MIME::Base64' => 0, 'URI::Escape' => 0, }, CONFIGURE_REQUIRES => { 'ExtUtils::MakeMaker' => 6.76, }, # BUILD_REQUIRES => { # }, TEST_REQUIRES => { 'Test::More' => 0, 'Test::Exception' => 0, 'MIME::Base64' => 0, 'URI::Escape' => 0, }, META_MERGE => { resources => { repository => 'https://github.com/sciurius/perl-File-LoadLines', bugtracker => "https://github.com/sciurius/perl-File-LoadLines/issues", }, } ); File-LoadLines-1.047/lib/0000755000400000040000000000000014742661063012631 5ustar jvjvFile-LoadLines-1.047/lib/File/0000755000400000040000000000000014742661063013510 5ustar jvjvFile-LoadLines-1.047/lib/File/LoadLines.pm0000644000400000040000000002522214742464012015716 0ustar jvjv#! perl package File::LoadLines; use warnings; use strict; use Exporter qw(import); our @EXPORT = qw( loadlines ); our @EXPORT_OK = qw( loadblob ); use Encode; use Carp; use utf8; =head1 NAME File::LoadLines - Load lines from files and network =cut our $VERSION = '1.047'; =head1 SYNOPSIS use File::LoadLines; my @lines = loadlines("mydata.txt"); use File::LoadLines qw(loadblob); my $img = loadblob("https://img.shields.io/badge/Language-Perl-blue"); =head1 DESCRIPTION File::LoadLines provides an easy way to load the contents of a text file into an array of lines. It is intended for small to moderate size files like config files that are often produced by weird tools (and users). It will transparently fetch data from the network if the provided file name is a URL. File::LoadLines automatically handles ASCII, Latin-1 and UTF-8 text. When the file has a BOM, it handles UTF-8, UTF-16 LE and BE, and UTF-32 LE and BE. Recognized line terminators are NL (Unix, Linux), CRLF (DOS, Windows) and CR (Mac) Function loadblob(), exported on depand, fetches the content and returns it without processing, equivalent to File::Slurp and ilk. =head1 EXPORT By default the function loadlines() is exported. =head1 FUNCTIONS =head2 loadlines @lines = loadlines("mydata.txt"); @lines = loadlines("mydata.txt", $options); The file is opened, read, decoded and split into lines that are returned in the result array. Line terminators are removed. In scalar context, returns an array reference. The first argument may be the name of a file, an opened file handle, or a reference to a string that contains the data. The name of a file on disk may start with C<"file://">, this is ignored. If the name starts with C<"http:"> or C<"https:"> the data will be retrieved using LWP. L like C<"data:text/plain;base64,SGVsbG8sIFdvcmxkIQ=="> are also supported. The second argument can be used to influence the behaviour. It is a hash reference of option settings. Note that loadlines() is a I, it reads the whole file into memory and, for splitting, requires temporarily memory for twice the size of the file. =over =item split Enabled by default. The data is split into lines and returned as an array (in list context) or as an array reference (in scalar context). If set to zero, the data is not split into lines but returned as a single string. =item chomp Enabled by default. Line terminators are removed from the resultant lines. If set to zero, the line terminators are not removed. =item encoding If specified, loadlines() will use this encoding to decode the file data if it cannot automatically detect the encoding. If you pass an options hash, File::LoadLines will set C to the encoding it detected and used for this file data. =item blob If specified, the data read is not touched but returned exactly as read. C overrules C and C. =item fail If specified, it should be either C<"hard"> or C<"soft">. If C<"hard">, read errors are signalled using croak exceptions. This is the default. If set to C<"soft">, loadlines() will return an empty result and set the error message in the options hash with key C<"error">. =back =cut sub loadlines { my ( $filename, $options ) = @_; croak("Missing filename.\n") unless defined $filename; croak("Invalid options.\n") if (defined $options && (ref($options) ne "HASH")); $options->{blob} //= 0; $options->{split} //= !$options->{blob}; $options->{chomp} //= !$options->{blob}; $options->{fail} //= "hard"; my $data; # slurped file data my $encoded; # already encoded # Gather data from the input. if ( ref($filename) ) { if ( ref($filename) eq 'GLOB' || ref($filename) eq 'IO::File' ) { binmode( $filename, ':raw' ); $data = do { local $/; <$filename> }; $filename = "__GLOB__"; } else { $data = $$filename; $filename = "__STRING__"; $encoded++; } } elsif ( $filename eq '-' ) { $filename = "__STDIN__"; binmode( STDIN, ':raw' ); $data = do { local $/; }; } elsif ( $filename =~ /^https?:/ ) { require LWP::UserAgent; my $ua = LWP::UserAgent->new( timeout => 20 ); my $res = $ua->get($filename); if ( $res->is_success ) { $data = $res->decoded_content; } elsif ( $options->{fail} eq "soft" ) { $options->{error} = $res->status_line; return; } else { croak("$filename: ", $res->status_line); } } elsif ( $filename =~ /^data:/ ) { unless ( $filename =~ m! ^ data: (? .*? ) , (? .* ) $ !sx ) { if ( $options->{fail} eq "soft" ) { $options->{error} = "Malformed inline data"; return; } else { croak("Malformed inline data"); } } $data = $+{data}; $filename = "__DATA__"; my $mediatype = $+{mediatype}; my $enc = ""; if ( $mediatype && $mediatype =~ /^(.*);base64$/ ) { $mediatype = $1; $enc = "base64"; } $options->{mediatype} = $mediatype if $mediatype; if ( ! $enc ) { # URL encoded. $data = $+{data}; $data =~ s/\%([0-9a-f][0-9a-f])/chr(hex($1))/ige; } else { # Base64. require MIME::Base64; $data = MIME::Base64::decode($data); } if ( $mediatype && $mediatype =~ /;charset=([^;]*)/ ) { $data = decode( $1, $data ); $options->{encoding} = $1; $encoded++; } } else { my $name = $filename; $name =~ s;^file://;;; $filename = decode_utf8($name); # On MS Windows, non-latin (wide) filenames need special treatment. if ( $filename ne $name && $^O =~ /mswin/i ) { require Win32API::File; my $fn = encode('UTF-16LE', "$filename").chr(0).chr(0); my $fh = Win32API::File::CreateFileW ( $fn, Win32API::File::FILE_READ_DATA(), 0, [], Win32API::File::OPEN_EXISTING(), 0, []); croak("$filename: $^E (Win32)\n") if $^E; unless ( Win32API::File::OsFHandleOpen( 'FILE', $fh, "r") ) { $options->{error} = "$!", return if $options->{fail} eq "soft"; croak("$filename: $!\n"); } binmode FILE => ':raw'; $data = do { local $/; readline(\*FILE) }; # warn("$filename³: len=", length($data), "\n"); close(FILE); } else { my $f; unless ( open( $f, '<:raw', $filename ) ) { $options->{error} = "$!", return if $options->{fail} eq "soft"; croak("$name: $!\n"); } $data = do { local $/; <$f> }; } } $options->{_filesource} = $filename if $options; my $name = encode_utf8($filename); if ( $options->{blob} ) { # Do not touch. $options->{encoding} = 'Blob'; } elsif ( $encoded ) { # Nothing to do, already dealt with. $options->{encoding} //= 'Perl'; } # Detect Byte Order Mark. elsif ( $data =~ /^\xEF\xBB\xBF/ ) { warn("$name is UTF-8 (BOM)\n") if $options->{debug}; $options->{encoding} = 'UTF-8'; $data = decode( "UTF-8", substr($data, 3) ); } elsif ( $data =~ /^\xFE\xFF/ ) { warn("$name is UTF-16BE (BOM)\n") if $options->{debug}; $options->{encoding} = 'UTF-16BE'; $data = decode( "UTF-16BE", substr($data, 2) ); } elsif ( $data =~ /^\xFF\xFE\x00\x00/ ) { warn("$name is UTF-32LE (BOM)\n") if $options->{debug}; $options->{encoding} = 'UTF-32LE'; $data = decode( "UTF-32LE", substr($data, 4) ); } elsif ( $data =~ /^\xFF\xFE/ ) { warn("$name is UTF-16LE (BOM)\n") if $options->{debug}; $options->{encoding} = 'UTF-16LE'; $data = decode( "UTF-16LE", substr($data, 2) ); } elsif ( $data =~ /^\x00\x00\xFE\xFF/ ) { warn("$name is UTF-32BE (BOM)\n") if $options->{debug}; $options->{encoding} = 'UTF-32BE'; $data = decode( "UTF-32BE", substr($data, 4) ); } # No BOM, did user specify an encoding? elsif ( $options->{encoding} ) { warn("$name is ", $options->{encoding}, " (fallback)\n") if $options->{debug}; $data = decode( $options->{encoding}, $data, 1 ); } # Try UTF8, fallback to ISO-8895.1. else { my $d = eval { decode( "UTF-8", $data, 1 ) }; if ( $@ ) { warn("$name is ISO-8859.1 (assumed)\n") if $options->{debug}; $options->{encoding} = 'ISO-8859-1'; $data = decode( "iso-8859-1", $data ); } elsif ( $d !~ /[^[:ascii:]]/ ) { warn("$name is ASCII (detected)\n") if $options->{debug}; $options->{encoding} = 'ASCII'; $data = $d; } else { warn("$name is UTF-8 (detected)\n") if $options->{debug}; $options->{encoding} = 'UTF-8'; $data = $d; } } # This can be used to add line continuation or comment stripping. if ( $options->{strip} ) { $data =~ s/$options->{strip}//g; } return $data unless $options->{split}; # Split in lines; my @lines; if ( $options->{chomp} ) { # Unless empty, make sure there is a final newline. $data .= "\n" if $data =~ /.(?!\r\n|\n|\r)\z/; # We need to maintain trailing newlines. push( @lines, $1 ) while $data =~ /(.*?)(?:\r\n|\n|\r)/g; } else { push( @lines, $1 ) while $data =~ /(.*?(?:\r\n|\n|\r))/g; # In case the last line has no terminator. push( @lines, $1 ) if $data =~ /(?:\r\n|\n|\r)([^\r\n]+)\z/; } undef $data; return wantarray ? @lines : \@lines; } =head2 loadblob use File::LoadLines qw(loadblob); $rawdata = loadblob("raw.dat"); $rawdata = loadblob("raw.dat", $options); This is equivalent to calling loadlines() with C<< blob=>1 >> in the options. =cut sub loadblob { my ( $filename, $options ) = @_; croak("Missing filename.\n") unless defined $filename; croak("Invalid options.\n") if defined($options) && ref($options) ne "HASH"; $options //= {}; $options->{blob} = 1; loadlines( $filename, $options ); } =head1 SEE ALSO There are currently no other modules that handle BOM detection and line splitting. I have a faint hope that future versions of Perl and Raku will deal with this transparently, but I fear the worst. =head1 HINTS When you have raw file data (e.g. from a zip), you can use loadlines() to decode and unpack: open( my $data, '<', \$contents ); $lines = loadlines( $data, $options ); There is no hard requirement on LWP. If you want to use transparent fetching of data over the network please make sure LWP::UserAgent is available. =head1 AUTHOR Johan Vromans, C<< >> =head1 SUPPORT AND DOCUMENTATION Development of this module takes place on GitHub: https://github.com/sciurius/perl-File-LoadLines. You can find documentation for this module with the perldoc command. perldoc File::LoadLines Please report any bugs or feature requests using the issue tracker on GitHub. =head1 COPYRIGHT & LICENSE Copyright 2018,2020,2024 Johan Vromans, all rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; # End of File::LoadLines File-LoadLines-1.047/README.md0000644000400000040000000000263214526617340013344 0ustar jvjv# File::LoadLines *This is the only module you'll ever need to read data* ![Version](https://img.shields.io/github/v/release/sciurius/perl-File-LoadLines) ![GitHub issues](https://img.shields.io/github/issues/sciurius/perl-File-LoadLines) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](http://makeapullrequest.com) ![Language Perl](https://img.shields.io/badge/Language-Perl-blue) File::LoadLines provides an easy way to load the contents of a disk file or network resource into your program. It can deliver the contents without touching (as a blob) but its most useful purpose is to deliver the contents of text data into an array of lines. Hence the name, File::LoadLines. It automatically handles data encodings ASCII, Latin and UTF-8 text. When the file has a BOM, it handles UTF-8, UTF-16 LE and BE, and UTF-32 LE and BE. Recognized line terminators are NL (Unix, Linux), CRLF (DOS, Windows) and CR (Mac) ## SUPPORT AND DOCUMENTATION Development of this module takes place on GitHub: https://github.com/sciurius/perl-File-LoadLines. You can find documentation for this module with the perldoc command. perldoc File::LoadLines Please report any bugs or feature requests using the issue tracker on GitHub. ## COPYRIGHT AND LICENCE Copyright (C) 2018,2020,2023 Johan Vromans This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. File-LoadLines-1.047/Changes0000644000400000040000000000220614742463573013364 0ustar jvjvRevision history for File-LoadLines Default fail to "hard" to prevent undefined warnings. Accept data from IO::File handle. 1.046 2024-02-17 Packaging changes. Nothing functional. 1.045 2024-01-26 Add support for file:// URLs. Update dependencies. 1.044 2023-01-04 Add support for data URLs. 1.043 2023-12-14 Fix typo. 1.042 2023-12-13 More encoding testing. 1.041 2023-12-10 Fix nochomp (MS Windows only). 1.040 2023-11-20 Add loadblob(), exported on demand. Enhance documentation. 1.030 2023-11-19 Add support for blob, soft failures and network fetching. (With chomp=0) Allow final line to not be terminated. 1.021 2021-08-16 Re-align version numbering. 1.020.2 2021-02-02 Fix test 29. 1.020.1 2021-02-02 Fix test 29. 1.02 2021-02-01 Improve handling of non-latin1 filenames on MS Windows. 1.01 2020-12-10 Change README to README.md. Return the encoding used in the $options hash, if any. Detect ASCII. 1.00 2020-12-03 Fix Carp and Test::Exception dependencies. Assume mature. 0.02 2018-11-06 Mostly documentation enhancements. 0.01 2018-10-26 First version, released on an unsuspecting world. File-LoadLines-1.047/MANIFEST0000644000400000040000000000107414742661063013216 0ustar jvjvChanges MANIFEST Makefile.PL README.md lib/File/LoadLines.pm t/00-load.t t/10-basic.t t/11-basic.t t/12-basic.t t/13-nochomp.t t/14-noeof.t t/15-blob.t t/16-blob.t t/17-softfail.t t/20-encoding.t t/21-encoding.t t/22-encoding.t t/28-encoding.t t/29-encoding.t t/test_.dat t/test0.dat t/test1.dat t/test2.dat t/test3.dat t/test4.dat t/test5.dat t/test6.dat t/test7.dat t/test8.dat t/test9.dat t/testW.dat META.yml Module YAML meta-data (added by MakeMaker) META.json Module JSON meta-data (added by MakeMaker) File-LoadLines-1.047/META.yml0000644000400000040000000000147114742661063013337 0ustar jvjv--- abstract: 'Load lines from files and network ' author: - 'Johan Vromans ' build_requires: ExtUtils::MakeMaker: '0' MIME::Base64: '0' Test::Exception: '0' Test::More: '0' URI::Escape: '0' configure_requires: ExtUtils::MakeMaker: '6.76' dynamic_config: 1 generated_by: 'ExtUtils::MakeMaker version 7.70, CPAN::Meta::Converter version 2.150010' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: File-LoadLines no_index: directory: - t - inc requires: MIME::Base64: '0' URI::Escape: '0' perl: '5.010001' resources: bugtracker: https://github.com/sciurius/perl-File-LoadLines/issues repository: https://github.com/sciurius/perl-File-LoadLines version: '1.047' x_serialization_backend: 'CPAN::Meta::YAML version 0.018' File-LoadLines-1.047/META.json0000644000400000040000000000262614742661063013512 0ustar jvjv{ "abstract" : "Load lines from files and network ", "author" : [ "Johan Vromans " ], "dynamic_config" : 1, "generated_by" : "ExtUtils::MakeMaker version 7.70, CPAN::Meta::Converter version 2.150010", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : 2 }, "name" : "File-LoadLines", "no_index" : { "directory" : [ "t", "inc" ] }, "prereqs" : { "build" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "configure" : { "requires" : { "ExtUtils::MakeMaker" : "6.76" } }, "runtime" : { "requires" : { "MIME::Base64" : "0", "URI::Escape" : "0", "perl" : "5.010001" } }, "test" : { "requires" : { "MIME::Base64" : "0", "Test::Exception" : "0", "Test::More" : "0", "URI::Escape" : "0" } } }, "release_status" : "stable", "resources" : { "bugtracker" : { "web" : "https://github.com/sciurius/perl-File-LoadLines/issues" }, "repository" : { "url" : "https://github.com/sciurius/perl-File-LoadLines" } }, "version" : "1.047", "x_serialization_backend" : "JSON::PP version 4.16" }