HTML-FormatText-WithLinks-0.14 000755 001750 001750 0 11500257417 16224 5 ustar 00struan struan 000000 000000 HTML-FormatText-WithLinks-0.14/MANIFEST 000444 001750 001750 1121 11500257417 17505 0 ustar 00struan struan 000000 000000 Build.PL Changes examples/basic_usage.pl examples/custom_footnotes.pl examples/custom_numbers.pl lib/HTML/FormatText/WithLinks.pm Makefile.PL MANIFEST META.yml README t/01load.t t/basic_parse.t t/custom_format.t t/file.html t/no_html_passed.t t/parse_a_with_no_href.t t/parse_file.t t/parse_missing_file.t t/parse_multiple_times.t t/parse_with_base.t t/parse_with_html_fragment.t t/parse_with_identical_links.t t/parse_with_links.t t/parse_with_local_links.t t/parse_with_name.t t/parse_with_relative_links.t t/pod.t t/podcover.t t/skip_linked_urls.t t/treebuilder_problem.t t/with_emphasis.t HTML-FormatText-WithLinks-0.14/README 000444 001750 001750 1300 11500257417 17233 0 ustar 00struan struan 000000 000000 NAME HTML::FormatText::WithLinks - HTML to text conversion with links as footnotes SYNOPSIS use HTML::FormatText::WithLinks; my $f = HTML::FormatText::WithLinks->new(); my $html = qq(
Some html with a link
); my $text = $f->parse($html); print $text; # results in something like Some html with a [1]link 1. http://example.com/ INSTALLATION Either perl Makefile.PL make make test make install or perl Build.PL ./Build ./Build test ./Build install HTML-FormatText-WithLinks-0.14/Build.PL 000444 001750 001750 1055 11500257417 17656 0 ustar 00struan struan 000000 000000 use strict; use Module::Build; Module::Build ->new( module_name => "HTML::FormatText::WithLinks", license => 'perl', requires => { 'HTML::FormatText' => 2, 'HTML::TreeBuilder' => 0, 'URI::WithBase' => 0, }, build_requires => { 'Test::More' => 0, }, create_makefile_pl => 'traditional', ) ->create_build_script; HTML-FormatText-WithLinks-0.14/META.yml 000444 001750 001750 1767 11500257417 17645 0 ustar 00struan struan 000000 000000 --- abstract: 'HTML to text conversion with links as footnotes' author: - 'Struan Donald. ESome html with a link
); my $text = $f->parse($html); print $text; # results in something like Some html with a [1]link 1. http://example.com/ my $f2 = HTML::FormatText::WithLinks->new( before_link => '', after_link => ' [%l]', footnote => '' ); $text = $f2->parse($html); print $text; # results in something like Some html with a link [http://example.com/] my $f3 = HTML::FormatText::WithLinks->new( link_num_generator => sub { return "*" x (shift() + 1); }, footnote => '[%n] %l' ); $text = $f3->parse($html); print $text; # results in something like Some html with a [*]link [*] http://example.com/ =head1 DESCRIPTION HTML::FormatText::WithLinks takes HTML and turns it into plain text but prints all the links in the HTML as footnotes. By default, it attempts to mimic the format of the lynx text based web browser's --dump option. =head1 METHODS =head2 new my $f = HTML::FormatText::WithLinks->new( %options ); Returns a new instance. It accepts all the options of HTML::FormatText plus =over =item base a base option. This should be set to a URI which will be used to turn any relative URIs on the HTML to absolute ones. =item doc_overrides_base If a base element is found in the document and it has an href attribute then setting doc_overrides_base to true will cause the document's base to be used. This defaults to false. =item before_link (default: '[%n]') =item after_link (default: '') =item footnote (default: '[%n] %l') a string to print before a link (i.e. when the is found), after link has ended (i.e. when then is found) and when printing out footnotes. "%n" will be replaced by the link number, "%l" will be replaced by the link itself. If footnote is set to '', no footnotes will be printed. =item link_num_generator (default: sub { return shift() + 1 }) link_num_generator is a sub that returns the value to be printed for a given link number. The internal store starts numbering at 0. =item with_emphasis If set to 1 then italicised text will be surrounded by C> and bolded text by C<_>. You can change these markers by using the CThis is a mail of some sort with a link.
HTML-FormatText-WithLinks-0.14/t/basic_parse.t 000444 001750 001750 1210 11500257417 21256 0 ustar 00struan struan 000000 000000 # $Id$ use Test::More tests => 5; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new(); ok($f, 'object created'); my $text = $f->parse($html); ok($text, 'html formatted'); is($text, " This is a mail of some sort\n\n", 'html correctly formatted'); $f = HTML::FormatText::WithLinks->new( leftmargin => 0); $text = $f->parse($html); ok($text, 'html formatted'); is($text, "This is a mail of some sort\n\n", 'html correctly formatted with no left margin'); sub new_html { return <<'HTML';This is a mail of some sort
HTML } HTML-FormatText-WithLinks-0.14/t/skip_linked_urls.t 000444 001750 001750 2235 11500257417 22354 0 ustar 00struan struan 000000 000000 use Test::More 'no_plan'; use HTML::FormatText::WithLinks; my $html = simple_example(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0, skip_linked_urls => 1, before_link => '', after_link => ' (%l)', footnote => '' ); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq!This is a mail of some sort with a bunch of linked URLs. http://example.com/ and another https://example.com/ ftp now ftp://example.com not the same but not this (http://example.com/) or this http://example.com (http://example.com/foo) !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); sub simple_example { return <<'HTML';This is a mail of some sort with a bunch of linked URLs.
and another https://example.com/
ftp now ftp://example.com
not the same but not this
or this http://example.com
HTML } HTML-FormatText-WithLinks-0.14/t/parse_with_local_links.t 000444 001750 001750 1544 11500257417 23534 0 ustar 00struan struan 000000 000000 # $Id$ use Test::More tests => 6; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0 ); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq!This is a mail of some sort with a [1]link to a local anchor. 1. #top !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); sub new_html { return <<'HTML';This is a mail of some sort with a link to a local anchor.
HTML } my $f2 = HTML::FormatText::WithLinks->new( leftmargin => 0, anchor_links => 0 ); ok($f2, 'object created'); my $text2 = $f2->parse($html); my $correct_text2 = qq!This is a mail of some sort with a link to a local anchor. !; ok($text2, 'html formatted'); is($text2, $correct_text2, 'html correctly formatted'); HTML-FormatText-WithLinks-0.14/t/parse_with_identical_links.t 000444 001750 001750 4423 11500257417 24375 0 ustar 00struan struan 000000 000000 # $Id: 03_parse_with_links.t 383 2004-01-12 17:09:27Z struan $ use Test::More tests => 9; use HTML::FormatText::WithLinks; my $html = simple_example(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0, unique_links => 1 ); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq!This is a mail of some sort with a [1]link and yet another [1]link. 1. http://example.com/ !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); $html = complex_example(); $correct_text = qq!This is a mail of some sort with a [1]link and another [2]link and yet another [1]link. 1. http://example.com/ 2. http://example.net !; # recreate as otherwise output is a bit unpredictable $f = HTML::FormatText::WithLinks->new( leftmargin => 0, unique_links => 1 ); $text = $f->parse($html); ok($text, 'more complex html formatted'); is($text, $correct_text, 'more complex html correctly formatted'); $correct_text = qq!This is a mail of some sort with a link[1] and another link[2] and yet another link[1]. 1. http://example.com/ 2. http://example.net !; # recreate as otherwise output is a bit unpredictable $f = HTML::FormatText::WithLinks->new( leftmargin => 0, unique_links => 1, before_link => '', after_link => '[%n]' ); $text = $f->parse($html); ok($text, 'after_link html formatted'); is($text, $correct_text, 'after_link html correctly formatted'); $f = HTML::FormatText::WithLinks->new( leftmargin => 0 ); $correct_text = qq!This is a mail of some sort with a [1]link and another [2]link and yet another [3]link. 1. http://example.com/ 2. http://example.net 3. http://example.com/ !; $text = $f->parse($html); ok($text, 'more complex html with non unique links formatted'); is($text, $correct_text, 'more complex html with non unique links correctly formatted'); sub simple_example { return <<'HTML';This is a mail of some sort with a link and yet another link.
HTML } sub complex_example { return <<'HTML';This is a mail of some sort with a link and another link and yet another link.
HTML } HTML-FormatText-WithLinks-0.14/t/parse_file.t 000444 001750 001750 576 11500257417 21112 0 ustar 00struan struan 000000 000000 # $Id$ use Test::More tests => 3; use HTML::FormatText::WithLinks; my $f = HTML::FormatText::WithLinks->new( leftmargin => 0 ); ok($f, 'object created'); my $text = $f->parse_file('t/file.html'); my $correct_text = qq!This is a mail of some sort with a [1]link. 1. http://example.com/ !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); HTML-FormatText-WithLinks-0.14/t/parse_with_base.t 000444 001750 001750 2704 11500257417 22153 0 ustar 00struan struan 000000 000000 # $Id$ use Test::More tests => 9; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0 ); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq!This is a mail of some sort with a [1]link. 1. http://example.com/relative.html !; my $override_text = qq!This is a mail of some sort with a [1]link. 1. http://example.net/relative.html !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); my $f2 = HTML::FormatText::WithLinks->new( leftmargin => 0, base => 'http://example.net/' ); ok($f2, 'object created'); my $text2 = $f2->parse($html); ok($text2, 'html formatted'); is($text2, $override_text, 'html correctly formatted - config base overrides doc'); my $f3 = HTML::FormatText::WithLinks->new( leftmargin => 0, base => 'http://example.net/', doc_overrides_base => 1 ); ok($f3, 'object created'); my $text3 = $f3->parse($html); ok($text3, 'html formatted'); is($text3, $correct_text, 'html correctly formatted - doc overrides config'); sub new_html { return <<'HTML';This is a mail of some sort with a link.
HTML } HTML-FormatText-WithLinks-0.14/t/custom_format.t 000444 001750 001750 3140 11500257417 21671 0 ustar 00struan struan 000000 000000 # $Id$ use Test::More tests => 9; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0, before_link => "[%n] ", footnote => "[%n] %l"); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq!This is a mail of some sort with a [1] link. [1] http://example.com/ !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); $f = HTML::FormatText::WithLinks->new( leftmargin => 0, before_link => "[%n] ", footnote => ''); $text = $f->parse($html); $correct_text = qq!This is a mail of some sort with a [1] link. !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted with no footnotes'); $correct_text = qq!This is a mail of some sort with a link[1]. 1. http://example.com/ !; $f = HTML::FormatText::WithLinks->new( leftmargin => 0, before_link => "", after_link => "[%n]"); $text = $f->parse($html); ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted with after_link'); $correct_text = qq!This is a mail of some sort with a [0]link. 0. http://example.com/ !; $f = HTML::FormatText::WithLinks->new( leftmargin => 0, link_num_generator => sub { shift(); } ); $text = $f->parse($html); ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted with link_num_generator'); sub new_html { return <<'HTML';This is a mail of some sort with a link.
HTML } HTML-FormatText-WithLinks-0.14/t/parse_a_with_no_href.t 000444 001750 001750 1465 11500257417 23164 0 ustar 00struan struan 000000 000000 # $Id$ use Test::More tests => 6; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0 ); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq!This is a mail of some sort with a link. !; ok($text, 'html formatted'); is($text, $correct_text, 'html with hrefless link correctly formatted'); my $f2 = HTML::FormatText::WithLinks->new( leftmargin => 0, base => 'http://example.com' ); ok($f2, 'object created'); $text = $f2->parse($html); ok($text, 'html formatted'); is($text, $correct_text, 'html with hrefless link and base set correctly formatted'); sub new_html { return <<'HTML';This is a mail of some sort with a link.
HTML } HTML-FormatText-WithLinks-0.14/t/parse_with_relative_links.t 000444 001750 001750 1172 11500257417 24252 0 ustar 00struan struan 000000 000000 # $Id$ use Test::More tests => 3; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0, base => 'http://example.com/'); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq!This is a mail of some sort with a [1]link. 1. http://example.com/relative.html !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); sub new_html { return <<'HTML';This is a mail of some sort with a link.
HTML } HTML-FormatText-WithLinks-0.14/t/no_html_passed.t 000444 001750 001750 605 11500257417 21771 0 ustar 00struan struan 000000 000000 # $Id$ use Test::More tests => 3; use HTML::FormatText::WithLinks; my $f = HTML::FormatText::WithLinks->new(); ok($f, 'object created'); my $text = $f->parse(); is($text, undef, 'return undef if no html passed in'); $f = HTML::FormatText::WithLinks->new( leftmargin => 0); $text = $f->parse(''); is($text, '', 'return empty string if empty string passed in'); HTML-FormatText-WithLinks-0.14/t/with_emphasis.t 000444 001750 001750 2360 11500257417 21656 0 ustar 00struan struan 000000 000000 # $Id: 02_basic_parse.t 383 2004-01-12 17:09:27Z struan $ use Test::More tests => 9; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( with_emphasis => 1 ); ok($f, 'object created'); my $text = $f->parse($html); ok($text, 'html formatted'); is($text, " This is a mail of _some_ /sort/\n\n It has _some_ of the /words/ emphasised\n\n", 'html correctly formatted with emphasis'); my $f2 = HTML::FormatText::WithLinks->new( ); ok( $f2, "object created" ); my $text2= $f2->parse( $html ); ok( $text2, "html formatted" ); is( $text2, " This is a mail of some sort\n\n It has some of the words emphasised\n\n", 'html correctly formatted without emphasis' ); # Test alternate markers $f = HTML::FormatText::WithLinks->new( with_emphasis => 1, bold_marker => '*', italic_marker => '"' ); ok($f, 'object created'); $text = $f->parse($html); ok($text, 'html formatted'); is($text, qq[ This is a mail of *some* "sort"\n\n It has *some* of the "words" emphasised\n\n], 'html correctly formatted with emphasis'); sub new_html { return <<'HTML';This is a mail of some sort
It has some of the words emphasised
HTML } HTML-FormatText-WithLinks-0.14/t/parse_with_name.t 000444 001750 001750 1502 11500257417 22154 0 ustar 00struan struan 000000 000000 # $Id$ use Test::More tests => 6; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0 ); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq!This is a mail of some sort with a link. !; ok($text, 'html formatted'); is($text, $correct_text, 'html with name only link correctly formatted'); my $f2 = HTML::FormatText::WithLinks->new( leftmargin => 0, base => 'http://example.com' ); ok($f2, 'object created'); $text = $f2->parse($html); ok($text, 'html formatted'); is($text, $correct_text, 'html with name only link and base set correctly formatted'); sub new_html { return <<'HTML';This is a mail of some sort with a link.
HTML } HTML-FormatText-WithLinks-0.14/t/pod.t 000444 001750 001750 210 11500257417 17544 0 ustar 00struan struan 000000 000000 # $Id$ use Test::More; eval "use Test::Pod 1.00"; plan skip_all => "Test::Pod 1.00 required for testing POD" if $@; all_pod_files_ok(); HTML-FormatText-WithLinks-0.14/t/parse_with_links.t 000444 001750 001750 1042 11500257417 22353 0 ustar 00struan struan 000000 000000 # $Id$ use Test::More tests => 3; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0 ); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq!This is a mail of some sort with a [1]link. 1. http://example.com/ !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); sub new_html { return <<'HTML';This is a mail of some sort with a link.
HTML } HTML-FormatText-WithLinks-0.14/t/parse_with_html_fragment.t 000444 001750 001750 1047 11500257417 24067 0 ustar 00struan struan 000000 000000 # $Id$ use Test::More tests => 3; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 4 ); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq! This is a mail of some sort with a [1]link and some more text here. 1. http://example.com/ !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); sub new_html { return qq(This is a mail of some sort with a link and some more text here.); } HTML-FormatText-WithLinks-0.14/t/parse_multiple_times.t 000444 001750 001750 1536 11500257417 23244 0 ustar 00struan struan 000000 000000 # $Id$ use Test::More tests => 5; use HTML::FormatText::WithLinks; my $html_link = new_html_link(); my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0 ); ok($f, 'object created'); my $text = $f->parse($html_link); my $correct_text = qq!This is a mail of some sort with a [1]link. 1. http://example.com/ !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); $text = $f->parse($html); ok($text, 'html formatted'); is($text, "\n\nThis is a mail of some sort\n\n", 'html correctly formatted with no left margin'); sub new_html_link { return <<'HTML';This is a mail of some sort with a link.
HTML } sub new_html { return <<'HTML';This is a mail of some sort
HTML } HTML-FormatText-WithLinks-0.14/t/treebuilder_problem.t 000444 001750 001750 1323 11500257417 23036 0 ustar 00struan struan 000000 000000 use Test::More; BEGIN { eval "use Test::MockObject"; if ( $@ ) { plan skip_all => "Test::MockObject required for testing TreeBuilder problems"; } else { plan tests => 3; } my $m = Test::MockObject->new(); $m->fake_new( 'HTML::TreeBuilder' ); $m->mock( 'parse', sub { $! = 1122; return undef; } ); }; use HTML::FormatText::WithLinks; my $f = HTML::FormatText::WithLinks->new( ); ok($f, 'object created'); my $text = $f->parse('some text
'); is($text, undef, 'undef returned for broken HTML::TreeBuilder'); like($f->error, qr/^HTML::TreeBuilder problem: /, 'correct error message for broken HTML::TreeBuilder'); HTML-FormatText-WithLinks-0.14/t/01load.t 000444 001750 001750 452 11500257417 20052 0 ustar 00struan struan 000000 000000 # $Id$ use Test::More tests => 5; use_ok('HTML::FormatText::WithLinks'); my $f = HTML::FormatText::WithLinks->new(); ok($f, 'objected created'); isa_ok( $f, 'HTML::FormatText::WithLinks' ); my $f2 = $f->new(); ok( $f2, 'second object created' ); isa_ok( $f2, 'HTML::FormatText::WithLinks' ); HTML-FormatText-WithLinks-0.14/examples 000755 001750 001750 0 11500257417 20042 5 ustar 00struan struan 000000 000000 HTML-FormatText-WithLinks-0.14/examples/custom_numbers.pl 000444 001750 001750 1510 11500257417 23576 0 ustar 00struan struan 000000 000000 #!/usr/bin/perl use strict; use warnings; use Roman; use LWP::Simple; use HTML::FormatText::WithLinks; =head1 DESCRIPTION This examples uses the custom number generation option to generate the footnote numbers as latin numerals. It also demonstrates how to place the footnote indicators after the link instead of in front which is the default. =cut my $html = get("http://exo.org.uk/"); my $f = HTML::FormatText::WithLinks->new( base => "http://exo.org.uk/", unique_links => 1, link_num_generator => \&generator, # fear my dodgy latin... before_link => '', after_link => '[%n]', footnote => '%n est %l' ); sub generator() { my $num = shift; # Romans didn't get zero... $num += 1; return uc roman($num); } print $f->parse($html); HTML-FormatText-WithLinks-0.14/examples/custom_footnotes.pl 000444 001750 001750 1013 11500257417 24141 0 ustar 00struan struan 000000 000000 #!/usr/bin/perl use strict; use warnings; use LWP::Simple; use HTML::FormatText::WithLinks; =head1 DESCRIPTION This script demonstrates how to used a custom footnote for your links. In this case all the footnotes will now be of the form footnote 10 is for http://exo.org.uk/code/ =cut my $html = get("http://exo.org.uk/"); my $f = HTML::FormatText::WithLinks->new( base => 'http://exo.org.uk/', unique_links => 1, footnote => 'footnote %n is for %l' ); print $f->parse($html); HTML-FormatText-WithLinks-0.14/examples/basic_usage.pl 000444 001750 001750 1031 11500257417 22774 0 ustar 00struan struan 000000 000000 #!/usr/bin/perl use strict; use warnings; use LWP::Simple; use HTML::FormatText::WithLinks; =head1 DESCRIPTION This script shows the basic usage of the module. The two options used are the base one to make sure that any relative links in the page are turned into absolute links and unique_links which only generates one footnote per link. =cut my $html = get("http://exo.org.uk/"); my $f = HTML::FormatText::WithLinks->new( base => "http://exo.org.uk/", unique_links => 1 ); print $f->parse($html);