HTML-FormatText-WithLinks-0.15000755001750001750 012452325612 16225 5ustar00struanstruan000000000000HTML-FormatText-WithLinks-0.15/MANIFEST000444001750001750 113312452325612 17511 0ustar00struanstruan000000000000Build.PL Changes examples/basic_usage.pl examples/custom_footnotes.pl examples/custom_numbers.pl lib/HTML/FormatText/WithLinks.pm Makefile.PL MANIFEST META.yml README t/01load.t t/basic_parse.t t/custom_format.t t/file.html t/no_html_passed.t t/parse_a_with_no_href.t t/parse_file.t t/parse_missing_file.t t/parse_multiple_times.t t/parse_with_base.t t/parse_with_html_fragment.t t/parse_with_identical_links.t t/parse_with_links.t t/parse_with_local_links.t t/parse_with_name.t t/parse_with_relative_links.t t/pod.t t/podcover.t t/skip_linked_urls.t t/treebuilder_problem.t t/with_emphasis.t META.json HTML-FormatText-WithLinks-0.15/META.json000444001750001750 316112452325612 20004 0ustar00struanstruan000000000000{ "abstract" : "HTML to text conversion with links as footnotes", "author" : [ "Struan Donald. Estruan@cpan.orgE", "Ian Malpass Eian@indecorous.comE was responsible for the custom \nformatting bits and the nudge to release the code.", "Simon Dassow Ejanus@errornet.de for the anchor_links option plus \na few bugfixes and optimisations", "Thomas Sibley Etrs@bestpractical.comE patches for skipping links that are their urls and to change the delimiters for bold and italic text.." ], "dynamic_config" : 1, "generated_by" : "Module::Build version 0.38, CPAN::Meta::Converter version 2.112150", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "HTML-FormatText-WithLinks", "prereqs" : { "build" : { "requires" : { "Test::More" : 0 } }, "configure" : { "requires" : { "Module::Build" : "0.38" } }, "runtime" : { "requires" : { "HTML::FormatText" : "2", "HTML::TreeBuilder" : 0, "URI::WithBase" : 0 } } }, "provides" : { "HTML::FormatText::WithLinks" : { "file" : "lib/HTML/FormatText/WithLinks.pm", "version" : "0.15" } }, "release_status" : "stable", "resources" : { "license" : [ "http://dev.perl.org/licenses/" ], "repository" : { "url" : "https://github.com/struan/html-formattext-withlinks" } }, "version" : "0.15" } HTML-FormatText-WithLinks-0.15/README000444001750001750 130012452325612 17234 0ustar00struanstruan000000000000NAME HTML::FormatText::WithLinks - HTML to text conversion with links as footnotes SYNOPSIS use HTML::FormatText::WithLinks; my $f = HTML::FormatText::WithLinks->new(); my $html = qq(

Some html with a link

); my $text = $f->parse($html); print $text; # results in something like Some html with a [1]link 1. http://example.com/ INSTALLATION Either perl Makefile.PL make make test make install or perl Build.PL ./Build ./Build test ./Build install HTML-FormatText-WithLinks-0.15/Build.PL000444001750001750 137612452325612 17665 0ustar00struanstruan000000000000use strict; use Module::Build; Module::Build ->new( module_name => "HTML::FormatText::WithLinks", license => 'perl', requires => { 'HTML::FormatText' => 2, 'HTML::TreeBuilder' => 0, 'URI::WithBase' => 0, }, build_requires => { 'Test::More' => 0, }, create_makefile_pl => 'traditional', meta_merge => { resources => { repository => 'https://github.com/struan/html-formattext-withlinks' } }, ) ->create_build_script; HTML-FormatText-WithLinks-0.15/META.yml000444001750001750 216112452325612 17633 0ustar00struanstruan000000000000--- abstract: 'HTML to text conversion with links as footnotes' author: - 'Struan Donald. Estruan@cpan.orgE' - "Ian Malpass Eian@indecorous.comE was responsible for the custom \nformatting bits and the nudge to release the code." - "Simon Dassow Ejanus@errornet.de for the anchor_links option plus \na few bugfixes and optimisations" - 'Thomas Sibley Etrs@bestpractical.comE patches for skipping links that are their urls and to change the delimiters for bold and italic text..' build_requires: Test::More: 0 configure_requires: Module::Build: 0.38 dynamic_config: 1 generated_by: 'Module::Build version 0.38, CPAN::Meta::Converter version 2.112150' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 name: HTML-FormatText-WithLinks provides: HTML::FormatText::WithLinks: file: lib/HTML/FormatText/WithLinks.pm version: 0.15 requires: HTML::FormatText: 2 HTML::TreeBuilder: 0 URI::WithBase: 0 resources: license: http://dev.perl.org/licenses/ repository: https://github.com/struan/html-formattext-withlinks version: 0.15 HTML-FormatText-WithLinks-0.15/Makefile.PL000444001750001750 75612452325612 20324 0ustar00struanstruan000000000000# Note: this file was auto-generated by Module::Build::Compat version 0.3800 use ExtUtils::MakeMaker; WriteMakefile ( 'NAME' => 'HTML::FormatText::WithLinks', 'VERSION_FROM' => 'lib/HTML/FormatText/WithLinks.pm', 'PREREQ_PM' => { 'HTML::FormatText' => 2, 'HTML::TreeBuilder' => 0, 'Test::More' => 0, 'URI::WithBase' => 0 }, 'INSTALLDIRS' => 'site', 'EXE_FILES' => [], 'PL_FILES' => {} ) ; HTML-FormatText-WithLinks-0.15/Changes000444001750001750 422212452325612 17655 0ustar00struanstruan000000000000# Changelog for HTML::FormatText::WithLinks version: 0.14 date: 4/01/2015 Various small documentation errors fixed --- version: 0.14 date: 9/12/2010 Missed the skipped links test from the MANIFEST :( --- version: 0.13 date: 9/12/2010 Added code to change the delimiters for italic and bold text (rt #63571) --- version: 0.12 date: 27/11/2010 Updated HTML::FormatText dependancy to require version 2+ Added code to pull out the base from the base element in a document (rt #55238) Added code to optionally skip links where the text is the same as the HREF (rt #63236) --- version: 0.11 date: 30/07/2008 failed to actually include any of the mentioned things in the previous release so re-releasing with the things actually there. sigh. --- version: 0.10 date: 21/07/2008 added anchor_links config option (rt #37634) compile regex and make it work with https (rt #37634) reset internal link collectors when call parse so object can be reused < Simon Dessau> (rt #37636) --- version: 0.09 date: 18/04/2007 added in examples directory --- version: 0.08 date: 26/03/2007 add in unique_links config option (rt #24713) --- version: 0.07 date: 30/08/2006 actually fix issues with empty hrefs (rt #14288) --- version: 0.06 date: 06/10/2005 add in missing ; to use line in examples in POD and README (rt #14777) --- version: 0.05 date: 12/09/2005 change way skip tests in t/12_treebuilderproblem.t to try and avoid CPAN testers failure --- version: 0.04 date: 27/08/2005 anchors with no href or empty href don't produce footnotes some internal refactoring much better test coverage --- version: 0.03 date: 19/05/2005 Added with_emphasis option Delete HTML::TreeBuilder object after parsing --- version: 0.02 date: 02/02/2004 Now call eof on HTML::TreeBuilder which fixes bug with last words in HTML fragments sometimes being lost --- version: 0.01 date: 12/01/2004 changes: Initial release HTML-FormatText-WithLinks-0.15/lib000755001750001750 012452325612 16773 5ustar00struanstruan000000000000HTML-FormatText-WithLinks-0.15/lib/HTML000755001750001750 012452325612 17537 5ustar00struanstruan000000000000HTML-FormatText-WithLinks-0.15/lib/HTML/FormatText000755001750001750 012452325612 21634 5ustar00struanstruan000000000000HTML-FormatText-WithLinks-0.15/lib/HTML/FormatText/WithLinks.pm000444001750001750 2722212452325612 24270 0ustar00struanstruan000000000000package HTML::FormatText::WithLinks; use strict; use URI::WithBase; use HTML::TreeBuilder; use base qw(HTML::FormatText); use vars qw($VERSION); $VERSION = '0.15'; sub new { my $proto = shift; my $class = ref( $proto ) || $proto; my $self = $class->SUPER::new( @_ ); $self->configure() unless @_; bless ( $self, $class ); return $self; } sub configure { my ($self, $hash) = @_; # a base uri so we can resolve relative uris $self->{base} = $hash->{base}; delete $hash->{base}; $self->{base} =~ s#(.*?)/[^/]*$#$1/# if $self->{base}; $self->{doc_overrides_base} = $hash->{doc_overrides_base}; delete $hash->{doc_overrides_base}; $self->{before_link} = '[%n]'; $self->{after_link} = ''; $self->{footnote} = '%n. %l'; $self->{link_num_generator} = sub { return shift() + 1 }; $self->{unique_links} = 0; $self->{anchor_links} = 1; $self->{skip_linked_urls} = 0; $self->{_link_track} = {}; $self->{bold_marker} = '_'; $self->{italic_marker} = '/'; foreach ( qw( before_link after_link footnote link_num_generator with_emphasis bold_marker italic_marker unique_links anchor_links skip_linked_urls ) ) { $self->{ $_ } = $hash->{ $_ } if exists $hash->{ $_ }; delete $hash->{ $_ }; } $self->SUPER::configure($hash); } # we need to do this as if you pass an HTML fragment without any # containing block level markup (e.g. a p tag) then no indentation # takes place so if we've not got a cur_pos we indent. sub textflow { my $self = shift; $self->goto_lm unless defined $self->{cur_pos}; $self->SUPER::textflow(@_); } sub head_start { my ($self) = @_; $self->SUPER::head_start(); # we don't care about what the documents says it's base is if ( $self->{base} and not $self->{doc_overrides_base} ) { return 0; } # descend into for possible there, even if superclass not # interested (as of HTML::FormatText 2.04 it's not) return 1; } # is supposed to be inside , but no need to demand that. # "lynx -source" sticks a at the very start of the document, before # even , so accepting anywhere lets that work. sub base_start { my ($self, $node) = @_; if (my $href = $node->attr('href')) { $self->{base} = $href; } # allow for no superclass base_start() in HTML::FormatText 2.04 if (! HTML::FormatText->can('base_start')) { return 0; } # chain up if it exists in the future return $self->SUPER::base_start(); } sub a_start { my $self = shift; my $node = shift; # local urls are no use so we have to make them absolute my $href = $node->attr('href') || ''; if ($href && $self->{anchor_links} == 0 && $href =~ m/^#/o) { $href = ''; } elsif ($href and $self->{skip_linked_urls} and $href eq $node->as_text) { $href = ''; } if ( $href ) { if ($href !~ m#^https?:|^mailto:#o) { $href = URI::WithBase->new($href, $self->{base})->abs(); } if ($self->{unique_links}) { if (defined $self->{_link_track}->{$href}) { $self->out( $self->text('before_link', $self->{_link_track}->{$href}, $href ) ); } else { push @{$self->{_links}}, $href; $self->{_link_track}->{$href} = $#{$self->{_links}}; $self->out( $self->text('before_link', $#{$self->{_links}}, $href ) ); } } else { push @{$self->{_links}}, $href; $self->out( $self->text('before_link') ); } } $self->SUPER::a_start(); } sub a_end { my $self = shift; my $node = shift; my $text; unless ($self->{skip_linked_urls} and $node->attr('href') eq $node->as_text) { if ($self->{unique_links}) { my $href = $node->attr('href'); $text = $self->text('after_link', $self->{_link_track}->{$href}, $href); } else { $text = $self->text('after_link'); } # If we're just dealing with a fragment of HTML, with a link at the # end, we get a space before the first footnote link if we do # $self->out( '' ) if ($text ne '') { $self->out( $text ); } } $self->SUPER::a_end(); } sub b_start { my $self = shift; $self->out( $self->{'bold_marker'} ) if $self->{ with_emphasis }; $self->SUPER::b_start(); } sub b_end { my $self = shift; $self->out( $self->{'bold_marker'} ) if $self->{ with_emphasis }; $self->SUPER::b_end(); } sub i_start { my $self = shift; $self->out( $self->{'italic_marker'} ) if $self->{ with_emphasis }; $self->SUPER::i_start(); } sub i_end { my $self = shift; $self->out( $self->{'italic_marker'} ) if $self->{ with_emphasis }; $self->SUPER::i_end(); } # print out our links sub html_end { my $self = shift; if ( $self->{_links} and @{$self->{_links}} and $self->{footnote} ) { $self->nl; $self->nl; # be tidy $self->goto_lm; for (0 .. $#{$self->{_links}}) { $self->goto_lm; $self->out( $self->text( 'footnote', $_, $self->{_links}->[$_] ) ); $self->nl; } } $self->SUPER::end(); } sub _link_num { my ($self, $num) = @_; $num = $#{$self->{_links}} unless defined $num; return &{$self->{link_num_generator}}($num); } sub text { my ($self, $type, $num, $href) = @_; if ($self->{_links} and @{$self->{_links}}) { $href = $self->{_links}->[$#{$self->{_links}}] unless (defined $num and defined $href); } $num = $self->_link_num($num); my $text = $self->{$type}; $text =~ s/%n/$num/g; $text =~ s/%l/$href/g; return $text; } sub parse { my $self = shift; my $text = shift; return undef unless defined $text; return '' if $text eq ''; my $tree = HTML::TreeBuilder->new->parse( $text ); return $self->_parse( $tree ); } sub parse_file { my $self = shift; my $file = shift; unless (-e $file and -f $file) { $self->error("$file not found or not a regular file"); return undef; } my $tree = HTML::TreeBuilder->new->parse_file( $file ); return $self->_parse( $tree ); } sub _parse { my $self = shift; my $tree = shift; $self->{_link_track} = {}; $self->{_links} = []; unless ( $tree ) { $self->error( "HTML::TreeBuilder problem" . ( $! ? ": $!" : '' ) ); return undef; } $tree->eof(); my $return_text = $self->format( $tree ); $tree->delete; return $return_text; } sub error { my $self = shift; if (@_) { $self->{error} = shift; } return $self->{error}; } 1; __END__ =head1 NAME HTML::FormatText::WithLinks - HTML to text conversion with links as footnotes =head1 SYNOPSIS use HTML::FormatText::WithLinks; my $f = HTML::FormatText::WithLinks->new(); my $html = qq(

Some html with a link

); my $text = $f->parse($html); print $text; # results in something like Some html with a [1]link 1. http://example.com/ my $f2 = HTML::FormatText::WithLinks->new( before_link => '', after_link => ' [%l]', footnote => '' ); $text = $f2->parse($html); print $text; # results in something like Some html with a link [http://example.com/] my $f3 = HTML::FormatText::WithLinks->new( link_num_generator => sub { return "*" x (shift() + 1); }, footnote => '[%n] %l' ); $text = $f3->parse($html); print $text; # results in something like Some html with a [*]link [*] http://example.com/ =head1 DESCRIPTION HTML::FormatText::WithLinks takes HTML and turns it into plain text but prints all the links in the HTML as footnotes. By default, it attempts to mimic the format of the lynx text based web browser's --dump option. =head1 METHODS =head2 new my $f = HTML::FormatText::WithLinks->new( %options ); Returns a new instance. It accepts all the options of HTML::FormatText plus =over =item base a base option. This should be set to a URI which will be used to turn any relative URIs on the HTML to absolute ones. =item doc_overrides_base If a base element is found in the document and it has an href attribute then setting doc_overrides_base to true will cause the document's base to be used. This defaults to false. =item before_link (default: '[%n]') =item after_link (default: '') =item footnote (default: '[%n] %l') a string to print before a link (i.e. when the is found), after link has ended (i.e. when then is found) and when printing out footnotes. "%n" will be replaced by the link number, "%l" will be replaced by the link itself. If footnote is set to '', no footnotes will be printed. =item link_num_generator (default: sub { return shift() + 1 }) link_num_generator is a sub that returns the value to be printed for a given link number. The internal store starts numbering at 0. =item with_emphasis If set to 1 then italicised text will be surrounded by C and bolded text by C<_>. You can change these markers by using the C and C options. =item unique_links If set to 1 then will only generate 1 footnote per unique URI as oppose to the default behaviour which is to generate a footnote per URI. =item anchor_links If set to 0 then links pointing to local anchors will be skipped. The default behaviour is to include all links. =item skip_linked_urls If set to 1, then links where the text equals the href value will be skipped. The default behaviour is to include all links. =back =head2 parse my $text = $f->parse($html); Takes some HTML and returns it as text. Returns undef on error. Will also return undef if you pass it undef. Returns an empty string if passed an empty string. =head2 parse_file my $text = $f->parse_file($filename); Takes a filename and returns the contents of the file as plain text. Returns undef on error. =head2 error $f->error(); Returns the last error that occurred. In practice this is likely to be either a warning that parse_file couldn't find the file or that HTML::TreeBuilder failed. =head1 CAVEATS When passing HTML fragments the results may be a little unpredictable. I've tried to work round the most egregious of the issues but any unexpected results are welcome. Also note that if for some reason there is an a tag in the document that does not have an href attribute then it will be quietly ignored. If this is really a problem for anyone then let me know and I'll see if I can think of a sensible thing to do in this case. =head1 AUTHOR Struan Donald. Estruan@cpan.orgE L Ian Malpass Eian@indecorous.comE was responsible for the custom formatting bits and the nudge to release the code. Simon Dassow Ejanus@errornet.de for the anchor_links option plus a few bugfixes and optimisations Kevin Ryde for the code for pulling the base out the document. Thomas Sibley Etrs@bestpractical.comE patches for skipping links that are their urls and to change the delimiters for bold and italic text.. =head1 SOURCE CODE The source code for this module is hosted on GitHub L =head1 COPYRIGHT Copyright (C) 2003-2010 Struan Donald and Ian Malpass. All rights reserved. =head1 LICENSE This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 SEE ALSO perl(1), HTML::Formatter. =cut HTML-FormatText-WithLinks-0.15/t000755001750001750 012452325612 16470 5ustar00struanstruan000000000000HTML-FormatText-WithLinks-0.15/t/podcover.t000444001750001750 40312452325612 20610 0ustar00struanstruan000000000000use Test::More; eval "use Test::Pod::Coverage 0.08"; plan skip_all => "Test::Pod::Coverage 0.08 required for testing POD coverage" if $@; all_pod_coverage_ok( { also_private => [ qr/(?:^a|_end|_start)$/, 'text', 'textflow', 'configure' ] }, 'pod coverage' ); HTML-FormatText-WithLinks-0.15/t/parse_missing_file.t000444001750001750 107012452325612 22652 0ustar00struanstruan000000000000# $Id$ use Test::More tests => 5; use HTML::FormatText::WithLinks; my $f = HTML::FormatText::WithLinks->new( leftmargin => 0 ); ok($f, 'object created'); my $text = $f->parse_file('t/missing.html'); is($text, undef, 'undef returned for missing file'); is($f->error, 't/missing.html not found or not a regular file', 'correct error message for missing file'); $text = $f->parse_file('.'); is($text, undef, 'undef returned for directory'); is($f->error, '. not found or not a regular file', 'correct error message for directory'); HTML-FormatText-WithLinks-0.15/t/file.html000444001750001750 16212452325612 20411 0ustar00struanstruan000000000000

This is a mail of some sort with a link.

HTML-FormatText-WithLinks-0.15/t/basic_parse.t000444001750001750 121012452325612 21257 0ustar00struanstruan000000000000# $Id$ use Test::More tests => 5; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new(); ok($f, 'object created'); my $text = $f->parse($html); ok($text, 'html formatted'); is($text, " This is a mail of some sort\n\n", 'html correctly formatted'); $f = HTML::FormatText::WithLinks->new( leftmargin => 0); $text = $f->parse($html); ok($text, 'html formatted'); is($text, "This is a mail of some sort\n\n", 'html correctly formatted with no left margin'); sub new_html { return <<'HTML';

This is a mail of some sort

HTML } HTML-FormatText-WithLinks-0.15/t/skip_linked_urls.t000444001750001750 223512452325612 22355 0ustar00struanstruan000000000000use Test::More 'no_plan'; use HTML::FormatText::WithLinks; my $html = simple_example(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0, skip_linked_urls => 1, before_link => '', after_link => ' (%l)', footnote => '' ); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq!This is a mail of some sort with a bunch of linked URLs. http://example.com/ and another https://example.com/ ftp now ftp://example.com not the same but not this (http://example.com/) or this http://example.com (http://example.com/foo) !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); sub simple_example { return <<'HTML';

This is a mail of some sort with a bunch of linked URLs.

http://example.com/

and another https://example.com/

ftp now ftp://example.com

not the same but not this

or this http://example.com

HTML } HTML-FormatText-WithLinks-0.15/t/parse_with_local_links.t000444001750001750 154412452325612 23535 0ustar00struanstruan000000000000# $Id$ use Test::More tests => 6; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0 ); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq!This is a mail of some sort with a [1]link to a local anchor. 1. #top !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); sub new_html { return <<'HTML';

This is a mail of some sort with a link to a local anchor.

HTML } my $f2 = HTML::FormatText::WithLinks->new( leftmargin => 0, anchor_links => 0 ); ok($f2, 'object created'); my $text2 = $f2->parse($html); my $correct_text2 = qq!This is a mail of some sort with a link to a local anchor. !; ok($text2, 'html formatted'); is($text2, $correct_text2, 'html correctly formatted'); HTML-FormatText-WithLinks-0.15/t/parse_with_identical_links.t000444001750001750 442312452325612 24376 0ustar00struanstruan000000000000# $Id: 03_parse_with_links.t 383 2004-01-12 17:09:27Z struan $ use Test::More tests => 9; use HTML::FormatText::WithLinks; my $html = simple_example(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0, unique_links => 1 ); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq!This is a mail of some sort with a [1]link and yet another [1]link. 1. http://example.com/ !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); $html = complex_example(); $correct_text = qq!This is a mail of some sort with a [1]link and another [2]link and yet another [1]link. 1. http://example.com/ 2. http://example.net !; # recreate as otherwise output is a bit unpredictable $f = HTML::FormatText::WithLinks->new( leftmargin => 0, unique_links => 1 ); $text = $f->parse($html); ok($text, 'more complex html formatted'); is($text, $correct_text, 'more complex html correctly formatted'); $correct_text = qq!This is a mail of some sort with a link[1] and another link[2] and yet another link[1]. 1. http://example.com/ 2. http://example.net !; # recreate as otherwise output is a bit unpredictable $f = HTML::FormatText::WithLinks->new( leftmargin => 0, unique_links => 1, before_link => '', after_link => '[%n]' ); $text = $f->parse($html); ok($text, 'after_link html formatted'); is($text, $correct_text, 'after_link html correctly formatted'); $f = HTML::FormatText::WithLinks->new( leftmargin => 0 ); $correct_text = qq!This is a mail of some sort with a [1]link and another [2]link and yet another [3]link. 1. http://example.com/ 2. http://example.net 3. http://example.com/ !; $text = $f->parse($html); ok($text, 'more complex html with non unique links formatted'); is($text, $correct_text, 'more complex html with non unique links correctly formatted'); sub simple_example { return <<'HTML';

This is a mail of some sort with a link and yet another link.

HTML } sub complex_example { return <<'HTML';

This is a mail of some sort with a link and another link and yet another link.

HTML } HTML-FormatText-WithLinks-0.15/t/parse_file.t000444001750001750 57612452325612 21113 0ustar00struanstruan000000000000# $Id$ use Test::More tests => 3; use HTML::FormatText::WithLinks; my $f = HTML::FormatText::WithLinks->new( leftmargin => 0 ); ok($f, 'object created'); my $text = $f->parse_file('t/file.html'); my $correct_text = qq!This is a mail of some sort with a [1]link. 1. http://example.com/ !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); HTML-FormatText-WithLinks-0.15/t/parse_with_base.t000444001750001750 270412452325612 22154 0ustar00struanstruan000000000000# $Id$ use Test::More tests => 9; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0 ); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq!This is a mail of some sort with a [1]link. 1. http://example.com/relative.html !; my $override_text = qq!This is a mail of some sort with a [1]link. 1. http://example.net/relative.html !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); my $f2 = HTML::FormatText::WithLinks->new( leftmargin => 0, base => 'http://example.net/' ); ok($f2, 'object created'); my $text2 = $f2->parse($html); ok($text2, 'html formatted'); is($text2, $override_text, 'html correctly formatted - config base overrides doc'); my $f3 = HTML::FormatText::WithLinks->new( leftmargin => 0, base => 'http://example.net/', doc_overrides_base => 1 ); ok($f3, 'object created'); my $text3 = $f3->parse($html); ok($text3, 'html formatted'); is($text3, $correct_text, 'html correctly formatted - doc overrides config'); sub new_html { return <<'HTML';

This is a mail of some sort with a link.

HTML } HTML-FormatText-WithLinks-0.15/t/custom_format.t000444001750001750 314012452325612 21672 0ustar00struanstruan000000000000# $Id$ use Test::More tests => 9; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0, before_link => "[%n] ", footnote => "[%n] %l"); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq!This is a mail of some sort with a [1] link. [1] http://example.com/ !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); $f = HTML::FormatText::WithLinks->new( leftmargin => 0, before_link => "[%n] ", footnote => ''); $text = $f->parse($html); $correct_text = qq!This is a mail of some sort with a [1] link. !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted with no footnotes'); $correct_text = qq!This is a mail of some sort with a link[1]. 1. http://example.com/ !; $f = HTML::FormatText::WithLinks->new( leftmargin => 0, before_link => "", after_link => "[%n]"); $text = $f->parse($html); ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted with after_link'); $correct_text = qq!This is a mail of some sort with a [0]link. 0. http://example.com/ !; $f = HTML::FormatText::WithLinks->new( leftmargin => 0, link_num_generator => sub { shift(); } ); $text = $f->parse($html); ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted with link_num_generator'); sub new_html { return <<'HTML';

This is a mail of some sort with a link.

HTML } HTML-FormatText-WithLinks-0.15/t/parse_a_with_no_href.t000444001750001750 146512452325612 23165 0ustar00struanstruan000000000000# $Id$ use Test::More tests => 6; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0 ); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq!This is a mail of some sort with a link. !; ok($text, 'html formatted'); is($text, $correct_text, 'html with hrefless link correctly formatted'); my $f2 = HTML::FormatText::WithLinks->new( leftmargin => 0, base => 'http://example.com' ); ok($f2, 'object created'); $text = $f2->parse($html); ok($text, 'html formatted'); is($text, $correct_text, 'html with hrefless link and base set correctly formatted'); sub new_html { return <<'HTML';

This is a mail of some sort with a link.

HTML } HTML-FormatText-WithLinks-0.15/t/parse_with_relative_links.t000444001750001750 117212452325612 24253 0ustar00struanstruan000000000000# $Id$ use Test::More tests => 3; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0, base => 'http://example.com/'); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq!This is a mail of some sort with a [1]link. 1. http://example.com/relative.html !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); sub new_html { return <<'HTML';

This is a mail of some sort with a link.

HTML } HTML-FormatText-WithLinks-0.15/t/no_html_passed.t000444001750001750 60512452325612 21772 0ustar00struanstruan000000000000# $Id$ use Test::More tests => 3; use HTML::FormatText::WithLinks; my $f = HTML::FormatText::WithLinks->new(); ok($f, 'object created'); my $text = $f->parse(); is($text, undef, 'return undef if no html passed in'); $f = HTML::FormatText::WithLinks->new( leftmargin => 0); $text = $f->parse(''); is($text, '', 'return empty string if empty string passed in'); HTML-FormatText-WithLinks-0.15/t/with_emphasis.t000444001750001750 236012452325612 21657 0ustar00struanstruan000000000000# $Id: 02_basic_parse.t 383 2004-01-12 17:09:27Z struan $ use Test::More tests => 9; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( with_emphasis => 1 ); ok($f, 'object created'); my $text = $f->parse($html); ok($text, 'html formatted'); is($text, " This is a mail of _some_ /sort/\n\n It has _some_ of the /words/ emphasised\n\n", 'html correctly formatted with emphasis'); my $f2 = HTML::FormatText::WithLinks->new( ); ok( $f2, "object created" ); my $text2= $f2->parse( $html ); ok( $text2, "html formatted" ); is( $text2, " This is a mail of some sort\n\n It has some of the words emphasised\n\n", 'html correctly formatted without emphasis' ); # Test alternate markers $f = HTML::FormatText::WithLinks->new( with_emphasis => 1, bold_marker => '*', italic_marker => '"' ); ok($f, 'object created'); $text = $f->parse($html); ok($text, 'html formatted'); is($text, qq[ This is a mail of *some* "sort"\n\n It has *some* of the "words" emphasised\n\n], 'html correctly formatted with emphasis'); sub new_html { return <<'HTML';

This is a mail of some sort

It has some of the words emphasised

HTML } HTML-FormatText-WithLinks-0.15/t/parse_with_name.t000444001750001750 150212452325612 22155 0ustar00struanstruan000000000000# $Id$ use Test::More tests => 6; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0 ); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq!This is a mail of some sort with a link. !; ok($text, 'html formatted'); is($text, $correct_text, 'html with name only link correctly formatted'); my $f2 = HTML::FormatText::WithLinks->new( leftmargin => 0, base => 'http://example.com' ); ok($f2, 'object created'); $text = $f2->parse($html); ok($text, 'html formatted'); is($text, $correct_text, 'html with name only link and base set correctly formatted'); sub new_html { return <<'HTML';

This is a mail of some sort with a link.

HTML } HTML-FormatText-WithLinks-0.15/t/pod.t000444001750001750 21012452325612 17545 0ustar00struanstruan000000000000# $Id$ use Test::More; eval "use Test::Pod 1.00"; plan skip_all => "Test::Pod 1.00 required for testing POD" if $@; all_pod_files_ok(); HTML-FormatText-WithLinks-0.15/t/parse_with_links.t000444001750001750 104212452325612 22354 0ustar00struanstruan000000000000# $Id$ use Test::More tests => 3; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0 ); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq!This is a mail of some sort with a [1]link. 1. http://example.com/ !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); sub new_html { return <<'HTML';

This is a mail of some sort with a link.

HTML } HTML-FormatText-WithLinks-0.15/t/parse_with_html_fragment.t000444001750001750 104712452325612 24070 0ustar00struanstruan000000000000# $Id$ use Test::More tests => 3; use HTML::FormatText::WithLinks; my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 4 ); ok($f, 'object created'); my $text = $f->parse($html); my $correct_text = qq! This is a mail of some sort with a [1]link and some more text here. 1. http://example.com/ !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); sub new_html { return qq(This is a mail of some sort with a link and some more text here.); } HTML-FormatText-WithLinks-0.15/t/parse_multiple_times.t000444001750001750 153612452325612 23245 0ustar00struanstruan000000000000# $Id$ use Test::More tests => 5; use HTML::FormatText::WithLinks; my $html_link = new_html_link(); my $html = new_html(); my $f = HTML::FormatText::WithLinks->new( leftmargin => 0 ); ok($f, 'object created'); my $text = $f->parse($html_link); my $correct_text = qq!This is a mail of some sort with a [1]link. 1. http://example.com/ !; ok($text, 'html formatted'); is($text, $correct_text, 'html correctly formatted'); $text = $f->parse($html); ok($text, 'html formatted'); is($text, "\n\nThis is a mail of some sort\n\n", 'html correctly formatted with no left margin'); sub new_html_link { return <<'HTML';

This is a mail of some sort with a link.

HTML } sub new_html { return <<'HTML';

This is a mail of some sort

HTML } HTML-FormatText-WithLinks-0.15/t/treebuilder_problem.t000444001750001750 132312452325612 23037 0ustar00struanstruan000000000000use Test::More; BEGIN { eval "use Test::MockObject"; if ( $@ ) { plan skip_all => "Test::MockObject required for testing TreeBuilder problems"; } else { plan tests => 3; } my $m = Test::MockObject->new(); $m->fake_new( 'HTML::TreeBuilder' ); $m->mock( 'parse', sub { $! = 1122; return undef; } ); }; use HTML::FormatText::WithLinks; my $f = HTML::FormatText::WithLinks->new( ); ok($f, 'object created'); my $text = $f->parse('

some text

'); is($text, undef, 'undef returned for broken HTML::TreeBuilder'); like($f->error, qr/^HTML::TreeBuilder problem: /, 'correct error message for broken HTML::TreeBuilder'); HTML-FormatText-WithLinks-0.15/t/01load.t000444001750001750 45212452325612 20053 0ustar00struanstruan000000000000# $Id$ use Test::More tests => 5; use_ok('HTML::FormatText::WithLinks'); my $f = HTML::FormatText::WithLinks->new(); ok($f, 'objected created'); isa_ok( $f, 'HTML::FormatText::WithLinks' ); my $f2 = $f->new(); ok( $f2, 'second object created' ); isa_ok( $f2, 'HTML::FormatText::WithLinks' ); HTML-FormatText-WithLinks-0.15/examples000755001750001750 012452325612 20043 5ustar00struanstruan000000000000HTML-FormatText-WithLinks-0.15/examples/custom_numbers.pl000444001750001750 151012452325612 23577 0ustar00struanstruan000000000000#!/usr/bin/perl use strict; use warnings; use Roman; use LWP::Simple; use HTML::FormatText::WithLinks; =head1 DESCRIPTION This examples uses the custom number generation option to generate the footnote numbers as latin numerals. It also demonstrates how to place the footnote indicators after the link instead of in front which is the default. =cut my $html = get("http://exo.org.uk/"); my $f = HTML::FormatText::WithLinks->new( base => "http://exo.org.uk/", unique_links => 1, link_num_generator => \&generator, # fear my dodgy latin... before_link => '', after_link => '[%n]', footnote => '%n est %l' ); sub generator() { my $num = shift; # Romans didn't get zero... $num += 1; return uc roman($num); } print $f->parse($html); HTML-FormatText-WithLinks-0.15/examples/custom_footnotes.pl000444001750001750 101312452325612 24142 0ustar00struanstruan000000000000#!/usr/bin/perl use strict; use warnings; use LWP::Simple; use HTML::FormatText::WithLinks; =head1 DESCRIPTION This script demonstrates how to used a custom footnote for your links. In this case all the footnotes will now be of the form footnote 10 is for http://exo.org.uk/code/ =cut my $html = get("http://exo.org.uk/"); my $f = HTML::FormatText::WithLinks->new( base => 'http://exo.org.uk/', unique_links => 1, footnote => 'footnote %n is for %l' ); print $f->parse($html); HTML-FormatText-WithLinks-0.15/examples/basic_usage.pl000444001750001750 103112452325612 22775 0ustar00struanstruan000000000000#!/usr/bin/perl use strict; use warnings; use LWP::Simple; use HTML::FormatText::WithLinks; =head1 DESCRIPTION This script shows the basic usage of the module. The two options used are the base one to make sure that any relative links in the page are turned into absolute links and unique_links which only generates one footnote per link. =cut my $html = get("http://exo.org.uk/"); my $f = HTML::FormatText::WithLinks->new( base => "http://exo.org.uk/", unique_links => 1 ); print $f->parse($html);