HTML-Tidy-1.60/0000755000101700007640000000000013156250346012277 5ustar alesterispcHTML-Tidy-1.60/t/0000755000101700007640000000000013156250345012541 5ustar alesterispcHTML-Tidy-1.60/t/roundtrip.t0000644000101700007640000000201313156247313014751 0ustar alesterispc#!perl -T use warnings; use strict; use Test::More tests => 3; use HTML::Tidy; my $args = { newline => 'Lf' }; my $tidy = HTML::Tidy->new($args); isa_ok( $tidy, 'HTML::Tidy' ); $tidy->ignore( type => TIDY_INFO ); # clean once $tidy->ignore( text => qr/DOCTYPE/ ); my $html = 'This is a test.'; my $clean = $tidy->clean( $html ); # then verify that it meets tidy's high standards $tidy = HTML::Tidy->new($args); # reset messages; $tidy->ignore( type => TIDY_INFO ); $clean = $tidy->clean($clean); my @messages = $tidy->messages( $clean ); is_deeply( \@messages, [], q{The cleaned stuff shouldn't have any errors} ); $clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/; my $expected = do { local $/ = undef; }; is( $clean, $expected, 'Cleaned up properly' ); __DATA__ This is a test. HTML-Tidy-1.60/t/parse.t0000644000101700007640000000060513156247313014042 0ustar alesterispc#!/usr/bin/perl -T use strict; use warnings; use Test::Exception; use Test::More tests => 2; use HTML::Tidy; my $tidy = HTML::Tidy->new; isa_ok( $tidy, 'HTML::Tidy' ); my $expected_pattern = 'Usage: parse($filename,$str [, $str...])'; throws_ok { $tidy->parse('fake-filename.txt'); } qr/\Q$expected_pattern\E/, 'parse() dies when not given a string or array of strings to parse'; HTML-Tidy-1.60/t/wordwrap.cfg0000644000101700007640000000004513156247313015067 0ustar alesterispctidy-mark: 0 newline: LF wrap: 12 HTML-Tidy-1.60/t/segfault-form.t0000644000101700007640000000072013156247313015501 0ustar alesterispc#!perl -Tw use strict; use warnings; use Test::More tests => 3; use HTML::Tidy; my $html = do { local $/ = undef; ; }; my $tidy = HTML::Tidy->new; isa_ok( $tidy, 'HTML::Tidy' ); $tidy->clean( $html ); isa_ok( $tidy, 'HTML::Tidy' ); pass( 'Cleaned OK' ); __DATA__
HTML-Tidy-1.60/t/cfg-for-parse.cfg0000644000101700007640000000002113156247313015647 0ustar alesterispcshow-warnings: 0 HTML-Tidy-1.60/t/perfect.t0000644000101700007640000000647713156247313014375 0ustar alesterispc#!perl -T use warnings; use strict; use Test::More tests => 3; use HTML::Tidy; my $html = join '', ; # No errors at all. my $tidy = HTML::Tidy->new; isa_ok( $tidy, 'HTML::Tidy' ); $tidy->ignore( type => TIDY_INFO ); my $rc = $tidy->parse( '-', $html ); ok( $rc, 'Parsed OK' ); my @returned = map { $_->as_string } $tidy->messages; is_deeply( \@returned, [], 'Should have no messages' ); # User reported a segfault when there are no messages. By gum, he was # right. __DATA__ petdance.com: Andy Lester's Programming & Writing Andy & Amy's Pet Supplies & Dance Instruction

Perl, Programming & Writing

My Technology & publishing blog at oreillynet.com
My Perl-specific and personal blog at use.perl.org
Andy Lester's resume
Andy's Perl Pages

The Lester Family

Andy: The Dad
Amy: The Mom
Quinn: The Girl
Baxter: The Dog
Our family trees

About Andy & Amy

The Page Of Mush
People we're looking for
Article about us and how we met from the Northwest Herald

Useful Stuff

Andy's Magic Start Page: Bunches of your favorite search engines, all in one place
Add-a-page Page: Bunches of website submission forms, all in one place
Cold Fusion stuff: CFX_HTTP and other custom tags

Musical Information, etc

Naked Raygun: Bring your mom and your mom's friends, too
Action Park: A compendium of information about Big Black, Rapeman, Shellac and Steve Albini

Other things that might be fun

When In Doubt, Use Parsley: Amy's journal
Wonder: Andy's old journal
So American It Hurts: Andy & Amy's trip to Graceland
Gallery Of Household Appliances
Media Wants: Stuff I yearn to consume
Buzzword Bingo
HTML-Tidy-1.60/t/unicode-nbsp.t0000644000101700007640000000132113156247313015312 0ustar alesterispc#!perl -T use warnings; use strict; use Test::More tests => 3; use HTML::Tidy; use Encode; my $bytes_string = "\x{c2}\x{a0}"; #UTF8 nbsp my $perl_chars = Encode::decode('utf8',$bytes_string); # Perl chars of utf8 byte string my $tidy = HTML::Tidy->new({ show_body_only => 1 }); my $newline = $tidy->clean( '' ); # HTML::Tidy adds a platform-dependent "newline". like( $newline, qr/^\r?\n?$/, 'Tidy Newline' ); # should be CR or LF or both my $expected_after_tidy = " $newline"; # HTML::Tidy should convert the nbsp to an HTML entity (and add a newline). is( $tidy->clean( $perl_chars ), $expected_after_tidy, 'Perl chars OK' ); is( $tidy->clean( $bytes_string ), $expected_after_tidy, 'Byte string OK' ); HTML-Tidy-1.60/t/wordwrap.t0000644000101700007640000000112613156247313014574 0ustar alesterispc#!perl -T use warnings; use strict; use Test::More tests => 1; use HTML::Tidy; my $input=q{Here's some ed and
eakfest MarkUp}; my $expected=<<'EOD'; Here's some ed and
eakfest MarkUp
EOD my @expected = split(/\n/, $expected); my $cfg = 't/wordwrap.cfg'; my $tidy = HTML::Tidy->new( {config_file => $cfg} ); my $result = $tidy->clean( $input ); my @result = split(/\n/, $result); is_deeply( \@result, \@expected, 'Cleaned stuff looks like what we expected'); HTML-Tidy-1.60/t/clean.t0000644000101700007640000000122613156250333014006 0ustar alesterispc#!/usr/bin/perl -T use strict; use warnings; use Test::Exception; use Test::More tests => 3; use HTML::Tidy; my $tidy = HTML::Tidy->new; isa_ok( $tidy, 'HTML::Tidy' ); my $expected_pattern = 'Usage: clean($str [, $str...])'; throws_ok { $tidy->clean(); } qr/\Q$expected_pattern\E/, 'clean() croaks when not given a string or list of strings'; like( $tidy->clean(''), _expected_empty_html(), '$tidy->clean("") returns empty HTML document', ); sub _expected_empty_html { return qr{ }; } HTML-Tidy-1.60/t/simple.t0000644000101700007640000000114513156247313014221 0ustar alesterispc#!perl -T use warnings; use strict; use Test::More tests => 4; use HTML::Tidy; my $html = join '', ; my $tidy = HTML::Tidy->new; isa_ok( $tidy, 'HTML::Tidy' ); $tidy->ignore( type => TIDY_INFO ); my $rc = $tidy->parse( '-', $html ); ok( $rc, 'Parsed OK' ); my @messages = $tidy->messages; is( scalar @messages, 5, 'Right number of initial messages' ); $tidy->clear_messages; is_deeply( [$tidy->messages], [], 'Cleared the messages' ); __DATA__ blah blah Barf

more blah

HTML-Tidy-1.60/t/ignore.t0000644000101700007640000001066513156247313014222 0ustar alesterispc#!perl -T use strict; use warnings; use Test::More tests => 9; use HTML::Tidy; my $html = do { local $/ = undef; }; my @expected_warnings = split /\n/, q{ - (1:1) Warning: missing declaration - (23:1) Warning: discarding unexpected - (24:XX) Warning: unescaped & which should be written as & - (24:XX) Warning: unescaped & which should be written as & }; chomp @expected_warnings; shift @expected_warnings; # First one's blank my @expected_errors = split /\n/, q{ - (23:1) Error: is not recognized! }; chomp @expected_errors; shift @expected_errors; # First one's blank WARNINGS_ONLY: { my $tidy = HTML::Tidy->new; isa_ok( $tidy, 'HTML::Tidy' ); $tidy->ignore( type => TIDY_ERROR ); my $rc = $tidy->parse( '-', $html ); ok( $rc, 'Parsed OK' ); my @returned = map { $_->as_string } $tidy->messages; s/[\r\n]+\z// for @returned; munge_returned( \@returned ); is_deeply( \@returned, \@expected_warnings, 'Matching warnings' ); } ERRORS_ONLY: { my $tidy = HTML::Tidy->new; isa_ok( $tidy, 'HTML::Tidy' ); $tidy->ignore( type => TIDY_WARNING ); my $rc = $tidy->parse( '-', $html ); ok( $rc, 'Parsed OK' ); my @returned = map { $_->as_string } $tidy->messages; s/[\r\n]+\z// for @returned; is_deeply( \@returned, \@expected_errors, 'Matching errors' ); } DIES_ON_ERROR: { my $tidy = HTML::Tidy->new; isa_ok( $tidy, 'HTML::Tidy' ); my $rc = eval { $tidy->ignore( blongo => TIDY_WARNING ) }; ok( !$rc, 'eval should fail' ); like( $@, qr/^Invalid ignore type.+blongo/, 'Throws an error' ); } sub munge_returned { # non-1 line numbers are not reliable across libtidies my $returned = shift; my $start_line = shift || '-'; for my $line ( @{$returned} ) { next if $line =~ /$start_line \(\d+:1\)/; $line =~ s/$start_line \((\d+):(\d+)\)/$start_line ($1:XX)/; } } __DATA__ petdance.com: Andy Lester's Programming & Writing Andy & Amy's Pet Supplies & Dance Instruction

Perl, Programming & Writing

My Technology & publishing blog at oreillynet.com
My Perl-specific and personal blog at use.perl.org
Andy Lester's resume
Andy's Perl Pages

The Lester Family

Andy: The Dad
Amy: The Mom
Quinn: The Girl
Baxter: The Dog
Our family trees

About Andy & Amy

The Page Of Mush
People we're looking for
Article about us and how we met from the Northwest Herald

Useful Stuff

Andy's Magic Start Page: Bunches of your favorite search engines, all in one place
Add-a-page Page: Bunches of website submission forms, all in one place
Cold Fusion stuff: CFX_HTTP and other custom tags

Musical Information, etc

Naked Raygun: Bring your mom and your mom's friends, too
Action Park: A compendium of information about Big Black, Rapeman, Shellac and Steve Albini

Other things that might be fun

When In Doubt, Use Parsley: Amy's journal
Wonder: Andy's old journal
So American It Hurts: Andy & Amy's trip to Graceland
Gallery Of Household Appliances
Media Wants: Stuff I yearn to consume
Buzzword Bingo
HTML-Tidy-1.60/t/venus.t0000755000101700007640000000522613156247313014077 0ustar alesterispc#!perl -T use warnings; use strict; use Test::More tests => 2; use HTML::Tidy; my $filename = 't/venus.html'; open( my $fh, '<', $filename ) or die "Can't open $filename: $!\n"; my $raw = do { local $/ = undef; <$fh> }; close $fh; my $cfg = 't/venus.cfg'; my $tidy = HTML::Tidy->new( {config_file => $cfg} ); isa_ok( $tidy, 'HTML::Tidy' ); my $cooked = $tidy->clean( $raw ); my @cooked = split( /\n/, $cooked ); chomp @cooked; my @expected = ; chomp @expected; is_deeply( \@cooked, \@expected, 'Cooked stuff looks like what we expected' ); __DATA__ Venus Flytrap for 100 Question

Wetland Plants Jeopardy

Venus Flytrap for 100

Question: What does the Venus Flytrap feed on?

Click here for the answer.

| Map | Site Search | Terms | Credits | Feedback |

Created for the Museums in the Classroom program sponsored by Illinois State Board of Education, the Brookfield Zoo, the Illinois State Museum., and Kildeer Countryside CCSD 96.
Authors: Twin Groves Museums in the Classroom Team,
School: Twin Groves Junior High School, Buffalo Grove, Illinois 60089
Created: 27 June 1998- Updated: 6 October 2003
HTML-Tidy-1.60/t/ignore-text.t0000644000101700007640000000735013156247313015201 0ustar alesterispc#!perl -T use warnings; use strict; use Test::More tests => 3; use HTML::Tidy; my $html = do { local $/; }; my @expected_messages = split /\n/, q{ DATA (24:XX) Warning: unescaped & which should be written as & DATA (24:XX) Warning: unescaped & which should be written as & }; chomp @expected_messages; shift @expected_messages; # First one's blank IGNORE_BOGOTAG: { my $tidy = HTML::Tidy->new; isa_ok( $tidy, 'HTML::Tidy' ); $tidy->ignore( text => qr/bogotag/ ); $tidy->ignore( text => [ qr/UNESCAPED/, qr/doctype/i ] ); # The qr/UNESCAPED/ should not ignore anything because there's no /i my $rc = $tidy->parse( 'DATA', $html ); ok( $rc, 'Parsed OK' ); my @returned = map { $_->as_string } $tidy->messages; munge_returned( \@returned, 'DATA' ); s/[\r\n]+\z// for @returned; is_deeply( \@returned, \@expected_messages, 'Matching warnings' ); } sub munge_returned { # non-1 line numbers are not reliable across libtidies my $returned = shift; my $start_line = shift || '-'; for my $line ( @{$returned} ) { next if $line =~ m/$start_line \(\d+:1\)/; $line =~ s/$start_line \((\d+):(\d+)\)/$start_line ($1:XX)/; } } __DATA__ petdance.com: Andy Lester's Programming & Writing Andy & Amy's Pet Supplies & Dance Instruction

Perl, Programming & Writing

My Technology & publishing blog at oreillynet.com
My Perl-specific and personal blog at use.perl.org
Andy Lester's resume
Andy's Perl Pages

The Lester Family

Andy: The Dad
Amy: The Mom
Quinn: The Girl
Baxter: The Dog
Our family trees

About Andy & Amy

The Page Of Mush
People we're looking for
Article about us and how we met from the Northwest Herald

Useful Stuff

Andy's Magic Start Page: Bunches of your favorite search engines, all in one place
Add-a-page Page: Bunches of website submission forms, all in one place
Cold Fusion stuff: CFX_HTTP and other custom tags

Musical Information, etc

Naked Raygun: Bring your mom and your mom's friends, too
Action Park: A compendium of information about Big Black, Rapeman, Shellac and Steve Albini

Other things that might be fun

When In Doubt, Use Parsley: Amy's journal
Wonder: Andy's old journal
So American It Hurts: Andy & Amy's trip to Graceland
Gallery Of Household Appliances
Media Wants: Stuff I yearn to consume
Buzzword Bingo
HTML-Tidy-1.60/t/illegal-options.t0000644000101700007640000000102313156247313016025 0ustar alesterispc#!perl -T use strict; use warnings; use Test::Exception; use Test::More; use HTML::Tidy; my @unsupported_options = qw( force-output gnu-emacs-file gnu-emacs keep-time quiet slide-style write-back ); foreach my $option ( @unsupported_options ) { throws_ok { HTML::Tidy->new( { config_file => 't/cfg-for-parse.cfg', $option => 1, } ); } qr/\QUnsupported option: $option\E/, "option $option is not supported"; } done_testing(); HTML-Tidy-1.60/t/unicode.t0000644000101700007640000000362013156247313014356 0ustar alesterispc#!perl -T # Copyright (c) 2006 Jonathan Rockway use warnings; use strict; use Test::More tests => 9; use HTML::Tidy; use Encode (); use Carp; my $args = { newline => 'Lf' }; my $tidy = HTML::Tidy->new($args); $tidy->ignore( type => TIDY_INFO ); # Suck in the reference HTML document. open( my $html_in, '<:utf8', 't/unicode.html' ) or Carp::croak( "Can't read unicode.html: $!" ); my $html = do { local $/; <$html_in> }; close $html_in; # Suck in the correct, cleaned doc (from DATA) binmode DATA, ':utf8'; my $reference = do {local $/; }; # Make sure both are unicode characters (not utf-x octets). ok(utf8::is_utf8($html), 'html is utf8'); ok(utf8::is_utf8($reference), 'reference is utf8'); my $clean = $tidy->clean( $html ); ok(utf8::is_utf8($clean), 'cleaned output is also unicode'); $clean =~ s/"HTML Tidy.+w3\.org"/"Tidy"/; $clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/; is($clean, $reference, q{Cleanup didn't break anything}); my @messages = $tidy->messages; is_deeply( \@messages, [], q{There still shouldn't be any errors} ); $tidy = HTML::Tidy->new($args); isa_ok( $tidy, 'HTML::Tidy' ); my $rc = $tidy->parse( '', $html ); ok( $rc, 'Parsed OK' ); @messages = $tidy->messages; is_deeply( \@messages, [], q{There still shouldn't be any errors} ); subtest 'Try send bytes to clean method.' => sub { my $html = Encode::encode('utf8',$html); ok(!utf8::is_utf8($html), 'html is row bytes'); my $clean = $tidy->clean( $html ); ok(utf8::is_utf8($clean), 'but cleaned output is string'); $clean =~ s/"HTML Tidy.+w3\.org"/"Tidy"/; $clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/; is($clean, $reference, q{Cleanup didn't break anything}); }; __DATA__ 日本語のホムページ

Unicodeが好きですか?

HTML-Tidy-1.60/t/unicode.html0000644000101700007640000000025413156247313015057 0ustar alesterispc 日本語のホムページ

Unicodeが好きですか?

HTML-Tidy-1.60/t/too-many-titles.t0000644000101700007640000000144613156247313016001 0ustar alesterispc#!perl -T use warnings; use strict; use Test::More tests => 3; use HTML::Tidy; my $html = join '', ; my @expected = split /\n/, q{ - (1:1) Warning: missing declaration - (4:9) Warning: too many title elements in }; chomp @expected; shift @expected; # First one's blank my $tidy = HTML::Tidy->new; isa_ok( $tidy, 'HTML::Tidy' ); $tidy->ignore( type => TIDY_INFO ); my $rc = $tidy->parse( '-', $html ); ok( $rc, 'Parsed OK' ); my @returned = map { $_->as_string } $tidy->messages; s/[\r\n]+\z// for @returned; is_deeply( \@returned, \@expected, 'Matching warnings' ); __DATA__ Test stuff As if one title isn't enough

This is my paragraph

HTML-Tidy-1.60/t/pod.t0000644000101700007640000000024713156247313013514 0ustar alesterispc#!perl -Tw use strict; use warnings; use Test::More; eval 'use Test::Pod 1.14'; plan skip_all => 'Test::Pod 1.14 required for testing POD' if $@; all_pod_files_ok(); HTML-Tidy-1.60/t/levels.t0000644000101700007640000000710413156247313014223 0ustar alesterispc#!perl -T use warnings; use strict; use Test::More tests => 3; use HTML::Tidy; my $tidy = HTML::Tidy->new; isa_ok( $tidy, 'HTML::Tidy' ); my $rc = $tidy->parse( '-', ); ok( $rc, 'Parsed OK' ); my @expected = split /\n/, q{ - (1:1) Warning: missing declaration - (23:1) Error: is not recognized! - (23:1) Warning: discarding unexpected - (24:XX) Warning: unescaped & which should be written as & - (24:XX) Warning: unescaped & which should be written as & }; chomp @expected; shift @expected; # First one's blank my @messages = map { $_->as_string } $tidy->messages; s/[\r\n]+\z// for @messages; munge_returned( \@messages ); is_deeply( \@messages, \@expected, 'Matching messages' ); sub munge_returned { # non-1 line numbers are not reliable across libtidies my $returned = shift; my $start_line = shift || '-'; for my $line ( @{$returned} ) { next if $line =~ /$start_line \(\d+:1\)/; $line =~ s/$start_line \((\d+):(\d+)\)/$start_line ($1:XX)/; } return; } __DATA__ petdance.com: Andy Lester's Programming & Writing Andy & Amy's Pet Supplies & Dance Instruction

Perl, Programming & Writing

My Technology & publishing blog at oreillynet.com
My Perl-specific and personal blog at use.perl.org
Andy Lester's resume
Andy's Perl Pages

The Lester Family

Andy: The Dad
Amy: The Mom
Quinn: The Girl
Baxter: The Dog
Our family trees

About Andy & Amy

The Page Of Mush
People we're looking for
Article about us and how we met from the Northwest Herald

Useful Stuff

Andy's Magic Start Page: Bunches of your favorite search engines, all in one place
Add-a-page Page: Bunches of website submission forms, all in one place
Cold Fusion stuff: CFX_HTTP and other custom tags

Musical Information, etc

Naked Raygun: Bring your mom and your mom's friends, too
Action Park: A compendium of information about Big Black, Rapeman, Shellac and Steve Albini

Other things that might be fun

When In Doubt, Use Parsley: Amy's journal
Wonder: Andy's old journal
So American It Hurts: Andy & Amy's trip to Graceland
Gallery Of Household Appliances
Media Wants: Stuff I yearn to consume
Buzzword Bingo
HTML-Tidy-1.60/t/version.t0000644000101700007640000000053113156247313014413 0ustar alesterispc#!perl -T use warnings; use strict; use Test::More tests => 4; use HTML::Tidy; for my $version_string (HTML::Tidy->tidyp_version, HTML::Tidy->libtidyp_version) { like( $version_string, qr/^\d\.\d{2,}$/, 'Valid version string' ); cmp_ok( $version_string, '>=', '0.90', 'Version is greater than 0.90, which is the one I maintain' ); } HTML-Tidy-1.60/t/opt-00.t0000644000101700007640000000260113156247313013745 0ustar alesterispc#!perl -T use warnings; use strict; use Test::More tests => 1; use HTML::Tidy; my $tidy = HTML::Tidy->new({ tidy_mark => 0, add_xml_decl => 1, output_xhtml => 1, doctype => 'strict', clean => 1, css_prefix => 'myprefix', drop_empty_paras => 0, enclose_block_text => 1, escape_cdata => 1, hide_comments => 1, replace_color => 1, repeated_attributes => 'keep-first', break_before_br => 1, vertical_space => 1, newline => 'cr', }); my $input=<<'EOD';

example

Here's some ed and
eakfest MarkUp: ...

EOD my $expected =<<'EOD';

example

Here's some ed and
eakfest MarkUp: ...

EOD my @expected = split(/\n/, $expected); my $result = $tidy->clean( $input ); my @result = split(/\r/, $result); is_deeply( \@result, \@expected, 'Cleaned stuff looks like what we expected'); HTML-Tidy-1.60/t/message.t0000644000101700007640000000264713156247313014364 0ustar alesterispc#!perl -T use warnings; use strict; use Test::More tests => 4; use HTML::Tidy; use HTML::Tidy::Message; WITH_LINE_NUMBERS: { my $error = HTML::Tidy::Message->new( 'foo.pl', TIDY_ERROR, 2112, 5150, 'Blah blah' ); isa_ok( $error, 'HTML::Tidy::Message' ); my %expected = ( file => 'foo.pl', type => TIDY_ERROR, line => 2112, column => 5150, text => 'Blah blah', as_string => 'foo.pl (2112:5150) Error: Blah blah', ); _match_up( $error, \%expected, 'With line numbers' ); } WITHOUT_LINE_NUMBERS: { my $error = HTML::Tidy::Message->new( 'bar.pl', TIDY_WARNING, undef, undef, 'Blah blah' ); isa_ok( $error, 'HTML::Tidy::Message' ); my %expected = ( file => 'bar.pl', type => TIDY_WARNING, line => 0, column => 0, text => 'Blah blah', as_string => 'bar.pl - Warning: Blah blah', ); _match_up( $error, \%expected, 'Without line numbers' ); } sub _match_up { local $Test::Builder::Level = $Test::Builder::Level + 1; my $error = shift; my $expected = shift; my $msg = shift or die; return subtest "_matchup( $msg )" => sub { plan tests => scalar keys %{$expected}; for my $what ( sort keys %{$expected} ) { is( $error->$what, $expected->{$what}, "$what matches" ); } }; } HTML-Tidy-1.60/t/venus.html0000644000101700007640000000457013156247313014576 0ustar alesterispc Venus Flytrap for 100 Question

Wetland Plants Jeopardy

Venus Flytrap for 100

 

Question: What does the Venus Flytrap feed on?

Click here for the answer.

| Map | Site Search | Terms | Credits | Feedback |

Created for the Museums in the Classroom program sponsored by Illinois State Board of Education, the Brookfield Zoo, the Illinois State Museum., and Kildeer Countryside CCSD 96.
 
Authors: Twin Groves Museums in the Classroom Team,
School: Twin Groves Junior High School, Buffalo Grove, Illinois 60089
Created: 27 June 1998- Updated: 6 October 2003
HTML-Tidy-1.60/t/cfg-for-parse.t0000644000101700007640000000140513156247313015362 0ustar alesterispc#!perl -T use warnings; use strict; use Test::More tests => 3; use HTML::Tidy; my $html = do { local $/; }; my @expected_messages = split /\n/, q{ DATA (3:1) Error: is not recognized! DATA (8:1) Error: is not recognized! DATA (9:1) Error: is not recognized! }; chomp @expected_messages; shift @expected_messages; # First one's blank my $tidy = HTML::Tidy->new( { config_file => 't/cfg-for-parse.cfg' } ); isa_ok( $tidy, 'HTML::Tidy' ); my $rc = $tidy->parse( 'DATA', $html ); ok( $rc, 'Parsed OK' ); my @returned = map { $_->as_string } $tidy->messages; s/[\r\n]+\z// for @returned; is_deeply( \@returned, \@expected_messages, 'Matching errors' ); __DATA__ ... Foo </HEAD> <BODY> </B> <X> <Y> </I> </BODY> �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������HTML-Tidy-1.60/t/extra-quote.t����������������������������������������������������������������������0000644�0001017�0000764�00000001714�13156247313�015210� 0����������������������������������������������������������������������������������������������������ustar �alester�������������������������ispc�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!perl -T use warnings; use strict; # Response to an HTML::Lint request that it handle mishandled quotes. # See https://rt.cpan.org/Ticket/Display.html?id=1459 use Test::More tests => 4; use HTML::Tidy; my $html = do { local $/ = undef; <DATA> }; my $tidy = HTML::Tidy->new; isa_ok( $tidy, 'HTML::Tidy' ); $tidy->ignore( text => qr/DOCTYPE/ ); my $rc = $tidy->parse( '-', $html ); ok( $rc, 'Parsed OK' ); my @expected = split /\n/, q{ - (4:1) Warning: <img> unexpected or duplicate quote mark - (4:1) Warning: <img> escaping malformed URI reference - (4:1) Warning: <img> lacks "alt" attribute }; chomp @expected; shift @expected; # First one's blank my @messages = $tidy->messages; is( scalar @messages, 3, 'Should have exactly three messages' ); my @strings = map { $_->as_string } @messages; s/[\r\n]+\z// for @strings; is_deeply( \@strings, \@expected, 'Matching warnings' ); __DATA__ <html> <title>Bogo HTML-Tidy-1.60/t/00-load.t0000644000101700007640000000036213156247313014064 0ustar alesterispc#!perl -T use strict; use warnings; use Test::More tests => 1; use HTML::Tidy; use HTML::Tidy::Message; diag( "Testing HTML::Tidy $HTML::Tidy::VERSION, tidyp " . HTML::Tidy->tidyp_version() . ", Perl $], $^X" ); pass( 'Modules loaded' ); HTML-Tidy-1.60/t/venus.cfg0000644000101700007640000000100513156247313014357 0ustar alesterispc// HTML Tidy configuration file bare: yes // // This actually caused a segmentation fault in a MSHTML created doc // //clean: yes drop-proprietary-attributes: yes drop-empty-paras: yes break-before-br: yes word-2000: yes //tidy-mark: yes tidy-mark: no //add-xml-space: yes output-xml: yes enclose-text: yes enclose-block-text: yes char-encoding: utf8 force-output: yes indent: yes quiet: yes //add-xml-decl: yes //gnu-emacs: yes // make sure we are using "\n", even on Win32 newline: LF HTML-Tidy-1.60/t/parse-errors.t0000644000101700007640000000150213156247313015351 0ustar alesterispc#!/usr/bin/perl -T use strict; use warnings; use Test::Exception; use Test::More tests => 2; use HTML::Tidy; my $tidy = HTML::Tidy->new; my $errbuf = do { local $/; readline(*DATA); }; my $ret = $tidy->_parse_errors('fake_filename.html', $errbuf, "\n"); is( $ret, 1, 'encountered 1 parsing error' ); is( scalar @{$tidy->{messages}}, 7, 'got 7 messages when parsing errors' ); __DATA__ line 1 column 1 - Warning: missing declaration line 1 column 1 - Warning: plain text isn\'t allowed in elements line 1 column 1 - Info: previously mentioned line 1 column 1 - Warning: inserting implicit line 1 column 13 - Warning: missing
line 1 column 1 - Warning: inserting missing \'title\' element Info: Document content looks like HTML 3.2 FAKE_ERROR_TYPE 5 warnings, 0 errors were found! HTML-Tidy-1.60/t/pod-coverage.t0000644000101700007640000000030713156247313015302 0ustar alesterispc#!perl -Tw use strict; use warnings; use Test::More; eval 'use Test::Pod::Coverage 1.04'; plan skip_all => 'Test::Pod::Coverage 1.04 required for testing POD coverage' if $@; all_pod_coverage_ok(); HTML-Tidy-1.60/t/clean-crash.t0000644000101700007640000000141313156247313015106 0ustar alesterispc#!/usr/bin/perl -T use warnings; use strict; # From a bug found by Aaron Patterson #Full context and any attached attachments can be found at: # #Here's a snippet of code to repro the bug, it produces an 'Illegal instruction' error use Test::More tests => 2; use HTML::Tidy; my $html = do { local $/; }; my $tidy = HTML::Tidy->new; isa_ok( $tidy, 'HTML::Tidy' ); $tidy->ignore( type => TIDY_INFO ); $tidy->clean( $html ); my @mess = map { $_ ? $_->as_string() : undef } $tidy->messages(); pass( 'Ended OK' ); __DATA__
HTML-Tidy-1.60/perlcriticrc0000644000101700007640000000173513156247313014715 0ustar alesterispc[-CodeLayout::ProhibitParensWithBuiltins] [CodeLayout::ProhibitHardTabs] allow_leading_tabs = 0 [-CodeLayout::RequireTidyCode] # Never works for me. [Compatibility::PodMinimumVersion] above_version = 5.008008 [-ControlStructures::ProhibitPostfixControls] [-Documentation::PodSpelling] [-Documentation::RequirePodAtEnd] [-Documentation::RequirePodSections] [-InputOutput::RequireCheckedSyscalls] functions = open opendir read readline readdir close closedir [-Miscellanea::RequireRcsKeywords] [-Modules::RequireVersionVar] [-RegularExpressions::ProhibitEscapedMetacharacters] [-RegularExpressions::RequireDotMatchAnything] [-RegularExpressions::RequireExtendedFormatting] [-RegularExpressions::RequireLineBoundaryMatching] [Subroutines::RequireArgUnpacking] [-ValuesAndExpressions::ProhibitConstantPragma] [-ValuesAndExpressions::ProhibitNoisyQuotes] [-ValuesAndExpressions::ProhibitEmptyQuotes] [-ValuesAndExpressions::ProhibitMagicNumbers] [-Variables::ProhibitPunctuationVars] HTML-Tidy-1.60/bin/0000755000101700007640000000000013156250345013046 5ustar alesterispcHTML-Tidy-1.60/bin/webtidy0000755000101700007640000000401213156247313014441 0ustar alesterispc#!/usr/bin/perl -w use warnings; use strict; use Getopt::Long; use HTML::Tidy; my $help; my $context; my $tidy = HTML::Tidy->new; GetOptions( 'help|version' => \$help, 'context:i' => \$context, 'noerrors' => sub { $tidy->ignore( type => [ TIDY_ERROR ] ) }, 'nowarnings' => sub { $tidy->ignore( type => [ TIDY_WARNING ] ) }, ) or $help = 1; if ( !@ARGV || $help ) { print "webtidy v$HTML::Tidy::VERSION using tidyp v" . HTML::Tidy::tidyp_version() . "\n"; print ; exit 1; } for my $url ( @ARGV ) { my @lines; if ( $url =~ /^https?:/ ) { if ( !eval { require LWP::Simple; 1; } ) { warn q{Can't retrieve URLs without LWP::Simple installed}; next; } my $content = LWP::Simple::get( $url ); if ( $content ) { @lines = split( /\n/, $content ); $_ = "$_\n" for @lines; } else { warn "Unable to fetch $url\n"; next; } } else { open( my $fh, '<', $url ) or die "Can't open $url: $!"; @lines = <$fh>; close $fh; } $tidy->parse( $url, @lines ); for my $message ( $tidy->messages ) { print $message->as_string(), "\n"; if ( defined $context ) { $context += 0; my $lineno = $message->line - 1; my $start = $lineno-$context; $start = 0 if $start < 0; my $end = $lineno+$context; $end = $#lines if $end > $#lines; for my $i ( $start..$end ) { printf( '%5d: %s', $i+1, $lines[$i] ); } print "\n"; } } $tidy->clear_messages(); } # for files __END__ Usage: webtidy [filename or url]... (filename - reads STDIN) --context[=n] Show the offending line (and n surrounding lines) --noerrors Ignore errors --nowarnings Ignore warnings --help This message webtidy is free software. You may modify or distribute it under the terms of the Artistic License v2.0. HTML-Tidy-1.60/ppport.h0000644000101700007640000006350613156247313014006 0ustar alesterispc /* ppport.h -- Perl/Pollution/Portability Version 2.007 * * Automatically Created by Devel::PPPort on Mon Feb 16 21:21:31 2004 * * Do NOT edit this file directly! -- Edit PPPort.pm instead. * * Version 2.x, Copyright (C) 2001, Paul Marquess. * Version 1.x, Copyright (C) 1999, Kenneth Albanowski. * This code may be used and distributed under the same license as any * version of Perl. * * This version of ppport.h is designed to support operation with Perl * installations back to 5.004, and has been tested up to 5.8.1. * * If this version of ppport.h is failing during the compilation of this * module, please check if a newer version of Devel::PPPort is available * on CPAN before sending a bug report. * * If you are using the latest version of Devel::PPPort and it is failing * during compilation of this module, please send a report to perlbug@perl.com * * Include all following information: * * 1. The complete output from running "perl -V" * * 2. This file. * * 3. The name & version of the module you were trying to build. * * 4. A full log of the build that failed. * * 5. Any other information that you think could be relevant. * * * For the latest version of this code, please retreive the Devel::PPPort * module from CPAN. * */ /* * In order for a Perl extension module to be as portable as possible * across differing versions of Perl itself, certain steps need to be taken. * Including this header is the first major one, then using dTHR is all the * appropriate places and using a PL_ prefix to refer to global Perl * variables is the second. * */ /* If you use one of a few functions that were not present in earlier * versions of Perl, please add a define before the inclusion of ppport.h * for a static include, or use the GLOBAL request in a single module to * produce a global definition that can be referenced from the other * modules. * * Function: Static define: Extern define: * newCONSTSUB() NEED_newCONSTSUB NEED_newCONSTSUB_GLOBAL * */ /* To verify whether ppport.h is needed for your module, and whether any * special defines should be used, ppport.h can be run through Perl to check * your source code. Simply say: * * perl -x ppport.h *.c *.h *.xs foo/bar*.c [etc] * * The result will be a list of patches suggesting changes that should at * least be acceptable, if not necessarily the most efficient solution, or a * fix for all possible problems. It won't catch where dTHR is needed, and * doesn't attempt to account for global macro or function definitions, * nested includes, typemaps, etc. * * In order to test for the need of dTHR, please try your module under a * recent version of Perl that has threading compiled-in. * */ /* #!/usr/bin/perl @ARGV = ("*.xs") if !@ARGV; %badmacros = %funcs = %macros = (); $replace = 0; foreach () { $funcs{$1} = 1 if /Provide:\s+(\S+)/; $macros{$1} = 1 if /^#\s*define\s+([a-zA-Z0-9_]+)/; $replace = $1 if /Replace:\s+(\d+)/; $badmacros{$2}=$1 if $replace and /^#\s*define\s+([a-zA-Z0-9_]+).*?\s+([a-zA-Z0-9_]+)/; $badmacros{$1}=$2 if /Replace (\S+) with (\S+)/; } foreach $filename (map(glob($_),@ARGV)) { unless (open(IN, "<$filename")) { warn "Unable to read from $file: $!\n"; next; } print "Scanning $filename...\n"; $c = ""; while () { $c .= $_; } close(IN); $need_include = 0; %add_func = (); $changes = 0; $has_include = ($c =~ /#.*include.*ppport/m); foreach $func (keys %funcs) { if ($c =~ /#.*define.*\bNEED_$func(_GLOBAL)?\b/m) { if ($c !~ /\b$func\b/m) { print "If $func isn't needed, you don't need to request it.\n" if $changes += ($c =~ s/^.*#.*define.*\bNEED_$func\b.*\n//m); } else { print "Uses $func\n"; $need_include = 1; } } else { if ($c =~ /\b$func\b/m) { $add_func{$func} =1 ; print "Uses $func\n"; $need_include = 1; } } } if (not $need_include) { foreach $macro (keys %macros) { if ($c =~ /\b$macro\b/m) { print "Uses $macro\n"; $need_include = 1; } } } foreach $badmacro (keys %badmacros) { if ($c =~ /\b$badmacro\b/m) { $changes += ($c =~ s/\b$badmacro\b/$badmacros{$badmacro}/gm); print "Uses $badmacros{$badmacro} (instead of $badmacro)\n"; $need_include = 1; } } if (scalar(keys %add_func) or $need_include != $has_include) { if (!$has_include) { $inc = join('',map("#define NEED_$_\n", sort keys %add_func)). "#include \"ppport.h\"\n"; $c = "$inc$c" unless $c =~ s/#.*include.*XSUB.*\n/$&$inc/m; } elsif (keys %add_func) { $inc = join('',map("#define NEED_$_\n", sort keys %add_func)); $c = "$inc$c" unless $c =~ s/^.*#.*include.*ppport.*$/$inc$&/m; } if (!$need_include) { print "Doesn't seem to need ppport.h.\n"; $c =~ s/^.*#.*include.*ppport.*\n//m; } $changes++; } if ($changes) { open(OUT,">/tmp/ppport.h.$$"); print OUT $c; close(OUT); open(DIFF, "diff -u $filename /tmp/ppport.h.$$|"); while () { s!/tmp/ppport\.h\.$$!$filename.patched!; print STDOUT; } close(DIFF); unlink("/tmp/ppport.h.$$"); } else { print "Looks OK\n"; } } __DATA__ */ #ifndef _P_P_PORTABILITY_H_ #define _P_P_PORTABILITY_H_ #ifndef PERL_REVISION # ifndef __PATCHLEVEL_H_INCLUDED__ # include # endif # if !(defined(PERL_VERSION) || (SUBVERSION > 0 && defined(PATCHLEVEL))) # include # endif # ifndef PERL_REVISION # define PERL_REVISION (5) /* Replace: 1 */ # define PERL_VERSION PATCHLEVEL # define PERL_SUBVERSION SUBVERSION /* Replace PERL_PATCHLEVEL with PERL_VERSION */ /* Replace: 0 */ # endif #endif #define PERL_BCDVERSION ((PERL_REVISION * 0x1000000L) + (PERL_VERSION * 0x1000L) + PERL_SUBVERSION) /* It is very unlikely that anyone will try to use this with Perl 6 (or greater), but who knows. */ #if PERL_REVISION != 5 # error ppport.h only works with Perl version 5 #endif /* PERL_REVISION != 5 */ #ifndef ERRSV # define ERRSV perl_get_sv("@",FALSE) #endif #if (PERL_VERSION < 4) || ((PERL_VERSION == 4) && (PERL_SUBVERSION <= 5)) /* Replace: 1 */ # define PL_Sv Sv # define PL_compiling compiling # define PL_copline copline # define PL_curcop curcop # define PL_curstash curstash # define PL_defgv defgv # define PL_dirty dirty # define PL_dowarn dowarn # define PL_hints hints # define PL_na na # define PL_perldb perldb # define PL_rsfp_filters rsfp_filters # define PL_rsfpv rsfp # define PL_stdingv stdingv # define PL_sv_no sv_no # define PL_sv_undef sv_undef # define PL_sv_yes sv_yes /* Replace: 0 */ #endif #ifdef HASATTRIBUTE # if (defined(__GNUC__) && defined(__cplusplus)) || defined(__INTEL_COMPILER) # define PERL_UNUSED_DECL # else # define PERL_UNUSED_DECL __attribute__((unused)) # endif #else # define PERL_UNUSED_DECL #endif #ifndef dNOOP # define NOOP (void)0 # define dNOOP extern int Perl___notused PERL_UNUSED_DECL #endif #ifndef dTHR # define dTHR dNOOP #endif #ifndef dTHX # define dTHX dNOOP # define dTHXa(x) dNOOP # define dTHXoa(x) dNOOP #endif #ifndef pTHX # define pTHX void # define pTHX_ # define aTHX # define aTHX_ #endif /* IV could also be a quad (say, a long long), but Perls * capable of those should have IVSIZE already. */ #if !defined(IVSIZE) && defined(LONGSIZE) # define IVSIZE LONGSIZE #endif #ifndef IVSIZE # define IVSIZE 4 /* A bold guess, but the best we can make. */ #endif #ifndef UVSIZE # define UVSIZE IVSIZE #endif #ifndef NVTYPE # if defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE) # define NVTYPE long double # else # define NVTYPE double # endif typedef NVTYPE NV; #endif #ifndef INT2PTR #if (IVSIZE == PTRSIZE) && (UVSIZE == PTRSIZE) # define PTRV UV # define INT2PTR(any,d) (any)(d) #else # if PTRSIZE == LONGSIZE # define PTRV unsigned long # else # define PTRV unsigned # endif # define INT2PTR(any,d) (any)(PTRV)(d) #endif #define NUM2PTR(any,d) (any)(PTRV)(d) #define PTR2IV(p) INT2PTR(IV,p) #define PTR2UV(p) INT2PTR(UV,p) #define PTR2NV(p) NUM2PTR(NV,p) #if PTRSIZE == LONGSIZE # define PTR2ul(p) (unsigned long)(p) #else # define PTR2ul(p) INT2PTR(unsigned long,p) #endif #endif /* !INT2PTR */ #ifndef boolSV # define boolSV(b) ((b) ? &PL_sv_yes : &PL_sv_no) #endif #ifndef gv_stashpvn # define gv_stashpvn(str,len,flags) gv_stashpv(str,flags) #endif #ifndef newSVpvn # define newSVpvn(data,len) ((len) ? newSVpv ((data), (len)) : newSVpv ("", 0)) #endif #ifndef newRV_inc /* Replace: 1 */ # define newRV_inc(sv) newRV(sv) /* Replace: 0 */ #endif /* DEFSV appears first in 5.004_56 */ #ifndef DEFSV # define DEFSV GvSV(PL_defgv) #endif #ifndef SAVE_DEFSV # define SAVE_DEFSV SAVESPTR(GvSV(PL_defgv)) #endif #ifndef newRV_noinc # ifdef __GNUC__ # define newRV_noinc(sv) \ ({ \ SV *nsv = (SV*)newRV(sv); \ SvREFCNT_dec(sv); \ nsv; \ }) # else # if defined(USE_THREADS) static SV * newRV_noinc (SV * sv) { SV *nsv = (SV*)newRV(sv); SvREFCNT_dec(sv); return nsv; } # else # define newRV_noinc(sv) \ (PL_Sv=(SV*)newRV(sv), SvREFCNT_dec(sv), (SV*)PL_Sv) # endif # endif #endif /* Provide: newCONSTSUB */ /* newCONSTSUB from IO.xs is in the core starting with 5.004_63 */ #if (PERL_VERSION < 4) || ((PERL_VERSION == 4) && (PERL_SUBVERSION < 63)) #if defined(NEED_newCONSTSUB) static #else extern void newCONSTSUB(HV * stash, char * name, SV *sv); #endif #if defined(NEED_newCONSTSUB) || defined(NEED_newCONSTSUB_GLOBAL) void newCONSTSUB(stash,name,sv) HV *stash; char *name; SV *sv; { U32 oldhints = PL_hints; HV *old_cop_stash = PL_curcop->cop_stash; HV *old_curstash = PL_curstash; line_t oldline = PL_curcop->cop_line; PL_curcop->cop_line = PL_copline; PL_hints &= ~HINT_BLOCK_SCOPE; if (stash) PL_curstash = PL_curcop->cop_stash = stash; newSUB( #if (PERL_VERSION < 3) || ((PERL_VERSION == 3) && (PERL_SUBVERSION < 22)) /* before 5.003_22 */ start_subparse(), #else # if (PERL_VERSION == 3) && (PERL_SUBVERSION == 22) /* 5.003_22 */ start_subparse(0), # else /* 5.003_23 onwards */ start_subparse(FALSE, 0), # endif #endif newSVOP(OP_CONST, 0, newSVpv(name,0)), newSVOP(OP_CONST, 0, &PL_sv_no), /* SvPV(&PL_sv_no) == "" -- GMB */ newSTATEOP(0, Nullch, newSVOP(OP_CONST, 0, sv)) ); PL_hints = oldhints; PL_curcop->cop_stash = old_cop_stash; PL_curstash = old_curstash; PL_curcop->cop_line = oldline; } #endif #endif /* newCONSTSUB */ #ifndef START_MY_CXT /* * Boilerplate macros for initializing and accessing interpreter-local * data from C. All statics in extensions should be reworked to use * this, if you want to make the extension thread-safe. See ext/re/re.xs * for an example of the use of these macros. * * Code that uses these macros is responsible for the following: * 1. #define MY_CXT_KEY to a unique string, e.g. "DynaLoader_guts" * 2. Declare a typedef named my_cxt_t that is a structure that contains * all the data that needs to be interpreter-local. * 3. Use the START_MY_CXT macro after the declaration of my_cxt_t. * 4. Use the MY_CXT_INIT macro such that it is called exactly once * (typically put in the BOOT: section). * 5. Use the members of the my_cxt_t structure everywhere as * MY_CXT.member. * 6. Use the dMY_CXT macro (a declaration) in all the functions that * access MY_CXT. */ #if defined(MULTIPLICITY) || defined(PERL_OBJECT) || \ defined(PERL_CAPI) || defined(PERL_IMPLICIT_CONTEXT) /* This must appear in all extensions that define a my_cxt_t structure, * right after the definition (i.e. at file scope). The non-threads * case below uses it to declare the data as static. */ #define START_MY_CXT #if (PERL_VERSION < 4 || (PERL_VERSION == 4 && PERL_SUBVERSION < 68 )) /* Fetches the SV that keeps the per-interpreter data. */ #define dMY_CXT_SV \ SV *my_cxt_sv = perl_get_sv(MY_CXT_KEY, FALSE) #else /* >= perl5.004_68 */ #define dMY_CXT_SV \ SV *my_cxt_sv = *hv_fetch(PL_modglobal, MY_CXT_KEY, \ sizeof(MY_CXT_KEY)-1, TRUE) #endif /* < perl5.004_68 */ /* This declaration should be used within all functions that use the * interpreter-local data. */ #define dMY_CXT \ dMY_CXT_SV; \ my_cxt_t *my_cxtp = INT2PTR(my_cxt_t*,SvUV(my_cxt_sv)) /* Creates and zeroes the per-interpreter data. * (We allocate my_cxtp in a Perl SV so that it will be released when * the interpreter goes away.) */ #define MY_CXT_INIT \ dMY_CXT_SV; \ /* newSV() allocates one more than needed */ \ my_cxt_t *my_cxtp = (my_cxt_t*)SvPVX(newSV(sizeof(my_cxt_t)-1));\ Zero(my_cxtp, 1, my_cxt_t); \ sv_setuv(my_cxt_sv, PTR2UV(my_cxtp)) /* This macro must be used to access members of the my_cxt_t structure. * e.g. MYCXT.some_data */ #define MY_CXT (*my_cxtp) /* Judicious use of these macros can reduce the number of times dMY_CXT * is used. Use is similar to pTHX, aTHX etc. */ #define pMY_CXT my_cxt_t *my_cxtp #define pMY_CXT_ pMY_CXT, #define _pMY_CXT ,pMY_CXT #define aMY_CXT my_cxtp #define aMY_CXT_ aMY_CXT, #define _aMY_CXT ,aMY_CXT #else /* single interpreter */ #define START_MY_CXT static my_cxt_t my_cxt; #define dMY_CXT_SV dNOOP #define dMY_CXT dNOOP #define MY_CXT_INIT NOOP #define MY_CXT my_cxt #define pMY_CXT void #define pMY_CXT_ #define _pMY_CXT #define aMY_CXT #define aMY_CXT_ #define _aMY_CXT #endif #endif /* START_MY_CXT */ #ifndef IVdf # if IVSIZE == LONGSIZE # define IVdf "ld" # define UVuf "lu" # define UVof "lo" # define UVxf "lx" # define UVXf "lX" # else # if IVSIZE == INTSIZE # define IVdf "d" # define UVuf "u" # define UVof "o" # define UVxf "x" # define UVXf "X" # endif # endif #endif #ifndef NVef # if defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE) && \ defined(PERL_PRIfldbl) /* Not very likely, but let's try anyway. */ # define NVef PERL_PRIeldbl # define NVff PERL_PRIfldbl # define NVgf PERL_PRIgldbl # else # define NVef "e" # define NVff "f" # define NVgf "g" # endif #endif #ifndef AvFILLp /* Older perls (<=5.003) lack AvFILLp */ # define AvFILLp AvFILL #endif #ifdef SvPVbyte # if PERL_REVISION == 5 && PERL_VERSION < 7 /* SvPVbyte does not work in perl-5.6.1, borrowed version for 5.7.3 */ # undef SvPVbyte # define SvPVbyte(sv, lp) \ ((SvFLAGS(sv) & (SVf_POK|SVf_UTF8)) == (SVf_POK) \ ? ((lp = SvCUR(sv)), SvPVX(sv)) : my_sv_2pvbyte(aTHX_ sv, &lp)) static char * my_sv_2pvbyte(pTHX_ register SV *sv, STRLEN *lp) { sv_utf8_downgrade(sv,0); return SvPV(sv,*lp); } # endif #else # define SvPVbyte SvPV #endif #ifndef SvPV_nolen # define SvPV_nolen(sv) \ ((SvFLAGS(sv) & (SVf_POK)) == SVf_POK \ ? SvPVX(sv) : sv_2pv_nolen(sv)) static char * sv_2pv_nolen(pTHX_ register SV *sv) { STRLEN n_a; return sv_2pv(sv, &n_a); } #endif #ifndef get_cv # define get_cv(name,create) perl_get_cv(name,create) #endif #ifndef get_sv # define get_sv(name,create) perl_get_sv(name,create) #endif #ifndef get_av # define get_av(name,create) perl_get_av(name,create) #endif #ifndef get_hv # define get_hv(name,create) perl_get_hv(name,create) #endif #ifndef call_argv # define call_argv perl_call_argv #endif #ifndef call_method # define call_method perl_call_method #endif #ifndef call_pv # define call_pv perl_call_pv #endif #ifndef call_sv # define call_sv perl_call_sv #endif #ifndef PERL_SCAN_GREATER_THAN_UV_MAX # define PERL_SCAN_GREATER_THAN_UV_MAX 0x02 #endif #ifndef PERL_SCAN_SILENT_ILLDIGIT # define PERL_SCAN_SILENT_ILLDIGIT 0x04 #endif #ifndef PERL_SCAN_ALLOW_UNDERSCORES # define PERL_SCAN_ALLOW_UNDERSCORES 0x01 #endif #ifndef PERL_SCAN_DISALLOW_PREFIX # define PERL_SCAN_DISALLOW_PREFIX 0x02 #endif #if (PERL_VERSION >= 6) #define I32_CAST #else #define I32_CAST (I32*) #endif #ifndef grok_hex static UV _grok_hex (char *string, STRLEN *len, I32 *flags, NV *result) { NV r = scan_hex(string, *len, I32_CAST len); if (r > UV_MAX) { *flags |= PERL_SCAN_GREATER_THAN_UV_MAX; if (result) *result = r; return UV_MAX; } return (UV)r; } # define grok_hex(string, len, flags, result) \ _grok_hex((string), (len), (flags), (result)) #endif #ifndef grok_oct static UV _grok_oct (char *string, STRLEN *len, I32 *flags, NV *result) { NV r = scan_oct(string, *len, I32_CAST len); if (r > UV_MAX) { *flags |= PERL_SCAN_GREATER_THAN_UV_MAX; if (result) *result = r; return UV_MAX; } return (UV)r; } # define grok_oct(string, len, flags, result) \ _grok_oct((string), (len), (flags), (result)) #endif #ifndef grok_bin static UV _grok_bin (char *string, STRLEN *len, I32 *flags, NV *result) { NV r = scan_bin(string, *len, I32_CAST len); if (r > UV_MAX) { *flags |= PERL_SCAN_GREATER_THAN_UV_MAX; if (result) *result = r; return UV_MAX; } return (UV)r; } # define grok_bin(string, len, flags, result) \ _grok_bin((string), (len), (flags), (result)) #endif #ifndef IN_LOCALE # define IN_LOCALE \ (PL_curcop == &PL_compiling ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME) #endif #ifndef IN_LOCALE_RUNTIME # define IN_LOCALE_RUNTIME (PL_curcop->op_private & HINT_LOCALE) #endif #ifndef IN_LOCALE_COMPILETIME # define IN_LOCALE_COMPILETIME (PL_hints & HINT_LOCALE) #endif #ifndef IS_NUMBER_IN_UV # define IS_NUMBER_IN_UV 0x01 # define IS_NUMBER_GREATER_THAN_UV_MAX 0x02 # define IS_NUMBER_NOT_INT 0x04 # define IS_NUMBER_NEG 0x08 # define IS_NUMBER_INFINITY 0x10 # define IS_NUMBER_NAN 0x20 #endif #ifndef grok_numeric_radix # define GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send) #define grok_numeric_radix Perl_grok_numeric_radix bool Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send) { #ifdef USE_LOCALE_NUMERIC #if (PERL_VERSION >= 6) if (PL_numeric_radix_sv && IN_LOCALE) { STRLEN len; char* radix = SvPV(PL_numeric_radix_sv, len); if (*sp + len <= send && memEQ(*sp, radix, len)) { *sp += len; return TRUE; } } #else /* pre5.6.0 perls don't have PL_numeric_radix_sv so the radix * must manually be requested from locale.h */ #include struct lconv *lc = localeconv(); char *radix = lc->decimal_point; if (radix && IN_LOCALE) { STRLEN len = strlen(radix); if (*sp + len <= send && memEQ(*sp, radix, len)) { *sp += len; return TRUE; } } #endif /* PERL_VERSION */ #endif /* USE_LOCALE_NUMERIC */ /* always try "." if numeric radix didn't match because * we may have data from different locales mixed */ if (*sp < send && **sp == '.') { ++*sp; return TRUE; } return FALSE; } #endif /* grok_numeric_radix */ #ifndef grok_number #define grok_number Perl_grok_number int Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep) { const char *s = pv; const char *send = pv + len; const UV max_div_10 = UV_MAX / 10; const char max_mod_10 = UV_MAX % 10; int numtype = 0; int sawinf = 0; int sawnan = 0; while (s < send && isSPACE(*s)) s++; if (s == send) { return 0; } else if (*s == '-') { s++; numtype = IS_NUMBER_NEG; } else if (*s == '+') s++; if (s == send) return 0; /* next must be digit or the radix separator or beginning of infinity */ if (isDIGIT(*s)) { /* UVs are at least 32 bits, so the first 9 decimal digits cannot overflow. */ UV value = *s - '0'; /* This construction seems to be more optimiser friendly. (without it gcc does the isDIGIT test and the *s - '0' separately) With it gcc on arm is managing 6 instructions (6 cycles) per digit. In theory the optimiser could deduce how far to unroll the loop before checking for overflow. */ if (++s < send) { int digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { /* Now got 9 digits, so need to check each time for overflow. */ digit = *s - '0'; while (digit >= 0 && digit <= 9 && (value < max_div_10 || (value == max_div_10 && digit <= max_mod_10))) { value = value * 10 + digit; if (++s < send) digit = *s - '0'; else break; } if (digit >= 0 && digit <= 9 && (s < send)) { /* value overflowed. skip the remaining digits, don't worry about setting *valuep. */ do { s++; } while (s < send && isDIGIT(*s)); numtype |= IS_NUMBER_GREATER_THAN_UV_MAX; goto skip_value; } } } } } } } } } } } } } } } } } } numtype |= IS_NUMBER_IN_UV; if (valuep) *valuep = value; skip_value: if (GROK_NUMERIC_RADIX(&s, send)) { numtype |= IS_NUMBER_NOT_INT; while (s < send && isDIGIT(*s)) /* optional digits after the radix */ s++; } } else if (GROK_NUMERIC_RADIX(&s, send)) { numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */ /* no digits before the radix means we need digits after it */ if (s < send && isDIGIT(*s)) { do { s++; } while (s < send && isDIGIT(*s)); if (valuep) { /* integer approximation is valid - it's 0. */ *valuep = 0; } } else return 0; } else if (*s == 'I' || *s == 'i') { s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; s++; if (s == send || (*s != 'F' && *s != 'f')) return 0; s++; if (s < send && (*s == 'I' || *s == 'i')) { s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; s++; if (s == send || (*s != 'I' && *s != 'i')) return 0; s++; if (s == send || (*s != 'T' && *s != 't')) return 0; s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0; s++; } sawinf = 1; } else if (*s == 'N' || *s == 'n') { /* XXX TODO: There are signaling NaNs and quiet NaNs. */ s++; if (s == send || (*s != 'A' && *s != 'a')) return 0; s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; s++; sawnan = 1; } else return 0; if (sawinf) { numtype &= IS_NUMBER_NEG; /* Keep track of sign */ numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; } else if (sawnan) { numtype &= IS_NUMBER_NEG; /* Keep track of sign */ numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; } else if (s < send) { /* we can have an optional exponent part */ if (*s == 'e' || *s == 'E') { /* The only flag we keep is sign. Blow away any "it's UV" */ numtype &= IS_NUMBER_NEG; numtype |= IS_NUMBER_NOT_INT; s++; if (s < send && (*s == '-' || *s == '+')) s++; if (s < send && isDIGIT(*s)) { do { s++; } while (s < send && isDIGIT(*s)); } else return 0; } } while (s < send && isSPACE(*s)) s++; if (s >= send) return numtype; if (len == 10 && memEQ(pv, "0 but true", 10)) { if (valuep) *valuep = 0; return IS_NUMBER_IN_UV; } return 0; } #endif /* grok_number */ #endif /* _P_P_PORTABILITY_H_ */ /* End of File ppport.h */ HTML-Tidy-1.60/Changes0000644000101700007640000000367513156250333013601 0ustar alesterispcRevision history for Perl extension HTML::Tidy. 1.60 Wed Sep 13 10:34:35 CDT 2017 ==================================== No functionality changes. Fixes failing tests caused by differing versions of the tidy library. Thanks, Rufus Cable. 1.58 Sat May 27 00:03:51 CDT 2017 ==================================== No functionality changes. This just fixes some tests. This will probably be the last release that uses the old tidy/tidyp. There is a new tidy that supports HTML 5 and I'm going to start working on making HTML::Tidy use that. [FIXES] Failing tests on Windows. Thanks, Klaus Baldermann. (GH #11, GH #23) [INTERNALS] Added more tests. Thanks, Hunter McMillen. 1.56 Sun Sep 22 16:39:40 CDT 2013 ==================================== [ENHANCEMENTS] webtidy's version statement now shows the version number of the underlying tidyp library. [FIXES] Fixed an undef warning in parse(). Thanks, Vladimir Timofeev. utf8 was never encoded correctly. Thanks, Vladimir Timofeev and Alistair Francis. The ->parse() method would sometimes return false even though everything worked fine. Thanks, @sebaer. 1.54 Fri Sep 17 00:44:36 CDT 2010 ==================================== Please note that the bug tracker for HTML::Tidy is now at http://github.com/petdance/html-tidy. [FIXES] Fixed incorrect calls to croak(). Thanks, Steve Grazzini. [DOCUMENTATION] Updating all docs referring to libtidyp and Alien::Libtidyp. 1.52 Wed May 12 2010 ======================= First release since the major overhaul that relies on libtidyp. Now relies on Perl 5.8. I'm not at all interested in supporting ancient Perl version. HTML::Tidy now relies on the libtidyp that Andy Lester maintains on github. http://github.com/petdance/libtidyp [ENHANCEMENTS] Now includes support for TIDY_INFO messages. Improve support for Windows platforms. [FIXES] Fixed a segfault if there are no errors. Allow for either "tidyp" or "HTML Tidy" as a program name in tests. HTML-Tidy-1.60/lib/0000755000101700007640000000000013156250345013044 5ustar alesterispcHTML-Tidy-1.60/lib/HTML/0000755000101700007640000000000013156250345013610 5ustar alesterispcHTML-Tidy-1.60/lib/HTML/Tidy.pm0000644000101700007640000002511113156250333015054 0ustar alesterispcpackage HTML::Tidy; use 5.008; use strict; use warnings; use Carp (); use HTML::Tidy::Message; =head1 NAME HTML::Tidy - (X)HTML validation in a Perl object =head1 VERSION Version 1.60 =cut our $VERSION = '1.60'; =head1 SYNOPSIS use HTML::Tidy; my $tidy = HTML::Tidy->new( {config_file => 'path/to/config'} ); $tidy->ignore( type => TIDY_WARNING, type => TIDY_INFO ); $tidy->parse( "foo.html", $contents_of_foo ); for my $message ( $tidy->messages ) { print $message->as_string; } =head1 DESCRIPTION C is an HTML checker in a handy dandy object. It's meant as a replacement for L. If you're currently an L user looking to migrate, see the section L. =head1 EXPORTS Message types C, C and C. Everything else is an object method. =cut use base 'Exporter'; use constant TIDY_ERROR => 3; use constant TIDY_WARNING => 2; use constant TIDY_INFO => 1; our @EXPORT = qw( TIDY_ERROR TIDY_WARNING TIDY_INFO ); =head1 METHODS =head2 new() Create an HTML::Tidy object. my $tidy = HTML::Tidy->new(); Optionally you can give a hashref of configuration parms. my $tidy = HTML::Tidy->new( {config_file => 'path/to/tidy.cfg'} ); This configuration file will be read and used when you clean or parse an HTML file. You can also pass options directly to tidyp. my $tidy = HTML::Tidy->new( { output_xhtml => 1, tidy_mark => 0, } ); See C for the list of options supported by tidyp. The following options are not supported by C: =over 4 =item * quiet =back =cut sub new { my $class = shift; my $args = shift || {}; my @unsupported_options = qw( force-output gnu-emacs-file gnu-emacs keep-time quiet slide-style write-back ); # REVIEW perhaps a list of supported options would be better my $self = bless { messages => [], ignore_type => [], ignore_text => [], config_file => '', tidy_options => {}, }, $class; for my $key (keys %{$args} ) { if ($key eq 'config_file') { $self->{config_file} = $args->{$key}; next; } my $newkey = $key; $newkey =~ tr/_/-/; if ( grep {$newkey eq $_} @unsupported_options ) { Carp::croak( "Unsupported option: $newkey" ); } $self->{tidy_options}->{$newkey} = $args->{$key}; } return $self; } =head2 messages() Returns the messages accumulated. =cut sub messages { my $self = shift; return @{$self->{messages}}; } =head2 clear_messages() Clears the list of messages, in case you want to print and clear, print and clear. If you don't clear the messages, then each time you call L you'll be accumulating more in the list. =cut sub clear_messages { my $self = shift; $self->{messages} = []; return; } =head2 ignore( parm => value [, parm => value ] ) Specify types of messages to ignore. Note that the ignore flags must be set B calling C. You can call C as many times as necessary to set up all your restrictions; the options will stack up. =over 4 =item * type => TIDY_INFO|TIDY_WARNING|TIDY_ERROR Specifies the type of messages you want to ignore, either info or warnings or errors. If you wanted, you could call ignore on all three and get no messages at all. $tidy->ignore( type => TIDY_WARNING ); =item * text => qr/regex/ =item * text => [ qr/regex1/, qr/regex2/, ... ] Checks the text of the message against the specified regex or regexes, and ignores the message if there's a match. The value for the I parm may be either a regex, or a reference to a list of regexes. $tidy->ignore( text => qr/DOCTYPE/ ); $tidy->ignore( text => [ qr/unsupported/, qr/proprietary/i ] ); =back =cut sub ignore { my $self = shift; my @parms = @_; while ( @parms ) { my $parm = shift @parms; my $value = shift @parms; my @values = ref($value) eq 'ARRAY' ? @{$value} : ($value); Carp::croak( qq{Invalid ignore type of "$parm"} ) unless ($parm eq 'text') or ($parm eq 'type'); push( @{$self->{"ignore_$parm"}}, @values ); } # while return; } # ignore =head2 parse( $filename, $str [, $str...] ) Parses a string, or list of strings, that make up a single HTML file. The I<$filename> parm is only used as an identifier for your use. The file is not actually read and opened. Returns true if all went OK, or false if there was some problem calling tidy, or parsing tidy's output. =cut sub parse { my $self = shift; my $filename = shift; if (@_ == 0) { Carp::croak('Usage: parse($filename,$str [, $str...])') ## no critic } my $html = join( '', @_ ); utf8::encode($html) if utf8::is_utf8($html); my ($errorblock,$newline) = _tidy_messages( $html, $self->{config_file}, $self->{tidy_options} ); return 1 unless defined $errorblock; utf8::decode($errorblock); return !$self->_parse_errors($filename, $errorblock, $newline); } sub _parse_errors { my $self = shift; my $filename = shift; my $errs = shift; my $newline = shift; my $parse_errors; my @lines = split( /$newline/, $errs ); for my $line ( @lines ) { chomp $line; my $message; if ( $line =~ /^line (\d+) column (\d+) - (Warning|Error|Info): (.+)$/ ) { my ($line, $col, $type, $text) = ($1, $2, $3, $4); $type = ($type eq 'Warning') ? TIDY_WARNING : ($type eq 'Info') ? TIDY_INFO : TIDY_ERROR; $message = HTML::Tidy::Message->new( $filename, $type, $line, $col, $text ); } elsif ( $line =~ m/^Info: (.+)$/ ) { # Info line we don't want my $text = $1; $message = HTML::Tidy::Message->new( $filename, TIDY_INFO, undef, undef, $text ); } elsif ( $line =~ /^\d+ warnings?, \d+ errors? were found!/ ) { # Summary line we don't want } elsif ( $line eq 'No warnings or errors were found.' ) { # Summary line we don't want } elsif ( $line eq 'This document has errors that must be fixed before' ) { # Summary line we don't want } elsif ( $line eq 'using HTML Tidy to generate a tidied up version.' ) { # Summary line we don't want } elsif ( $line =~ m/^\s*$/ ) { # Blank line we don't want } else { Carp::carp "HTML::Tidy: Unknown error type: $line"; ++$parse_errors; } push( @{$self->{messages}}, $message ) if $message && $self->_is_keeper( $message ); } # for return $parse_errors; } =head2 clean( $str [, $str...] ) Cleans a string, or list of strings, that make up a single HTML file. Returns the cleaned string as a single string. =cut sub clean { my $self = shift; if (@_ == 0) { Carp::croak('Usage: clean($str [, $str...])') ## no critic } my $text = join( '', @_ ); utf8::encode($text) if utf8::is_utf8($text); if ( defined $text ) { $text .= "\n"; } my ($cleaned, $errbuf, $newline) = _tidy_clean( $text, $self->{config_file}, $self->{tidy_options}); utf8::decode($cleaned); utf8::decode($errbuf); $self->_parse_errors('', $errbuf, $newline); return $cleaned; } # Tells whether a given message object is one that we should keep. sub _is_keeper { my $self = shift; my $message = shift; my @ignore_types = @{$self->{ignore_type}}; if ( @ignore_types ) { return if grep { $message->type == $_ } @ignore_types; } my @ignore_texts = @{$self->{ignore_text}}; if ( @ignore_texts ) { return if grep { $message->text =~ $_ } @ignore_texts; } return 1; } =head2 tidyp_version() =head2 libtidyp_version() Returns the version of the underling tidyp library. =cut # backcompat sub libtidyp_version { return shift->tidyp_version } sub tidyp_version { my $version_str = _tidyp_version(); return $version_str; } require XSLoader; XSLoader::load('HTML::Tidy', $VERSION); 1; __END__ =head1 INSTALLING TIDYP C requires that C be installed on your system. You can obtain tidyp through your distribution's package manager (make sure you install the development package with headers), or from the tidyp Git repository at L. =head1 CONVERTING FROM C C is different from C in a number of crucial ways. =over 4 =item * It's not pure Perl C is mostly a happy wrapper around tidyp. =item * The real work is done by someone else Changes to tidyp may come down the pipe that I don't have control over. That's the price we pay for having it do a darn good job. =item * It's no longer bundled with its C counterpart L came bundled with C, but L is a separate distribution. This saves the people who don't want the C framework from pulling it in, and all its prerequisite modules. =back =head1 BUGS & FEEDBACK Please report any bugs or feature requests at the issue tracker on github L. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. Please do NOT use L. =head1 SUPPORT You can find documentation for this module with the perldoc command. perldoc HTML::Tidy You can also look for information at: =over 4 =item * HTML::Tidy's issue queue at github L =item * AnnoCPAN: Annotated CPAN documentation L =item * CPAN Ratings L =item * search.cpan.org L =item * Git source code repository L =back =head1 ACKNOWLEDGEMENTS Thanks to Rufus Cable, Jonathan Rockway, and Robert Bachmann for contributions. =head1 AUTHOR Andy Lester, C<< >> =head1 COPYRIGHT & LICENSE Copyright (C) 2005-2017 by Andy Lester This library is free software. You mean modify or distribute it under the Artistic License v2.0. =cut HTML-Tidy-1.60/lib/HTML/Tidy/0000755000101700007640000000000013156250345014521 5ustar alesterispcHTML-Tidy-1.60/lib/HTML/Tidy/Message.pm0000644000101700007640000000557513156247313016460 0ustar alesterispcpackage HTML::Tidy::Message; use warnings; use strict; use overload q{""} => \&as_string, fallback => 'sounds like a good idea'; =head1 NAME HTML::Tidy::Message - Message object for the Tidy functionality =head1 SYNOPSIS See L for all the gory details. =head1 EXPORTS None. It's all object-based. =head1 METHODS Almost everything is an accessor. =head2 new( $file, $line, $column, $text ) Create an object. It's not very exciting. =cut sub new { my $class = shift; my $file = shift; my $type = shift; my $line = shift || 0; my $column = shift || 0; my $text = shift; # Add an element that says what tag caused the error (B, TR, etc) # so that we can match 'em up down the road. my $self = { _file => $file, _type => $type, _line => $line, _column => $column, _text => $text, }; bless $self, $class; return $self; } =head2 where() Returns a formatted string that describes where in the file the error has occurred. For example, (14:23) for line 14, column 23. The terrible thing about this function is that it's both a plain ol' formatting function as in my $str = where( 14, 23 ); AND it's an object method, as in: my $str = $error->where(); I don't know what I was thinking when I set it up this way, but it's bad practice. =cut sub where { my $self = shift; return '-' unless $self->line && $self->column; return sprintf( '(%d:%d)', $self->line, $self->column ); } =head2 as_string() Returns a nicely-formatted string for printing out to stdout or some similar user thing. =cut sub as_string { my $self = shift; my %strings = ( 1 => 'Info', 2 => 'Warning', 3 => 'Error', ); return sprintf( '%s %s %s: %s', $self->file, $self->where, $strings{$self->type}, $self->text ); } =head2 file() Returns the filename of the error, as set by the caller. =head2 type() Returns the type of the error. This will either be C, or C. =head2 line() Returns the line number of the error, or 0 if there isn't an applicable line number. =head2 column() Returns the column number, or 0 if there isn't an applicable column number. =head2 text() Returns the text of the message. This does not include a type string, like "Info: ". =cut sub file { my $self = shift; return $self->{_file} } sub type { my $self = shift; return $self->{_type} } sub line { my $self = shift; return $self->{_line} } sub column { my $self = shift; return $self->{_column} } sub text { my $self = shift; return $self->{_text} } =head1 COPYRIGHT & LICENSE Copyright 2005-2017 Andy Lester. This program is free software; you can redistribute it and/or modify it under the terms of the Artistic License v2.0. =head1 AUTHOR Andy Lester, C<< >> =cut 1; # happy HTML-Tidy-1.60/Makefile.PL0000644000101700007640000000573313156247313014261 0ustar alesterispc#!/usr/bin/perl package main; use 5.008000; use strict; use warnings; use ExtUtils::MakeMaker; use ExtUtils::Liblist; use Config; my $libs = '-ltidyp'; my $inc = "-I. -I/usr/include/tidyp -I/usr/local/include/tidyp -I$Config{usrinc}/tidyp"; eval { require Alien::Tidyp; }; if ( !$@ ) { print "Using tidyp via Alien::Tidyp\n"; $libs = Alien::Tidyp->config('LIBS'); $inc = Alien::Tidyp->config('INC'); } else { print "Alien::Tidyp not found. Looking for for tidyp on your system.\n"; my @vars = ExtUtils::Liblist->ext( '-L/usr/lib -L/usr/local/lib -ltidyp', 0, 1 ); $libs = $vars[2]; if ( !$libs ) { $libs = '-ltidyp'; print <<'EOF'; It seems that you don't have tidyp installed. HTML::Tidy does no real work on its own. It's just a wrapper around tidyp. Please read the README.markdown file for details on how to install tidyp. If you do have tidyp installed, but Makefile.PL can't detect it, go ahead and try building. If HTML::Tidy builds and tests correctly, please file a ticket at Github at http://github.com/petdance/html-tidy/issues, so we can fix the library detection code. EOF } } eval { require LWP::Simple; }; if ( $@ ) { print <<'EOF'; NOTE: It seems that you don't have LWP::Simple installed. The webtidy program will not be able to retrieve web pages. EOF } my $parms = { NAME => 'HTML::Tidy', AUTHOR => 'Andy Lester ', VERSION_FROM => 'lib/HTML/Tidy.pm', ABSTRACT_FROM => 'lib/HTML/Tidy.pm', PREREQ_PM => { 'Encode' => 0, # for tests 'Exporter' => 0, 'Getopt::Long' => 0, # in webtidy 'Test::More' => '0.98', # For subtest() 'Test::Builder' => 0, 'Carp' => 0, 'overload' => 0, 'constant' => 0, }, LIBS => [$libs], NEEDS_LINKING => 1, INC => $inc, EXE_FILES => [qw(bin/webtidy)], dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', }, clean => { FILES => 'HTML-Tidy-*' }, }; if ( $ExtUtils::MakeMaker::VERSION ge '6.45_01' ) { $parms->{META_MERGE} = { resources => { license => 'http://www.opensource.org/licenses/artistic-license-2.0.php', homepage => 'http://github.com/petdance/html-tidy', bugtracker => 'http://github.com/petdance/html-tidy/issues', repository => 'http://github.com/petdance/html-tidy', } }; $parms->{LICENSE} = 'artistic_2'; } if ( $ExtUtils::MakeMaker::VERSION ge '6.47_02' ) { $parms->{MIN_PERL_VERSION} = 5.008; } WriteMakefile( %{$parms} ); sub MY::postamble { return <<'MAKE_FRAG'; .PHONY: tags critic tags: ctags -f tags --recurse --totals \ --exclude=blib --exclude=t/lib \ --exclude=.svn --exclude='*~' \ --languages=C,Perl --langmap=Perl:+.t \ . critic: perlcritic -1 \ -profile perlcriticrc \ . MAKE_FRAG } HTML-Tidy-1.60/README.markdown0000644000101700007640000000334613156247313015006 0ustar alesterispcHTML::Tidy ========== HTML::Tidy is an HTML checker in a handy dandy object. It's meant as a companion to [HTML::Lint][1], which is written in Perl but is not nearly as capable as HTML::Tidy. PREREQUISITES ============= HTML::Tidy does very little work. The real work of HTML::Tidy is done by the tidyp library, which is written in C. To use HTML::Tidy, you must install tidyp. There are two, perhaps three, ways to install tidyp: * Get a tarball from the [tidyp source distributions][2] from Github and and build it like any other C library. Note that you must get a source tarball, *not* just clone the source tree via github. * Install the [Alien::Tidyp][3] Perl module, which automates the tidyp installation process. * Your operating system may also have a package for tidyp that you can install. As of this writing, these operating systems are known to provide tidyp library: * Fedora contains [tidyp-devel package][4] * FreeBSD contains [tidyp port][5] You need only do one of these steps. INSTALLATION ============ Once you have libtidyp installed via one of the previous methods, install HTML::Tidy like any standard Perl module. perl Makefile.PL make make test make install COPYRIGHT AND LICENSE ===================== Copyright (C) 2004-2017 by Andy Lester This library is free software. It may be redistributed and modified under the Artistic License v2.0. [1]: http://search.cpan.org/dist/HTML-Lint/ "HTML::Lint" [2]: http://github.com/petdance/tidyp/downloads "tidyp source distributions" [3]: http://search.cpan.org/dist/Alien-Tidyp/ "Alien::Tidyp" [4]: https://apps.fedoraproject.org/packages/tidyp "tidyp-devel package" [5]: http://fbsdmon.org/ports/textproc/tidyp "tidyp port" HTML-Tidy-1.60/MANIFEST0000644000101700007640000000131413156250346013427 0ustar alesterispcbin/webtidy Changes lib/HTML/Tidy.pm lib/HTML/Tidy/Message.pm Makefile.PL MANIFEST perlcriticrc ppport.h README.markdown t/00-load.t t/cfg-for-parse.cfg t/cfg-for-parse.t t/clean.t t/clean-crash.t t/extra-quote.t t/ignore-text.t t/ignore.t t/illegal-options.t t/levels.t t/message.t t/opt-00.t t/parse.t t/parse-errors.t t/perfect.t t/pod-coverage.t t/pod.t t/roundtrip.t t/segfault-form.t t/simple.t t/too-many-titles.t t/unicode.html t/unicode.t t/unicode-nbsp.t t/venus.cfg t/venus.html t/venus.t t/version.t t/wordwrap.cfg t/wordwrap.t Tidy.xs META.yml Module YAML meta-data (added by MakeMaker) META.json Module JSON meta-data (added by MakeMaker) HTML-Tidy-1.60/Tidy.xs0000644000101700007640000001225113156247313013565 0ustar alesterispc#include "EXTERN.h" #include "perl.h" #include "XSUB.h" #include #include #include #include static void _load_config_hash(TidyDoc tdoc, HV *tidy_options) { HE *entry; (void) hv_iterinit(tidy_options); while ( (entry = hv_iternext(tidy_options)) != NULL ) { I32 key_len; const char * const key = hv_iterkey(entry,&key_len); const TidyOption opt = tidyGetOptionByName(tdoc,key); if (!opt) { warn( "HTML::Tidy: Unrecognized option: \"%s\"\n",key ); } else { const TidyOptionId id = tidyOptGetId(opt); SV * const sv_data = hv_iterval(tidy_options,entry); STRLEN data_len; const char * const data = SvPV(sv_data,data_len); if ( ! tidyOptSetValue(tdoc,id,data) ) { warn( "HTML::Tidy: Can't set option: \"%s\" to \"%s\"\n", key, data ); } } } } MODULE = HTML::Tidy PACKAGE = HTML::Tidy PROTOTYPES: ENABLE void _tidy_messages(input, configfile, tidy_options) INPUT: const char *input const char *configfile HV *tidy_options PREINIT: TidyBuffer errbuf = {0}; TidyDoc tdoc = tidyCreate(); /* Initialize "document" */ const char* newline; int rc = 0; PPCODE: tidyBufInit(&errbuf); rc = ( tidyOptSetValue( tdoc, TidyCharEncoding, "utf8" ) ? rc : -1 ); if ( (rc >= 0 ) && configfile && *configfile ) { rc = tidyLoadConfig( tdoc, configfile ); } if ( rc >= 0 ) { _load_config_hash(tdoc,tidy_options); } if ( rc >= 0 ) { /* Capture diagnostics */ rc = tidySetErrorBuffer( tdoc, &errbuf ); } if ( rc >= 0 ) { /* Parse the input */ rc = tidyParseString( tdoc, input ); } if ( rc >= 0 && errbuf.bp) { XPUSHs( sv_2mortal(newSVpvn((char *)errbuf.bp, errbuf.size)) ); /* TODO: Make this a function */ switch ( tidyOptGetInt(tdoc,TidyNewline) ) { case TidyLF: newline = "\n"; break; case TidyCR: newline = "\r"; break; default: newline = "\r\n"; break; } XPUSHs( sv_2mortal(newSVpv(newline, 0)) ); } else { rc = -1; } if ( errbuf.bp ) tidyBufFree( &errbuf ); tidyRelease( tdoc ); if ( rc < 0 ) { XSRETURN_UNDEF; } void _tidy_clean(input, configfile, tidy_options) INPUT: const char *input const char *configfile HV *tidy_options PREINIT: TidyBuffer errbuf = {0}; TidyBuffer output = {0}; TidyDoc tdoc = tidyCreate(); /* Initialize "document" */ const char* newline; int rc = 0; PPCODE: tidyBufInit(&output); tidyBufInit(&errbuf); /* Set our default first. */ /* Don't word-wrap */ rc = ( tidyOptSetInt( tdoc, TidyWrapLen, 0 ) ? rc : -1 ); if ( (rc >= 0 ) && configfile && *configfile ) { rc = tidyLoadConfig( tdoc, configfile ); } /* XXX I think this cascade is a bug waiting to happen */ if ( rc >= 0 ) { rc = ( tidyOptSetValue( tdoc, TidyCharEncoding, "utf8" ) ? rc : -1 ); } if ( rc >= 0 ) { _load_config_hash( tdoc, tidy_options ); } if ( rc >= 0 ) { rc = tidySetErrorBuffer( tdoc, &errbuf ); /* Capture diagnostics */ } if ( rc >= 0 ) { rc = tidyParseString( tdoc, input ); /* Parse the input */ } if ( rc >= 0 ) { rc = tidyCleanAndRepair(tdoc); } if ( rc > 1 ) { rc = ( tidyOptSetBool( tdoc, TidyForceOutput, yes ) ? rc : -1 ); } if ( rc >= 0) { rc = tidySaveBuffer( tdoc, &output ); } if ( rc >= 0) { rc = tidyRunDiagnostics( tdoc ); } if ( rc >= 0 && output.bp && errbuf.bp ) { XPUSHs( sv_2mortal(newSVpvn((char *)output.bp, output.size)) ); XPUSHs( sv_2mortal(newSVpvn((char *)errbuf.bp, errbuf.size)) ); /* TODO: Hoist this into a function */ switch ( tidyOptGetInt(tdoc,TidyNewline) ) { case TidyLF: newline = "\n"; break; case TidyCR: newline = "\r"; break; default: newline = "\r\n"; break; } XPUSHs( sv_2mortal(newSVpv(newline, 0)) ); } else { rc = -1; } tidyBufFree( &output ); tidyBufFree( &errbuf ); tidyRelease( tdoc ); if ( rc < 0 ) { XSRETURN_UNDEF; } SV* _tidyp_version() PREINIT: const char* version; CODE: version = tidyVersion(); RETVAL = newSVpv(version,0); /* will be automatically "mortalized" */ OUTPUT: RETVAL HTML-Tidy-1.60/META.yml0000644000101700007640000000165313156250345013554 0ustar alesterispc--- abstract: '(X)HTML validation in a Perl object' author: - 'Andy Lester ' build_requires: ExtUtils::MakeMaker: '0' configure_requires: ExtUtils::MakeMaker: '0' dynamic_config: 1 generated_by: 'ExtUtils::MakeMaker version 7.16, CPAN::Meta::Converter version 2.150005' license: artistic_2 meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: HTML-Tidy no_index: directory: - t - inc requires: Carp: '0' Encode: '0' Exporter: '0' Getopt::Long: '0' Test::Builder: '0' Test::More: '0.98' constant: '0' overload: '0' perl: '5.008' resources: bugtracker: http://github.com/petdance/html-tidy/issues homepage: http://github.com/petdance/html-tidy license: http://www.opensource.org/licenses/artistic-license-2.0.php repository: http://github.com/petdance/html-tidy version: '1.60' x_serialization_backend: 'CPAN::Meta::YAML version 0.012' HTML-Tidy-1.60/META.json0000644000101700007640000000302213156250346013715 0ustar alesterispc{ "abstract" : "(X)HTML validation in a Perl object", "author" : [ "Andy Lester " ], "dynamic_config" : 1, "generated_by" : "ExtUtils::MakeMaker version 7.16, CPAN::Meta::Converter version 2.150005", "license" : [ "artistic_2" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "HTML-Tidy", "no_index" : { "directory" : [ "t", "inc" ] }, "prereqs" : { "build" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "configure" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "runtime" : { "requires" : { "Carp" : "0", "Encode" : "0", "Exporter" : "0", "Getopt::Long" : "0", "Test::Builder" : "0", "Test::More" : "0.98", "constant" : "0", "overload" : "0", "perl" : "5.008" } } }, "release_status" : "stable", "resources" : { "bugtracker" : { "web" : "http://github.com/petdance/html-tidy/issues" }, "homepage" : "http://github.com/petdance/html-tidy", "license" : [ "http://www.opensource.org/licenses/artistic-license-2.0.php" ], "repository" : { "url" : "http://github.com/petdance/html-tidy" } }, "version" : "1.60", "x_serialization_backend" : "JSON::PP version 2.27400" }