HTML-Lint-2.32/0000755000175000017500000000000013313263532011561 5ustar andyandyHTML-Lint-2.32/README.md0000644000175000017500000000111613220756434013044 0ustar andyandy# HTML::Lint * Linux [![Build Status](https://api.travis-ci.org/petdance/html-lint.svg?branch=dev)](https://travis-ci.org/petdance/html-lint) * [CPAN Testers](http://cpantesters.org/distro/H/html-lint.html) HTML::Lint is a pure-Perl HTML parser and checker for syntactic legitmacy. It supports only HTML 4. The "weblint" program that comes with HTML::Lint lets you lint a webpage or local files. For those of you doing automated testing with Test::More and the rest of the Perl testing framework, Test::HTML::Lint lets you automate HTML checking. Andy Lester andy at petdance dot com HTML-Lint-2.32/MANIFEST0000644000175000017500000000233113313263532012711 0ustar andyandyChanges MANIFEST Makefile.PL README.md bin/weblint lib/HTML/Lint.pm lib/HTML/Lint/Error.pm lib/HTML/Lint/HTML4.pm lib/HTML/Lint/Parser.pm lib/Test/HTML/Lint.pm t/00-load.t t/01-coverage.t t/02-versions.t t/10-test-html-lint.t t/11-test-html-lint-overload.t t/12-html_fragment_ok.t t/20-error-types-export.t t/20-error-types-skip.t t/20-error-types.t t/30-test-builder.t t/40-where.t t/50-multiple-files.t t/60-add-tags.t t/Util.pm t/api-eof-not-called.t t/api-parse-not-called.t t/attr-invalid-entity.t t/attr-repeated.t t/attr-unclosed-entity.t t/attr-unknown-entity.t t/attr-unknown.t t/attr-use-entity.t t/config-unknown-directive.t t/config-unknown-value.t t/doc-tag-required.t t/elem-empty-but-closed.t t/elem-img-alt-missing.t t/elem-img-sizes-missing.t t/elem-input-alt-missing.t t/elem-nonrepeatable.t t/elem-unclosed.t t/elem-unknown.t t/elem-unopened.t t/embed-extensions.t t/nolint.t t/parse_file.t t/pod-coverage.t t/pod.t t/random-nobr.t t/strong-id.t t/text-invalid-entity.t t/text-unclosed-entity.t t/text-unknown-entity.t t/text-use-entity.t t/xhtml-html.t META.yml Module YAML meta-data (added by MakeMaker) META.json Module JSON meta-data (added by MakeMaker) HTML-Lint-2.32/Makefile.PL0000644000175000017500000000524313220756434013544 0ustar andyandyuse strict; use warnings; use ExtUtils::MakeMaker qw( WriteMakefile ); use 5.006001; if ( not eval { require LWP::Simple; 1; } ) { print <<'EOF'; NOTE: It seems that you don't have LWP::Simple installed. The weblint program will not be able to retrieve web pages. EOF } my %parms = ( NAME => 'HTML::Lint', DISTNAME => 'HTML-Lint', VERSION_FROM => 'lib/HTML/Lint.pm', ABSTRACT_FROM => 'lib/HTML/Lint.pm', PMLIBDIRS => [qw(lib/)], AUTHOR => 'Andy Lester ', MIN_PERL_VERSION=> 5.006, PREREQ_PM => { 'Exporter' => 0, 'Test::More' => 0, 'Test::Builder' => 0, 'Test::Builder::Tester' => 0, 'File::Find' => 0, 'HTML::Entities' => 0, 'HTML::Parser' => '3.47', 'HTML::Tagset' => '3.03', }, EXE_FILES => [qw(bin/weblint)], dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', }, clean => { FILES => 'HTML-Lint-*' }, ); if ( $ExtUtils::MakeMaker::VERSION =~ /^\d[.]\d\d$/ and $ExtUtils::MakeMaker::VERSION > 6.30 ) { $parms{LICENSE} = 'artistic_2'; } if ( $ExtUtils::MakeMaker::VERSION ge '6.46' ) { $parms{META_ADD} = { resources => { homepage => 'http://search.cpan.org/dist/html-lint', bugtracker => 'https://github.com/petdance/html-lint/issues', license => 'http://www.opensource.org/licenses/artistic-license-2.0.php', repository => 'https://github.com/petdance/html-lint', }, }; } WriteMakefile( %parms ); sub MY::postamble { ## no critic ( Subroutines::ProhibitQualifiedSubDeclarations ) my $postamble = <<'MAKE_FRAG'; .PHONY: tags critic tags: ctags -f tags --recurse --totals \ --exclude=blib \ --exclude=.svn \ --exclude='*~' \ --languages=Perl --langmap=Perl:+.t \ critic: perlcritic -1 -q -profile perlcriticrc $(ack -f --perl) PROF_ARGS = -Mblib blib/script/weblint index.html timed: all $(PERL) $(PROF_ARGS) >> /dev/null 2>&1 dprof: all $(PERL) -d:DProf $(PROF_ARGS) >> /dev/null 2>&1 dprofpp -R dproflb: all $(PERL) -d:DProfLB $(PROF_ARGS) >> /dev/null 2>&1 dprofpp -R fastprof: all $(PERL) -d:FastProf $(PROF_ARGS) >> /dev/null 2>&1 fprofpp profile: all $(PERL) -d:Profile $(PROF_ARGS) >> /dev/null 2>&1 less prof.out profiler: all $(PERL) -MDevel::Profiler $(PROF_ARGS) >> /dev/null 2>&1 dprofpp -R smallprof: all $(PERL) -d:SmallProf $(PROF_ARGS) >> /dev/null 2>&1 sort -k 2nr,2 smallprof.out | less nytprof: all $(PERL) -d:NYTProf $(PROF_ARGS) >> /dev/null 2>&1 nytprofhtml MAKE_FRAG return $postamble; } HTML-Lint-2.32/META.json0000664000175000017500000000300213313263532013177 0ustar andyandy{ "abstract" : "check for HTML errors in a string or file", "author" : [ "Andy Lester " ], "dynamic_config" : 1, "generated_by" : "ExtUtils::MakeMaker version 7.24, CPAN::Meta::Converter version 2.143240", "license" : [ "artistic_2" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "HTML-Lint", "no_index" : { "directory" : [ "t", "inc" ] }, "prereqs" : { "build" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "configure" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "runtime" : { "requires" : { "Exporter" : "0", "File::Find" : "0", "HTML::Entities" : "0", "HTML::Parser" : "3.47", "HTML::Tagset" : "3.03", "Test::Builder" : "0", "Test::Builder::Tester" : "0", "Test::More" : "0", "perl" : "5.006" } } }, "release_status" : "stable", "resources" : { "bugtracker" : { "web" : "https://github.com/petdance/html-lint/issues" }, "homepage" : "http://search.cpan.org/dist/html-lint", "license" : [ "http://www.opensource.org/licenses/artistic-license-2.0.php" ], "repository" : { "url" : "https://github.com/petdance/html-lint" } }, "version" : "2.32" } HTML-Lint-2.32/bin/0000755000175000017500000000000013313263531012330 5ustar andyandyHTML-Lint-2.32/bin/weblint0000755000175000017500000000474713313263060013733 0ustar andyandy#!/usr/bin/perl -w use warnings; use strict; use Getopt::Long; use HTML::Lint; use HTML::Lint::Error; use HTML::Lint::HTML4; my $help; my $context; my $structure = 1; my $helper = 1; my $fluff = 1; GetOptions( 'help' => \$help, 'context:i' => \$context, 'only' => sub { $structure = $helper = $fluff = 0; }, 'structure!' => \$structure, 'helper!' => \$helper, 'fluff!' => \$fluff, ) or $help = 1; if ( !@ARGV || $help ) { print "weblint v$HTML::Lint::VERSION\n"; print ; exit 1; } my @types; push( @types, HTML::Lint::Error::STRUCTURE ) if $structure; push( @types, HTML::Lint::Error::HELPER ) if $helper; push( @types, HTML::Lint::Error::FLUFF ) if $fluff; my $lint = HTML::Lint->new; $lint->only_types( @types ) if @types; for my $url ( @ARGV ) { my @lines; $lint->newfile( $url ); if ( $url =~ /^https?:/ ) { if ( !eval { require LWP::Simple; 1; } ) { warn q{Can't retrieve URLs without LWP::Simple installed}; next; } my $content = LWP::Simple::get( $url ); if ( $content ) { @lines = split( /\n/, $content ); $_ = "$_\n" for @lines; } else { warn "Unable to fetch $url\n"; next; } } elsif ( $url eq '-' ) { @lines = ; } else { open( my $fh, '<', $url ) or die "Can't open $url: $!"; @lines = <$fh>; close $fh or die $!; } $lint->parse( $_ ) for @lines; $lint->eof(); for my $error ( $lint->errors() ) { print $error->as_string(), "\n"; if ( defined $context ) { $context += 0; my $lineno = $error->line - 1; my $start = $lineno-$context; $start = 0 if $start < 0; my $end = $lineno+$context; $end = $#lines if $end > $#lines; print " $_\n" for @lines[$start..$end]; print "\n"; } } $lint->clear_errors(); } # for files __END__ Usage: weblint [filename or url]... (filename - reads STDIN) --help This message --context[=n] Show the offending line (and n surrounding lines) Error types: (default: all on) --[no]structure Structural issues, like unclosed tag pairs --[no]helper Helper issues, like missing HEIGHT & WIDTH --[no]fluff Fluff that can be removed, like bad tag attributes --only Turns off all other error types, as in --only --fluff HTML-Lint-2.32/t/0000755000175000017500000000000013313263531012023 5ustar andyandyHTML-Lint-2.32/t/config-unknown-directive.t0000644000175000017500000000075013220756434017136 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ # [ 'config-unknown-directive' => q{Set #1 (6:5) Unknown directive "bongo"} ], [ 'config-unknown-directive' => qr/Unknown directive "bongo"$/ ], ], [] ); __DATA__ Test stuff HTML-Lint-2.32/t/text-unclosed-entity.t0000644000175000017500000000224313220756434016327 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'text-unclosed-entity' => qr/Entity ö is missing its closing semicolon/ ], [ 'text-unclosed-entity' => qr/Entity ? is missing its closing semicolon/ ], [ 'text-unknown-entity' => qr/Entity &middle is unknown/ ], ], [] ); __DATA__ Ace of ♠: A tribute to Motörhead. Motö rhead rulez! ⊃ ² But can we find an unclosed entity at the end of the line ?

What about unclosed unknown entities in the &middle of the line? Here's an awesome link to "You Better Swim" from the SpongeBob movie. HTML-Lint-2.32/t/00-load.t0000644000175000017500000000030513220756434013350 0ustar andyandy#!perl -Tw use strict; use warnings; use Test::More tests => 1; use HTML::Lint; use Test::HTML::Lint; pass( 'Loaded modules' ); diag( "Testing HTML::Lint $HTML::Lint::VERSION, Perl $], $^X" ); HTML-Lint-2.32/t/elem-unclosed.t0000644000175000017500000000071013220756434014750 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'elem-unclosed' => qr/\Q at (6:12) is never closed/i ], [ 'elem-unclosed' => qr/\Q at (7:12) is never closed/i ], ], [] ); __DATA__ Test stuff

This is my paragraph

This is another paragraph

HTML-Lint-2.32/t/attr-unknown.t0000644000175000017500000000076513220756434014675 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'attr-unknown' => qr/Unknown attribute "FOOD" for tag

/i ], [ 'attr-unknown' => qr/Unknown attribute "Yummy" for tag /i ], ], [] ); __DATA__ Test stuff

This is my paragraph about burritos

This is my paragraph about refried beans HTML-Lint-2.32/t/12-html_fragment_ok.t0000644000175000017500000000435213220756434015762 0ustar andyandy#!perl use warnings; use strict; use Test::More tests => 4; use Test::Builder::Tester; use Test::HTML::Lint; my $not_so_good_html = <<'HTML';

This is a valid fragment (with some errors), but an incomplete document.

HTML HTML_OK: { test_out( 'not ok 1 - Called html_ok' ); test_fail( +8 ); test_diag( 'Errors: Called html_ok' ); test_diag( ' (3:5) does not have ALT text defined' ); test_diag( ' (4:5) does not have non-blank ALT text defined' ); test_diag( ' (5:1) tag is required' ); test_diag( ' (5:1) tag is required' ); test_diag( ' (5:1) tag is required' ); test_diag( ' (5:1) tag is required' ); html_ok( $not_so_good_html, 'Called html_ok' ); test_test( 'html_ok works on wonky fragment' ); } HTML_FRAGMENT_OK: { test_out( 'not ok 1 - Called html_fragment_ok' ); test_fail( +4 ); test_diag( 'Errors: Called html_fragment_ok' ); test_diag( ' (3:5) <img src="alpha.jpg"> does not have ALT text defined' ); test_diag( ' (4:5) <input name="" type="image"> does not have non-blank ALT text defined' ); html_fragment_ok( $not_so_good_html, 'Called html_fragment_ok' ); test_test( 'html_fragment_ok works on wonky fragment' ); } # HTML that is a valid fragment, but not a valid document. my $ok_fragment = <<'HTML'; <p> This is a valid fragment (with some errors), but an incomplete document. <img src="alpha.jpg" height="21" width="12" alt="alpha"> <input type="image" alt="foo"> </p> HTML HTML_OK: { test_out( 'not ok 1 - Called html_ok' ); test_fail( +6 ); test_diag( 'Errors: Called html_ok' ); test_diag( ' (5:1) <body> tag is required' ); test_diag( ' (5:1) <head> tag is required' ); test_diag( ' (5:1) <html> tag is required' ); test_diag( ' (5:1) <title> tag is required' ); html_ok( $ok_fragment, 'Called html_ok' ); test_test( 'html_ok gets back doc-level errors on fragment' ); } HTML_FRAGMENT_OK: { test_out( 'ok 1 - Called html_fragment_ok' ); html_fragment_ok( $ok_fragment, 'Called html_fragment_ok' ); test_test( 'html_fragment_ok passes on fragment' ); } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������HTML-Lint-2.32/t/attr-unknown-entity.t��������������������������������������������������������������0000644�0001750�0001750�00000000460�13220756434�016177� 0����������������������������������������������������������������������������������������������������ustar �andy����������������������������andy�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'attr-unknown-entity' => qr/Entity &numsefisk; is unknown/ ], ], [<DATA>] ); __DATA__ <HTML> <HEAD> <TITLE>Test stuff HTML-Lint-2.32/t/elem-img-alt-missing.t0000644000175000017500000000063713220756434016145 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'elem-img-alt-missing' => qr/ does not have ALT text defined/i ], ], [] ); __DATA__ Test stuff

This is my paragraph

HTML-Lint-2.32/t/10-test-html-lint.t0000644000175000017500000000045513220756434015325 0ustar andyandy#!perl -Tw use warnings; use strict; use Test::More tests => 2; use Test::HTML::Lint; my $chunk = 'A fine chunk of code'; TODO: { # undef should fail local $TODO = 'This test should NOT succeed'; html_ok( undef ); } html_ok( $chunk ); HTML-Lint-2.32/t/xhtml-html.t0000644000175000017500000000226113220756434014315 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ ], [] ); __DATA__ Test stuff This test brought to you by Mötley Crüe.

        Blah blah blah

        Now listen up
        She's razor sharp
        If she don't get her way
        She'll slice you apart
        Now she's cool cool black
        Moves like a cat
        If you don't get her game
        You might not make it back

        (Pre chorus)
        She's got the look's that kill
        That kill
        She's got the look's that kill
        That kill




        (Chorus)
        She's got the looks that kill

        Now she's bullet proof
        Keeps her motor clean
        And believe me you
        She's a number thirteen
        The church strikes midnight
        She's lookin' louder and louder
        She's gonna turn on your juice, boy
        So she turns on the power

        (Pre-chorus)
        She's got the looks that kill

        (Chorus)(Solo)(Verse)(Pre-chorus)(Chorus)
        
HTML-Lint-2.32/t/config-unknown-value.t0000644000175000017500000000125513220756434016275 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'config-unknown-value' => qr/Unknown value "14" for elem-img-sizes-missing directive$/ ], ], [] ); __DATA__ Test stuff HTML-Lint-2.32/t/parse_file.t0000644000175000017500000000133313220756434014327 0ustar andyandy#!perl -T use warnings; use strict; use Test::More tests => 1; use HTML::Lint; use File::Temp qw( tempfile ); my ($o, $OUTPUT_FN) = tempfile( SUFFIX => '.xhtml', UNLINK => 1); print {$o} <<'EOF'; Foo

Hello

File

EOF close($o) or die $!; my $lint = HTML::Lint->new; $lint->parse_file($OUTPUT_FN); is_deeply( [map { $_->as_string() } $lint->errors()], [], 'HTML is valid for output file.' ); HTML-Lint-2.32/t/elem-input-alt-missing.t0000644000175000017500000000216713220756434016530 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'elem-input-alt-missing' => qr/ does not have non-blank ALT text defined/i ], [ 'elem-input-alt-missing' => qr/ does not have non-blank ALT text defined/i ], [ 'elem-input-alt-missing' => qr/ does not have non-blank ALT text defined/i ], [ 'elem-input-alt-missing' => qr/ does not have non-blank ALT text defined/i ], ], [] ); __DATA__ Test stuff

This is my paragraph

HTML-Lint-2.32/t/elem-unknown.t0000644000175000017500000000077313220756434014644 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'elem-unknown' => qr/unknown element /i ], [ 'elem-unclosed' => qr/ at \(\d+:\d+\) is never closed/i ], ], [] ); __DATA__ Test stuff

This is my paragraph

whizbang! HTML-Lint-2.32/t/50-multiple-files.t0000644000175000017500000000257313220756434015402 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; my @files = _get_paragraphed_files(); checkit( [ [ 'elem-unopened' => qr/<\/p> with no opening

/i ], [ 'elem-unclosed' => qr/\Q at (6:12) is never closed/i ], [ 'elem-unclosed' => qr/\Q at (7:12) is never closed/i ], [ 'elem-unopened' => qr/<\/b> with no opening /i ], ], @files ); # Read in a set of sets of lines, where each "file" is separated by a # blank line in sub _get_paragraphed_files { local $/ = ''; my @sets; while ( my $paragraph = ) { my @lines = split /\n/, $paragraph; @lines = map { "$_\n" } @lines; push( @sets, [@lines] ); } return @sets; } __DATA__ Test stuff This is my paragraph

Test stuff

This is my paragraph

This is another paragraph

Test stuff
Gratuitous unnecessary closing tag that does NOT match to the opening [B] above.

This is my paragraph

HTML-Lint-2.32/t/elem-img-sizes-missing.t0000644000175000017500000000117013220756434016513 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'elem-img-sizes-missing' => qr/\Q tag has no HEIGHT and WIDTH attributes/i ], [ 'elem-img-sizes-missing' => qr/\Q tag has no HEIGHT and WIDTH attributes/i ], ], [] ); __DATA__ Test stuff

This is my paragraph

Bork! Bork! Bork! HTML-Lint-2.32/t/attr-repeated.t0000644000175000017500000000051613220756434014761 0ustar andyandy#!perl use strict; use warnings; use lib 't/'; use Util; checkit( [ [ 'attr-repeated' => qr/ALIGN attribute in

is repeated/i ], ], [] ); __DATA__ Test stuff

This is my paragraph

HTML-Lint-2.32/t/api-eof-not-called.t0000644000175000017500000000072713027375770015571 0ustar andyandy#!perl use warnings; use strict; use Test::More tests => 3; use HTML::Lint; use HTML::Lint::HTML4; my $lint = HTML::Lint->new; isa_ok( $lint, 'HTML::Lint', 'Created lint object' ); $lint->newfile( '' ); $lint->parse( '

Blah blah

' ); my @errors = $lint->errors(); cmp_ok( scalar @errors, '>', 0, 'Should get back at least one error' ); my $error = $errors[-1]; is( $error->errcode, 'api-eof-not-called', 'The last error in the list is the API error' ); HTML-Lint-2.32/t/40-where.t0000644000175000017500000000225413220756434013554 0ustar andyandy#!perl use strict; use warnings; use lib 't/'; use Util; my $html = ''; checkit( [ [ 'elem-unopened' => 'Set #1 (1:1) with no opening ' ], [ 'doc-tag-required' => 'Set #1 (1:1) tag is required' ], [ 'doc-tag-required' => 'Set #1 (1:1) tag is required' ], [ 'doc-tag-required' => 'Set #1 (1:1) tag is required' ], [ 'doc-tag-required' => 'Set #1 (1:1) tag is required' ], ], [$html] ); __END__ This doesn't test the error finding as much as the where() method. It fixes the following bug: Date: Mon, 22 Dec 2003 22:07:54 -0800 From: Adam Monsen <adamm@wazamatta.com> To: Andy Lester <andy@petdance.com> Subject: HTML::Lint::Error bug The following demonstrates a bug in HTML::Lint that is seen when an offending tag is flush left ... use HTML::Lint; my $lint = HTML::Lint->new(); $lint->parse('</body>'); warn $_->as_string."\n" for $lint->errors; The warning I'm getting looks like this: Argument "" isn't numeric in addition (+) at /usr/lib/perl5/site_perl/5.8.1/HTML/Lint/Error.pm line 176. If I change the parse() call as follows (by adding a leading space): $lint->parse(' </body>'); the warning disappears. ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������HTML-Lint-2.32/t/20-error-types-skip.t��������������������������������������������������������������0000644�0001750�0001750�00000003043�13220756434�015674� 0����������������������������������������������������������������������������������������������������ustar �andy����������������������������andy�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!perl -Tw use strict; use warnings; use Test::More tests => 8; use HTML::Lint; use HTML::Lint::Error ':types'; my $text = do { local $/ = undef; <DATA> }; FUNC_METHOD: { my $lint = HTML::Lint->new(); isa_ok( $lint, 'HTML::Lint' ); $lint->parse( $text ); $lint->eof; is( scalar $lint->errors, 1, 'One error with a clean lint' ); $lint->newfile(); $lint->clear_errors(); $lint->only_types( HELPER, FLUFF ); $lint->parse( $text ); $lint->eof; is( scalar $lint->errors, 0, 'No errors if helper & fluff' ); $lint->newfile(); $lint->clear_errors(); $lint->only_types( STRUCTURE ); $lint->parse( $text ); $lint->eof; my @errors = $lint->errors; if ( !is( scalar @errors, 1, 'One error if we specify STRUCTURE if we turn it off' ) ) { diag( $_->as_string ) for @errors; } } CONSTRUCTOR_METHOD_SCALAR: { my $lint = HTML::Lint->new( only_types => STRUCTURE ); isa_ok( $lint, 'HTML::Lint' ); $lint->parse( $text ); my @errors = $lint->errors; if ( !is( scalar @errors, 1, 'One error if we specify STRUCTURE if we turn it off' ) ) { diag( $_->as_string ) for @errors; } } CONSTRUCTOR_METHOD_ARRAYREF: { my $lint = HTML::Lint->new( only_types => [HELPER, FLUFF] ); isa_ok( $lint, 'HTML::Lint' ); $lint->parse( $text ); is( scalar $lint->errors, 0, 'No errors if helper & fluff' ); } __DATA__ <HTML> <HEAD> <TITLE>Test stuff This is my paragraph HTML-Lint-2.32/t/text-invalid-entity.t0000644000175000017500000000302113224570052016126 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; # We used to have text-invalid-entity if the entities had an invalid value, but we no longer do. checkit( [ [ 'text-unknown-entity' => qr/Entity &metalhorns; is unknown/ ], [ 'text-unknown-entity' => qr/Entity &xdeadbeef; is unknown/ ], ], [] ); __DATA__ Ace of ♠: A tribute to Motörhead. ® &metalhorns;

Thanks for visiting Ace of ♠

Ace of ♠ is your single source for everything related to Motörhead.

Here's an icon of my girlfriend Jenny: �

And here's an icon of a deceased cow: �

Another deceased cow: &xdeadbeef;

Here's an awesome link to "You Better Swim" from the SpongeBob movie. HTML-Lint-2.32/t/random-nobr.t0000644000175000017500000000066013220756434014436 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'elem-unknown' => qr/unknown element /i ], [ 'elem-unclosed' => qr/ at \(\d+:\d+\) is never closed/i ], ], [] ); __DATA__ Test stuff NOBR is fine with me! But Donky is not HTML-Lint-2.32/t/attr-use-entity.t0000644000175000017500000000115313220756434015274 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'attr-use-entity' => qr/Character "\\xF1" should be written as ñ/ ], [ 'attr-use-entity' => qr/Character "&" should be written as &/ ], [ 'attr-use-entity' => qr/Character "&" should be written as &/ ], ], [] ); __DATA__ Test stuff HTML-Lint-2.32/t/elem-unopened.t0000644000175000017500000000050613220756434014754 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'elem-unopened' => qr/<\/p> with no opening

/i ], ], [] ); __DATA__ Test stuff This is my paragraph

HTML-Lint-2.32/t/text-use-entity.t0000644000175000017500000000302413220756434015305 0ustar andyandy#!perl use utf8; use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'text-use-entity' => qr/Character "\\x0B" should be written as / ], [ 'text-use-entity' => qr/Character "\\x38C" should be written as Ό/ ], [ 'text-use-entity' => qr/Character "\\xF1" should be written as ñ/ ], [ 'text-use-entity' => qr/Character "&" should be written as &/ ], [ 'text-unclosed-entity' => qr/Entity ö is missing its closing semicolon/ ], [ 'text-use-entity' => qr/Character "&" should be written as &/ ], ], [] ); __DATA__ Test stuff Here's a non-entityable char [ ].

And here's a non-entityable char over 255 [Ό].

We'll get to it mañana, which should really have an ñ.

Who wants a peanut butter & jelly? Motö rhead does! They love rock & roll!

Here's an awesome link to "You Better Swim" from the SpongeBob movie.

HTML-Lint-2.32/t/20-error-types-export.t0000644000175000017500000000062013220756434016245 0ustar andyandy#!perl -Tw use warnings; use strict; use Test::More tests => 4; use HTML::Lint::Error ':types'; my $err = HTML::Lint::Error->new( undef, undef, undef, 'elem-empty-but-closed' ); ok( $err->is_type( STRUCTURE ) ); ok( !$err->is_type( FLUFF, HELPER ) ); $err = HTML::Lint::Error->new( undef, undef, undef, 'attr-unknown' ); ok( $err->is_type( FLUFF ) ); ok( !$err->is_type( STRUCTURE, HELPER ) ); HTML-Lint-2.32/t/strong-id.t0000644000175000017500000000165313220756434014131 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'attr-unknown' => qr/Unknown attribute "bongo" for tag / ], ], [] ); =pod HTML::Lint 2.02 and weblint, Red Hat EL 3 This should result in no warnings: echo 'qwerasdf' | weblint - - (1:45) Unknown attribute "id" for tag but it gives: - (1:45) Unknown attribute "id" for tag id is a core attribute in HTML4/XHTML1: http://www.w3.org/TR/html4/html40.txt =cut __DATA__ Test stuff

A test for this bug.

Bad Bad

HTML-Lint-2.32/t/attr-unclosed-entity.t0000644000175000017500000000110513220756434016311 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'attr-unclosed-entity' => qr/Entity ? is missing its closing semicolon/ ], [ 'attr-unclosed-entity' => qr/Entity ö is missing its closing semicolon/ ], ], [] ); __DATA__ Test stuff Motörhead Motörhead HTML-Lint-2.32/t/20-error-types.t0000644000175000017500000000077113220756434014735 0ustar andyandy#!perl -Tw use warnings; use strict; use Test::More tests => 4; use HTML::Lint::Error; my $err = HTML::Lint::Error->new( undef, undef, undef, 'elem-empty-but-closed' ); ok( $err->is_type( HTML::Lint::Error::STRUCTURE ) ); ok( !$err->is_type( HTML::Lint::Error::FLUFF, HTML::Lint::Error::HELPER ) ); $err = HTML::Lint::Error->new( undef, undef, undef, 'attr-unknown' ); ok( $err->is_type( HTML::Lint::Error::FLUFF ) ); ok( !$err->is_type( HTML::Lint::Error::STRUCTURE, HTML::Lint::Error::HELPER ) ); HTML-Lint-2.32/t/pod-coverage.t0000644000175000017500000000041713220756434014573 0ustar andyandy#!perl -Tw use warnings; use strict; use Test::More; if ( !eval 'use Test::Pod::Coverage 1.04; 1;' ) { ## no critic ( BuiltinFunctions::ProhibitStringyEval ) plan skip_all => 'Test::Pod::Coverage 1.04 required for testing POD coverage'; } all_pod_coverage_ok(); HTML-Lint-2.32/t/60-add-tags.t0000644000175000017500000000143513220756434014130 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; use HTML::Lint::HTML4; # This test is the same as t/attr-unknown.t, but with tag table modification. HTML::Lint::HTML4::add_attribute( 'p', 'food' ); HTML::Lint::HTML4::add_attribute( 'body', 'cuisine' ); HTML::Lint::HTML4::add_tag( 'meal' ); HTML::Lint::HTML4::add_attribute( 'meal', 'type' ); checkit( [ [ 'attr-unknown' => qr/Unknown attribute "Yummy" for tag /i ], ], [] ); __DATA__ Test stuff

This is my paragraph about burritos

This is my paragraph about refried beans Steak burrito HTML-Lint-2.32/t/elem-empty-but-closed.t0000644000175000017500000000052113220756434016331 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'elem-empty-but-closed' => qr/
is not a container -- <\/hr> is not allowed/ ], ], [] ); __DATA__ Test stuff
This is a bad paragraph HTML-Lint-2.32/t/doc-tag-required.t0000644000175000017500000000047013220756434015353 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'doc-tag-required' => qr/ tag is required/ ], ], [] ); __DATA__ Test stuff

This is my paragraph

HTML-Lint-2.32/t/02-versions.t0000644000175000017500000000053013220756434014303 0ustar andyandy#!perl -Tw use warnings; use strict; use Test::More tests => 2; use HTML::Lint::Parser; use HTML::Lint; use Test::HTML::Lint; is( $HTML::Lint::VERSION, $Test::HTML::Lint::VERSION, 'HTML::Lint and Test::HTML::Lint versions match' ); is( $HTML::Lint::VERSION, $HTML::Lint::Parser::VERSION, 'HTML::Lint and Test::HTML::Lint versions match' ); HTML-Lint-2.32/t/nolint.t0000644000175000017500000000246013220756434013523 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'elem-img-sizes-missing' => qr/\Q tag has no HEIGHT and WIDTH attributes/i ], [ 'elem-img-alt-missing' => qr/\Q does not have ALT text defined/i ], [ 'elem-img-alt-missing' => qr/\Q does not have ALT text defined/i ], # gamma.jpg will not error at all [ 'elem-img-alt-missing' => qr/\Q does not have ALT text defined/i ], [ 'elem-img-sizes-missing' => qr/\Q tag has no HEIGHT and WIDTH attributes/i ], [ 'elem-img-alt-missing' => qr/\Q does not have ALT text defined/i ], [ 'elem-unclosed' => 'Set #1 (20:5) at (13:9) is never closed' ], ], [] ); __DATA__ Test stuff HTML-Lint-2.32/t/01-coverage.t0000644000175000017500000000061713220756434014233 0ustar andyandy#!perl -Tw # This test verifies that there is a t/*.t file for every possible Lint error. use strict; use warnings; use Test::More 'no_plan'; use HTML::Lint::Error; my @errors = keys %HTML::Lint::Error::errors; isnt( scalar @errors, 0, 'There are at least some errors to be found.' ); for my $error ( @errors ) { my $filename = "t/$error.t"; ok( -e $filename, "$filename exists" ); } HTML-Lint-2.32/t/11-test-html-lint-overload.t0000644000175000017500000000061513220756434017135 0ustar andyandy#!perl -Tw use strict; use warnings; use Test::More tests => 1; use HTML::Lint; use HTML::Lint::Error; use Test::HTML::Lint; my $lint = HTML::Lint->new(); $lint->only_types( HTML::Lint::Error::FLUFF ); # This code is invalid, but the linter should ignore it my $chunk = << 'END';

This is a fine chunk of code

END html_ok( $lint, $chunk, 'STRUCTUREally naughty code passed' ); HTML-Lint-2.32/t/30-test-builder.t0000644000175000017500000000041612740064225015036 0ustar andyandy#!perl -Tw use warnings; use strict; # The test is not that html_ok() works, but that the tests=>1 gets # acts as it should. use Test::HTML::Lint tests=>1; my $chunk = 'A fine chunk of code'; html_ok( $chunk ); HTML-Lint-2.32/t/api-parse-not-called.t0000644000175000017500000000070113027375770016122 0ustar andyandy#!perl use warnings; use strict; use Test::More tests => 3; use HTML::Lint; use HTML::Lint::HTML4; my $lint = HTML::Lint->new; isa_ok( $lint, 'HTML::Lint', 'Created lint object' ); $lint->newfile( '' ); $lint->eof; my @errors = $lint->errors(); cmp_ok( scalar @errors, '>', 0, 'Should get back at least one error' ); my $error = $errors[-1]; is( $error->errcode, 'api-parse-not-called', 'The last error in the list is the API error' ); HTML-Lint-2.32/t/pod.t0000644000175000017500000000035613220756434013004 0ustar andyandy#!perl -Tw use strict; use warnings; use Test::More; if ( !eval 'use Test::Pod 1.14; 1;' ) { ## no critic ( BuiltinFunctions::ProhibitStringyEval ) plan skip_all => 'Test::Pod 1.14 required for testing POD'; } all_pod_files_ok(); HTML-Lint-2.32/t/elem-nonrepeatable.t0000644000175000017500000000061013220756434015752 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'elem-nonrepeatable' => qr/\Q is not repeatable, but already appeared at (3:9)/i ], ], [<DATA>] ); __DATA__ <HTML> <HEAD> <TITLE>Test stuff As if one title isn't enough

This is my paragraph

HTML-Lint-2.32/t/text-unknown-entity.t0000644000175000017500000000137613220756434016220 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; checkit( [ [ 'text-unknown-entity' => qr/Entity &metalhorns; is unknown/ ], ], [] ); __DATA__ Ace of ♠: A tribute to Motörhead. ® &metalhorns; Thanks for visiting Ace of ♠

Here's an awesome link to "You Better Swim" from the SpongeBob movie. HTML-Lint-2.32/t/Util.pm0000644000175000017500000000256213220756434013311 0ustar andyandypackage Util; use parent 'Exporter'; use warnings; use strict; use Test::More; use HTML::Lint; our @EXPORT = qw( checkit ); sub checkit { my @expected = @{+shift}; my @linesets = @_; plan( tests => 3*(scalar @expected) + 4 ); my $lint = HTML::Lint->new; isa_ok( $lint, 'HTML::Lint', 'Created lint object' ); my $n; for my $set ( @linesets ) { ++$n; $lint->newfile( "Set #$n" ); $lint->parse( $_ ) for @{$set}; $lint->eof; } my @errors = $lint->errors(); is( scalar @errors, scalar @expected, 'Right # of errors' ); while ( @errors && @expected ) { my $error = shift @errors; isa_ok( $error, 'HTML::Lint::Error' ); my $expected = shift @expected; is( $error->errcode, $expected->[0], 'Error codes match' ); my $match = $expected->[1]; if ( ref($match) eq 'Regexp' ) { like( $error->as_string, $match, 'Error matches regex' ); } else { is( $error->as_string, $match, 'Error matches string' ); } } my $dump; is( scalar @errors, 0, 'No unexpected errors found' ) or $dump = 1; is( scalar @expected, 0, 'No expected errors missing' ) or $dump = 1; if ( $dump && @errors ) { diag( 'Leftover errors...' ); diag( $_->as_string ) for @errors; } return; } 1; # happy HTML-Lint-2.32/t/attr-invalid-entity.t0000644000175000017500000000056613224570052016127 0ustar andyandy#!perl use warnings; use strict; use lib 't/'; use Util; # We used to have attr-invalid-entity if the entities had an invalid value, but we no longer do. checkit( [ ], [] ); __DATA__ Test stuff

HTML-Lint-2.32/META.yml0000664000175000017500000000163413313263531013037 0ustar andyandy--- abstract: 'check for HTML errors in a string or file' author: - 'Andy Lester ' build_requires: ExtUtils::MakeMaker: '0' configure_requires: ExtUtils::MakeMaker: '0' dynamic_config: 1 generated_by: 'ExtUtils::MakeMaker version 7.24, CPAN::Meta::Converter version 2.143240' license: artistic_2 meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: HTML-Lint no_index: directory: - t - inc requires: Exporter: '0' File::Find: '0' HTML::Entities: '0' HTML::Parser: '3.47' HTML::Tagset: '3.03' Test::Builder: '0' Test::Builder::Tester: '0' Test::More: '0' perl: '5.006' resources: bugtracker: https://github.com/petdance/html-lint/issues homepage: http://search.cpan.org/dist/html-lint license: http://www.opensource.org/licenses/artistic-license-2.0.php repository: https://github.com/petdance/html-lint version: '2.32' HTML-Lint-2.32/lib/0000755000175000017500000000000013313263531012326 5ustar andyandyHTML-Lint-2.32/lib/Test/0000755000175000017500000000000013313263531013245 5ustar andyandyHTML-Lint-2.32/lib/Test/HTML/0000755000175000017500000000000013313263531014011 5ustar andyandyHTML-Lint-2.32/lib/Test/HTML/Lint.pm0000644000175000017500000001221113313263060015247 0ustar andyandypackage Test::HTML::Lint; use warnings; use strict; use Test::Builder; use Exporter; use HTML::Lint; use vars qw( @ISA $VERSION @EXPORT ); @ISA = qw( HTML::Parser Exporter ); =head1 NAME Test::HTML::Lint - Test::More-style wrapper around HTML::Lint =head1 VERSION Version 2.32 =cut $VERSION = '2.32'; my $Tester = Test::Builder->new; =head1 SYNOPSIS use Test::HTML::Lint tests => 4; my $table = build_display_table(); html_ok( $table, 'Built display table properly' ); =head1 DESCRIPTION This module provides a few convenience methods for testing exception based code. It is built with L and plays happily with L and friends. If you are not already familiar with L now would be the time to go take a look. =head1 EXPORT C =cut @EXPORT = qw( html_ok html_fragment_ok ); sub import { my $self = shift; my $pack = caller; $Tester->exported_to($pack); $Tester->plan(@_); $self->export_to_level(1, $self, @EXPORT); return; } =head2 html_ok( [$lint, ] $html, $name ) Checks to see if C<$html> is a valid HTML document, including checks for having C<< >>, C<< >>, C<< > >> and C<< <body> >> tags. If you're checking something that is only a fragment of an HTML document, use C<html_fragment_ok()>. If you pass an HTML::Lint object, C<html_ok()> will use that for its settings. my $lint = new HTML::Lint( only_types => STRUCTURE ); html_ok( $lint, $content, "Web page passes structural tests only" ); Otherwise, it will use the default rules. html_ok( $content, "Web page passes ALL tests" ); Note that if you pass in your own HTML::Lint object, C<html_ok()> will clear its errors before using it. =cut sub html_ok { my $lint; if ( ref($_[0]) eq 'HTML::Lint' ) { $lint = shift; $lint->newfile(); $lint->clear_errors(); } else { $lint = HTML::Lint->new; } my $html = shift; my $name = shift; my $ok = defined $html; if ( !$ok ) { $Tester->ok( 0, $name ); } else { $lint->parse( $html ); $lint->eof(); my $nerr = scalar $lint->errors; $ok = !$nerr; $Tester->ok( $ok, $name ); if ( !$ok ) { my $msg = 'Errors:'; $msg .= " $name" if $name; $Tester->diag( $msg ); $Tester->diag( $_->as_string ) for $lint->errors; } } return $ok; } =head2 html_fragment_ok( [$lint, ] $html, $name ) Checks that C<$fragment> is valid HTML, but not necessarily a valid HTML document. For example, this is a valid fragment, but not a valid HTML document: <body> <p>Lorem ipsum</p> </body> because it doesn't contain C<< <html> >> and C<< <head> >> tags. If you want to check that it is a valid document, use C<html_ok()>. If you pass an HTML::Lint object, C<html_fragment_ok()> will use that for its settings. my $lint = new HTML::Lint( only_types => STRUCTURE ); html_fragment_ok( $lint, $content, 'Web page passes structural tests only' ); Otherwise, it will use the default rules. html_fragment_ok( $content, 'Fragment passes ALL tests' ); Note that if you pass in your own HTML::Lint object, C<html_fragment_ok()> will clear its errors before using it. =cut sub html_fragment_ok { my $lint; if ( ref($_[0]) eq 'HTML::Lint' ) { $lint = shift; $lint->newfile(); $lint->clear_errors(); } else { $lint = HTML::Lint->new; } my $html = shift; my $name = shift; my $ok = defined $html; if ( !$ok ) { $Tester->ok( 0, $name ); } else { $lint->parse( $html ); $lint->eof(); # Ignore doc-level errors. my @errors = grep { $_->errcode ne 'doc-tag-required' } $lint->errors; my $nerr = @errors; $ok = !$nerr; $Tester->ok( $ok, $name ); if ( !$ok ) { my $msg = 'Errors:'; $msg .= " $name" if $name; $Tester->diag( $msg ); $Tester->diag( $_->as_string ) for @errors; } } return $ok; } =head1 BUGS All bugs and requests are now being handled through GitHub. https://github.com/petdance/html-lint/issues DO NOT send bug reports to http://rt.cpan.org/. =head1 TO DO There needs to be a C<html_table_ok()> to check that the HTML is a self-contained, well-formed table, and then a comparable one for C<html_page_ok()>. If you think this module should do something that it doesn't do at the moment please let me know. =head1 ACKNOWLEDGEMENTS Thanks to chromatic and Michael G Schwern for the excellent Test::Builder, without which this module wouldn't be possible. Thanks to Adrian Howard for writing Test::Exception, from which most of this module is taken. =head1 COPYRIGHT & LICENSE Copyright 2005-2018 Andy Lester. This program is free software; you can redistribute it and/or modify it under the terms of the Artistic License v2.0. http://www.opensource.org/licenses/Artistic-2.0 Please note that these modules are not products of or supported by the employers of the various contributors to the code. =head1 AUTHOR Andy Lester, C<andy@petdance.com> =cut 1; ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������HTML-Lint-2.32/lib/HTML/����������������������������������������������������������������������������0000755�0001750�0001750�00000000000�13313263531�013072� 5����������������������������������������������������������������������������������������������������ustar �andy����������������������������andy�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������HTML-Lint-2.32/lib/HTML/Lint.pm���������������������������������������������������������������������0000644�0001750�0001750�00000022236�13313263060�014340� 0����������������������������������������������������������������������������������������������������ustar �andy����������������������������andy�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������package HTML::Lint; use warnings; use strict; use HTML::Lint::Error; use HTML::Lint::Parser (); use HTML::Entities (); =head1 NAME HTML::Lint - check for HTML errors in a string or file =head1 VERSION Version 2.32 =cut our $VERSION = '2.32'; =head1 SYNOPSIS my $lint = HTML::Lint->new; $lint->only_types( HTML::Lint::Error::STRUCTURE ); # Parse lines of data. $lint->newfile( $filename ); while ( my $line = <> ) { $lint->parse( $line ); } $lint->eof(); # Or, parse an entire file at once. $lint->parse_file( $filename ); # Fetch the errors that the linter found. my $error_count = $lint->errors; foreach my $error ( $lint->errors ) { print $error->as_string, "\n"; } HTML::Lint also comes with a wrapper program called F<weblint> that handles linting from the command line: $ weblint http://www.cnn.com/ http://www.cnn.com/ (395:83) <IMG SRC="spacer.gif"> tag has no HEIGHT and WIDTH attributes. http://www.cnn.com/ (395:83) <IMG SRC="goofus.gif"> does not have ALT text defined http://www.cnn.com/ (396:217) Unknown element <nobr> http://www.cnn.com/ (396:241) </nobr> with no opening <nobr> http://www.cnn.com/ (842:7) target attribute in <a> is repeated And finally, you can also get L<Apache::HTML::Lint> that passes any mod_perl-generated code through HTML::Lint and get it dumped into your Apache F<error_log>. [Mon Jun 3 14:03:31 2002] [warn] /foo.pl (1:45) </p> with no opening <p> [Mon Jun 3 14:03:31 2002] [warn] /foo.pl (1:49) Unknown element <gronk> [Mon Jun 3 14:03:31 2002] [warn] /foo.pl (1:56) Unknown attribute "x" for tag <table> =cut =head1 METHODS NOTE: Some of these methods mirror L<HTML::Parser>'s methods, but HTML::Lint is not a subclass of HTML::Parser. =head2 new() Create an HTML::Lint object, which inherits from HTML::Parser. You may pass the types of errors you want to check for in the C<only_types> parm. my $lint = HTML::Lint->new( only_types => HTML::Lint::Error::STRUCTURE ); If you want more than one, you must pass an arrayref: my $lint = HTML::Lint->new( only_types => [HTML::Lint::Error::STRUCTURE, HTML::Lint::Error::FLUFF] ); =cut sub new { my $class = shift; my %args = @_; my $self = { _errors => [], _types => [], }; bless $self, $class; if ( my $only = $args{only_types} ) { $self->only_types( ref $only eq 'ARRAY' ? @{$only} : $only ); delete $args{only_types}; } warn "Unknown argument $_\n" for keys %args; return $self; } =head2 $lint->parser() Returns the parser object for this object, creating one if necessary. =cut sub parser { my $self = shift; if ( not $self->{_parser} ) { $self->{_parser} = HTML::Lint::Parser->new( sub { $self->gripe( @_ ) } ); $self->{_parser}->ignore_elements( qw(script style) ); } return $self->{_parser}; } =head2 $lint->parse( $text ) =head2 $lint->parse( $code_ref ) Passes in a chunk of HTML to be linted, either as a piece of text, or a code reference. See L<HTML::Parser>'s C<parse_file> method for details. =cut sub parse { my $self = shift; my $rc = $self->parser->parse( @_ ); $self->{_parse_called} = 1; return $rc; } =head2 $lint->parse_file( $file ) Analyzes HTML directly from a file. The C<$file> argument can be a filename, an open file handle, or a reference to an open file handle. See L<HTML::Parser>'s C<parse_file> method for details. =cut sub parse_file { my $self = shift; my $rc = $self->parser->parse_file( @_ ); $self->{_parse_called} = 1; $self->eof; return $rc; } =head2 $lint->eof() Signals the end of a block of text getting passed in. This must be called to make sure that all parsing is complete before looking at errors. Any parameters (and there shouldn't be any) are passed through to HTML::Parser's eof() method. =cut sub eof { ## no critic ( Subroutines::ProhibitBuiltinHomonyms ) my $self = shift; my $rc; my $parser = $self->parser; if ( $parser ) { $rc = $parser->eof(@_); delete $self->{_parser}; $self->{_eof_called} = 1; } return $rc; } =head2 $lint->errors() In list context, C<errors> returns all of the errors found in the parsed text. Each error is an object of the type L<HTML::Lint::Error>. In scalar context, it returns the number of errors found. =cut sub errors { my $self = shift; if ( !$self->{_parse_called} ) { $self->gripe( 'api-parse-not-called' ); } elsif ( !$self->{_eof_called} ) { $self->gripe( 'api-eof-not-called' ); } if ( wantarray ) { return @{$self->{_errors}}; } else { return scalar @{$self->{_errors}}; } } =head2 $lint->clear_errors() Clears the list of errors, in case you want to print and clear, print and clear. =cut sub clear_errors { my $self = shift; $self->{_errors} = []; return; } =head2 $lint->only_types( $type1[, $type2...] ) Specifies to only want errors of a certain type. $lint->only_types( HTML::Lint::Error::STRUCTURE ); Calling this without parameters makes the object return all possible errors. The error types are C<STRUCTURE>, C<HELPER> and C<FLUFF>. See L<HTML::Lint::Error> for details on these types. =cut sub only_types { my $self = shift; $self->{_types} = [@_]; return; } =head2 $lint->gripe( $errcode, [$key1=>$val1, ...] ) Adds an error message, in the form of an L<HTML::Lint::Error> object, to the list of error messages for the current object. The file, line and column are automatically passed to the L<HTML::Lint::Error> constructor, as well as whatever other key value pairs are passed. For example: $lint->gripe( 'attr-repeated', tag => $tag, attr => $attr ); Usually, the user of the object won't call this directly, but just in case, here you go. =cut sub gripe { my $self = shift; my $error = HTML::Lint::Error->new( $self->{_file}, $self->parser->{_line}, $self->parser->{_column}, @_ ); my @keeps = @{$self->{_types}}; if ( !@keeps || $error->is_type(@keeps) ) { push( @{$self->{_errors}}, $error ); } return; } =head2 $lint->newfile( $filename ) Call C<newfile()> whenever you switch to another file in a batch of linting. Otherwise, the object thinks everything is from the same file. Note that the list of errors is NOT cleared. Note that I<$filename> does NOT need to match what's put into C<parse()> or C<parse_file()>. It can be a description, a URL, or whatever. You should call C<newfile()> even if you are only validating one file. If you do not call C<newfile()> then your errors will not have a filename attached to them. =cut sub newfile { my $self = shift; my $file = shift; delete $self->{_parser}; delete $self->{_parse_called}; delete $self->{_eof_called}; $self->{_file} = $file; $self->{_line} = 0; $self->{_column} = 0; $self->{_first_seen} = {}; return $self->{_file}; } # newfile 1; =head1 MODIFYING HTML::LINT'S BEHAVIOR Sometimes you'll have HTML that for some reason cannot conform to HTML::Lint's expectations. For those instances, you can use HTML comments to modify HTML::Lint's behavior. Say you have an image where for whatever reason you can't get dimensions for the image. This HTML snippet: <img src="logo.png" height="120" width="50" alt="Company logo"> <img src="that.png"> causes this error: foo.html (14:20) <img src="that.png"> tag has no HEIGHT and WIDTH attributes But if for some reason you can't get those dimensions when you build the page, you can at least stop HTML::Lint complaining about it. <img src="this.png" height="120" width="50" alt="Company logo"> <!-- html-lint elem-img-sizes-missing: off, elem-img-alt-missing: off --> <img src="that.png"> <!-- html-lint elem-img-sizes-missing: on, elem-img-alt-missing: off --> If you want to turn off all HTML::Lint warnings for a block of code, use <!-- html-lint all: off --> And turn them back on with <!-- html-lint all: on --> You don't have to use "on" and "off". For "on", you can use "true" or "1". For "off", you can use "0" or "false". For a list of possible errors and their codes, see L<HTML::Lint::Error>, or run F<perldoc HTML::Lint::Error>. =head1 BUGS, WISHES AND CORRESPONDENCE All bugs and requests are now being handled through GitHub. https://github.com/petdance/html-lint/issues DO NOT send bug reports to http://rt.cpan.org/ or http://code.google.com/ =head1 TODO =over 4 =item * Check for attributes that require values =item * <TABLE>s that have no rows. =item * Form fields that aren't in a FORM =item * DIVs with nothing in them. =item * HEIGHT= that have percents in them. =item * Check for goofy stuff like: <b><li></b><b>Hello Reader - Spanish Level 1 (K-3)</b> =back =head1 COPYRIGHT & LICENSE Copyright 2005-2018 Andy Lester. This program is free software; you can redistribute it and/or modify it under the terms of the Artistic License v2.0. http://www.opensource.org/licenses/Artistic-2.0 Please note that these modules are not products of or supported by the employers of the various contributors to the code. =head1 AUTHOR Andy Lester, andy at petdance.com =cut 1; ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������HTML-Lint-2.32/lib/HTML/Lint/�����������������������������������������������������������������������0000755�0001750�0001750�00000000000�13313263531�014000� 5����������������������������������������������������������������������������������������������������ustar �andy����������������������������andy�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������HTML-Lint-2.32/lib/HTML/Lint/Parser.pm��������������������������������������������������������������0000644�0001750�0001750�00000024665�13313263060�015604� 0����������������������������������������������������������������������������������������������������ustar �andy����������������������������andy�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������package HTML::Lint::Parser; use warnings; use strict; use HTML::Parser 3.20; use HTML::Tagset 3.03; use HTML::Lint::Error (); use HTML::Lint::HTML4 qw( %isKnownAttribute %isRequired %isNonrepeatable %isObsolete ); use HTML::Entities qw( %char2entity %entity2char ); use parent 'HTML::Parser'; =head1 NAME HTML::Lint::Parser - Parser for HTML::Lint. No user-serviceable parts inside. =head1 VERSION Version 2.32 =cut our $VERSION = '2.32'; =head1 SYNOPSIS See L<HTML::Lint> for all the gory details. =head1 METHODS =head2 new( $gripe ) Constructor for the main parsing object. The I<$gripe> argument is a coderef to a function that can handle errors from the parser. It is only ever (so far) C<HTML::Lint::gripe()>. =cut sub new { my $class = shift; my $gripe = shift; my $self = HTML::Parser->new( api_version => 3, start_document_h => [ \&_start_document, 'self' ], end_document_h => [ \&_end_document, 'self,line,column' ], start_h => [ \&_start, 'self,tagname,line,column,@attr' ], end_h => [ \&_end, 'self,tagname,line,column,tokenpos,@attr' ], comment_h => [ \&_comment, 'self,tagname,line,column,text' ], text_h => [ \&_text, 'self,text' ], strict_names => 0, empty_element_tags => 1, attr_encoded => 1, ); bless $self, $class; $self->{_gripe} = $gripe; $self->{_stack} = []; $self->{_directives} = {}; return $self; } =head2 $parser->gripe( $errorcode, [ arg1=>val1, ...] ) Calls the passed-in gripe function. If a given directive has been set to turn off a given message, then the parent gripe never gets called. =cut sub gripe { my $self = shift; my $errorcode = shift; if ( $self->_displayable( $errorcode ) ) { $self->{_gripe}->( $errorcode, @_ ); } return; } sub _displayable { my $self = shift; my $errorcode = shift; my $directives = $self->{_directives}; if ( not defined $directives->{$errorcode} ) { return 1; } else { return $directives->{$errorcode}; } } sub _start_document { return; } sub _end_document { my ($self,$line,$column) = @_; for my $tag ( sort keys %isRequired ) { if ( !$self->{_first_seen}->{$tag} ) { $self->gripe( 'doc-tag-required', tag => $tag ); } } return; } sub _start { my ($self,$tag,$line,$column,@attr) = @_; $self->{_line} = $line; $self->{_column} = $column; my $validattr = $isKnownAttribute{ $tag }; if ( $validattr ) { my %seen; my $i = 0; while ( $i < @attr ) { my ($attr,$val) = @attr[$i++,$i++]; if ( $seen{$attr}++ ) { $self->gripe( 'attr-repeated', tag => $tag, attr => $attr ); } if ( !$validattr->{$attr} ) { $self->gripe( 'attr-unknown', tag => $tag, attr => $attr ); } $self->_entity($val, 'attr'); } # while attribs } else { $self->gripe( 'elem-unknown', tag => $tag ); } $self->_element_push( $tag ) unless $HTML::Tagset::emptyElement{ $tag }; if ( my $where = $self->{_first_seen}{$tag} ) { if ( $isNonrepeatable{$tag} ) { $self->gripe( 'elem-nonrepeatable', tag => $tag, where => HTML::Lint::Error::where( @{$where} ) ); } } else { $self->{_first_seen}{$tag} = [$line,$column]; } # Call any other overloaded func my $tagfunc = "_start_$tag"; if ( $self->can($tagfunc) ) { $self->$tagfunc( $tag, @attr ); } return; } sub _text { my ($self,$text) = @_; $self->_entity($text, 'text'); return; } sub _entity { my ($self,$text,$type) = @_; if ( not $self->{_entity_lookup} ) { my @entities = sort keys %HTML::Entities::entity2char; # Strip his semicolons s/;$// for @entities; $self->{_entity_lookup} = { map { ($_,1) } @entities }; } while ( $text =~ /([^\x09\x0A\x0D -~])/g ) { my $bad = $1; $self->gripe( $type . '-use-entity', char => sprintf( '\x%02lX', ord($bad) ), entity => $char2entity{ $bad } || '&#' . ord($bad) . ';', ); } while ( $text =~ /&([^ ;]*;?)/g ) { my $match = $1; if ( $match eq '' ) { $self->gripe( $type . '-use-entity', char => '&', entity => '&' ); } elsif ( $match !~ m/;$/ ) { if ( exists $self->{_entity_lookup}->{$match} || $match =~ m/^#(\d+)$/ || $match =~ m/^#x[\dA-F]+$/i) { $self->gripe( $type . '-unclosed-entity', entity => "&$match;" ); } else { $self->gripe( $type . '-unknown-entity', entity => "&$match" ); } } elsif ( $match =~ m/^#(\d+);$/ ) { # All numeric entities are OK. We used to check that they were in a given range. } elsif ( $match =~ m/^#x([\dA-F]+);$/i ) { # All hex entities OK. We used to check that they were in a given range. } else { $match =~ s/;$//; if ( !exists $self->{_entity_lookup}->{$match} ) { $self->gripe( $type . '-unknown-entity', entity => "&$match;" ); } } } return; } sub _comment { my ($self,$tagname,$line,$column,$text) = @_; # Look for the html-lint directives if ( $tagname =~ m/^\s*html-lint\s*(.+)\s*$/ ) { my $text = $1; my @commands = split( /\s*,\s*/, $text ); for my $command ( @commands ) { my ($directive,$value) = split( /\s*:\s*/, $command, 2 ); _trim($_) for ($directive,$value); if ( ($directive ne 'all') && ( not exists $HTML::Lint::Error::errors{ $directive } ) ) { $self->gripe( 'config-unknown-directive', directive => $directive, where => HTML::Lint::Error::where($line,$column) ); next; } my $normalized_value = _normalize_value( $value ); if ( !defined($normalized_value) ) { $self->gripe( 'config-unknown-value', directive => $directive, value => $value, where => HTML::Lint::Error::where($line,$column) ); next; } if ( $directive eq 'all' ) { for my $err ( keys %HTML::Lint::Error::errors ) { $self->_set_directive( $err, $normalized_value ); } } else { $self->_set_directive( $directive, $normalized_value ); } } } return; } sub _set_directive { my $self = shift; my $which = shift; my $what = shift; $self->{_directives}{$which} = $what; return; } sub _normalize_value { my $what = shift; $what = _trim( $what ); return 1 if $what eq '1' || $what eq 'on' || $what eq 'true'; return 0 if $what eq '0' || $what eq 'off' || $what eq 'false'; return undef; } sub _trim { $_[0] =~ s/^\s+//; $_[0] =~ s/\s+$//; return $_[0]; } sub _end { ## no critic ( Subroutines::ProhibitManyArgs ) I have no choice in what these args are. my ($self,$tag,$line,$column,$tokenpos,@attr) = @_; $self->{_line} = $line; $self->{_column} = $column; if ( !$tokenpos ) { # This is a dummy end event for something like <img />. # Do nothing. } elsif ( $HTML::Tagset::emptyElement{ $tag } ) { $self->gripe( 'elem-empty-but-closed', tag => $tag ); } else { if ( $self->_in_context($tag) ) { my @leftovers = $self->_element_pop_back_to($tag); for ( @leftovers ) { my ($tag,$line,$col) = @{$_}; $self->gripe( 'elem-unclosed', tag => $tag, where => HTML::Lint::Error::where($line,$col) ) unless $HTML::Tagset::optionalEndTag{$tag}; } # for } else { $self->gripe( 'elem-unopened', tag => $tag ); } } # is empty element # Call any other overloaded func my $tagfunc = "_end_$tag"; if ( $self->can($tagfunc) ) { $self->$tagfunc( $tag, $line ); } return; } sub _element_push { my $self = shift; for ( @_ ) { push( @{$self->{_stack}}, [$_,$self->{_line},$self->{_column}] ); } # while return; } sub _find_tag_in_stack { my $self = shift; my $tag = shift; my $stack = $self->{_stack}; my $offset = @{$stack} - 1; while ( $offset >= 0 ) { if ( $stack->[$offset][0] eq $tag ) { return $offset; } --$offset; } # while return; } sub _element_pop_back_to { my $self = shift; my $tag = shift; my $offset = $self->_find_tag_in_stack($tag) or return; my @leftovers = splice( @{$self->{_stack}}, $offset + 1 ); pop @{$self->{_stack}}; return @leftovers; } sub _in_context { my $self = shift; my $tag = shift; my $offset = $self->_find_tag_in_stack($tag); return defined $offset; } # Overridden tag-specific stuff sub _start_img { ## no critic ( Subroutines::ProhibitUnusedPrivateSubroutines ) # Called by parser based on tag name. my ($self,$tag,%attr) = @_; my ($h,$w,$src) = @attr{qw( height width src )}; if ( defined $h && defined $w ) { # Check sizes } else { $self->gripe( 'elem-img-sizes-missing', src=>$src ); } if ( not defined $attr{alt} ) { $self->gripe( 'elem-img-alt-missing', src=>$src ); } return; } sub _start_input { ## no critic ( Subroutines::ProhibitUnusedPrivateSubroutines ) # Called by parser based on tag name. my ($self,$tag,%attr) = @_; my ($type,$alt) = @attr{qw( type alt )}; if ( defined($type) && (lc($type) eq 'image') ) { my $ok = defined($alt); if ( $ok ) { $alt =~ s/^ +//; $alt =~ s/ +$//; $ok = ($alt ne ''); } if ( !$ok ) { my $name = $attr{name}; $name = '' unless defined $name; $self->gripe( 'elem-input-alt-missing', name => $name ); } } return; } 1; ���������������������������������������������������������������������������HTML-Lint-2.32/lib/HTML/Lint/HTML4.pm���������������������������������������������������������������0000644�0001750�0001750�00000021717�13224567121�015201� 0����������������������������������������������������������������������������������������������������ustar �andy����������������������������andy�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������package HTML::Lint::HTML4; use warnings; use strict; =head1 NAME HTML::Lint::HTML4 -- Rules for HTML 4 as used by HTML::Lint. =head1 SYNOPSIS Collection of tags and attributes for use by HTML::Lint. You can add your own tags and attributes if you like. # Add an attribute that your company uses. HTML::Lint::HTML4::add_attribute( 'body', 'proprietary-attribute' ); # Add the HTML 5 <canvas> tag. HTML::Lint::HTML4::add_tag( 'canvas' ); HTML::Lint::HTML4::add_attribute( 'canvas', $_ ) for qw( height width ); This must be done before HTML::Lint does any validation. Note also that this modifies a global table, and is not on a per-object basis. =cut use parent 'Exporter'; our @EXPORT_OK = qw( %isKnownAttribute %isRequired %isNonrepeatable %isObsolete ); sub _hash { return { map { ($_ => 1) } @_ } } our @physical = qw( b big code i kbd s small strike sub sup tt u xmp ); our @content = qw( abbr acronym cite code dfn em kbd samp strong var ); our @core = qw( class id style title ); our @i18n = qw( dir lang ); our @events = qw( onclick ondblclick onkeydown onkeypress onkeyup onmousedown onmousemove onmouseout onmouseover onmouseup ); our @std = (@core,@i18n,@events); our %isRequired = %{_hash( qw( html body head title ) )}; our %isNonrepeatable = %{_hash( qw( html head base title body isindex ))}; our %isObsolete = %{_hash( qw( listing plaintext xmp ) )}; # Some day I might do something with these. For now, they're just comments. sub _ie_only { return @_ } sub _ns_only { return @_ } our %isKnownAttribute = ( # All the physical markup has the same (map { $_=>_hash(@std) } (@physical, @content) ), a => _hash( @std, qw( accesskey charset coords href hreflang name onblur onfocus rel rev shape tabindex target type ) ), address => _hash( @std ), applet => _hash( @std ), area => _hash( @std, qw( accesskey alt coords href nohref onblur onfocus shape tabindex target ) ), base => _hash( qw( href target ) ), basefont => _hash( qw( color face id size ) ), bdo => _hash( @core, @i18n ), blockquote => _hash( @std, qw( cite ) ), body => _hash( @std, qw( alink background bgcolor link marginheight marginwidth onload onunload text vlink ), _ie_only( qw( bgproperties leftmargin topmargin ) ), ), br => _hash( @core, qw( clear ) ), button => _hash( @std, qw( accesskey disabled name onblur onfocus tabindex type value ) ), caption => _hash( @std, qw( align ) ), center => _hash( @std ), cite => _hash(), col => _hash( @std, qw( align char charoff span valign width ) ), colgroup => _hash( @std, qw( align char charoff span valign width ) ), del => _hash( @std, qw( cite datetime ) ), div => _hash( @std, qw( align ) ), dir => _hash( @std, qw( compact ) ), dd => _hash( @std ), dl => _hash( @std, qw( compact ) ), dt => _hash( @std ), embed => _hash( qw( align height hidden name palette quality play src units width ), _ns_only( qw( border hspace pluginspage type vspace ) ), ), fieldset => _hash( @std ), font => _hash( @core, @i18n, qw( color face size ) ), form => _hash( @std, qw( accept-charset action enctype method name onreset onsubmit target ) ), frame => _hash( @core, qw( frameborder longdesc marginheight marginwidth name noresize scrolling src ) ), frameset => _hash( @core, qw( cols onload onunload rows border bordercolor frameborder framespacing ) ), h1 => _hash( @std, qw( align ) ), h2 => _hash( @std, qw( align ) ), h3 => _hash( @std, qw( align ) ), h4 => _hash( @std, qw( align ) ), h5 => _hash( @std, qw( align ) ), h6 => _hash( @std, qw( align ) ), head => _hash( @i18n, qw( profile ) ), hr => _hash( @core, @events, qw( align noshade size width ) ), html => _hash( @i18n, qw( version xmlns xml:lang ) ), iframe => _hash( @core, qw( align frameborder height longdesc marginheight marginwidth name scrolling src width ) ), img => _hash( @std, qw( align alt border height hspace ismap longdesc name src usemap vspace width ) ), input => _hash( @std, qw( accept accesskey align alt border checked disabled maxlength name onblur onchange onfocus onselect readonly size src tabindex type usemap value ) ), ins => _hash( @std, qw( cite datetime ) ), isindex => _hash( @core, @i18n, qw( prompt ) ), label => _hash( @std, qw( accesskey for onblur onfocus ) ), legend => _hash( @std, qw( accesskey align ) ), li => _hash( @std, qw( type value ) ), 'link' => _hash( @std, qw( charset href hreflang media rel rev target type ) ), 'map' => _hash( @std, qw( name ) ), menu => _hash( @std, qw( compact ) ), meta => _hash( @i18n, qw( content http-equiv name scheme ) ), nobr => _hash( @std ), noframes => _hash( @std ), noscript => _hash( @std ), object => _hash( @std, qw( align archive border classid codebase codetype data declare height hspace name standby tabindex type usemap vspace width )), ol => _hash( @std, qw( compact start type ) ), optgroup => _hash( @std, qw( disabled label ) ), option => _hash( @std, qw( disabled label selected value ) ), p => _hash( @std, qw( align ) ), param => _hash( qw( id name type value valuetype ) ), plaintext => _hash(), pre => _hash( @std, qw( width ) ), q => _hash( @std, qw( cite ) ), script => _hash( qw( charset defer event for language src type ) ), 'select' => _hash( @std, qw( disabled multiple name onblur onchange onfocus size tabindex ) ), span => _hash( @std ), style => _hash( @i18n, qw( media title type ) ), table => _hash( @std, qw( align bgcolor border cellpadding cellspacing datapagesize frame rules summary width ), _ie_only( qw( background bordercolor bordercolordark bordercolorlight ) ), _ns_only( qw( bordercolor cols height hspace vspace ) ), ), tbody => _hash( @std, qw( align char charoff valign ) ), td => _hash( @std, qw( abbr align axis bgcolor char charoff colspan headers height nowrap rowspan scope valign width ), _ie_only( qw( background bordercolor bordercolordark bordercolorlight ) ), ), textarea => _hash( @std, qw( accesskey cols disabled name onblur onchange onfocus onselect readonly rows tabindex wrap ) ), th => _hash( @std, qw( abbr align axis bgcolor char charoff colspan headers height nowrap rowspan scope valign width ), _ie_only( qw( background bordercolor bordercolordark bordercolorlight ) ), ), thead => _hash( @std, qw( align char charoff valign ) ), tfoot => _hash( @std, qw( align char charoff valign ) ), title => _hash( @i18n ), tr => _hash( @std, qw( align bgcolor char charoff valign ), _ie_only( qw( bordercolor bordercolordark bordercolorlight nowrap ) ), _ns_only( qw( nowrap ) ), ), ul => _hash( @std, qw( compact type ) ), ); =head1 FUNCTIONS The functions below are very specifically not exported, and need to be called with a complete package reference, so as to remind the programmer that she is monkeying with the entire package. =head2 add_tag( $tag ); Adds a tag to the list of tags that HTML::Lint knows about. If you specify a tag that HTML::Lint already knows about, then nothing is changed. HTML::Lint::HTML4::add_tag( 'canvas' ); =cut sub add_tag { my $tag = shift; if ( !$isKnownAttribute{ $tag } ) { $isKnownAttribute{ $tag } = {}; } return; } =head2 add_attribute( $tag, $attribute ); Adds an attribute to a tag that HTML::Lint knows about. The tag must already be known to HTML::Lint or else this function will die. HTML::Lint::HTML4::add_attribute( 'canvas', $_ ) for qw( height width ); =cut sub add_attribute { my $tag = shift; my $attr = shift; my $attrs = $isKnownAttribute{ $tag } || die "Tag $tag is unknown"; $isKnownAttribute{ $tag }->{ $attr } = 1; return; } 1; __END__ =head1 AUTHOR Andy Lester C<andy at petdance.com> =head1 COPYRIGHT Copyright 2005-2018 Andy Lester. This program is free software; you can redistribute it and/or modify it under the terms of the Artistic License v2.0. http://www.opensource.org/licenses/Artistic-2.0 Please note that these modules are not products of or supported by the employers of the various contributors to the code. =cut �������������������������������������������������HTML-Lint-2.32/lib/HTML/Lint/Error.pm���������������������������������������������������������������0000644�0001750�0001750�00000026172�13224570052�015437� 0����������������������������������������������������������������������������������������������������ustar �andy����������������������������andy�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������package HTML::Lint::Error; use warnings; use strict; use parent 'Exporter'; our @EXPORT_OK = qw( STRUCTURE HELPER FLUFF ); our %EXPORT_TAGS = ( types => [@EXPORT_OK] ); our %errors; =head1 NAME HTML::Lint::Error - Error object for the Lint functionality =head1 SYNOPSIS See L<HTML::Lint> for all the gory details. =head1 EXPORTS None. It's all object-based. =head1 METHODS Almost everything is an accessor. =head1 Error types: C<STRUCTURE>, C<HELPER>, C<FLUFF> Each error has a type. Note that these roughly, but not exactly, go from most severe to least severe. =over 4 =item * C<STRUCTURE> For problems that relate to the structural validity of the code. Examples: Unclosed <TABLE> tags, incorrect values for attributes, and repeated attributes. =item * C<HELPER> Helpers are notes that will help you with your HTML, or that will help the browser render the code better or faster. Example: Missing HEIGHT and WIDTH attributes in an IMG tag. =item * C<FLUFF> Fluff is for items that don't hurt your page, but don't help it either. This is usually something like an unknown attribute on a tag. =back =cut use constant CONFIG => 1; use constant STRUCTURE => 2; use constant HELPER => 3; use constant FLUFF => 4; =head2 new() Create an object. It's not very exciting. =cut sub new { my $class = shift; my $file = shift; my $line = shift; my $column = shift; my $errcode = shift; my @errparms = @_; # Add an element that says what tag caused the error (B, TR, etc) # so that we can match 'em up down the road. my $self = { _file => $file, _line => $line, _column => $column, _errcode => $errcode, _errtext => undef, _type => undef, }; bless $self, $class; $self->_expand_error( $errcode, @errparms ); return $self; } sub _expand_error { my $self = shift; my $errcode = shift; my $specs = $errors{$errcode}; my $str; if ( $specs ) { ($str, $self->{_type}) = @{$specs}; } else { $str = "Unknown code: $errcode"; } if ( defined $str ) { while ( @_ ) { my $var = shift; my $val = shift; $str =~ s/\$\{$var\}/$val/g; } } $self->{_errtext} = $str; return; } =head2 is_type( $type1 [, $type2 ] ) Tells if any of I<$type1>, I<$type2>... match the error's type. Returns the type that matched. if ( $err->is_type( HTML::Lint::Error::STRUCTURE ) ) {.... =cut sub is_type { my $self = shift; for my $matcher ( @_ ) { return $matcher if $matcher eq $self->type; } return; } =head2 where() Returns a formatted string that describes where in the file the error has occurred. For example, (14:23) for line 14, column 23. The terrible thing about this function is that it's both a plain ol' formatting function as in my $str = where( 14, 23 ); AND it's an object method, as in: my $str = $error->where(); I don't know what I was thinking when I set it up this way, but it's bad practice. =cut sub where { my $line; my $col; if ( not ref $_[0] ) { $line = shift; $col = shift; } else { my $self = shift; $line = $self->line; $col = $self->column; } $col ||= 0; return sprintf( '(%s:%s)', $line, $col + 1 ); } =head2 as_string() Returns a nicely-formatted string for printing out to stdout or some similar user thing. =cut sub as_string { my $self = shift; return sprintf( '%s %s %s', $self->file, $self->where, $self->errtext ); } =head2 file() Returns the filename of the error, as set by the caller. =head2 line() Returns the line number of the error. =head2 column() Returns the column number, starting from 0 =head2 errcode() Returns the HTML::Lint error code. Don't rely on this, because it will probably go away. =head2 errtext() Descriptive text of the error =head2 type() Type of the error =cut sub file { my $self = shift; return $self->{_file} || '' } sub line { my $self = shift; return $self->{_line} || '' } sub column { my $self = shift; return $self->{_column} || '' } sub errcode { my $self = shift; return $self->{_errcode} || '' } sub errtext { my $self = shift; return $self->{_errtext} || '' } sub type { my $self = shift; return $self->{_type} || '' } =head1 POSSIBLE ERRORS Each possible error in HTML::Lint has a code. These codes are used to identify each error for when you need to turn off error checking for a specific error. =cut %errors = ( ## no critic ( ValuesAndExpressions::RequireInterpolationOfMetachars ) 'api-parse-not-called' => ['The parse() method has not been called on this file.', CONFIG], 'api-eof-not-called' => ['The eof() method has not been called on this file.', CONFIG], 'config-unknown-directive' => ['Unknown directive "${directive}"', CONFIG], 'config-unknown-value' => ['Unknown value "${value}" for ${directive} directive', CONFIG], 'elem-empty-but-closed' => ['<${tag}> is not a container -- </${tag}> is not allowed', STRUCTURE], 'elem-img-alt-missing' => ['<img src="${src}"> does not have ALT text defined', HELPER], 'elem-img-sizes-missing' => ['<img src="${src}"> tag has no HEIGHT and WIDTH attributes', HELPER], 'elem-input-alt-missing' => ['<input name="${name}" type="image"> does not have non-blank ALT text defined', HELPER], 'elem-nonrepeatable' => ['<${tag}> is not repeatable, but already appeared at ${where}', STRUCTURE], 'elem-unclosed' => ['<${tag}> at ${where} is never closed', STRUCTURE], 'elem-unknown' => ['Unknown element <${tag}>', STRUCTURE], 'elem-unopened' => ['</${tag}> with no opening <${tag}>', STRUCTURE], 'doc-tag-required' => ['<${tag}> tag is required', STRUCTURE], 'attr-repeated' => ['${attr} attribute in <${tag}> is repeated', STRUCTURE], 'attr-unknown' => ['Unknown attribute "${attr}" for tag <${tag}>', FLUFF], 'attr-unclosed-entity' => ['Entity ${entity} is missing its closing semicolon', STRUCTURE], 'attr-unknown-entity' => ['Entity ${entity} is unknown', STRUCTURE], 'attr-use-entity' => ['Character "${char}" should be written as ${entity}', STRUCTURE], 'text-unclosed-entity' => ['Entity ${entity} is missing its closing semicolon', STRUCTURE], 'text-unknown-entity' => ['Entity ${entity} is unknown', STRUCTURE], 'text-use-entity' => ['Character "${char}" should be written as ${entity}', STRUCTURE], ); =head2 api-parse-not-called You called the C<errors()> method before calling C<parse()> and C<eof()>. =head2 api-eof-not-called You called the C<errors()> method before calling C<eof()>. =head2 config-unknown-directive Unknown directive "DIRECTIVE" You specified a directive in a comment for HTML::Lint that it didn't recognize. =head2 config-unknown-value Unknown value "VALUE" for DIRECTIVE directive Directive values can only be "on", "off", "yes", "no", "true", "false", "0" and "1". =head2 elem-unknown HTML::Lint doesn't know recognize the tag. =head2 elem-unopened C<< </tag> >> with no opening C<< <tag> >>. =head2 elem-unclosed C<< <tag> >> at WHERE is never closed. =head2 elem-empty-but-closed C<< <tag> >> is not a container -- C<< </tag> >> is not allowed. =head2 elem-img-alt-missing C<< <img src="FILENAME.PNG"> >> does not have ALT text defined. =head2 elem-img-sizes-missing C<< <img src="FILENAME.PNG"> >> tag has no HEIGHT and WIDTH attributes. =head2 elem-nonrepeatable C<< <tag> >> is not repeatable, but already appeared at WHERE. =head2 doc-tag-required C<< <tag> >> tag is required. =head2 attr-repeated ATTR attribute in C<< <tag> >> is repeated. =head2 attr-unknown Unknown attribute "ATTR" for tag C<< <tag> >>. =head2 text-unclosed-entity Entity ENTITY is missing its closing semicolon =head2 text-unknown-entity Entity ENTITY is unknown =head2 text-use-entity Character "CHAR" should be written as ENTITY =head1 COPYRIGHT & LICENSE Copyright 2005-2018 Andy Lester. This program is free software; you can redistribute it and/or modify it under the terms of the Artistic License v2.0. http://www.opensource.org/licenses/Artistic-2.0 Please note that these modules are not products of or supported by the employers of the various contributors to the code. =head1 AUTHOR Andy Lester, C<andy at petdance.com> =cut 1; # happy __DATA__ Errors that haven't been done yet. #elem-head-only <${tag}> can only appear in the <HEAD> element #elem-non-head-element <${tag}> cannot appear in the <HEAD> element #elem-obsolete <${tag}> is obsolete #elem-nested-element <${tag}> cannot be nested -- one is already opened at ${where} #elem-wrong-context Illegal context for <${tag}> -- must appear in <${othertag}> tag. #elem-heading-in-anchor <A> should be inside <${tag}>, not <${tag}> inside <A> #elem-head-missing No <HEAD> element found #elem-head-missing-title No <TITLE> in <HEAD> element #elem-img-sizes-incorrect <IMG> tag's HEIGHT and WIDTH attributes are incorrect. They should be ${correct}. #attr-missing <${tag}> is missing a "${attr}" attribute #comment-unclosed Unclosed comment #comment-markup Markup embedded in a comment can confuse some browsers #text-literal-metacharacter Metacharacter $char should be represented as "$otherchar" #text-title-length The HTML spec recommends that that <TITLE> be no more than 64 characters #text-markup Tag <${tag}> found in the <TITLE>, which will not be rendered properly. #elem-physical-markup <${tag}> is physical font markup. Use logical (such as <${othertag}>) instead. #elem-leading-whitespace <${tag}> should not have whitespace between "<" and "${tag}>" #'must-follow' => [ ENABLED, MC_ERROR, '<$argv[0]> must immediately follow <$argv[1]>', ], # 'empty-container' => [ ENABLED, MC_WARNING, 'empty container element <$argv[0]>.', ], # 'directory-index' => [ ENABLED, MC_WARNING, 'directory $argv[0] does not have an index file ($argv[1])', ], # 'attribute-delimiter' => [ ENABLED, MC_WARNING, 'use of \' for attribute value delimiter is not supported by all browsers (attribute $argv[0] of tag $argv[1])', ], # 'container-whitespace' => [ DISABLED, MC_WARNING, '$argv[0] whitespace in content of container element $argv[1]', ], # 'bad-text-context' => [ ENABLED, MC_ERROR, 'illegal context, <$argv[0]>, for text; should be in $argv[1].', ], # 'attribute-format' => [ ENABLED, MC_ERROR, 'illegal value for $argv[0] attribute of $argv[1] ($argv[2])', ], # 'quote-attribute-value' => [ ENABLED, MC_ERROR, 'value for attribute $argv[0] ($argv[1]) of element $argv[2] should be quoted (i.e. $argv[0]="$argv[1]")', ], # 'meta-in-pre' => [ ENABLED, MC_ERROR, 'you should use "$argv[0]" in place of "$argv[1]", even in a PRE element.', ], # 'implied-element' => [ ENABLED, MC_WARNING, 'saw <$argv[0]> element, but no <$argv[1]> element', ], # 'button-usemap' => [ ENABLED, MC_ERROR, 'illegal to associate an image map with IMG inside a BUTTON', ], ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������HTML-Lint-2.32/Changes������������������������������������������������������������������������������0000644�0001750�0001750�00000015203�13313263516�013057� 0����������������������������������������������������������������������������������������������������ustar �andy����������������������������andy�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Revision history HTML::Lint and Test::HTML::Lint. NOTE: All bugs and requests are now being handled through GitHub. https://github.com/petdance/html-lint/issues Please DO NOT send bug reports to http://rt.cpan.org/. 2.32 Fri Jun 22 15:57:39 CDT 2018 Note that this very well may be the final release of HTML::Lint that I make. I've been spending my time on HTML::Tidy5, which works on HTML5, checks for more problems, and is much faster. If you're interested in maintaining HTML::Lint, send me email at andy@petdance.com. I'm not sure I want to hand it off to anyone yet, but we can discuss. [ENHANCEMENTS] Allow "weblint -" to read from STDIN. Thanks, Frank Dana. 2.30 Sun Jan 7 22:02:25 CST 2018 No changes since 2.27_03. 2.27_03 Wed Jan 3 17:07:07 CST 2018 [FIXES] Removed the text-invalid-entity and attr-invalid-entity, which were for entities that had an invalid numeric value, anything greater than 𐀀. There is no longer a restriction on the numeric values of HTML entities. (GH#60) 2.27_02 Wed Dec 27 11:46:28 CST 2017 There are be no functionality changes since 2.27_01. [INTERNALS] Many Perl::Critic cleanups. 2.27_01 Fri Dec 22 15:54:32 CST 2017 [ENHANCEMENTS] Adds checking of entities in attributes, not just text. Thanks, Klaus S. Madsen. [FIXES] Calling ->parsefile() would generate an error. Thanks, Shlomi Fish. (GH#58) [INTERNALS] Prepare for perl 5.26.0 which removes '.' from @INC. Thanks, Jim Keenan. Fix disttest target. Thanks, Shlomi Fish. 2.26 Thu Dec 29 22:36:54 CST 2016 Stable release. No changes from previous release. 2.25_02 Tue Dec 27 14:34:22 CST 2016 [FIXES] html_fragment_ok() was not properly excluding document-level errors. It was effectively the same as html_ok(). 2.25_01 Fri Dec 23 22:36:17 CST 2016 [ENHANCEMENTS] Added two new types of errors to let you know you're using the API incorrectly. You should be parsing files like this: my $lint = HTML::Lint->new; $lint->newfile( $filename ); $lint->parse( $line ); $lint->eof(); my @errors = $lint->errors(); If you neglect to call ->parse or ->eof, you'll get an error returned in the list of errors from ->errors(). [FIXES] Test::HTML::Lint::html_fragment_ok() was not properly calling ->eof. 2.24 Wed Dec 7 22:20:13 CST 2016 Official release. No changes from 2.23_01. 2.23_01 Tue Dec 6 22:48:56 CST 2016 [ENHANCEMENTS] Added detection of unknown HTML entities, like "known &unclosed &entities are not found". Also fixes the case where HTML::Lint gets confused by an entity like "²" which it thought was an unterminated "⊃" entity. Thanks, Klaus S. Madsen. [FIXES] Errors of the type doc-tag-required did not come out in any defined order. They are now sorted by tag name. This was discovered because hash randomization caused tests to fail on Perl 5.18 and above. Thanks, Slaven Rezic, Andrew Main and Lisa Hare. Handle some warnings that get thrown if certain values are undef. Thanks, Yves Lavoie. Handle characters that are not handled by HTML::Entities. (GitHub issue #13) Thanks, Tim Landscheidt. [INTERNALS] Add a test to verify a fixed bug. Thanks to Lance Wicks as part of the CPAN Pull Request Challenge. 2.22 Mon Apr 6 15:47:11 CDT 2015 [CHANGES THAT COULD BREAK YOUR CODE] Previously, html_ok() would not check the entire structure of a web page to check for <html>, <head>, <title> and <body> tags. Now it will. If you want to check fragments of HTML for validity but know that they are not valid HTML documents on their own, use the new html_fragment_ok(). [ENHANCEMENTS] Added new error, elem-input-alt-missing, that warns of <input type="image"> tags that are missing an alt="" attribute. This helps for accessability to make sure that any images have alternate text for screen readers. Added ability to modify HTML::Lint's table of known tags and attributes, so you could do this: # Add an attribute that your company uses. HTML::Lint::HTML4::add_attribute( 'body', 'proprietary-attribute' ); # Add the HTML 5 <canvas> tag. HTML::Lint::HTML4::add_tag( 'canvas' ); HTML::Lint::HTML4::add_attribute( 'canvas', $_ ) for qw( height width ); [FIXES] Test::HTML::Lint::html_ok() would not call the HTML::Lint eof() method, which meant it wouldn't do document-wide tests. 2.20 Fri Apr 6 00:49:51 CDT 2012 [ENHANCEMENTS] Sometimes creating HTML::Lint-compliant HTML just isn't possible. Now, you can now turn individual errors on and off in your HTML via comment directives, like so: <!-- html-lint elem-img-sizes-missing: off, attr-unknown: off --> And if you have a batch of code that's hopeless: <!-- html-lint all: off --> Added check for unknown entities, such as "&foo;". Added check for unclosed entitities, such as "&" without the closing semicolon. Added a check for a bare ampersand that should be written as & 2.10 Tue Dec 6 11:16:16 CST 2011 [FIXES] Tags that were self-closed were being ignored. For example, if you had <img src="blah.jpg" /> then HTML::Lint would ignore the tag. This has been fixed. [LICENSE] HTML::Lint is now explicitly licensed under Artistic License 2.0, instead of the vague "same terms as Perl itself." 2.06 Thu Dec 18 00:07:54 CST 2008 [FIXES] Added attributes to <frameset>. <strong> tag didn't allow any attributes. Now it does. Removed the <listing> tag. 2.04 Mon Jun 2 11:41:16 CDT 2008 [FIXES] <textarea> now knows about the wrap attribute. [INTERNALS] Lots of enhancements pointed out by Perl::Critic. 2.02 Thu Nov 3 11:49:18 CST 2005 [ENHANCEMENTS] * The warnings for missing ALT and HEIGHT/WIDTH on your images now give the SRC attribute. 2.00 Tue Sep 20 23:10:39 CDT 2005 [CHANGES THAT COULD BREAK YOUR CODE] * I've changed the object structure. HTML::Lint now has-a HTML::Parser, and no longer is-a HTML::Parser. * weblint-cgi and weblint-original are no longer distributed with HTML::Lint. * Now requires Perl 5.6.0. [FIXES] * Line numbering is now correct if you parse more than one file. * Changed t/*.*.t so that they only had one period in the filename. Apparently VMS doesn't like filenames with multiple dots. Pre-2.00 I've thrown away previous history because really, nobody cares what changed in this module prior to 2005. ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������