HTML-StripScripts-Parser-1.03/0000755000000000000000000000000011274523714014606 5ustar rootrootHTML-StripScripts-Parser-1.03/MANIFEST0000644000000000000000000000031011274523714015731 0ustar rootrootMANIFEST Changes Makefile.PL README Parser.pm t/10basic.t t/20script.t t/50flow.t t/70xss.t t/80pod.t t/81pod_coverage.t META.yml Module meta-data (added by MakeMaker) HTML-StripScripts-Parser-1.03/Changes0000644000175000001660000000261111274520624017051 0ustar clintonapache1.03 5 Nov 2009 Changed min version of HTML::StripScripts to 1.0.5 1.02 1 Dec 2007 Made minimum required version of HTML::StripScripts 1.04 because of a bug in previous versions 1.01 21 Nov 2007 Made minimum required version of HTML::Parser 3.56 because of some parsing bugs in previous versions of that module 1.00 5 Jun 2007 Passed all CPAN tests - bumped version 0.991 29 May 2007 Fixed some Kwalitee issues: - improved META.yml - added Test::Pod and Test::Pod::Coverage tests 0.99 28 May 2007 Added the filter_html() method Added the XSS tests from http://ha.ckers.org/xss.html Now requires HTML::StripScripts v 0.99 0.06 09 Dec 2003 Removed some more tests that proved fragile with respect to HTML::Parser variations. 0.05 09 Dec 2003 Depend on HTML::Parser 3.25 or later, since we need the end_document hook. 0.04 02 Oct 2003 Added PARSER_OPTIONS to the init() method. 0.03 02 Oct 2003 Removed some fragile tests that depended on HTML::Parser parsing in a particular way. Added a workaround for HTML::Parser bug http://rt.cpan.org/NoAuth/Bug.html?id=3954 0.02 31 Mar 2003 Removed misplaced =back in the POD. 0.01 30 Mar 2003 Initial release. HTML-StripScripts-Parser-1.03/README0000644000175000001660000000410710626541243016440 0ustar clintonapacheNAME HTML::StripScripts::Parser - XSS filter using HTML::Parser SYNOPSIS use HTML::StripScripts::Parser; my $hss = HTML::StripScripts::Parser->new( { Context => 'Document', Rules => { ... }, }, strict_comment => 1, strict_names => 1, ); $hss->parse_file("foo.html"); print $hss->filtered_document; DESCRIPTION This class subclasses both HTML::StripScripts and HTML::Parser, adding the input methods that HTML::Parser provides to HTML::StripScripts. See HTML::StripScripts and HTML::Parser. CONSTRUCTORS new ( CONFIG, [PARSER_OPTIONS] ) Creates a new "HTML::StripScripts::Parser" object, and invokes the HTML::Parser init() method so that tags are fed to the correct HTML::StripScripts methods. The CONFIG parameter has the same semantics as the CONFIG parameter to the HTML::StripScripts constructor. Any PARSER_OPTIONS supplied will be passed on to the HTML::Parser init method, allowing you to influence the way the input is parsed. You cannot use PARSER_OPTIONS to set HTML::Parser event handlers, since "HTML::StripScripts::Parser" uses all of the event hooks itself. METHODS See HTML::Parser for input methods, HTML::StripScripts for output methods. SUBCLASSING The "HTML::StripScripts::Parser" class is subclassable. Filter objects are plain hashes. The hss_init() method takes the same arguments as new(), and calls the initialization methods of both "HTML::StripScripts" and "HTML::Parser". See "SUBCLASSING" in HTML::StripScripts and "SUBCLASSING" in HTML::Parser. SEE ALSO HTML::StripScripts, HTML::Parser AUTHOR Original author Nick Cleaton Enick@cleaton.netE New code added and module maintained by Clinton Gormley Eclint@traveljury.comE COPYRIGHT Copyright (C) 2003 Nick Cleaton. All Rights Reserved. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. HTML-StripScripts-Parser-1.03/Parser.pm0000644000175000001660000001020111274520567017350 0ustar clintonapachepackage HTML::StripScripts::Parser; use strict; use vars qw($VERSION); $VERSION = '1.03'; =head1 NAME HTML::StripScripts::Parser - XSS filter using HTML::Parser =head1 SYNOPSIS use HTML::StripScripts::Parser(); my $hss = HTML::StripScripts::Parser->new( { Context => 'Document', ## HTML::StripScripts configuration Rules => { ... }, }, strict_comment => 1, ## HTML::Parser options strict_names => 1, ); $hss->parse_file("foo.html"); print $hss->filtered_document; OR print $hss->filter_html($html); =head1 DESCRIPTION This class provides an easy interface to C, using C to parse the HTML. See L for details of how to customise how the raw HTML is parsed into tags, and L for details of how to customise the way those tags are filtered. =cut =head1 CONSTRUCTORS =over =item new ( {CONFIG}, [PARSER_OPTIONS] ) Creates a new C object. The CONFIG parameter has the same semantics as the CONFIG parameter to the C constructor. Any PARSER_OPTIONS supplied will be passed on to the L init method, allowing you to influence the way the input is parsed. You cannot use PARSER_OPTIONS to set the C event handlers (see L) since C uses all of the event hooks itself. However, you can use C (see L) to customise the handling of all tags and attributes. =cut use HTML::StripScripts; use HTML::Parser; use base qw(HTML::StripScripts HTML::Parser); sub hss_init { my ( $self, $cfg, @parser_options ) = @_; $self->init( @parser_options, api_version => 3, start_document_h => [ 'input_start_document', 'self' ], start_h => [ 'input_start', 'self,text' ], end_h => [ 'input_end', 'self,text' ], text_h => [ 'input_text', 'self,text' ], default_h => [ 'input_text', 'self,text' ], declaration_h => [ 'input_declaration', 'self,text' ], comment_h => [ 'input_comment', 'self,text' ], process_h => [ 'input_process', 'self,text' ], end_document_h => [ 'input_end_document', 'self' ], # workaround for http://rt.cpan.org/NoAuth/Bug.html?id=3954 ( $HTML::Parser::VERSION =~ /^3\.(29|30|31)$/ ? ( strict_comment => 1 ) : () ), ); $self->SUPER::hss_init($cfg); } =back =head1 METHODS See L for input methods, L for output methods. =head2 C C is a convenience method for filtering HTML already loaded into a scalar variable. It combines calls to C, C and C. $filtered_html = $hss->filter_html($html); =cut #=================================== sub filter_html { #=================================== my ( $self, $html ) = @_; $self->parse($html); $self->eof; return $self->filtered_document; } =head1 SUBCLASSING The C class is subclassable. Filter objects are plain hashes. The hss_init() method takes the same arguments as new(), and calls the initialization methods of both C and C. See L and L. =head1 SEE ALSO L, L, L =head1 BUGS None reported. Please report any bugs or feature requests to bug-html-stripscripts-parser@rt.cpan.org, or through the web interface at L. =head1 AUTHOR Original author Nick Cleaton Enick@cleaton.netE New code added and module maintained by Clinton Gormley Eclint@traveljury.comE =head1 COPYRIGHT Copyright (C) 2003 Nick Cleaton. All Rights Reserved. Copyright (C) 2007 Clinton Gormley. All Rights Reserved. =head1 LICENSE This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; HTML-StripScripts-Parser-1.03/META.yml0000644000000000000000000000107611274523714016063 0ustar rootroot--- #YAML:1.0 name: HTML-StripScripts-Parser version: 1.03 abstract: XSS filter using HTML::Parser license: perl author: - Nick Cleaton , Clinton Gormley generated_by: ExtUtils::MakeMaker version 6.42 distribution_type: module requires: HTML::Parser: 3.56 HTML::StripScripts: 1.05 Test::More: 0 meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.3.html version: 1.3 HTML-StripScripts-Parser-1.03/Makefile.PL0000644000175000001660000000107511274520633017533 0ustar clintonapacheuse ExtUtils::MakeMaker; WriteMakefile( 'NAME' => 'HTML::StripScripts::Parser', 'VERSION_FROM' => 'Parser.pm', 'ABSTRACT_FROM' => 'Parser.pm', 'AUTHOR' => 'Nick Cleaton ,' . ' Clinton Gormley ', 'PREREQ_PM' => { 'Test::More' => 0, 'HTML::StripScripts' => 1.05, 'HTML::Parser' => 3.56, }, $ExtUtils::MakeMaker::VERSION >= 6.31 ? ( 'LICENSE' => 'perl' ) : (), ); HTML-StripScripts-Parser-1.03/t/0000755000000000000000000000000011274523714015051 5ustar rootrootHTML-StripScripts-Parser-1.03/t/80pod.t0000644000175000001660000000020110627000430017116 0ustar clintonapacheuse Test::More; eval "use Test::Pod 1.00"; plan skip_all => "Test::Pod 1.00 required for testing POD" if $@; all_pod_files_ok(); HTML-StripScripts-Parser-1.03/t/10basic.t0000644000175000001660000000261310626541243017432 0ustar clintonapache use strict; use Test::More tests => 14; BEGIN { $^W = 1 } use_ok('HTML::StripScripts::Parser'); use vars qw($p); $p = HTML::StripScripts::Parser->new; isa_ok( $p, 'HTML::StripScripts::Parser' ); my $pp = $p->new; isa_ok( $pp, 'HTML::StripScripts::Parser' ); test( '', '', 'empty document' ); test( 'foo', 'foo', 'text only document' ); test( "f\0o", 'f o', 'strip nulls' ); test( 'foo', 'foo', 'parse into tags' ); test( 'xy', 'xy', 'filter start' ); test( 'xy', 'xy', 'filter end' ); test( '
', '
', 'filter text' ); test( 'xy', 'xy', 'filter process' ); test( 'xy', 'xy', 'filter comment' ); test( 'xy', 'xy', 'filter declaration' ); { package MyFilter; use base qw(HTML::StripScripts::Parser); sub output_end { my ( $self, $text ) = @_; $self->output( uc $text ); } } $p = MyFilter->new; test( 'foo', 'foo', 'subclassing works as expected' ); sub test { my ( $in, $out, $name ) = @_; is( $p->filter_html($in), $out, $name ); } HTML-StripScripts-Parser-1.03/t/81pod_coverage.t0000644000175000001660000000042110627011451021003 0ustar clintonapacheuse Test::More; eval "use Test::Pod::Coverage tests=>1"; plan skip_all => "Test::Pod::Coverage required for testing POD coverage" if $@; pod_coverage_ok( "HTML::StripScripts::Parser", { trustme => ['hss_init'] }, "HTML::StripScripts::Parser is covered" ); HTML-StripScripts-Parser-1.03/t/20script.t0000644000175000001660000000173710626541243017664 0ustar clintonapache use strict; BEGIN { $^W = 1 } use Test::More tests => 2; use HTML::StripScripts::Parser; my $filt = HTML::StripScripts::Parser->new( { Context => 'Document' } ); is( $filt->filter_html(< script test page foo IN script test page foo OUT is( $filt->filter_html(< script test page foo baz IN script test page foo baz OUT HTML-StripScripts-Parser-1.03/t/50flow.t0000644000175000001660000002725410626541243017334 0ustar clintonapache use strict; BEGIN { $^W = 1; use vars qw(@tests); @tests = ( [ 'empty', q{}, q{} ], [ 'space', q{ }, q{ } ], [ 'plain', q{hello mum}, q{hello mum} ], [ 'plain nl', qq{hello mum\n}, "hello mum\n" ], [ 'nonprint', qq{foo\0bar}, "foo bar" ], [ 'p tag', qq{

hello mum\n}, "

hello mum\n

" ], [ 'i tag', qq{hello mum\n}, "hello mum\n" ], [ 'valid p', q{

valid p

}, q{

valid p

} ], [ 'misplaced tr', q{misplaced }, q{misplaced } ], [ 'misplaced td', q{misplaced }, q{misplaced } ], [ 'misplaced li', q{misplaced
  • }, q{misplaced } ], [ 'misplaced cdata', q{misplaced cdata
    hello}, q{
    misplaced cdatahello} ], [ 'pass emtpy img', q{}, q{} ], [ 'block img src', q{}, q{} ], [ 'block a href', q{x}, q{x} ], [ 'block a mailto', q{x}, q{x} ], [ 'unknown tag', q{}, q{} ], [ 'unknown attr', q{}, q{} ], [ 'misplaced close', q{}, q{} ], [ 'br', q{
    hello
    }, q{
    hello} ], [ 'hr width', q{x
    y}, q{x
    y} ], [ 'hr width dq', q{x
    y}, q{x
    y} ], [ 'hr width sq', q{x
    y}, q{x
    y} ], [ 'hr silly width', q{x
    y}, q{x
    y} ], [ 'hr silly width dq', q{x
    y}, q{x
    y} ], [ 'hr silly width sq', q{x
    y}, q{x
    y} ], [ 'bad trailing /', q{hello}, q{hello} ], [ 'good trailing /', q{
    }, q{
    } ], [ 'interleave', q{ghE}, q{ghE} ], [ 'interleave case', q{ghE}, q{ghE} ], [ 'interleave open', q{ghE}, q{ghE} ], [ 'p close order', q{

    one

    two

    three}, q{

    one

    two

    three

    } ], [ 'p/li close order', q{
    • 1

    • 2

    }, q{
    • 1

    • 2

    }, ], [ 'p/li left open', q{
    • 1

    • 2}, q{

      • 1

      • 2

      }, ], [ 'italic p', q{foo

      bar}, q{foo

      bar

      } ], [ 'misplaced close', q{foo
      }, q{foo} ], # [ 'lonley <', q{<}, q{<} ], [ 'lonley >', q{>}, q{>} ], [ 'lonley "', q{"}, q{"} ], [ 'lonley &', q{&}, q{&} ], [ 'valid entity', q{<}, q{<} ], [ 'uppercase entity', q{Þ}, q{Þ} ], [ 'valid numeric ent', q{{}, '{' ], [ 'valid hex entity', q{k}, q{k} ], [ 'unicode numeric', q{ಂ}, q{ಂ} ], [ 'unicode hex lc', q{뾔}, q{뾔} ], [ 'unicode hex uc', q{뾔}, q{뾔} ], [ 'unknown entity', q{&foo;}, q{&foo;} ], [ 'nasty entity', q{ &{foo}; }, q{ &{foo}; } ], [ 'minus entity', q{&foo-foo;}, q{&foo-foo;} ], [ 'underscore entity', q{&foo_foo;}, q{&foo_foo;} ], [ 'overlong entity', q{&littlesquigglethingwithalinethroughit;}, q{&littlesquigglethingwithalinethroughit;} ], [ 'overlong hex', q{�}, q{&#x7FB20A4E;} ], [ 'overlong decimal', q{�}, q{&#349850348;} ], [ '-ve decimal', q{&#-7;}, q{&#-7;} ], [ '+ve decimal', q{&#+7;}, q{&#+7;} ], [ 'invalid numeric', q{&#o777;}, q{&#o777;} ], [ '<">'>=&\{\}}, q{';alert(String.fromCharCode(88,83,83))//\';alert(String.fromCharCode(88,83,83))//";alert(String.fromCharCode(88,83,83))//\";alert(String.fromCharCode(88,83,83))//-->">'>=&{}}, # 2 q{'';!--"=&\{()\}}, q{'';!--"=&{()}}, # 3 q{}, q{}, # 4 q{}, q{}, # 5 q{}, q{}, # 6 q{}, q{}, # 7 q{}, q{}, # 8 q{}, q{}, # 9 q{}, q{}, # 10 q{
      }, q{
      }, # 11 q{
      }, q{
      }, # 12 q{
      }, q{
      }, # 13 q{}, q{}, # 14 q{}, q{}, # 15 q{}, q{}, # 16 q{}, q{}, # 17 q{}, q{}, # 18 q{}, q{}, # 19 q{}, q{}, # 20 q{exp/*
      • XSS}, q{exp/*}, # 21 q{}, q{}, # 22 q{}, q{}, # 23 q{}, q{}, # 24 q{}, q{}, # 25 q{}, q{}, # 26 q{}, q{}, # 27 q{}, q{}, # 28 q{}, q{}, # 29 q{}, q{}, # 30 q{}, q{}, # 31 q{a="get"; b="URL(""; c="javascript:"; d="alert('XSS');")";}, q{a="get"; b="URL(""; c="javascript:"; d="alert('XSS');")";}, # 32 q{}, q{}, # 33 q{}, q{}, # 34 q{}, q{}, # 35 q{}, q{}, # 36 q{}, q{}, # 37 q{}, q{}, # 38 q{}, q{}, # 39 q{}, q{}, # 40 q{}, q{}, # 41 q{}, q{}, # 42 q{
        }, q{
        }, # 43 q{
        }, q{
        }, # 44 q{}, q{}, # 45 q{]]>}, q{]]>}, # 46 q{}, q{}, # 47 q{}, q{}, # 48 q{}, q{}, # 49 q{}, # 50 q{}, q{}, # 51 q{}, q{}, # 52 q{}, q{}, # 53 q{}, q{'"-->}, # 54 q{}, # 55 q{
        }, q{
        }, # 56 q{<}, q{}, # 57 q{}, q{}, # 58 q{}, q{}, # 59 q{}, q{}, # 60 q{}, q{}, # 61 q{}, q{}, # 62 q{}, q{}, # 63 q{
        }, q{
        }, # 64 q{}, q{}, # 65 q{ +ADw-SCRIPT+AD4-alert('XSS');+ADw-/SCRIPT+AD4-}, q{ +ADw-SCRIPT+AD4-alert('XSS');+ADw-/SCRIPT+AD4-}, # 66 q{\";alert('XSS');//}, q{\";alert('XSS');//}, # 67 q{}, q{}, # 68 q{}, q{}, # 69 q{}, q{}, # 70 q{}, q{}, # 71 q{}, q{}, # 72 q{}, q{}, # 73 q{}, # 74 q{perl -e 'print "";'> out}, q{perl -e 'print "";'> out}, # 75 q{perl -e 'print "&alert("XSS")";' > out}, q{perl -e 'print "&alert("XSS")";' > out}, # 76 q{}, q{}, # 77 q{}, q{}, # 78 q{}, q{}, # 79 q{}, q{<}, # 84 q{">}, q{">}, # 85 q{}, q{}, # 87 q{}, q{}, # 88 q{}, q{}, # 89 q{}, q{}, # 90 q{}, q{}, # 91 q{PT SRC="http://ha.ckers.org/xss.js">}, q{PT SRC="http://ha.ckers.org/xss.js">}, # 92 q{}, q{}, # 93 q{XSS}, q{XSS}, # 95 q{XSS}, q{XSS}, # 96 q{XSS}, q{XSS}, # 97 q{XSS}, q{XSS}, # 98 q{test
test}, q{test test}, # 99 q{test
test}, q{test&#10;test}, ); }