HTML-Strip-2.10/0000755000175000017500000000000012706404362011747 5ustar alexalexHTML-Strip-2.10/README0000644000175000017500000000104612706402730012625 0ustar alexalexHTML::Strip =========== This module strips HTML-like markup from text. It is written in XS, and thus about five times quicker than using regular expressions for the same task. INSTALLATION To install this module type the following: perl Makefile.PL make make test make install COPYRIGHT AND LICENCE Please report any bugs/suggestions to Alex Bowley Copyright (c) 2003 Alex Bowley. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. HTML-Strip-2.10/Strip.xs0000644000175000017500000000436512706403746013441 0ustar alexalex#include "EXTERN.h" #include "perl.h" #include "XSUB.h" #include "strip_html.h" MODULE = HTML::Strip PACKAGE = HTML::Strip PROTOTYPES: ENABLE Stripper * _create() PREINIT: Stripper * stripper; CODE: Newx( stripper, 1, Stripper ); _reset( stripper ); RETVAL = stripper; OUTPUT: RETVAL void _xs_destroy( stripper ) Stripper * stripper CODE: Safefree( stripper ); SV * _strip_html( stripper, text ) Stripper * stripper SV * text PREINIT: char * raw = (char *)SvPV_nolen(text); char * clean; int size = strlen(raw)+1; INIT: Newx( clean, size+1, char); CODE: _strip_html( stripper, raw, clean, SvUTF8(text) ); RETVAL = newSVpv(clean, strlen(clean)); if( SvUTF8(text) ) SvUTF8_on(RETVAL); OUTPUT: RETVAL CLEANUP: Safefree( clean ); void _reset( stripper ) Stripper * stripper void clear_striptags( stripper ) Stripper * stripper void add_striptag( stripper, tag ) Stripper * stripper char * tag void set_emit_spaces( stripper, emit ) Stripper * stripper int emit CODE: stripper->o_emit_spaces = emit; void set_decode_entities( stripper, decode ) Stripper * stripper int decode CODE: stripper->o_decode_entities = decode; int decode_entities( stripper ) Stripper * stripper CODE: RETVAL = stripper->o_decode_entities; OUTPUT: RETVAL void _set_striptags_ref( stripper, tagref ) Stripper * stripper SV * tagref PREINIT: AV * tags; I32 numtags = 0; int n; if( (SvROK(tagref)) && (SvTYPE(SvRV(tagref)) == SVt_PVAV) ) { tags = (AV *) SvRV(tagref); } else { XSRETURN_UNDEF; } numtags = av_len(tags); if( numtags < 0 ) { XSRETURN_UNDEF; } CODE: clear_striptags( stripper ); for (n = 0; n <= numtags; n++) { STRLEN l; char * tag = SvPV(*av_fetch(tags, n, 0), l); add_striptag( stripper, tag ); } void set_auto_reset( stripper, value ) Stripper * stripper int value CODE: stripper->o_auto_reset = value; int auto_reset( stripper ) Stripper * stripper CODE: RETVAL = stripper->o_auto_reset; OUTPUT: RETVAL void set_debug( stripper, value ) Stripper * stripper int value CODE: stripper->o_debug = value; int debug( stripper ) Stripper * stripper CODE: RETVAL = stripper->o_debug; OUTPUT: RETVAL HTML-Strip-2.10/META.yml0000644000175000017500000000121412706404362013216 0ustar alexalex--- abstract: 'Perl extension for stripping HTML markup from text.' author: - 'Alex Bowley ' build_requires: ExtUtils::MakeMaker: '0' configure_requires: ExtUtils::MakeMaker: '0' dynamic_config: 1 generated_by: 'ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.142690' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: HTML-Strip no_index: directory: - t - inc requires: Carp: '0' DynaLoader: '0' Test::Exception: '0' Test::More: '0' perl: '5.008' strict: '0' warnings: '0' version: '2.10' x_recommended: HTML::Entities: 0 HTML-Strip-2.10/Makefile.PL0000644000175000017500000000203212706402730013713 0ustar alexalexuse 5.008; use ExtUtils::MakeMaker; my $EUMM_VERSION = eval $ExtUtils::MakeMaker::VERSION; WriteMakefile( NAME => 'HTML::Strip', VERSION_FROM => 'Strip.pm', PREREQ_PM => { # core modules 'warnings' => 0, 'strict' => 0, 'Carp' => 0, 'DynaLoader' => 0, # build requires 'Test::More' => 0, 'Test::Exception' => 0, }, ABSTRACT_FROM => 'Strip.pm', AUTHOR => 'Alex Bowley ', ( $EUMM_VERSION >= 6.46 ? ( LICENSE => 'perl', META_MERGE => { recommended => { 'HTML::Entities' => 0, }, }, ) : () ), ( $EUMM_VERSION >= 6.48 ? ( MIN_PERL_VERSION => 5.008, ) : () ), LIBS => [''], # e.g., '-lm' DEFINE => '', # e.g., '-DHAVE_SOMETHING' INC => '', # e.g., '-I/usr/include/other' OBJECT => '$(O_FILES)', # link all the C files too ); HTML-Strip-2.10/META.json0000644000175000017500000000215712706404362013375 0ustar alexalex{ "abstract" : "Perl extension for stripping HTML markup from text.", "author" : [ "Alex Bowley " ], "dynamic_config" : 1, "generated_by" : "ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.142690", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "HTML-Strip", "no_index" : { "directory" : [ "t", "inc" ] }, "prereqs" : { "build" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "configure" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "runtime" : { "requires" : { "Carp" : "0", "DynaLoader" : "0", "Test::Exception" : "0", "Test::More" : "0", "perl" : "5.008", "strict" : "0", "warnings" : "0" } } }, "release_status" : "stable", "version" : "2.10", "x_recommended" : { "HTML::Entities" : 0 } } HTML-Strip-2.10/strip_html.c0000644000175000017500000002452412706403064014305 0ustar alexalex#include #include #include #include "strip_html.h" #ifdef _MSC_VER #define strcasecmp(a,b) stricmp(a,b) #endif void _strip_html( Stripper * stripper, char * raw, char * output, int is_utf8_p ) { char * p_raw = raw; char * raw_end = raw + strlen(raw); char * p_output = output; int width; if( stripper->o_debug ) { printf( "[DEBUG] input string: %s\n", p_raw ); } while( p_raw < raw_end ) { width = is_utf8_p ? utf8_char_width(p_raw) : 1; if( stripper->o_debug ) { printf( "[DEBUG] char:%C w%i state:%c%c%c tag:%5s last:%c%c%c%c in:%c%c%c quote:%c ", *p_raw, width, (stripper->f_closing ? 'C' : ' '), (stripper->f_in_tag ? 'T' : ' '), (stripper->f_full_tagname ? 'F' : ' '), stripper->tagname, (stripper->f_just_seen_tag ? 'T' : ' '), (stripper->f_outputted_space ? 'S' : ' '), (stripper->f_lastchar_slash ? '/' : ' '), (stripper->f_lastchar_minus ? '-' : ' '), (stripper->f_in_decl ? 'D' : ' '), (stripper->f_in_comment ? 'C' : ' '), (stripper->f_in_striptag ? 'X' : ' '), (stripper->f_in_quote ? stripper->quote : ' ') ); } // either a single char or a set of unicode code points; if( stripper->f_in_tag ) { /* inside a tag */ /* check if we know either the tagname, or that we're in a declaration */ if( !stripper->f_full_tagname && !stripper->f_in_decl ) { /* if this is the first character, check if it's a '!'; if so, we're in a declaration */ if( stripper->p_tagname == stripper->tagname && *p_raw == '!' ) { stripper->f_in_decl = 1; } /* then check if the first character is a '/', in which case, this is a closing tag */ else if( stripper->p_tagname == stripper->tagname && *p_raw == '/' ) { stripper->f_closing = 1; } /* if the first character wasn't a '/', and we're in a stripped block, * assume this is a mathematical operator and reset */ else if( !stripper->f_closing && stripper->f_in_striptag && stripper->p_tagname == stripper->tagname && *p_raw != '/' ) { stripper->f_in_tag = 0; stripper->f_closing = 0; /* we only care about closing tags within a stripped tags block (e.g. scripts) */ } else if( !stripper->f_in_striptag || stripper->f_closing ) { /* if we don't have the full tag name yet, add p_raw character unless it's whitespace, a '/', or a '>'; otherwise null pad the string and set the full tagname flag, and check the tagname against stripped ones. also sanity check we haven't reached the array bounds, and truncate the tagname here if we have */ if( (!isspace( *p_raw ) && *p_raw != '/' && *p_raw != '>') && !( (stripper->p_tagname - stripper->tagname) == MAX_TAGNAMELENGTH ) ) { *stripper->p_tagname++ = *p_raw; } else { *stripper->p_tagname = 0; stripper->f_full_tagname = 1; /* if we're in a stripped tag block, and this is a closing tag, check to see if it ends the stripped block */ if( stripper->f_in_striptag && stripper->f_closing ) { if( strcasecmp( stripper->tagname, stripper->striptag ) == 0 ) { stripper->f_in_striptag = 0; } /* if we're outside a stripped tag block, check tagname against stripped tag list */ } else if( !stripper->f_in_striptag && !stripper->f_closing ) { int i; for( i = 0; i < stripper->numstriptags; i++ ) { if( strcasecmp( stripper->tagname, stripper->o_striptags[i] ) == 0 ) { stripper->f_in_striptag = 1; strcpy( stripper->striptag, stripper->tagname ); } } } check_end( stripper, *p_raw ); } } } else { if( stripper->f_in_quote ) { /* inside a quote */ /* end of quote if p_raw character matches the opening quote character */ if( *p_raw == stripper->quote ) { stripper->quote = 0; stripper->f_in_quote = 0; } } else { /* not in a quote */ /* check for quote characters, but not in a comment */ if( !stripper->f_in_comment && ( *p_raw == '\'' || *p_raw == '\"' ) ) { stripper->f_in_quote = 1; stripper->quote = *p_raw; /* reset lastchar_* flags in case we have something perverse like '-"' or '/"' */ stripper->f_lastchar_minus = 0; stripper->f_lastchar_slash = 0; } else { if( stripper->f_in_decl ) { /* inside a declaration */ if( stripper->f_lastchar_minus ) { /* last character was a minus, so if p_raw one is, then we're either entering or leaving a comment */ if( *p_raw == '-' ) { stripper->f_in_comment = !stripper->f_in_comment; } stripper->f_lastchar_minus = 0; } else { /* if p_raw character is a minus, we might be starting a comment marker */ if( *p_raw == '-' ) { stripper->f_lastchar_minus = 1; } } if( !stripper->f_in_comment ) { check_end( stripper, *p_raw ); } } else { check_end( stripper, *p_raw ); } } /* quote character check */ } /* in quote check */ } /* full tagname check */ } else { /* not in a tag */ /* check for tag opening, and reset parameters if one has */ if( *p_raw == '<' ) { stripper->f_in_tag = 1; stripper->tagname[0] = 0; stripper->p_tagname = stripper->tagname; stripper->f_full_tagname = 0; stripper->f_closing = 0; stripper->f_just_seen_tag = 1; } else { /* copy to stripped provided we're not in a stripped block */ if( !stripper->f_in_striptag ) { /* only emit spaces if we're configured to do so (on by default) */ if( stripper->o_emit_spaces ){ /* output a space in place of tags we have previously parsed, and set a flag so we only do this once for every group of tags. done here to prevent unnecessary trailing spaces */ if( !isspace(*p_raw) && /* don't output a space if this character is one anyway */ !stripper->f_outputted_space && stripper->f_just_seen_tag ) { if( stripper->o_debug ) { printf("SPACE "); } *p_output++ = ' '; stripper->f_outputted_space = 1; } } strncpy(p_output, p_raw, width); if( stripper->o_debug ) { printf("CHAR %c", *p_raw); } p_output += width; /* reset 'just seen tag' flag */ stripper->f_just_seen_tag = 0; /* reset 'outputted space' flag if character is not one */ if (!isspace(*p_raw)) { stripper->f_outputted_space = 0; } else { stripper->f_outputted_space = 1; } } } } /* in tag check */ p_raw += width; if( stripper->o_debug ) { printf("\n"); } } /* while loop */ *p_output = 0; if (stripper->o_auto_reset) { _reset( stripper ); } } int utf8_char_width(unsigned char * string) { if (~*string & 128) { // 0xxxxxxx return 1; } else if ((*string & 192) == 128) { // 10xxxxxx /* latter bytes of a multibyte utf8 char XXX this should never happen in practice XXX but we account for it anyway */ int width = 1; char * p = string; while ((*p++ & 192) == 128) { width++; } return width; } else if ((*string & 224) == 192) { // 110xxxxx return 2; } else if ((*string & 240) == 224) { // 1110xxxx return 3; } else if ((*string & 248) == 240) { // 11110xxx return 4; /* part of original utf8 spec, but not used } else if ((*string & 252) == 248) { // 111110xx return 5; } else if ((*string & 254) == 252) { // 1111110x return 6; */ } else { printf( "[WARN] invalid utf8 char ord=%i\n", *string ); return 1; } } void _reset( Stripper * stripper ) { stripper->f_in_tag = 0; stripper->f_closing = 0; stripper->f_lastchar_slash = 0; stripper->f_full_tagname = 0; /* hack to stop a space being output on strings starting with a tag */ stripper->f_outputted_space = 1; stripper->f_just_seen_tag = 0; stripper->f_in_quote = 0; stripper->f_in_decl = 0; stripper->f_in_comment = 0; stripper->f_lastchar_minus = 0; stripper->f_in_striptag = 0; memset(stripper->tagname, 0, sizeof(stripper->tagname)); } void clear_striptags( Stripper * stripper ) { strcpy(stripper->o_striptags[0], ""); stripper->numstriptags = 0; } void add_striptag( Stripper * stripper, char * striptag ) { if( stripper->numstriptags < MAX_STRIPTAGS-1 ) { strcpy(stripper->o_striptags[stripper->numstriptags++], striptag); } else { fprintf( stderr, "Cannot have more than %i strip tags", MAX_STRIPTAGS ); } } void check_end( Stripper * stripper, char end ) { /* if p_raw character is a slash, may be a closed tag */ if( end == '/' ) { stripper->f_lastchar_slash = 1; } else { /* if the p_raw character is a '>', then the tag has ended */ /* slight hack to deal with mathematical characters in script tags: * if we're in a stripped block, and this is a closing tag, spaces * will also end the tag, since we only want it for comparison with * the opening one */ if( (end == '>') || (stripper->f_in_striptag && stripper->f_closing && isspace(end)) ) { stripper->f_in_quote = 0; stripper->f_in_comment = 0; stripper->f_in_decl = 0; stripper->f_in_tag = 0; stripper->f_closing = 0; /* Do not start a stripped tag block if the tag is a closed one, e.g. '

Hello

EOF $hs->eof; }; # test for RT#99207 subtest "RT#99207" => sub { plan tests => 1; my $hs = HTML::Strip->new(); is( $hs->parse( < document.write(''); hallo EOF $hs->eof; }; HTML-Strip-2.10/t/250_whitespace_single_char.t0000644000175000017500000000035212706402730017454 0ustar alexalexuse Test::More tests => 1; use HTML::Strip; # test for RT#19036 my $hs = HTML::Strip->new(); is( $hs->parse( '01 May 2006010' ), '01 May 2006 0 10', "whitespace single character bug" ); $hs->eof; HTML-Strip-2.10/t/400_kwalitee.t0000644000175000017500000000030512706402730014562 0ustar alexalexuse Test::More; BEGIN { plan skip_all => 'these tests are for release candidate testing' unless $ENV{RELEASE_TESTING}; } use Test::Kwalitee 'kwalitee_ok'; kwalitee_ok(); done_testing; HTML-Strip-2.10/t/220_edge_case.t0000644000175000017500000000127412706402730014662 0ustar alexalexuse Test::More tests => 5; use HTML::Strip; # test for RT#21008 # stripping comments my $hs = HTML::Strip->new(); is( $hs->parse( "a<>b" ), "a b", 'edge case with <> ok' ); $hs->eof; is( $hs->parse( "a<>b c<>d" ), "a b c d", 'edge case with <>s ok' ); $hs->eof; is( $hs->parse( "From: <>\n\na. Title: some text\n\nb. etc\n" ), "From: \n\na. Title: some text\n\nb. etc\n", 'test case' ); is( $hs->parse( "From: <>\n\na. Title: some text\n\nb. etc\n" ), "From: \n\na. Title: some text\n\nb. etc\n", 'test case' ); $hs->eof; is( $hs->parse( q{this is an "example" with 'quoted' parts that should not be stripped} ), q{this is an "example" with 'quoted' parts that should not be stripped} ); HTML-Strip-2.10/t/260_offbyone.t0000644000175000017500000000044612706402730014576 0ustar alexalexuse Test::More tests => 1; # test for RT#94713 my $INC = join ' ', map { "-I$_" } @INC; SKIP: { skip "test fails on windows", 1 if $^O eq 'MSWin32'; is(`MALLOC_OPTIONS=Z $^X $INC -MHTML::Strip -e 'print HTML::Strip->new->parse(q[
  • abc < 0.5 km
  • xyz
  • ])'`, q[abc xyz]); } HTML-Strip-2.10/t/240_striptags.t0000644000175000017500000000163012706402730015001 0ustar alexalexuse Test::More tests => 2; use HTML::Strip; subtest "set_striptags( \@ARRAY )" => sub { plan tests => 2; my $hs = HTML::Strip->new; $hs->set_striptags( [ 'foo' ] ); is( $hs->parse( 'bar' ), 'foo bar', 'set_striptags redefinition works' ); $hs->eof; is( $hs->parse( 'foobar' ), 'bar', 'set_striptags redefinition works' ); $hs->eof; }; subtest "set_striptags( LIST )" => sub { plan tests => 3; my @striptags = qw(baz quux); my $hs = HTML::Strip->new; $hs->set_striptags( @striptags ); is( $hs->parse( 'fumblebarfoo' ), 'bar', 'stripping user-defined tags ok' ); $hs->eof; is( $hs->parse( 'fumblefoobar' ), 'bar', 'stripping user-defined tags ok' ); $hs->eof; is( $hs->parse( ' baz ' ), ' baz ', 'stripping user-defined tags ok' ); $hs->eof; }; HTML-Strip-2.10/t/001_smoke.t0000644000175000017500000000043212706402730014071 0ustar alexalexuse Test::More tests => 3; use Test::Exception; use_ok 'HTML::Strip'; my $hs; lives_ok( sub { $hs = HTML::Strip->new() }, "constructor doesn't blow up" ); SKIP: { skip "Constructor failed", 1 unless $hs; lives_ok( sub { $hs->parse('') }, "->parse() doesn't blow up" ); } HTML-Strip-2.10/t/210_auto_reset.t0000644000175000017500000000116012706402730015126 0ustar alexalexuse Test::More tests => 2; use HTML::Strip; subtest "reset off" => sub { plan tests => 2; my $hs = HTML::Strip->new; # auto_reset off by default my $o = $hs->parse( "\nTitle\n\nEnd\n" ); is( $o2, "\nEnd\n" ); }; subtest "reset on" => sub { plan tests => 2; my $hs = HTML::Strip->new( auto_reset => 1 ); # auto_reset on my $o = $hs->parse( "\nTitle\n\nEnd\n" ); is( $o2, "c+d\n\nEnd\n" ); }; HTML-Strip-2.10/t/100_basic.t0000644000175000017500000000117412706402730014040 0ustar alexalexuse Test::More tests => 7; use HTML::Strip; my $hs = HTML::Strip->new(); is( $hs->parse( 'test' ), 'test', 'works with plain text' ); $hs->eof; is( $hs->parse( 'test' ), 'test', 'works with | tags' ); $hs->eof; is( $hs->parse( 'foo
    bar' ), 'foo bar', 'works with
    tag' ); $hs->eof; is( $hs->parse( '

    test

    ' ), 'test', 'works with tags with attributes' ); $hs->eof; is( $hs->parse( 'bar' ), 'bar', 'strips tags' ); is( $hs->parse( 'baz' ), ' baz', 'strips tags' ); $hs->eof; is( $hs->parse( 'baz' ), 'baz', 'strip comments' ); $hs->eof; HTML-Strip-2.10/t/300_utf8.t0000644000175000017500000000741512706402730013653 0ustar alexalexuse Test::More tests => 5; use HTML::Strip; use FindBin qw/$Bin/; use Encode qw/is_utf8/; my $filename = "$Bin/russian.html"; ok( open my $fh, $filename ); SKIP: { skip "Cannot open $filename", 4 unless $fh; binmode $fh, ':utf8'; local $/; my $data = <$fh>; close $fh; binmode DATA, ':utf8'; my $expected_text = ; my $hs = HTML::Strip->new(); ok( my $clean_text = $hs->parse( $data ) ); ok( is_utf8($clean_text), "Text comes back as UTF-8" ); is( strip_spaces($clean_text), strip_spaces($expected_text) ); TODO: { local $TODO = "emit_spaces after '»' is inconsistent on other arch/versions"; is( $clean_text, $expected_text ); } } sub strip_spaces { my $text = shift; my $stripped = $text; $stripped =~ tr! !!d; return $stripped; } __DATA__ 23 января 2009 года \r 28 января 2009 года в Челябинске состоится семинар компании «Доктор Веб» — российского разработчика средств информационной безопасности под маркой Dr.Web. Мероприятие посетят партнеры компании, а также представители региональных масс-медиа. \r На сегодняшний день все большее количество пользователей в Челябинской области проявляют интерес к проблеме информационной безопасности. Несмотря на финансовый кризис, спрос на средства антивирусной и антиспам-защиты в регионе остается достаточно высоким. «Доктор Веб», высоко оценивая потребительский потенциал области, 28 января проведет в Челябинске партнерский семинар, который соберет многочисленных партнеров компании. \r В ходе семинара специалисты «Доктор Веб» подробно расскажут о ключевых функциях программных продуктов Dr.Web, их преимуществах в сравнении с конкурентами. Особое внимание будет уделено новинкам — Dr.Web Security Space и Антивирус Dr.Web для Windows 5.0. Помимо этого, будут обсуждены новые вирусные угрозы и технологии борьбы с ними. \r Также в рамках семинара состоится вручение сертификатов специалиста по продажам программного обеспечения Dr.Web. Для получения сертификата участник семинара должен дистанционно сдать экзамен до 26 января 2009 г. Для этого необходимо будет предварительно заполнить регистрационную анкету на сайте компании «Доктор Веб» , выбрав в ней соответствующий пункт («сдать on-line экзамен»). \r В подарок все участники семинара получат ключи, позволяющие в течение 2 месяцев бесплатно протестировать всю линейку продуктов Dr.Web. \r \r Адрес : \r Отель «Holiday Inn Chelyabinsk-Riverside», Университетская набережная, 18 \r Конференц-зал (2 этаж) \r Регистрация участников: 11:00 — 11:30 \r HTML-Strip-2.10/t/410_pod.t0000644000175000017500000000020112706402730013533 0ustar alexalexuse Test::More; eval "use Test::Pod 1.00"; plan skip_all => "Test::Pod 1.00 required for testing POD" if $@; all_pod_files_ok(); HTML-Strip-2.10/t/230_filter.t0000644000175000017500000000075012706402730014247 0ustar alexalexuse Test::More tests => 2; use HTML::Strip; subtest "no filter" => sub { plan tests => 1; my $hs = HTML::Strip->new( filter => undef ); ok( $hs->parse( ' ' ), ' ' ); $hs->eof; }; subtest "whitespace filter" => sub { plan tests => 1; my $filter = sub { my $s = shift; $s =~ s/\s/ /g;; $s }; my $hs = HTML::Strip->new( filter => $filter ); ok( $hs->parse( "title\ntext\ntext" ), 'title text text' ); $hs->eof; }; HTML-Strip-2.10/t/200_comment.t0000644000175000017500000000212312706402730014415 0ustar alexalex# http://rt.cpan.org/Public/Bug/Display.html?id=32355 use Test::More tests => 2; use HTML::Strip; subtest declarations => sub { plan tests => 1; my $hs = HTML::Strip->new(); is( $hs->parse( q{Text} ), "Text", 'decls are stripped' ); $hs->eof; }; subtest comments => sub { plan tests => 5; my $hs = HTML::Strip->new(); is( $hs->parse( q{Hello World!} ), "Hello World!", "comments are stripped" ); $hs->eof; is( $hs->parse( q{Hello World!} ), "Hello World!", q{comments may contain '} ); $hs->eof; is( $hs->parse( q{Hello World!} ), "Hello World!", q{comments may contain "} ); $hs->eof; is( $hs->parse( q{