HTML-Quoted-0.03/ 0000755 0000765 0000765 00000000000 11512247340 011672 5 ustar ruz ruz HTML-Quoted-0.03/Changes 0000644 0000765 0000765 00000000425 11512237724 013173 0 ustar ruz ruz 0.03 2011-01-09 * much more user friendlier documentation * get rid of empty {} in the results * fix mixed plain text quoting and blockquotes * properly handle
0.02 2010-02-27 * treat P tag as inline, for now 0.01 2010-02-12 * initial release HTML-Quoted-0.03/inc/ 0000755 0000765 0000765 00000000000 11512247340 012443 5 ustar ruz ruz HTML-Quoted-0.03/inc/Module/ 0000755 0000765 0000765 00000000000 11512247340 013670 5 ustar ruz ruz HTML-Quoted-0.03/inc/Module/Install/ 0000755 0000765 0000765 00000000000 11512247340 015276 5 ustar ruz ruz HTML-Quoted-0.03/inc/Module/Install/Base.pm 0000644 0000765 0000765 00000002147 11512247336 016517 0 ustar ruz ruz #line 1 package Module::Install::Base; use strict 'vars'; use vars qw{$VERSION}; BEGIN { $VERSION = '1.00'; } # Suspend handler for "redefined" warnings BEGIN { my $w = $SIG{__WARN__}; $SIG{__WARN__} = sub { $w }; } #line 42 sub new { my $class = shift; unless ( defined &{"${class}::call"} ) { *{"${class}::call"} = sub { shift->_top->call(@_) }; } unless ( defined &{"${class}::load"} ) { *{"${class}::load"} = sub { shift->_top->load(@_) }; } bless { @_ }, $class; } #line 61 sub AUTOLOAD { local $@; my $func = eval { shift->_top->autoload } or return; goto &$func; } #line 75 sub _top { $_[0]->{_top}; } #line 90 sub admin { $_[0]->_top->{admin} or Module::Install::Base::FakeAdmin->new; } #line 106 sub is_admin { ! $_[0]->admin->isa('Module::Install::Base::FakeAdmin'); } sub DESTROY {} package Module::Install::Base::FakeAdmin; use vars qw{$VERSION}; BEGIN { $VERSION = $Module::Install::Base::VERSION; } my $fake; sub new { $fake ||= bless(\@_, $_[0]); } sub AUTOLOAD {} sub DESTROY {} # Restore warning handler BEGIN { $SIG{__WARN__} = $SIG{__WARN__}->(); } 1; #line 159 HTML-Quoted-0.03/inc/Module/Install/Can.pm 0000644 0000765 0000765 00000003333 11512247336 016344 0 ustar ruz ruz #line 1 package Module::Install::Can; use strict; use Config (); use File::Spec (); use ExtUtils::MakeMaker (); use Module::Install::Base (); use vars qw{$VERSION @ISA $ISCORE}; BEGIN { $VERSION = '1.00'; @ISA = 'Module::Install::Base'; $ISCORE = 1; } # check if we can load some module ### Upgrade this to not have to load the module if possible sub can_use { my ($self, $mod, $ver) = @_; $mod =~ s{::|\\}{/}g; $mod .= '.pm' unless $mod =~ /\.pm$/i; my $pkg = $mod; $pkg =~ s{/}{::}g; $pkg =~ s{\.pm$}{}i; local $@; eval { require $mod; $pkg->VERSION($ver || 0); 1 }; } # check if we can run some command sub can_run { my ($self, $cmd) = @_; my $_cmd = $cmd; return $_cmd if (-x $_cmd or $_cmd = MM->maybe_command($_cmd)); for my $dir ((split /$Config::Config{path_sep}/, $ENV{PATH}), '.') { next if $dir eq ''; my $abs = File::Spec->catfile($dir, $_[1]); return $abs if (-x $abs or $abs = MM->maybe_command($abs)); } return; } # can we locate a (the) C compiler sub can_cc { my $self = shift; my @chunks = split(/ /, $Config::Config{cc}) or return; # $Config{cc} may contain args; try to find out the program part while (@chunks) { return $self->can_run("@chunks") || (pop(@chunks), next); } return; } # Fix Cygwin bug on maybe_command(); if ( $^O eq 'cygwin' ) { require ExtUtils::MM_Cygwin; require ExtUtils::MM_Win32; if ( ! defined(&ExtUtils::MM_Cygwin::maybe_command) ) { *ExtUtils::MM_Cygwin::maybe_command = sub { my ($self, $file) = @_; if ($file =~ m{^/cygdrive/}i and ExtUtils::MM_Win32->can('maybe_command')) { ExtUtils::MM_Win32->maybe_command($file); } else { ExtUtils::MM_Unix->maybe_command($file); } } } } 1; __END__ #line 156 HTML-Quoted-0.03/inc/Module/Install/Fetch.pm 0000644 0000765 0000765 00000004627 11512247336 016703 0 ustar ruz ruz #line 1 package Module::Install::Fetch; use strict; use Module::Install::Base (); use vars qw{$VERSION @ISA $ISCORE}; BEGIN { $VERSION = '1.00'; @ISA = 'Module::Install::Base'; $ISCORE = 1; } sub get_file { my ($self, %args) = @_; my ($scheme, $host, $path, $file) = $args{url} =~ m|^(\w+)://([^/]+)(.+)/(.+)| or return; if ( $scheme eq 'http' and ! eval { require LWP::Simple; 1 } ) { $args{url} = $args{ftp_url} or (warn("LWP support unavailable!\n"), return); ($scheme, $host, $path, $file) = $args{url} =~ m|^(\w+)://([^/]+)(.+)/(.+)| or return; } $|++; print "Fetching '$file' from $host... "; unless (eval { require Socket; Socket::inet_aton($host) }) { warn "'$host' resolve failed!\n"; return; } return unless $scheme eq 'ftp' or $scheme eq 'http'; require Cwd; my $dir = Cwd::getcwd(); chdir $args{local_dir} or return if exists $args{local_dir}; if (eval { require LWP::Simple; 1 }) { LWP::Simple::mirror($args{url}, $file); } elsif (eval { require Net::FTP; 1 }) { eval { # use Net::FTP to get past firewall my $ftp = Net::FTP->new($host, Passive => 1, Timeout => 600); $ftp->login("anonymous", 'anonymous@example.com'); $ftp->cwd($path); $ftp->binary; $ftp->get($file) or (warn("$!\n"), return); $ftp->quit; } } elsif (my $ftp = $self->can_run('ftp')) { eval { # no Net::FTP, fallback to ftp.exe require FileHandle; my $fh = FileHandle->new; local $SIG{CHLD} = 'IGNORE'; unless ($fh->open("|$ftp -n")) { warn "Couldn't open ftp: $!\n"; chdir $dir; return; } my @dialog = split(/\n/, <<"END_FTP"); open $host user anonymous anonymous\@example.com cd $path binary get $file $file quit END_FTP foreach (@dialog) { $fh->print("$_\n") } $fh->close; } } else { warn "No working 'ftp' program available!\n"; chdir $dir; return; } unless (-f $file) { warn "Fetching failed: $@\n"; chdir $dir; return; } return if exists $args{size} and -s $file != $args{size}; system($args{run}) if exists $args{run}; unlink($file) if $args{remove}; print(((!exists $args{check_for} or -e $args{check_for}) ? "done!" : "failed! ($!)"), "\n"); chdir $dir; return !$?; } 1; HTML-Quoted-0.03/inc/Module/Install/Makefile.pm 0000644 0000765 0000765 00000027032 11512247336 017362 0 ustar ruz ruz #line 1 package Module::Install::Makefile; use strict 'vars'; use ExtUtils::MakeMaker (); use Module::Install::Base (); use Fcntl qw/:flock :seek/; use vars qw{$VERSION @ISA $ISCORE}; BEGIN { $VERSION = '1.00'; @ISA = 'Module::Install::Base'; $ISCORE = 1; } sub Makefile { $_[0] } my %seen = (); sub prompt { shift; # Infinite loop protection my @c = caller(); if ( ++$seen{"$c[1]|$c[2]|$_[0]"} > 3 ) { die "Caught an potential prompt infinite loop ($c[1]|$c[2]|$_[0])"; } # In automated testing or non-interactive session, always use defaults if ( ($ENV{AUTOMATED_TESTING} or -! -t STDIN) and ! $ENV{PERL_MM_USE_DEFAULT} ) { local $ENV{PERL_MM_USE_DEFAULT} = 1; goto &ExtUtils::MakeMaker::prompt; } else { goto &ExtUtils::MakeMaker::prompt; } } # Store a cleaned up version of the MakeMaker version, # since we need to behave differently in a variety of # ways based on the MM version. my $makemaker = eval $ExtUtils::MakeMaker::VERSION; # If we are passed a param, do a "newer than" comparison. # Otherwise, just return the MakeMaker version. sub makemaker { ( @_ < 2 or $makemaker >= eval($_[1]) ) ? $makemaker : 0 } # Ripped from ExtUtils::MakeMaker 6.56, and slightly modified # as we only need to know here whether the attribute is an array # or a hash or something else (which may or may not be appendable). my %makemaker_argtype = ( C => 'ARRAY', CONFIG => 'ARRAY', # CONFIGURE => 'CODE', # ignore DIR => 'ARRAY', DL_FUNCS => 'HASH', DL_VARS => 'ARRAY', EXCLUDE_EXT => 'ARRAY', EXE_FILES => 'ARRAY', FUNCLIST => 'ARRAY', H => 'ARRAY', IMPORTS => 'HASH', INCLUDE_EXT => 'ARRAY', LIBS => 'ARRAY', # ignore '' MAN1PODS => 'HASH', MAN3PODS => 'HASH', META_ADD => 'HASH', META_MERGE => 'HASH', PL_FILES => 'HASH', PM => 'HASH', PMLIBDIRS => 'ARRAY', PMLIBPARENTDIRS => 'ARRAY', PREREQ_PM => 'HASH', CONFIGURE_REQUIRES => 'HASH', SKIP => 'ARRAY', TYPEMAPS => 'ARRAY', XS => 'HASH', # VERSION => ['version',''], # ignore # _KEEP_AFTER_FLUSH => '', clean => 'HASH', depend => 'HASH', dist => 'HASH', dynamic_lib=> 'HASH', linkext => 'HASH', macro => 'HASH', postamble => 'HASH', realclean => 'HASH', test => 'HASH', tool_autosplit => 'HASH', # special cases where you can use makemaker_append CCFLAGS => 'APPENDABLE', DEFINE => 'APPENDABLE', INC => 'APPENDABLE', LDDLFLAGS => 'APPENDABLE', LDFROM => 'APPENDABLE', ); sub makemaker_args { my ($self, %new_args) = @_; my $args = ( $self->{makemaker_args} ||= {} ); foreach my $key (keys %new_args) { if ($makemaker_argtype{$key}) { if ($makemaker_argtype{$key} eq 'ARRAY') { $args->{$key} = [] unless defined $args->{$key}; unless (ref $args->{$key} eq 'ARRAY') { $args->{$key} = [$args->{$key}] } push @{$args->{$key}}, ref $new_args{$key} eq 'ARRAY' ? @{$new_args{$key}} : $new_args{$key}; } elsif ($makemaker_argtype{$key} eq 'HASH') { $args->{$key} = {} unless defined $args->{$key}; foreach my $skey (keys %{ $new_args{$key} }) { $args->{$key}{$skey} = $new_args{$key}{$skey}; } } elsif ($makemaker_argtype{$key} eq 'APPENDABLE') { $self->makemaker_append($key => $new_args{$key}); } } else { if (defined $args->{$key}) { warn qq{MakeMaker attribute "$key" is overriden; use "makemaker_append" to append values\n}; } $args->{$key} = $new_args{$key}; } } return $args; } # For mm args that take multiple space-seperated args, # append an argument to the current list. sub makemaker_append { my $self = shift; my $name = shift; my $args = $self->makemaker_args; $args->{$name} = defined $args->{$name} ? join( ' ', $args->{$name}, @_ ) : join( ' ', @_ ); } sub build_subdirs { my $self = shift; my $subdirs = $self->makemaker_args->{DIR} ||= []; for my $subdir (@_) { push @$subdirs, $subdir; } } sub clean_files { my $self = shift; my $clean = $self->makemaker_args->{clean} ||= {}; %$clean = ( %$clean, FILES => join ' ', grep { length $_ } ($clean->{FILES} || (), @_), ); } sub realclean_files { my $self = shift; my $realclean = $self->makemaker_args->{realclean} ||= {}; %$realclean = ( %$realclean, FILES => join ' ', grep { length $_ } ($realclean->{FILES} || (), @_), ); } sub libs { my $self = shift; my $libs = ref $_[0] ? shift : [ shift ]; $self->makemaker_args( LIBS => $libs ); } sub inc { my $self = shift; $self->makemaker_args( INC => shift ); } sub _wanted_t { } sub tests_recursive { my $self = shift; my $dir = shift || 't'; unless ( -d $dir ) { die "tests_recursive dir '$dir' does not exist"; } my %tests = map { $_ => 1 } split / /, ($self->tests || ''); require File::Find; File::Find::find( sub { /\.t$/ and -f $_ and $tests{"$File::Find::dir/*.t"} = 1 }, $dir ); $self->tests( join ' ', sort keys %tests ); } sub write { my $self = shift; die "&Makefile->write() takes no arguments\n" if @_; # Check the current Perl version my $perl_version = $self->perl_version; if ( $perl_version ) { eval "use $perl_version; 1" or die "ERROR: perl: Version $] is installed, " . "but we need version >= $perl_version"; } # Make sure we have a new enough MakeMaker require ExtUtils::MakeMaker; if ( $perl_version and $self->_cmp($perl_version, '5.006') >= 0 ) { # MakeMaker can complain about module versions that include # an underscore, even though its own version may contain one! # Hence the funny regexp to get rid of it. See RT #35800 # for details. my $v = $ExtUtils::MakeMaker::VERSION =~ /^(\d+\.\d+)/; $self->build_requires( 'ExtUtils::MakeMaker' => $v ); $self->configure_requires( 'ExtUtils::MakeMaker' => $v ); } else { # Allow legacy-compatibility with 5.005 by depending on the # most recent EU:MM that supported 5.005. $self->build_requires( 'ExtUtils::MakeMaker' => 6.42 ); $self->configure_requires( 'ExtUtils::MakeMaker' => 6.42 ); } # Generate the MakeMaker params my $args = $self->makemaker_args; $args->{DISTNAME} = $self->name; $args->{NAME} = $self->module_name || $self->name; $args->{NAME} =~ s/-/::/g; $args->{VERSION} = $self->version or die <<'EOT'; ERROR: Can't determine distribution version. Please specify it explicitly via 'version' in Makefile.PL, or set a valid $VERSION in a module, and provide its file path via 'version_from' (or 'all_from' if you prefer) in Makefile.PL. EOT $DB::single = 1; if ( $self->tests ) { my @tests = split ' ', $self->tests; my %seen; $args->{test} = { TESTS => (join ' ', grep {!$seen{$_}++} @tests), }; } elsif ( $Module::Install::ExtraTests::use_extratests ) { # Module::Install::ExtraTests doesn't set $self->tests and does its own tests via harness. # So, just ignore our xt tests here. } elsif ( -d 'xt' and ($Module::Install::AUTHOR or $ENV{RELEASE_TESTING}) ) { $args->{test} = { TESTS => join( ' ', map { "$_/*.t" } grep { -d $_ } qw{ t xt } ), }; } if ( $] >= 5.005 ) { $args->{ABSTRACT} = $self->abstract; $args->{AUTHOR} = join ', ', @{$self->author || []}; } if ( $self->makemaker(6.10) ) { $args->{NO_META} = 1; #$args->{NO_MYMETA} = 1; } if ( $self->makemaker(6.17) and $self->sign ) { $args->{SIGN} = 1; } unless ( $self->is_admin ) { delete $args->{SIGN}; } if ( $self->makemaker(6.31) and $self->license ) { $args->{LICENSE} = $self->license; } my $prereq = ($args->{PREREQ_PM} ||= {}); %$prereq = ( %$prereq, map { @$_ } # flatten [module => version] map { @$_ } grep $_, ($self->requires) ); # Remove any reference to perl, PREREQ_PM doesn't support it delete $args->{PREREQ_PM}->{perl}; # Merge both kinds of requires into BUILD_REQUIRES my $build_prereq = ($args->{BUILD_REQUIRES} ||= {}); %$build_prereq = ( %$build_prereq, map { @$_ } # flatten [module => version] map { @$_ } grep $_, ($self->configure_requires, $self->build_requires) ); # Remove any reference to perl, BUILD_REQUIRES doesn't support it delete $args->{BUILD_REQUIRES}->{perl}; # Delete bundled dists from prereq_pm, add it to Makefile DIR my $subdirs = ($args->{DIR} || []); if ($self->bundles) { my %processed; foreach my $bundle (@{ $self->bundles }) { my ($mod_name, $dist_dir) = @$bundle; delete $prereq->{$mod_name}; $dist_dir = File::Basename::basename($dist_dir); # dir for building this module if (not exists $processed{$dist_dir}) { if (-d $dist_dir) { # List as sub-directory to be processed by make push @$subdirs, $dist_dir; } # Else do nothing: the module is already present on the system $processed{$dist_dir} = undef; } } } unless ( $self->makemaker('6.55_03') ) { %$prereq = (%$prereq,%$build_prereq); delete $args->{BUILD_REQUIRES}; } if ( my $perl_version = $self->perl_version ) { eval "use $perl_version; 1" or die "ERROR: perl: Version $] is installed, " . "but we need version >= $perl_version"; if ( $self->makemaker(6.48) ) { $args->{MIN_PERL_VERSION} = $perl_version; } } if ($self->installdirs) { warn qq{old INSTALLDIRS (probably set by makemaker_args) is overriden by installdirs\n} if $args->{INSTALLDIRS}; $args->{INSTALLDIRS} = $self->installdirs; } my %args = map { ( $_ => $args->{$_} ) } grep {defined($args->{$_} ) } keys %$args; my $user_preop = delete $args{dist}->{PREOP}; if ( my $preop = $self->admin->preop($user_preop) ) { foreach my $key ( keys %$preop ) { $args{dist}->{$key} = $preop->{$key}; } } my $mm = ExtUtils::MakeMaker::WriteMakefile(%args); $self->fix_up_makefile($mm->{FIRST_MAKEFILE} || 'Makefile'); } sub fix_up_makefile { my $self = shift; my $makefile_name = shift; my $top_class = ref($self->_top) || ''; my $top_version = $self->_top->VERSION || ''; my $preamble = $self->preamble ? "# Preamble by $top_class $top_version\n" . $self->preamble : ''; my $postamble = "# Postamble by $top_class $top_version\n" . ($self->postamble || ''); local *MAKEFILE; open MAKEFILE, "+< $makefile_name" or die "fix_up_makefile: Couldn't open $makefile_name: $!"; eval { flock MAKEFILE, LOCK_EX }; my $makefile = do { local $/;tag and it's quite easy to parse. Some wrap text into Itags and add '>' in the beginning of the paragraphs. Things gettign messier when it's an HTML reply on plain text mail thread. If B
that is not supported then file a bug report via rt.cpan.org with as short as possible example. B is even better. Test file with patch is the best. Not obviouse patches without tests suck. =head1 METHODS =head2 extract my $struct = HTML::Quoted->extract( $html ); Takes a string with HTML and returns array reference. Each element in the array either array or hash. For example: [ { 'raw' => 'Hi,' }, { 'raw' => ' On date X wrote:
' }, [ { 'raw' => '' }, { 'raw' => 'Hello,' }, { 'raw' => '' } ], ... ] Hashes represent a part of the html. The following keys are meaningful at the moment: =over 4 =item * raw - raw HTML =item * quoter_raw, quoter - raw and decoded (entities are converted) quoter if block is prefixed with quoting characters =back =cut sub extract { my $self = shift; my $parser = HTML::Quoted::Parser->new( api_version => 3, handlers => { start_document => [handle_doc_start => 'self'], end_document => [handle_doc_end => 'self'], start => [handle_start => 'self, tagname, attr, attrseq, text'], end => [handle_end => 'self, tagname, text'], text => [handle_text => 'self, text, is_cdata'], default => [handle_default => 'self, event, text'], }, ); $parser->empty_element_tags(1); $parser->parse($_[0]); $parser->eof; return $parser->{'html_quoted_parser'}{'result'}; } package HTML::Quoted::Parser; use base "HTML::Parser"; sub handle_doc_start { my ($self) = @_; my $meta = $self->{'html_quoted_parser'} = {}; my $res = $meta->{'result'} = [{}]; $meta->{'current'} = $res->[0]; $meta->{'stack'} = [$res]; $meta->{'in'} = { quote => 0, block => [0] }; } sub handle_doc_end { my ($self) = @_; my $meta = $self->{'html_quoted_parser'}; pop @{ $meta->{'result'} } if ref $meta->{'result'}[-1] eq 'HASH' && !keys %{ $meta->{'result'}[-1] }; $self->organize( $meta->{'result'} ); } sub organize { my ($self, $list) = @_; my $prev = undef; foreach my $e ( splice @$list ) { if ( ref $e eq 'ARRAY' ) { push @$list, $self->organize($e); $prev = undef; } elsif ( $e->{'block'} ) { push @$list, $e; $prev = undef; } elsif ( defined $e->{'quoter'} ) { if ( !$prev || $self->combine( $prev, $e ) ) { push @$list, $prev = [ $e ]; } } else { push @$list, $e; $prev = undef; } } return $list; } sub combine { my ($self, $list, $e) = @_; my ($last) = grep ref $_ eq 'HASH', reverse @$list; if ( $last->{'quoter'} eq $e->{'quoter'} ) { push @$list, $e; return (); } elsif ( rindex( $last->{'quoter'}, $e->{'quoter'}, 0) == 0 ) { @$list = ( [@$list], $e ); return (); } elsif ( rindex( $e->{'quoter'}, $last->{'quoter'}, 0) == 0 ) { if ( ref $list->[-1] eq 'ARRAY' && !$self->combine( $list->[-1], $e ) ) { return (); } push @$list, [ $e ]; return (); } else { return $e; } } # XXX: p is treated as inline tag as it's groupping tag that # can not contain blocks inside, use span for groupping my %INLINE_TAG = map {$_ => 1 } qw( a br span bdo map img tt i b big small em strong dfn code q samp kbd var cite abbr acronym sub sup p ); my %ENTITIES = ( '>' => '>', '>' => '>', '>' => '>', ); my $re_amp = join '|', map "\Q$_\E", '>', grep $ENTITIES{$_} eq '>', keys %ENTITIES; $re_amp = qr{$re_amp}; my $re_quote_char = qr{[!#%=|:]}; my $re_quote_chunk = qr{ $re_quote_char(?!\w) | \w*$re_amp+ }x; my $re_quoter = qr{ $re_quote_chunk (?:[ \\t]* $re_quote_chunk)* }x; sub handle_start { my ($self, $tag, $attr, $attrseq, $text) = @_; my $meta = $self->{'html_quoted_parser'}; my $stack = $meta->{'stack'}; if ( $meta->{'in'}{'br'} ) { $meta->{'in'}{'br'} = 0; push @{ $stack->[-1] }, $meta->{'current'} = {}; } if ( $tag eq 'blockquote' ) { my $new = [{ quote => 1, block => 1 }]; push @{ $stack->[-1] }, $new; push @$stack, $new; # HACK: everything pushed into this $meta->{'current'} = $new->[0]; $meta->{'in'}{'quote'}++; push @{ $meta->{'in'}{'block'} }, 0; $meta->{'current'}{'raw'} .= $text; push @{ $stack->[-1] }, $meta->{'current'} = {}; } elsif ( $tag eq 'br' && !$meta->{'in'}{'block'}[-1] ) { $meta->{'current'}{'raw'} .= $text; my $line = $meta->{'current'}{'raw'}; if ( $line =~ /^\n*($re_quoter)/ ) { $meta->{'current'}{'quoter_raw'} = $1; $meta->{'current'}{'quoter'} = $self->decode_entities( $meta->{'current'}{'quoter_raw'} ); } $meta->{'in'}{'br'} = 1; } elsif ( !$INLINE_TAG{ $tag } ) { if ( !$meta->{'in'}{'block'}[-1] && keys %{ $meta->{'current'} } ) { push @{ $stack->[-1] }, $meta->{'current'} = { raw => '' }; } $meta->{'current'}{'block'} = 1; $meta->{'current'}{'raw'} .= $text; $meta->{'in'}{'block'}[-1]++; } else { $meta->{'current'}{'raw'} .= $text; } } sub handle_end { my ($self, $tag, $text) = @_; my $meta = $self->{'html_quoted_parser'}; my $stack = $meta->{'stack'}; if ( $meta->{'in'}{'br'} && $tag ne 'br' ) { $meta->{'in'}{'br'} = 0; push @{ $stack->[-1] }, $meta->{'current'} = {} } $meta->{'current'}{'raw'} .= $text; if ( $tag eq 'blockquote' ) { pop @$stack; pop @{ $meta->{'in'}{'block'} }; push @{ $stack->[-1] }, $meta->{'current'} = {}; $meta->{'in'}{'quote'}--; } elsif ( $tag eq 'br' ) { $meta->{'in'}{'br'} = 0; push @{ $stack->[-1] }, $meta->{'current'} = {} } elsif ( $tag eq 'p' ) { push @{ $stack->[-1] }, $meta->{'current'} = {} } elsif ( !$INLINE_TAG{ $tag } ) { $meta->{'in'}{'block'}[-1]--; if ( $meta->{'in'}{'block'}[-1] ) { $meta->{'current'}{'block'} = 1; } else { push @{ $stack->[-1] }, $meta->{'current'} = {}; } } } sub decode_entities { my ($self, $string) = @_; $string =~ s/(&(?:[a-z]+|#[0-9]|#x[0-9a-f]+);)/ $ENTITIES{$1} || $ENTITIES{lc $1} || $1 /ge; return $string; } sub handle_text { my ($self, $text) = @_; my $meta = $self->{'html_quoted_parser'}; if ( $meta->{'in'}{'br'} ) { $meta->{'in'}{'br'} = 0; push @{ $meta->{'stack'}[-1] }, $meta->{'current'} = {}; } $self->{'html_quoted_parser'}{'current'}{'raw'} .= $text; } sub handle_default { my ($self, $event, $text) = @_; my $meta = $self->{'html_quoted_parser'}; if ( $meta->{'in'}{'br'} ) { $meta->{'in'}{'br'} = 0; push @{ $meta->{'stack'}[-1] }, $meta->{'current'} = {}; } $self->{'html_quoted_parser'}{'current'}{'raw'} .= $text; } =head1 AUTHOR Ruslan.Zakirov EHow are you?' }, { 'raw' => 'ruz@bestpractical.comE =head1 LICENSE Under the same terms as perl itself. =cut 1; HTML-Quoted-0.03/Makefile.PL 0000644 0000765 0000765 00000000217 11335104307 013642 0 ustar ruz ruz use inc::Module::Install; all_from 'lib/HTML/Quoted.pm'; readme_from 'lib/HTML/Quoted.pm'; requires 'HTML::Parser' => '3.0'; WriteAll(); HTML-Quoted-0.03/MANIFEST 0000644 0000765 0000765 00000000575 11512237400 013026 0 ustar ruz ruz Changes inc/Module/Install.pm inc/Module/Install/Base.pm inc/Module/Install/Can.pm inc/Module/Install/Fetch.pm inc/Module/Install/Makefile.pm inc/Module/Install/Metadata.pm inc/Module/Install/ReadmeFromPod.pm inc/Module/Install/Win32.pm inc/Module/Install/WriteAll.pm lib/HTML/Quoted.pm Makefile.PL MANIFEST This list of files META.yml README t/blockquote.t t/blocks.t t/lines.t HTML-Quoted-0.03/META.yml 0000644 0000765 0000765 00000001100 11512247336 013140 0 ustar ruz ruz --- abstract: 'extract structure of quoted HTML mail message' author: - 'Ruslan.Zakirov ' build_requires: ExtUtils::MakeMaker: 6.42 configure_requires: ExtUtils::MakeMaker: 6.42 distribution_type: module generated_by: 'Module::Install version 1.00' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 module_name: HTML::Quoted name: HTML-Quoted no_index: directory: - inc - t requires: HTML::Parser: 3.0 perl: 5.8.0 resources: license: http://dev.perl.org/licenses/ version: 0.03 HTML-Quoted-0.03/README 0000644 0000765 0000765 00000003373 11512247336 012565 0 ustar ruz ruz NAME HTML::Quoted - extract structure of quoted HTML mail message SYNOPSIS use HTML::Quoted; my $html = '...'; my $struct = HTML::Quoted->extract( $html ); DESCRIPTION Parses and extracts quotation structure out of a HTML message. Purpose and returned structures are very similar to Text::Quoted. SUPPORTED FORMATS Variouse MUAs use quite different approaches for quoting in mails. Some use *blockquote* tag and it's quite easy to parse. Some wrap text into *p* tags and add '>' in the beginning of the paragraphs. Things gettign messier when it's an HTML reply on plain text mail thread. If you found format that is not supported then file a bug report via rt.cpan.org with as short as possible example. Test file is even better. Test file with patch is the best. Not obviouse patches without tests suck. METHODS extract my $struct = HTML::Quoted->extract( $html ); Takes a string with HTML and returns array reference. Each element in the array either array or hash. For example: [ { 'raw' => 'Hi,' }, { 'raw' => ' On date X wrote:
' }, [ { 'raw' => '' }, { 'raw' => 'Hello,' }, { 'raw' => '' } ], ... ] Hashes represent a part of the html. The following keys are meaningful at the moment: * raw - raw HTML * quoter_raw, quoter - raw and decoded (entities are converted) quoter if block is prefixed with quoting characters AUTHOR Ruslan.ZakirovHow are you?' }, { 'raw' => 'LICENSE Under the same terms as perl itself. HTML-Quoted-0.03/t/ 0000755 0000765 0000765 00000000000 11512247340 012135 5 ustar ruz ruz HTML-Quoted-0.03/t/blockquote.t 0000644 0000765 0000765 00000003201 11512231104 014455 0 ustar ruz ruz use strict; use warnings; use Test::More tests => 1; use HTML::Quoted; use Data::Dumper; sub check { my ($html, $expected) = @_; my $res = HTML::Quoted->extract($html); is_deeply( $res, $expected, 'correct parsing') or diag Dumper($res); } { my $text = q{Hi, }; my $res = [ { 'raw' => 'Hi,' }, { 'block' => 1, 'raw' => 'On date X wrote:Hello,How are you?I'm fine.Where have you been?Around.' } ]; check( $text, $res ); } HTML-Quoted-0.03/t/blocks.t 0000644 0000765 0000765 00000001150 11335102622 013570 0 ustar ruz ruz use strict; use warnings; use Test::More tests => 4; BEGIN { use_ok('HTML::Quoted') }; use Data::Dumper; { my $a = "On date X wrote:
' }, [ { 'quote' => 1, 'block' => 1, 'raw' => '' }, { 'raw' => 'Hello,' }, { 'block' => 1, 'raw' => '' } ], { 'block' => 1, 'raw' => 'How are you?' }, { 'raw' => 'I'm fine.' }, [ { 'quote' => 1, 'block' => 1, 'raw' => '' }, { 'block' => 1, 'raw' => '' } ], { 'block' => 1, 'raw' => 'Where have you been?' }, { 'raw' => 'Around.line1"; is_deeply(HTML::Quoted->extract($a),[{raw => 'line1', block => 1 }]) or diag Dumper(HTML::Quoted->extract($a)); } { my $a = ""; is_deeply(HTML::Quoted->extract($a),[{raw => '', block => 1 }]) or diag Dumper(HTML::Quoted->extract($a)); } { my $a = "
"; is_deeply(HTML::Quoted->extract($a),[{raw => '', block => 1 },{raw => '
'}]) or diag Dumper(HTML::Quoted->extract($a)); } HTML-Quoted-0.03/t/lines.t 0000644 0000765 0000765 00000002041 11335102622 013425 0 ustar ruz ruz use strict; use warnings; use Test::More tests => 8; BEGIN { use_ok('HTML::Quoted') }; use Data::Dumper; { my $a = "line1"; is_deeply(HTML::Quoted->extract($a),[{raw => 'line1'}]); } { my $a = "line1
"; is_deeply(HTML::Quoted->extract($a),[{raw => 'line1
'}]) or diag Dumper(HTML::Quoted->extract($a)); } { my $a = "line1
"; is_deeply(HTML::Quoted->extract($a),[{raw => 'line1
'}]) or diag Dumper(HTML::Quoted->extract($a)); } { my $a = "line1
"; is_deeply(HTML::Quoted->extract($a),[{raw => 'line1
'}]) or diag Dumper(HTML::Quoted->extract($a)); } { my $a = "line1
line2"; is_deeply(HTML::Quoted->extract($a),[{raw => 'line1
'}, {raw => 'line2'}]) or diag Dumper(HTML::Quoted->extract($a)); } { my $a = "line1
line2"; is_deeply(HTML::Quoted->extract($a),[{raw => 'line1
'}, {raw => 'line2'}]); } { my $a = "line1
line2"; is_deeply(HTML::Quoted->extract($a),[{raw => 'line1
'}, {raw => 'line2'}]); }