HTML-WikiConverter-Markdown-0.06/0000755000175000017500000000000012370021664016051 5ustar jfearnjfearnHTML-WikiConverter-Markdown-0.06/MANIFEST0000644000175000017500000000043212370021664017201 0ustar jfearnjfearnChanges lib/HTML/WikiConverter/Markdown.pm Makefile.PL MANIFEST META.yml # Will be created by "make dist" README t/00-load.t t/01-markdown.t t/boilerplate.t t/pod-coverage.t t/pod.t t/runtests.pl META.json Module JSON meta-data (added by MakeMaker) HTML-WikiConverter-Markdown-0.06/t/0000755000175000017500000000000012370021664016314 5ustar jfearnjfearnHTML-WikiConverter-Markdown-0.06/t/boilerplate.t0000644000175000017500000000241012364114267021005 0ustar jfearnjfearn#!perl -T use strict; use warnings; use Test::More tests => 3; sub not_in_file_ok { my ( $filename, %regex ) = @_; open my $fh, "<", $filename or die "couldn't open $filename for reading: $!"; my %violated; while ( my $line = <$fh> ) { while ( my ( $desc, $regex ) = each %regex ) { if ( $line =~ $regex ) { push @{ $violated{$desc} ||= [] }, $.; } } } if (%violated) { fail("$filename contains boilerplate text"); diag "$_ appears on lines @{$violated{$_}}" for keys %violated; } else { pass("$filename contains no boilerplate text"); } } not_in_file_ok( README => "The README is used..." => qr/The README is used/, "'version information here'" => qr/to provide version information/, ); not_in_file_ok( Changes => "placeholder date/time" => qr(Date/time) ); sub module_boilerplate_ok { my ($module) = @_; not_in_file_ok( $module => 'the great new $MODULENAME' => qr/ - The great new /, 'boilerplate description' => qr/Quick summary of what the module/, 'stub function definition' => qr/function[12]/, ); } module_boilerplate_ok('lib/HTML/WikiConverter/Markdown.pm'); HTML-WikiConverter-Markdown-0.06/t/01-markdown.t0000644000175000017500000002742612364123452020555 0ustar jfearnjfearnuse HTML::WikiConverter; local $/; require 't/runtests.pl'; runtests( data => , dialect => 'Markdown', wiki_uri => 'http://www.test.com/wiki/' ); close DATA; __DATA__ unordered list __H__ __W__ * one * two * three __NEXT__ ordered list __H__
  1. one
  2. two
  3. three
__W__ 1. one 2. two 3. three __NEXT__ blockquote __H__
text
__W__ > text __NEXT__ nested blockquote __H__
text
text2
__W__ > text > > > text2 __NEXT__ nested blockquote cont'd __H__
This is the first level of quoting.
This is nested blockquote.

Back to the first level.

__W__ > This is the first level of quoting. > > > This is nested blockquote. > > Back to the first level. __NEXT__ h1 __H__

text

__W__ # text __NEXT__ bold __H__ bold text __W__ **bold text** __NEXT__ italics __H__ text __W__ _text_ __NEXT__ strong __H__ text __W__ **text** __NEXT__ em __H__ text __W__ _text_ __NEXT__ inline link ::link_style('inline') __H__

It's called LTP.

__W__ It's called [LTP](http://en.wikipedia.org/wiki/Long-term_potentiation "Long-term potentiation"). __NEXT__ reference link ::link_style('reference') __H__

It's called LTP.

__W__ It's called [LTP][1]. [1]: http://en.wikipedia.org/wiki/Long-term_potentiation "Long-term potentiation" __NEXT__ reference link no title ::link_style('inline') __H__

This link has no title attribute.

__W__ [This link](http://example.net/) has no title attribute. __NEXT__ multi-paragraphs with reference links ::link_style('reference') __H__

This is a paragraph with a link to Google. There's also a link to some other stuff, like Digg and Wikipedia.

Here's another paragraph.

This is fun stuff:

__W__ This is a paragraph with a link to [Google][1]. There's also a link to some other stuff, like [Digg][2] and [Wikipedia][3]. Here's another paragraph. This is fun stuff: * [Google Video is the best!][4] * [Example.org is a close second][5] [1]: http://google.com [2]: http://digg.com [3]: http://wikipedia.org [4]: http://video.google.com "Google Video" [5]: http://www.example.org "Examples" __NEXT__ code __H__ printf() __W__ `printf()` __NEXT__ inline image ::image_style('inline') __H__ Delete __W__ ![Delete](http://example.com/delete.png "Click to delete") __NEXT__ reference image ::image_style('reference') __H__ Delete __W__ ![Delete][1] [1]: http://example.com/delete.png "Click to delete" __NEXT__ mixed inline images and links ::image_style('inline') ::link_style('inline') __H__

Link goes Here. Image goes below:

Logo

__W__ Link goes [Here](http://example.com "Link to example.com"). Image goes below: ![Logo](http://example.com/logo.png) __NEXT__ mixed reference images and links ::image_style('reference') ::link_style('reference') __H__

This is a paragraph with a link to Google. There's also a link to some other stuff, like Digg and Wikipedia. Delete

__W__ This is a paragraph with a link to [Google][1]. There's also a link to some other stuff, like [Digg][2] and [Wikipedia][3]. ![Delete][4] [1]: http://google.com [2]: http://digg.com [3]: http://wikipedia.org [4]: http://example.com/delete.png "Click to delete" __NEXT__ fallback to tag if image has dimensions ::image_tag_fallback(1) __H__ Thingy __W__ Thingy __NEXT__ no fallback ::image_tag_fallback(0) ::image_style('inline') __H__ Thingy __W__ ![Thingy](http://example.com/origin.png "The title") __NEXT__ automatic links __H__ http://example.com __W__ __NEXT__ escapes __H__

a backslash \

a weird combo ![

a curly brace {

1992. not a list item!

__W__ a backslash \\ a weird combo \![ a curly brace \{ 1992\. not a list item! __NEXT__ multi-headers __H__

One

Two

Three

__W__ # One ## Two ### Three __NEXT__ one-dot lists ::ordered_list_style('one-dot') __H__
  1. one
  2. two
  3. three
__W__ 1. one 1. two 1. three __NEXT__ plus lists ::unordered_list_style('plus') __H__ __W__ + one + two + three __NEXT__ dash lists ::unordered_list_style('dash') __H__ __W__ - one - two - three __NEXT__ forced inline anchors ::force_inline_anchor_links(1) ::unordered_list_style('asterisk') __H__ __W__ * [Overview](#overview) * [Philosophy](#philosophy) * [Inline HTML](#html) __NEXT__ table __H__
My favorite animals
Animal Region Physical traits Food
Pacman frog Gran Chaco (Argentina) Half mouth, half stomach (quite literally!) Crickets, fish, etc.
__W__
My favorite animals
Animal Region Physical traits Food
Pacman frog Gran Chaco (Argentina) Half mouth, half stomach (quite literally!) Crickets, fish, etc.
__NEXT__ setext header ::header_style('setext') __H__

header1

Fun stuff here.

header2

More fun stuff!

__W__ header1 ======= Fun stuff here. header2 ------- More fun stuff! __NEXT__ more complete example ::header_style('atx') __H__

Aaron Swartz's html2text

A handful of people have asked if there's a way to translate Markdown in reverse — to turn HTML back into Markdown-formatted plain text. The short answer is yes, by using Aaron Swartz's new version of html2text:

html2text is a Python script that convers a page of HTML into clean, easy-to-read plain ASCII. Better yet, that ASCII also happens to be valid Markdown (a text-to-HTML format).

html2text works so well that I'm planning to use it to convert most of my old Daring Fireball articles (the ones I wrote in raw HTML). It's worth noting that if you start with a Markdown document, translate it to HTML, then use html2text to go back to Markdown, it won't give you the exact same document you started with. That sort of complete round-trip fidelity simply is not possible, but html2text comes pretty close.

Also, much like Markdown and SmartyPants, html2text works as a BBEdit text filter. Simply save a copy in the Unix Filters folder in your BBEdit Support folder.

__W__ ## Aaron Swartz's html2text A handful of people have asked if there's a way to translate Markdown in reverse — to turn HTML back into Markdown-formatted plain text. The short answer is yes, by using Aaron Swartz's new version of [html2text][1]: > html2text is a Python script that convers a page of HTML into clean, easy-to-read plain ASCII. Better yet, that ASCII also happens to be valid Markdown (a text-to-HTML format). html2text works so well that I'm planning to use it to convert most of my old Daring Fireball articles (the ones I wrote in raw HTML). It's worth noting that if you start with a Markdown document, translate it to HTML, then use html2text to go back to Markdown, it won't give you the exact same document you started with. That sort of complete round-trip fidelity simply is not possible, but html2text comes pretty close. Also, much like Markdown and SmartyPants, html2text works as a BBEdit text filter. Simply save a copy in the Unix Filters folder in your BBEdit Support folder. [1]: http://www.aaronsw.com/2002/html2text/ __NEXT__ blockquotes containing only phrasal elements __H__

Via Wikipedia:

Long-term potentiation is the long-lasting enhancement in communication between two neurons that lasts from minutes to hours.

Sweet.

__W__ Via [Wikipedia][1]: > Long-term potentiation is the long-lasting enhancement in communication between two [neurons][2] that lasts from minutes to hours. Sweet. [1]: http://en.wikipedia.org/wiki/Long-term_potentiation [2]: http://en.wikipedia.org/wiki/Neuron __NEXT__ blockquote containing p __H__

shouldn't add a paragraph parent

__W__ > shouldn't add a paragraph parent __NEXT__ __H__
unmarked paragraph

another paragraph

yet another

__W__ > unmarked paragraph > > another paragraph > > yet another __NEXT__ code containing backticks (bug #43998) __H__

There is a literal backtick (`) here.

__W__ ``There is a literal backtick (`) here.`` __NEXT__ amp, lt, gt within code blocks (bug #43996) __H__ print("a < b") if $c > $d __W__ `print("a < b") if $c > $d` __NEXT__ amp, lt, gt within code blocks (bug #43996, example from markdown docs, http://bit.ly/NSrG3) __H__

I strongly recommend against using any <blink> tags.

I wish SmartyPants used named entities like &mdash; instead of decimal-encoded entites like &#8212;.

__W__ I strongly recommend against using any `` tags. I wish SmartyPants used named entities like `—` instead of decimal-encoded entites like `—`. __NEXT__ escape literal backticks outside of tags __H__

Hi there, this is a backtick (`).

__W__ Hi there, this is a backtick (\`). __NEXT__ don't backslash-escape underscores within tags (bug #43993) __H__ foo _bar_ baz foo_bar __W__ `foo _bar_ baz foo_bar` __NEXT__ but do backslash-escape other underscores __H__

foo _bar_

__W__ foo \_bar\_ __NEXT__ code blocks __H__

Here's an example:

if( chomp( my $foo = <> ) ) {
  print "entered: $foo\n";
}
__W__ Here's an example: if( chomp( my $foo = <> ) ) { print "entered: $foo\n"; } __NEXT__ code blocks __H__
if( chomp( my $foo = <> ) ) {
  print "entered: $foo\n";
}
__W__ if( chomp( my $foo = <> ) ) { print "entered: $foo\n"; } __NEXT__ DIV __H__
outer div
nested div
__W__ outer div nested div __NEXT__ PRE __H__
this is
	a 
pre
__W__ this is a pre __NEXT__ Heading with ID __H__

my heading

__W__ # my heading {#linkhere} __NEXT__ BR __H__ need to add a new line between here
and here __W__ need to add a new line between here
and here HTML-WikiConverter-Markdown-0.06/t/runtests.pl0000644000175000017500000000720112364114460020540 0ustar jfearnjfearn#!/usr/bin/perl use warnings; use strict; use Test::More; use File::Spec; use HTML::Entities; use HTML::WikiConverter; *e = \&encode_entities; my $more_tests = < comment __W__ A comment __NEXT__ strip head __H__ fun stuff

Crazy stuff here

__W__ Crazy stuff here __NEXT__ strip scripts __H__

benevolent text

__W__ benevolent text END_TESTS sub runtests { my %arg = @_; $arg{wrap_in_html} = 1; $arg{base_uri} ||= 'http://www.test.com'; my $minimal = $arg{minimal} || 0; my $data = $arg{data} || ''; $data .= entity_tests() . $more_tests unless $minimal; my @tests = split /__NEXT__\n/, $data; my $numtests = @tests; #$numtests += 1 unless $minimal; # file test plan tests => $numtests; # Delete unrecognized HTML::WikiConverter options delete $arg{$_} for qw/ data minimal /; my $wc = new HTML::WikiConverter(%arg); foreach my $test (@tests) { $test =~ s/^(.*?)\n//; my $name = $1; my ( $html, $wiki ) = split /__W__\n/, $test; $html =~ s/__H__\n//; # $name =~ s{\s*\:\:(\w+\([^\)]*?\))}{ # my $method_call = $1; # eval "\$wc->$method_call;"; # die "Failed test call ($name): $@" if $@; # ''; # }ge; my ( $todo, $todo_reason ); $name =~ s{\s*\:\:(\w+\([^\)]*?\))}{ my $keyword = $1; if( $keyword =~ /TODO\((\"|\')(.*?)\1/ ) { $todo = 1; $todo_reason = $2; } else { my $method_call = $keyword; eval "\$wc->$method_call;"; die "Failed test call ($name): $@" if $@; } ''; }ge; for ( $html, $wiki ) { s/^\n+//; s/\n+$// } if ($todo) { TODO: { local $TODO = $todo_reason; is( $wc->html2wiki($html), $wiki, $name ); } } else { is( $wc->html2wiki($html), $wiki, $name ); } } #file_test($wc) unless $minimal; } sub entity_tests { my $tmpl = "__NEXT__\n%s\n__H__\n%s\n__W__\n%s\n"; # test-name, html-input, expected-wiki-output my $data = ''; my @chars = ( '<', '>', '&' ); foreach my $char (@chars) { ( my $charname = e($char) ) =~ s/[&;]//g; $data .= sprintf $tmpl, "literal ($charname)", $char, e($char) . sprintf $tmpl, "encode ($charname)", e($char), e($char) . sprintf $tmpl, "meta ($charname)", e( e($char) ), e( e($char) ); } return $data; } sub _slurp { my $path = shift; open H, $path or die "couldn't open $path: $!"; local $/; my $c = ; close H; return $c; } sub file_test { my $wc = shift; my $lc_dialect = lc $wc->dialect; my $infile = File::Spec->catfile( 't', 'complete.html' ); my $outfile = File::Spec->catfile( 't', "complete.$lc_dialect" ); SKIP: { skip "Couldn't find $infile (ignore this)", 1 unless -e $infile; skip "Couldn't find $outfile (ignore this)", 1 unless -e $outfile; my ( $got, $expect ) = ( $wc->html2wiki( file => $infile, slurp => 1 ), _slurp($outfile) ); for ( $got, $expect ) { s/^\n+//; s/\n+$// } is( $got, $expect, 'read from file' ); } } 1; HTML-WikiConverter-Markdown-0.06/t/00-load.t0000644000175000017500000000030512364114227017634 0ustar jfearnjfearn#!perl -T use Test::More tests => 1; BEGIN { use_ok('HTML::WikiConverter::Markdown'); } diag("Testing HTML::WikiConverter::Markdown $HTML::WikiConverter::Markdown::VERSION, Perl $], $^X"); HTML-WikiConverter-Markdown-0.06/t/pod-coverage.t0000644000175000017500000000113312364114420021046 0ustar jfearnjfearn#!perl -T use Test::More; eval "use Test::Pod::Coverage 1.04"; plan skip_all => "Test::Pod::Coverage 1.04 required for testing POD coverage" if $@; all_pod_coverage_ok( { also_private => [ # These methods are documented in HTML::WikiConverter::Dialects qr/ get_elem_contents |get_wiki_page |get_attr_str |elem_within_block |is_camel_case |rule |rules |attribute |attributes |preprocess_tree |preprocess_node |postprocess_output |caption2para |strip_aname |base_url |wiki_url /x ] } ); HTML-WikiConverter-Markdown-0.06/t/pod.t0000644000175000017500000000021512364114444017263 0ustar jfearnjfearn#!perl -T use Test::More; eval "use Test::Pod 1.14"; plan skip_all => "Test::Pod 1.14 required for testing POD" if $@; all_pod_files_ok(); HTML-WikiConverter-Markdown-0.06/META.yml0000664000175000017500000000111112370021664017316 0ustar jfearnjfearn--- abstract: 'Convert HTML to Markdown markup' author: - 'David J. Iberri ' build_requires: ExtUtils::MakeMaker: 0 configure_requires: ExtUtils::MakeMaker: 0 dynamic_config: 1 generated_by: 'ExtUtils::MakeMaker version 6.72, CPAN::Meta::Converter version 2.132140' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 name: HTML-WikiConverter-Markdown no_index: directory: - t - inc requires: HTML::Tagset: 0 HTML::WikiConverter: 0.67 Params::Validate: 0 Test::More: 0 URI: 0 version: 0.06 HTML-WikiConverter-Markdown-0.06/Makefile.PL0000644000175000017500000000133712364354150020031 0ustar jfearnjfearnuse strict; use warnings; use ExtUtils::MakeMaker; WriteMakefile( NAME => 'HTML::WikiConverter::Markdown', AUTHOR => 'David J. Iberri ', LICENSE => 'perl', VERSION_FROM => 'lib/HTML/WikiConverter/Markdown.pm', ABSTRACT_FROM => 'lib/HTML/WikiConverter/Markdown.pm', PL_FILES => {}, PREREQ_PM => { 'Test::More' => 0, 'HTML::WikiConverter' => 0.67, # for p_strict attribute 'HTML::Tagset' => 0, 'Params::Validate' => 0, 'URI' => 0, }, dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', }, clean => { FILES => 'HTML-WikiConverter-Markdown-*' }, ); HTML-WikiConverter-Markdown-0.06/README0000644000175000017500000000276012364125775016751 0ustar jfearnjfearnHTML::WikiConverter::Markdown ============================= This module adds HTML-to-Markdown conversion to the HTML::WikiConverter module. SYNOPSIS Converting HTML to wiki markup is easy: use HTML::WikiConverter; my $wc = new HTML::WikiConverter( dialect => 'Markdown' ); print $wc->html2wiki( "hello" ); Or from the command line: % html2wiki --dialect Markdown input.html > output.wiki There's also a web interface if you're so inclined: http://diberri.dyndns.org/wikipedia/html2wiki/ INSTALLATION To install this module, run the following commands: perl Makefile.PL make make test make install For test coverage metrics run: perl Makefile.PL make cover -delete make test HARNESS_PERL_SWITCHES=-MDevel::Cover cover SUPPORT AND DOCUMENTATION After installing, you can find documentation for this module with the perldoc command. perldoc HTML::WikiConverter::Markdown You can also look for information at: Search CPAN http://search.cpan.org/dist/HTML-WikiConverter-Markdown CPAN Request Tracker: http://rt.cpan.org/NoAuth/Bugs.html?Dist=HTML-WikiConverter-Markdown AnnoCPAN, annotated CPAN documentation: http://annocpan.org/dist/HTML-WikiConverter-Markdown CPAN Ratings: http://cpanratings.perl.org/d/HTML-WikiConverter-Markdown COPYRIGHT AND LICENCE Copyright (c) David J. Iberri This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. HTML-WikiConverter-Markdown-0.06/META.json0000664000175000017500000000201512370021664017472 0ustar jfearnjfearn{ "abstract" : "Convert HTML to Markdown markup", "author" : [ "David J. Iberri " ], "dynamic_config" : 1, "generated_by" : "ExtUtils::MakeMaker version 6.72, CPAN::Meta::Converter version 2.132140", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "HTML-WikiConverter-Markdown", "no_index" : { "directory" : [ "t", "inc" ] }, "prereqs" : { "build" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "configure" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "runtime" : { "requires" : { "HTML::Tagset" : "0", "HTML::WikiConverter" : "0.67", "Params::Validate" : "0", "Test::More" : "0", "URI" : "0" } } }, "release_status" : "stable", "version" : "0.06" } HTML-WikiConverter-Markdown-0.06/lib/0000755000175000017500000000000012370021664016617 5ustar jfearnjfearnHTML-WikiConverter-Markdown-0.06/lib/HTML/0000755000175000017500000000000012370021664017363 5ustar jfearnjfearnHTML-WikiConverter-Markdown-0.06/lib/HTML/WikiConverter/0000755000175000017500000000000012370021664022156 5ustar jfearnjfearnHTML-WikiConverter-Markdown-0.06/lib/HTML/WikiConverter/Markdown.pm0000644000175000017500000004101112370021340024262 0ustar jfearnjfearnpackage HTML::WikiConverter::Markdown; use warnings; use strict; use base 'HTML::WikiConverter'; our $VERSION = '0.06'; use Params::Validate ':types'; use HTML::Entities; use HTML::Tagset; use URI; =head1 NAME HTML::WikiConverter::Markdown - Convert HTML to Markdown markup =head1 SYNOPSIS use HTML::WikiConverter; my $wc = new HTML::WikiConverter( dialect => 'Markdown' ); print $wc->html2wiki( $html ); =head1 DESCRIPTION This module contains rules for converting HTML into Markdown markup. You should not use this module directly; HTML::WikiConverter is the entry point for html->wiki conversion (eg, see synopsis above). See L for additional usage details. =head1 ATTRIBUTES In addition to the regular set of attributes recognized by the L constructor, this dialect also accepts the following attributes that can be passed into the C constructor. See L for usage details. =head2 header_style Possible values: C<'setext'>, C<'atx'>. Determines how headers C

-C

will be formatted. See L for more information. Default is C<'atx'>. =head2 link_style Possible values: C<'inline'>, C<'reference'>. See L for more information. Default is C<'reference'>. =head2 force_inline_anchor_links Possible values: C<0>, C<1>. If enabled, links to anchors within the same page (eg, C<#some-anchor>) will always produce inline Markdown links, even under reference link style. This might be useful for building tables of contents. Default is C<0>. =head2 image_style Possible values: C<'inline'>, C<'reference'>. See L for more information. Default is C<'reference'>. =head2 image_tag_fallback Possible values: C<0>, C<1>. Markdown's image markup does not support image dimensions. If C is enabled, image tags containing dimensional information (ie, width or height) will not be converted into Markdown markup. Rather, they will be roughly preserved in their HTML form. Default is C<1>. =head2 unordered_list_style Possible values: C<'asterisk'>, C<'plus'>, C<'dash'>. See L for more information. Default is C<'asterisk'>. =head2 ordered_list_style Possible values: C<'sequential'>, C<'one-dot'>. Markdown supports two different markups for ordered lists. Sequential style gives each list element its own ordinal number (ie, C<'1.'>, C<'2.'>, C<'3.'>, etc.). One-dot style gives each list element the same ordinal number (ie, C<'1.'>). See L for more information. Default is C<'sequential'>. =head2 md_extra Possible values: C<0>, C<1>. Support MarkDown Extra L extensions. Default is C<0>. This support is incomplete. # Tables Supported # Fenced Code Blocks # Definition Lists Supported # Footnotes # Special Attributes # SmartyPants # Newlines # Strikethrough =cut sub attributes { { header_style => { default => 'atx', type => SCALAR }, link_style => { default => 'reference', type => SCALAR }, force_inline_anchor_links => { default => 0, type => BOOLEAN }, image_style => { default => 'reference', type => SCALAR }, image_tag_fallback => { default => 1, type => BOOLEAN }, unordered_list_style => { default => 'asterisk', type => SCALAR }, ordered_list_style => { default => 'sequential', type => SCALAR }, # Requires H::WC version 0.67 p_strict => { default => 0 }, md_extra => { default => 0, type => BOOLEAN }, }; } my @common_attrs = qw/ id class lang dir title style /; # Hack to accommodate bug #43997 - multiline code blocks my $code_block_prefix = 'bqwegsdfbwegadfbnsdfbahwerfgkjnsdfbohqw34t927398y5jnwrteb8uq34inb'; sub rules { my $self = shift; my %rules = ( hr => { replace => "\n\n----\n\n" }, br => { preserve => 1, empty => 1, end => \&_br_end }, p => { block => 1, trim => 'both', line_format => 'multi', line_prefix => \&_p_prefix }, blockquote => { block => 1, trim => 'both', line_format => 'blocks', line_prefix => '> ' }, ul => { block => 1, line_format => 'multi' }, ol => { alias => 'ul' }, li => { start => \&_li_start, trim => 'leading' }, i => { start => '_', end => '_' }, em => { alias => 'i' }, b => { start => '**', end => '**' }, strong => { alias => 'b' }, code => { start => \&_code_delim, end => \&_code_delim }, code_block => { line_prefix => $code_block_prefix, block => 1 }, a => { replace => \&_link }, img => { replace => \&_img }, div => { block => 1, line_format => 'blocks' }, pre => { line_prefix => "\t", block => 1, line_format => 'blocks' }, ); for ( 1 .. 6 ) { $rules{"h$_"} = { start => \&_header_start, end => \&_header_end, trim => 'both', block => 1 }; } for (qw/ table caption tr th td /) { $rules{$_} = { preserve => 1, attrs => \@common_attrs, start => "\n", end => "\n", line_format => 'multi' }; } # MarkDown Extra https://github.com/jmcmanus/pagedown-extra # Tables Supported # Fenced Code Blocks # Definition Lists Supported # Footnotes # Special Attributes # SmartyPants # Newlines # Strikethrough if ( $self->md_extra ) { $rules{dt} = { start => "\n", end => "\n", trim => 'both', }; $rules{dd} = { start => ": ", end => "\n", trim => 'both', }; delete( $rules{table} ); delete( $rules{caption} ); $rules{tr} = { start => "\n", end => "|", trim => 'both' }; $rules{td} = { start => "|", trim => 'both' }; $rules{th} = { alias => 'td' }; $rules{thead} = { end => "\n|-|", trim => 'both' }; # need an extra line here as some lists can contain complex block structures. $rules{ul} = { block => 1, line_format => 'blocks' }; $rules{li} = { start => \&_li_start, blocks => 1, trim => 'leading' }; } return \%rules; } sub _br_end { my ( $self, $node, $rules ) = @_; return "\n"; } sub _header_start { my ( $self, $node, $rules ) = @_; return '' unless $self->header_style eq 'atx'; ( my $level = $node->tag ) =~ s/\D//g; return unless $level; my $hr = ('#') x $level; return "$hr "; } sub _header_end { my ( $self, $node, $rules ) = @_; my $anchor = ''; if ( $node->id() ) { $anchor = "\t{#" . $node->id() . "}"; } return $anchor unless $self->header_style eq 'setext'; ( my $level = $node->tag ) =~ s/\D//g; return $anchor unless $level; my $symbol = $level == 1 ? '=' : '-'; my $len = length $self->get_elem_contents($node); my $bar = ($symbol) x $len; return "$anchor\n$bar\n"; } sub _link { my ( $self, $node, $rules ) = @_; my $url = $self->_abs2rel( $node->attr('href') || '' ); my $text = $self->get_elem_contents($node); my $title = $node->attr('title') || ''; my $style = $self->link_style; $style = 'inline' if $url =~ /^\#/ and $self->force_inline_anchor_links; if ( $url eq $text ) { return sprintf "<%s>", $url; } elsif ( $style eq 'inline' ) { return sprintf "[%s](%s \"%s\")", $text, $url, $title if $title; return sprintf "[%s](%s)", $text, $url; } elsif ( $style eq 'reference' ) { my $id = $self->_next_link_id; $self->_add_link( { id => $id, url => $url, title => $title } ); return sprintf "[%s][%s]", $text, $id; } } sub _last_link_id { shift->_attr( { internal => 1 }, _last_link_id => @_ ) } sub _links { shift->_attr( { internal => 1 }, _links => @_ ) } sub _next_link_id { my $self = shift; my $next_id = ( $self->_last_link_id || 0 ) + 1; $self->_last_link_id($next_id); return $next_id; } sub _add_link { my ( $self, $link ) = @_; $self->_links( [ @{ $self->_links || [] }, $link ] ); } sub _img { my ( $self, $node, $rules ) = @_; my $url = $node->attr('src') || ''; my $text = $node->attr('alt') || ''; my $title = $node->attr('title') || ''; my $width = $node->attr('width') || ''; my $height = $node->attr('height') || ''; if ( $width || $height and $self->image_tag_fallback ) { return "get_attr_str( $node, qw/ src width height alt /, @common_attrs ) . " />"; } elsif ( $self->image_style eq 'inline' ) { return sprintf "![%s](%s \"%s\")", $text, $url, $title if $title; return sprintf "![%s](%s)", $text, $url; } elsif ( $self->image_style eq 'reference' ) { my $id = $self->_next_link_id; $self->_add_link( { id => $id, url => $url, title => $title } ); return sprintf "![%s][%s]", $text, $id; } } sub _li_start { my ( $self, $node, $rules ) = @_; my @parent_lists = $node->look_up( _tag => qr/ul|ol/ ); my $prefix = (' ') x ( @parent_lists - 1 ); my $bullet = ''; $bullet = $self->_ul_li_start if $node->parent and $node->parent->tag eq 'ul'; $bullet = $self->_ol_li_start( $node->parent ) if $node->parent and $node->parent->tag eq 'ol'; return "\n$prefix$bullet "; } sub _ul_li_start { my $self = shift; my $style = $self->unordered_list_style; return '*' if $style eq 'asterisk'; return '+' if $style eq 'plus'; return '-' if $style eq 'dash'; die "no such unordered list style: '$style'"; } my %ol_count = (); sub _ol_li_start { my ( $self, $ol ) = @_; my $style = $self->ordered_list_style; if ( $style eq 'one-dot' ) { return '1.'; } elsif ( $style eq 'sequential' ) { my $count = ++$ol_count{$ol}; return "$count."; } else { die "no such ordered list style: $style"; } } sub _p_prefix { my ( $wc, $node, $rules ) = @_; return $node->look_up( _tag => 'li' ) ? ' ' : ''; } sub preprocess_node { my ( $self, $node ) = @_; return unless $node->tag and $node->parent and $node->parent->tag; if ( $node->tag eq 'blockquote' ) { my @non_phrasal_children = grep { !$self->_is_phrase_tag( $_->tag ) } $node->content_list; unless (@non_phrasal_children) { # ie, we have things like
blah blah blah
, without a

or something $self->_envelop_children( $node, HTML::Element->new('p') ); } } elsif ( $node->tag eq '~text' ) { $self->_escape_text($node); # bug #43998 $self->_decode_entities_in_code($node) if $node->parent->tag eq 'code' or $node->parent->tag eq 'code_block'; } } sub preprocess_tree { my ( $self, $root ) = @_; foreach my $node ( $root->descendants ) { # bug #43997 - multiline code blocks if ( $self->_text_is_within_code_pre($node) ) { $self->_convert_to_code_block($node); } } } sub _text_is_within_code_pre { my ( $self, $node ) = @_; return unless $node->parent->parent and $node->parent->parent->tag; # Must be

...
(or
...
) my $code_pre = $node->parent->tag eq 'code' && $node->parent->parent->tag eq 'pre'; my $pre_code = $node->parent->tag eq 'pre' && $node->parent->parent->tag eq 'code'; return unless $code_pre or $pre_code; # Can't be any other nodes in a code block return if $node->left or $node->right; return if $node->parent->left or $node->parent->right; return 1; } sub _convert_to_code_block { my ( $self, $node ) = @_; $node->parent->parent->replace_with_content->delete; $node->parent->tag("code_block"); } sub _envelop_children { my ( $self, $node, $new_child ) = @_; my @children = $node->detach_content; $node->push_content($new_child); $new_child->push_content(@children); } # special handling for: ` _ # . [ ! my @escapes = qw( \\ * { } _ ` ); my %backslash_escapes = ( '\\' => [ '0923fjhtml2wikiescapedbackslash', "\\\\" ], '*' => [ '0923fjhtml2wikiescapedasterisk', "\\*" ], '{' => [ '0923fjhtml2wikiescapedopenbrace', "\\{" ], '}' => [ '0923fjhtml2wikiescapedclosebrace', "\\}" ], '_' => [ '0923fjhtml2wikiescapedunderscore', "\\_" ], '`' => [ '0923fjhtml2wikiescapedbacktick', "\\`" ], ); sub _escape_text { my ( $self, $node ) = @_; my $text = $node->attr('text') || ''; # # (bug #43998) # Only backslash-escape backticks that don't occur within # tags. Those within tags are left alone and the backticks to # signal a tag get upgraded to a double-backtick by # _code_delim(). # # (bug #43993) # Likewise, only backslash-escape underscores that occur outside # tags. # my $inside_code = $node->look_up( _tag => 'code' ) || $node->look_up( _tag => 'code_block' ) || $node->look_up( _tag => 'pre' ); if ( not $inside_code ) { my $escapes = join '', @escapes; $text =~ s/([\Q$escapes\E])/$backslash_escapes{$1}->[0]/g; $text =~ s/^([\d]+)\./$1\\./; $text =~ s/^\#/\\#/; $text =~ s/\!\[/\\![/g; $text =~ s/\]\[/]\\[/g; $node->attr( text => $text ); } } # bug #43998 sub _code_delim { my ( $self, $node, $rules ) = @_; my $contents = $self->get_elem_contents($node); return $contents =~ /\`/ ? '``' : '`'; } # bug #43996 sub _decode_entities_in_code { my ( $self, $node ) = @_; my $text = $node->attr('text') || ''; return unless $text; HTML::Entities::_decode_entities( $text, { 'amp' => '&', 'lt' => '<', 'gt' => '>' } ); $node->attr( text => $text ); } sub postprocess_output { my ( $self, $outref ) = @_; $$outref =~ s/\Q$code_block_prefix\E/ /gm; $self->_unescape_text($outref); $self->_add_references($outref); } sub _unescape_text { my ( $self, $outref ) = @_; foreach my $escape ( values %backslash_escapes ) { $$outref =~ s/$escape->[0]/$escape->[1]/g; } } sub _add_references { my ( $self, $outref ) = @_; my @links = @{ $self->_links || [] }; return unless @links; my $links = ''; foreach my $link (@links) { my $id = $link->{id} || ''; my $url = $link->{url} || ''; my $title = $link->{title} || ''; if ($title) { $links .= sprintf " [%s]: %s \"%s\"\n", $id, $url, $title; } else { $links .= sprintf " [%s]: %s\n", $id, $url; } } $self->_links( [] ); $self->_last_link_id(0); $$outref .= "\n\n$links"; $$outref =~ s/\s+$//gs; } sub _is_phrase_tag { my $tag = pop || ''; return $HTML::Tagset::isPhraseMarkup{$tag} || $tag eq '~text'; } sub _abs2rel { my ( $self, $uri ) = @_; return $uri unless $self->base_uri; return URI->new($uri)->rel( $self->base_uri )->as_string; } =head1 AUTHOR David J. Iberri, C<< >> =head1 BUGS Please report any bugs or feature requests to C, or through the web interface at L. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. =head1 SUPPORT You can find documentation for this module with the perldoc command. perldoc HTML::WikiConverter::Markdown You can also look for information at: =over 4 =item * AnnoCPAN: Annotated CPAN documentation L =item * CPAN Ratings L =item * RT: CPAN's request tracker L =item * Search CPAN L =back =head1 COPYRIGHT & LICENSE Copyright 2006 David J. Iberri, all rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; HTML-WikiConverter-Markdown-0.06/Changes0000644000175000017500000000212312370021260017332 0ustar jfearnjfearn# Revision history for HTML::WikiConverter::Markdown date: 5 Aug 2014 version: 0.06 changes: - (bug #97384) Quote inline link titles - (bug #97385) Support heading IDs - (bug #97450) Support PRE and DIV - (bug #56769) Add newline after
- Add support for MarkDown Extra table & dl extensions. date: 2009-03-16 version: 0.05 changes: - requires HTML::WikiConverter 0.67 - (bug #43997) properly handles multiline code blocks date: 2009-03-13 version: 0.04 changes: - correct handling of blockquotes containing only phrasal elements - (bug #43988) properly escape backticks within code tags - (bug #43993) don't escape underscores within code tags - (bug #43996) decode specific html entities within code tags date: 2008-11-14 version: 0.03 changes: - (bug #40914) require H::WC 0.63 - add perl license to Makefile.PL - add more comprehensive test - tighten pod-coverage.t loose ends date: 2006-07-20 version: 0.02 changes: - require H::WC 0.60 - add attribute types date: 2006-07-07 version: 0.01 changes: - initial release (fulfill feature request: bug #18111)