MKDoc-XML-0.75/0002755000076400007640000000000010214062151013336 5ustar brunobruno00000000000000MKDoc-XML-0.75/t/0002755000076400007640000000000010214062151013601 5ustar brunobruno00000000000000MKDoc-XML-0.75/t/500_MKDoc_XML_Decode.t0000644000076400007640000000070707765641335017324 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Decode; my $decode = new MKDoc::XML::Decode (qw /xml xhtml numeric/); is ($decode->process ('Hello, <'), 'Hello, <'); is ($decode->process ('Hello, >'), 'Hello, >'); is ($decode->process ('Hello, &'), 'Hello, &'); is ($decode->process ('Hello, "'), 'Hello, "'); is ($decode->process ('Hello, ''), 'Hello, \''); 1; __END__ MKDoc-XML-0.75/t/101_MKDoc_XML_Tokenizer_Comment.t0000644000076400007640000000036207765640455021571 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Tokenizer; my $data = ''; eval { MKDoc::XML::Tokenizer->process_data ($data) }; ok ($@); 1; __END__ MKDoc-XML-0.75/t/001_MKDoc_XML_Token_tag_open.t0000644000076400007640000000151307765640455021067 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Token; my $data = qq ||; my $token = new MKDoc::XML::Token ($data); my $node = $token->tag_open(); ok (exists $node->{'petal:omit-tag'}); is ($node->{'petal:omit-tag'}, ''); $data = qq ||; $token = new MKDoc::XML::Token ($data); $node = $token->tag_open(); ok (exists $node->{'foo_bulb:zoo'}); is ($node->{'foo_bulb:zoo'}, 'baz'); $data = < EOF $token = new MKDoc::XML::Token ($data); $node = $token->tag_open(); is ($node->{'petal_temp:attributes'}, 'petal:attributes dididi dadada'); $token = new MKDoc::XML::Token (qq ||); $node = $token->tag_open(); is ($node->{foo}, 'bar'); 1; __END__ MKDoc-XML-0.75/t/604_MKDoc_XML_Dumper_Entities.t0000644000076400007640000000073310006703757021232 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Dumper; my $xml = < First Impressions ================= Okay, I am not allowed to disclosed information about "all Google software, EOF my $pl = MKDoc::XML::Dumper->xml2perl ($xml); ok (ref $pl); like ($pl->{data}, qr/\"/); 1; __END__ MKDoc-XML-0.75/t/data/0002755000076400007640000000000010214062151014512 5ustar brunobruno00000000000000MKDoc-XML-0.75/t/data/prova.xhtml0000644000076400007640000000103010077525313016722 0ustar brunobruno00000000000000 Petal test

 Walking in København.

MKDoc-XML-0.75/t/data/root.html0000644000076400007640000000127507765640455016420 0ustar brunobruno00000000000000 Big Title 2002-01-01 00:00:00 0 fr One, Two, Three This is some description default_child_listing_bottom

Blargh

This is a test

Foo Bar MKDoc-XML-0.75/t/data/sample.xml0000644000076400007640000000164607765640455016554 0ustar brunobruno00000000000000 en-gb English (United Kingdom) MKDoc MKDoc-XML-0.75/t/100_MKDoc_XML_Tokenizer.t0000644000076400007640000000204310130753640020061 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Tokenizer; { my $data = < EOF my $tokens = MKDoc::XML::Tokenizer->process_data ($data); like ($tokens->[0]->as_string(), qr/value\=\"test\"/); } my $file = (-e 't/data/sample.xml') ? 't/data/sample.xml' : 'data/sample.xml'; my $tokens = MKDoc::XML::Tokenizer->process_file ($file); like ($tokens->[0]->as_string(), qr/|); like ($tokens->[9]->as_string(), qr/^\s+$/s); 1; __END__ MKDoc-XML-0.75/t/601_MKDoc_XML_Dumper_Thaw.t0000644000076400007640000002007007765640455020357 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Dumper; local $MKDoc::XML::Dumper::IndentLevel = 0; # xml_to_perl_litteral { my $tree = { _tag => 'litteral', _content => [ 'Foo' ] }; my $res = MKDoc::XML::Dumper->xml_to_perl_litteral ($tree); is ($res, 'Foo'); } { my $tree = { _tag => 'litteral', _content => [ '' ] }; my $res = MKDoc::XML::Dumper->xml_to_perl_litteral ($tree); is ($res, ''); } { my $tree = { _tag => 'litteral', _content => [ '0' ] }; my $res = MKDoc::XML::Dumper->xml_to_perl_litteral ($tree); is ($res, '0'); } { my $tree = { _tag => 'litteral', undef => 'true' }; my $res = MKDoc::XML::Dumper->xml_to_perl_litteral ($tree); ok (not defined $res); } # xml_to_perl_backref { my $ref = []; my $ref_id = $ref + 0; local $MKDoc::XML::Dumper::BackRef = { 12 => 'Foo' }; my $res = MKDoc::XML::Dumper->xml_to_perl_backref ( { _tag => 'backref', id => '12' } ); is ($res, 'Foo'); } { my $ref = []; my $ref_id = $ref + 0; local $MKDoc::XML::Dumper::BackRef = {}; my $res = MKDoc::XML::Dumper->xml_to_perl_backref ( { _tag => 'backref', id => '12' } ); ok (not defined $res); } # xml_to_perl_scalar { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $tree = { _tag => 'scalar', id => '1', bless => 'Foo', _content => [ { _tag => 'litteral', _content => [ 'ABCDEF' ] } ] }; my $res = MKDoc::XML::Dumper->xml_to_perl_scalar ($tree); ok (ref $res); ok ($res->isa ('Foo')); is ($$res, 'ABCDEF'); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $tree = { _tag => 'scalar', id => '1', _content => [ { _tag => 'litteral', _content => [ 'ABCDEF' ] } ] }; my $res = MKDoc::XML::Dumper->xml_to_perl_scalar ($tree); ok (ref $res); is (ref $res, 'SCALAR'); is ($$res, 'ABCDEF'); } # perl_to_xml_hash { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $tree = { _tag => 'hash', id => '1', bless => 'Foo' }; my $res = MKDoc::XML::Dumper->xml_to_perl_hash ($tree); ok (ref $res); ok ($res->isa ('Foo')); ok ($res =~ /HASH/); ok (scalar keys %{$res} == 0); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $tree = { _tag => 'hash', id => '1', bless => 'Foo', _content => [ { _tag => 'item', key => 'foo', _content => [ { _tag => 'litteral', _content => [ 'bar' ] } ] } ] }; my $res = MKDoc::XML::Dumper->xml_to_perl_hash ($tree); ok (ref $res); ok ($res->isa ('Foo')); ok ($res =~ /HASH/); ok (scalar keys %{$res} == 1); is ($res->{foo}, 'bar'); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $tree = { _tag => 'hash', id => '1', bless => 'Foo', _content => [ { _tag => 'item', key => 'foo', _content => [ { _tag => 'litteral', _content => [ 'bar' ] } ] }, { _tag => 'item', key => 'baz', _content => [ { _tag => 'litteral', _content => [ 'buz' ] } ] } ] }; my $res = MKDoc::XML::Dumper->xml_to_perl_hash ($tree); ok (ref $res); ok ($res->isa ('Foo')); ok ($res =~ /HASH/); ok (scalar keys %{$res} == 2); is ($res->{foo}, 'bar'); is ($res->{baz}, 'buz'); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $tree = { _tag => 'hash', id => '1' }; my $res = MKDoc::XML::Dumper->xml_to_perl_hash ($tree); ok (ref $res); is (ref $res, 'HASH'); ok (scalar keys %{$res} == 0); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $tree = { _tag => 'hash', id => '1', _content => [ { _tag => 'item', key => 'foo', _content => [ { _tag => 'litteral', _content => [ 'bar' ] } ] } ] }; my $res = MKDoc::XML::Dumper->xml_to_perl_hash ($tree); ok (ref $res); is (ref $res, 'HASH'); ok (scalar keys %{$res} == 1); is ($res->{foo}, 'bar'); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $tree = { _tag => 'hash', id => '1', _content => [ { _tag => 'item', key => 'foo', _content => [ { _tag => 'litteral', _content => [ 'bar' ] } ] }, { _tag => 'item', key => 'baz', _content => [ { _tag => 'litteral', _content => [ 'buz' ] } ] } ] }; my $res = MKDoc::XML::Dumper->xml_to_perl_hash ($tree); ok (ref $res); is (ref $res, 'HASH'); ok (scalar keys %{$res} == 2); is ($res->{foo}, 'bar'); is ($res->{baz}, 'buz'); } # perl_to_xml_hash { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $tree = { _tag => 'array', id => '1', bless => 'Foo' }; my $res = MKDoc::XML::Dumper->xml_to_perl_array ($tree); ok (ref $res); ok ($res->isa ('Foo')); ok ($res =~ /ARRAY/); ok (@{$res} == 0); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $tree = { _tag => 'array', id => '1', bless => 'Foo', _content => [ { _tag => 'item', key => '0', _content => [ { _tag => 'litteral', _content => [ 'bar' ] } ] } ] }; my $res = MKDoc::XML::Dumper->xml_to_perl_array ($tree); ok (ref $res); ok ($res->isa ('Foo')); ok ($res =~ /ARRAY/); ok (@{$res} == 1); is ($res->[0], 'bar'); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $tree = { _tag => 'array', id => '1', bless => 'Foo', _content => [ { _tag => 'item', key => '0', _content => [ { _tag => 'litteral', _content => [ 'bar' ] } ] }, { _tag => 'item', key => '1', _content => [ { _tag => 'litteral', _content => [ 'buz' ] } ] } ] }; my $res = MKDoc::XML::Dumper->xml_to_perl_array ($tree); ok (ref $res); ok ($res->isa ('Foo')); ok ($res =~ /ARRAY/); ok (@{$res} == 2); is ($res->[0], 'bar'); is ($res->[1], 'buz'); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $tree = { _tag => 'array', id => '1' }; my $res = MKDoc::XML::Dumper->xml_to_perl_array ($tree); ok (ref $res); is (ref $res, 'ARRAY'); ok (@{$res} == 0); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $tree = { _tag => 'array', id => '1', _content => [ { _tag => 'item', key => '0', _content => [ { _tag => 'litteral', _content => [ 'bar' ] } ] } ] }; my $res = MKDoc::XML::Dumper->xml_to_perl_array ($tree); ok (ref $res); is (ref $res, 'ARRAY'); ok (@{$res} == 1); is ($res->[0], 'bar'); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $tree = { _tag => 'array', id => '1', _content => [ { _tag => 'item', key => '0', _content => [ { _tag => 'litteral', _content => [ 'bar' ] } ] }, { _tag => 'item', key => '1', _content => [ { _tag => 'litteral', _content => [ 'buz' ] } ] } ] }; my $res = MKDoc::XML::Dumper->xml_to_perl_array ($tree); ok (ref $res); is (ref $res, 'ARRAY'); ok (@{$res} == 2); is ($res->[0], 'bar'); is ($res->[1], 'buz'); } { # let's try some wicked stuff local $MKDoc::XML::Dumper::BackRef = {}; my $tree = { _tag => 'ref', id => '1', _content => [ { _tag => 'backref', id => '1' } ] }; my $res = MKDoc::XML::Dumper->xml_to_perl ( $tree ); ok (ref $res); is (ref $res, 'REF'); is ($$res, $res); } 1; __END__ MKDoc-XML-0.75/t/304_MKDoc_XML_Tagger_Numbers.t0000644000076400007640000000155410214061170021020 0ustar brunobruno00000000000000#!/usr/bin/perl use lib ('../lib', 'lib'); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Tagger::Preserve; use MKDoc::XML::Tagger; { my $text = MKDoc::XML::Tagger->process_data ( "

stuff 1

", { _expr => '&(1)', _tag => 'a', href => '/' } ); like ($text, qr|

stuff 1

|); } { my $text = MKDoc::XML::Tagger->process_data ( "

stuff 1

", { _expr => '1', _tag => 'a', href => '/' } ); # very wrong # like ($text, qr|&\(1\)stuff 1

|); # correct output like ($text, qr|

stuff 1

|); } { my $text = MKDoc::XML::Tagger->process_data ( "

stuff &(1)

", { _expr => '&(1)', _tag => 'a', href => '/' } ); like ($text, qr|

stuff &\(1\)

|); } 1; __END__ MKDoc-XML-0.75/t/201_MKDoc_XML_TreeBuilder_Root.t0000644000076400007640000000044707765640455021353 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::TreeBuilder; use MKDoc::XML::Tokenizer; { my @res = MKDoc::XML::TreeBuilder->process_file ('./t/data/root.html'); ok (1); # if it didn't die then we're good :) } 1; __END__ MKDoc-XML-0.75/t/300_MKDoc_XML_Tagger.t0000644000076400007640000000544310130753640017331 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Tagger; use MKDoc::XML::Tokenizer; # _tag_close and _tag_open functions { my $tag = MKDoc::XML::Tagger::_tag_close ('strong'); is ($tag, ''); $tag = MKDoc::XML::Tagger::_tag_open ('strong'); is ($tag, ''); $tag = MKDoc::XML::Tagger::_tag_open ('strong', { class => 'superFort' }); is ($tag, ''); } # this regex should match any amount of consecutive whitespace, # or \&(214) like tags, or carriage returns { my $sample_text = <SGML that is completely described in this document. EOF my $tokens = MKDoc::XML::Tokenizer->process_data ($example); my ($text, $tags) = MKDoc::XML::Tagger::_segregate_markup_from_text ($tokens); like ($text, qr/\&\(1\)SGML\&\(2\)/); like ($text, qr/\&\(3\)completely described\&\(4\)/); } # more nasty test { my $r = MKDoc::XML::Tagger->process_data ( 'Hello Cool World!', { _expr => 'Cool World', _tag => 'a', href => 'cw', alt => 'foo' }, { _expr => 'Hello Cool World', _tag => 'a', href => 'hcw' } ); is ($r, 'Hello Cool World!'); $r = MKDoc::XML::Tagger->process_data ( '<hello>', { _expr => 'hello', _tag => 'a', href => 'http://www.hello.com/' }, ); like ($r, qr/process_data ( '

News foo barStatements, declarations

', { '_expr' => 'news', 'href' => 'http://news.com/', '_tag' => 'a', }, { '_expr' => 'News', 'lang' => 'en', 'href' => 'http://users.groucho/news/', '_tag' => 'a', } ); } { my $data = qq |

<p>this is a test, hello world, this is a test</p>

|; my $r = MKDoc::XML::Tagger->process_data ( $data, { _expr => 'Hello World', _tag => 'a', href => 'cw', alt => 'foo' } ); } { my $r = MKDoc::XML::Tagger->process_data ( 'q & a', { '_expr' => 'Q & A', 'href' => 'http://news.com/', '_tag' => 'a', } ); is ($r, '
q & a'); } 1; __END__ MKDoc-XML-0.75/t/302_MKDoc_XML_Tagger_Spaces.t0000644000076400007640000000116410065553624020634 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Tagger::Preserve; # _tag_close and _tag_open functions { my $text = <process_data ( [ 'a' ], $text, { _expr => 'News', _tag => 'a', href => 'http://news.com/' }, ); like ($text, qr/anewsletter/); } 1; __END__ MKDoc-XML-0.75/t/305_MKDoc_XML_tagger_ampersand.t0000644000076400007640000000165510214042270021422 0ustar brunobruno00000000000000#!/usr/bin/perl use lib ('../lib', 'lib'); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Tagger::Preserve; use MKDoc::XML::Tagger; { my $text = MKDoc::XML::Tagger->process_data ( "

stuff

", { _expr => 'stuff', _tag => 'a', href => 'http://example.com/foo.cgi?a=b&c=d' } ); like ($text, qr|

stuff

|, '& double escaped'); }; { my $text = MKDoc::XML::Tagger->process_data ( "

stuff

", { _expr => 'stuff', _tag => 'foo', bar => '' } ); like ($text, qr|

stuff

|, '<> double escaped'); }; { my $text = MKDoc::XML::Tagger->process_data ( "

stuff

", { _expr => 'stuff', _tag => 'foo', bar => '"hello world"' } ); like ($text, qr|

stuff

|, '" double escaped'); }; 1; __END__ MKDoc-XML-0.75/t/400_MKDoc_XML_Stripper.t0000644000076400007640000000627507765640455017760 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Token; use MKDoc::XML::Stripper; # let's test this _node_to_tag business { my $r; $r = MKDoc::XML::Stripper::_node_to_tag ( { _tag => 'b', _open => 1 } ); is ($r, ''); $r = MKDoc::XML::Stripper::_node_to_tag ( { _tag => 'b', _close => 1 } ); is ($r, ''); $r = MKDoc::XML::Stripper::_node_to_tag ( { _tag => 'b', _open => 1, _close => 1 } ); is ($r, ''); $r = MKDoc::XML::Stripper::_node_to_tag ( { _tag => 'b', _open => 1, extra => "foo" } ); is ($r, ''); $r = MKDoc::XML::Stripper::_node_to_tag ( { _tag => 'b', _close => 1, extra => "foo" } ); is ($r, ''); $r = MKDoc::XML::Stripper::_node_to_tag ( { _tag => 'b', _open => 1, _close => 1, extra => "foo" } ); is ($r, ''); $r = MKDoc::XML::Stripper::_node_to_tag ( { _tag => 'b', _open => 1, extra => "'foo'" } ); is ($r, ''); $r = MKDoc::XML::Stripper::_node_to_tag ( { _tag => 'b', _close => 1, extra => "'foo'" } ); is ($r, ''); $r = MKDoc::XML::Stripper::_node_to_tag ( { _tag => 'b', _open => 1, _close => 1, extra => "'foo'" } ); is ($r, ''); $r = MKDoc::XML::Stripper::_node_to_tag ( { _tag => 'b', _open => 1, extra => "\"foo\"" } ); is ($r, ''); $r = MKDoc::XML::Stripper::_node_to_tag ( { _tag => 'b', _close => 1, extra => "\"foo\"" } ); is ($r, ''); $r = MKDoc::XML::Stripper::_node_to_tag ( { _tag => 'b', _open => 1, _close => 1, extra => "\"foo\"" } ); is ($r, ''); } # now let's perform some tests on MKDoc::XML::Stripper objects { my $s = new MKDoc::XML::Stripper; ok ($s->isa ('MKDoc::XML::Stripper')); # allow p along with 'class' and 'id' attributes $s->allow (qw /p class id/); ok ($s->{p}); is (ref $s->{p}, 'HASH'); ok ($s->{p}->{class}); ok ($s->{p}->{id}); ok (!$s->{p}->{p}); # let's see if the 'strip' method works... my $token = undef; $token = new MKDoc::XML::Token ('hello'); ok ($s->strip ($token)); is ($s->strip ($token)->as_string, 'hello'); $token = new MKDoc::XML::Token (''); ok (!$s->strip ($token)); $token = new MKDoc::XML::Token (''); ok (!$s->strip ($token)); $token = new MKDoc::XML::Token (''); ok (!$s->strip ($token)); $token = new MKDoc::XML::Token (''); ok (!$s->strip ($token)); $token = new MKDoc::XML::Token (''); ok (!$s->strip ($token)); $token = new MKDoc::XML::Token (''); ok (!$s->strip ($token)); $token = new MKDoc::XML::Token (''); ok (!$s->strip ($token)); $token = new MKDoc::XML::Token ('

'); ok ($s->strip ($token)); $token = new MKDoc::XML::Token ('

'); my $r = $s->strip ($token)->as_string(); like ($r, qr /

This is a
quite good test. We should see if the method which grabs descendant nodes is:

  • OK
  • Kind of OK
  • Completely Fubar
EOF $xml =~ s/\s+$//; my @nodes = MKDoc::XML::TreeBuilder->process_data ($xml); my $res = MKDoc::XML::TreePrinter->process (@nodes); is ($res => $xml); } 1; __END__ MKDoc-XML-0.75/t/600_MKDoc_XML_Dumper_Freeze.t0000644000076400007640000001557407765640455020710 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Dumper; local $MKDoc::XML::Dumper::IndentLevel = 0; # perl_to_xml_litteral { my $res = MKDoc::XML::Dumper->perl_to_xml_litteral ('Foo'); like ($res, qr /Foo<\/litteral>/); } { my $res = MKDoc::XML::Dumper->perl_to_xml_litteral (''); like ($res, qr/<\/litteral>/); } { my $res = MKDoc::XML::Dumper->perl_to_xml_litteral ('0'); like ($res, qr/0<\/litteral>/); } { my $res = MKDoc::XML::Dumper->perl_to_xml_litteral (undef); like ($res, qr//); } # perl_to_xml_backref { my $ref = []; my $ref_id = $ref + 0; local *MKDoc::XML::Dumper::ref_to_id = sub { 12 }; local $MKDoc::XML::Dumper::BackRef = { $ref_id => $ref }; my $res = MKDoc::XML::Dumper->perl_to_xml_backref ( $ref ); like ($res, qr//); } { my $ref = []; my $ref_id = $ref + 0; local *MKDoc::XML::Dumper::ref_to_id = sub { 12 }; local $MKDoc::XML::Dumper::BackRef = {}; my $res = MKDoc::XML::Dumper->perl_to_xml_backref ( $ref ); ok (not defined $res); } # perl_to_xml_scalar { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $var = 'ABCDEF'; my $ref = bless \$var, 'Foo'; my $res = MKDoc::XML::Dumper->perl_to_xml_scalar ($ref); like ($res, qr//); like ($res, qr/<\/scalar>/); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $var = 'ABCDEF'; my $ref = \$var; my $res = MKDoc::XML::Dumper->perl_to_xml_scalar ($ref); like ($res, qr//); like ($res, qr/<\/scalar>/); } # Quickly test the Indent methods { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; MKDoc::XML::Dumper->indent_more(); is ($MKDoc::XML::Dumper::IndentLevel, 1); MKDoc::XML::Dumper->indent_less(); is ($MKDoc::XML::Dumper::IndentLevel, 0); } # perl_to_xml_hash { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $res = MKDoc::XML::Dumper->perl_to_xml_hash ( bless {}, 'Foo' ); like ($res, qr //); like ($res, qr /<\/hash>/); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $res = MKDoc::XML::Dumper->perl_to_xml_hash ( bless { foo => 'bar' }, 'Foo' ); like ($res, qr //); like ($res, qr /<\/hash>/); like ($res, qr /\s+bar<\/litteral>\s+<\/item>/); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $res = MKDoc::XML::Dumper->perl_to_xml_hash ( bless { foo => 'bar', 'baz' => 'buz' }, 'Foo' ); like ($res, qr //); like ($res, qr /<\/hash>/); like ($res, qr /\s+bar<\/litteral>\s+<\/item>/); like ($res, qr /\s+buz<\/litteral>\s+<\/item>/); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $res = MKDoc::XML::Dumper->perl_to_xml_hash ( {} ); like ($res, qr //); like ($res, qr /<\/hash>/); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $res = MKDoc::XML::Dumper->perl_to_xml_hash ( { foo => 'bar' } ); like ($res, qr //); like ($res, qr /<\/hash>/); like ($res, qr /\s+bar<\/litteral>\s+<\/item>/); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $res = MKDoc::XML::Dumper->perl_to_xml_hash ( { foo => 'bar', 'baz' => 'buz' } ); like ($res, qr //); like ($res, qr /<\/hash>/); like ($res, qr /\s+bar<\/litteral>\s+<\/item>/); like ($res, qr /\s+buz<\/litteral>\s+<\/item>/); } # perl_to_xml_array { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $res = MKDoc::XML::Dumper->perl_to_xml_array ( bless [], 'Foo' ); like ($res, qr //); like ($res, qr /<\/array>/); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $res = MKDoc::XML::Dumper->perl_to_xml_array ( bless [ qw /foo bar/ ], 'Foo' ); like ($res, qr //); like ($res, qr /<\/array>/); like ($res, qr /\s+foo<\/litteral>\s+<\/item>/); like ($res, qr /\s+bar<\/litteral>\s+<\/item>/); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $res = MKDoc::XML::Dumper->perl_to_xml_array ( bless [ qw /foo bar baz buz/ ], 'Foo' ); like ($res, qr //); like ($res, qr /<\/array>/); like ($res, qr /\s+foo<\/litteral>\s+<\/item>/); like ($res, qr /\s+bar<\/litteral>\s+<\/item>/); like ($res, qr /\s+baz<\/litteral>\s+<\/item>/); like ($res, qr /\s+buz<\/litteral>\s+<\/item>/); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $res = MKDoc::XML::Dumper->perl_to_xml_array ( [] ); like ($res, qr //); like ($res, qr /<\/array>/); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $res = MKDoc::XML::Dumper->perl_to_xml_array ( [ qw /foo bar/ ] ); like ($res, qr //); like ($res, qr /<\/array>/); like ($res, qr /\s+foo<\/litteral>\s+<\/item>/); like ($res, qr /\s+bar<\/litteral>\s+<\/item>/); } { local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $res = MKDoc::XML::Dumper->perl_to_xml_array ( [ qw /foo bar baz buz/ ] ); like ($res, qr //); like ($res, qr /<\/array>/); like ($res, qr /\s+foo<\/litteral>\s+<\/item>/); like ($res, qr /\s+bar<\/litteral>\s+<\/item>/); like ($res, qr /\s+baz<\/litteral>\s+<\/item>/); like ($res, qr /\s+buz<\/litteral>\s+<\/item>/); } { # let's try some wicked stuff local $MKDoc::XML::Dumper::IndentLevel = 0; local $MKDoc::XML::Dumper::BackRef = {}; my $var = undef; $var = \"hello"; $var = \$var; my $res = MKDoc::XML::Dumper->perl_to_xml ( $var ); like ($res, qr /\s+/); } 1; __END__ MKDoc-XML-0.75/t/002_MKDoc_XML_Token_tag_self_close.t0000644000076400007640000000051407765640455022245 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Token; my $data = qq ||; my $token = new MKDoc::XML::Token ($data); my $node = $token->tag_self_close(); is ($node->{_tag}, 'a'); ok (defined $node->{'petal:replace'}); 1; __END__ MKDoc-XML-0.75/t/603_MKDoc_XML_Dumper_Compat.t0000644000076400007640000001434710130753640020670 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Dumper; use Data::Dumper; { my $xml = < 8763623178/search.html search search.html EOF my $struct = MKDoc::XML::Dumper->xml2perl ($xml); is (ref $struct, 'flo::editor::File'); is ($struct->{file}, '8763623178/search.html'); is ($struct->{title}, 'search'); is ($struct->{uri_name}, 'search.html'); } { my $xml = < / on 12 hello hello.html EOF my $struct = MKDoc::XML::Dumper->xml2perl ($xml); is (ref $struct, 'flo::editor::Headlines'); is ($struct->{from_path}, '/'); is ($struct->{leaf_only}, 'on'); is ($struct->{max_headlines}, '12'); is ($struct->{title}, 'hello'); is ($struct->{uri_name}, 'hello.html'); } { my $xml = < 7898392326/aureli-julian.jif Aureli Julian aureli-julian.jif EOF my $struct = MKDoc::XML::Dumper->xml2perl ($xml); is (ref $struct, 'flo::editor::Image'); is ($struct->{image}, '7898392326/aureli-julian.jif'); is ($struct->{title}, 'Aureli Julian'); is ($struct->{uri_name}, 'aureli-julian.jif'); } { my $xml = < foo foo foo.link http://www.foo.com EOF my $struct = MKDoc::XML::Dumper->xml2perl ($xml); is (ref $struct, 'flo::editor::Link'); is ($struct->{description}, 'foo'); is ($struct->{title}, 'foo'); is ($struct->{uri_name}, 'foo.link'); is ($struct->{url}, 'http://www.foo.com'); } { my $xml = < Aureli Julian Fred Flintstone 2003-11-05 foo 0322940468/aureli-julian.jif aureli-julian-2.jif EOF my $struct = MKDoc::XML::Dumper->xml2perl ($xml); is (ref $struct, 'flo::editor::Photo'); is ($struct->{'alt'}, 'Aureli Julian'); is ($struct->{'coverage'}, ''); is ($struct->{'creator'}, 'Fred Flintstone'); is ($struct->{'date_created'}, '2003-11-05'); is ($struct->{'description'}, 'foo'); is ($struct->{'image'}, '0322940468/aureli-julian.jif'); is ($struct->{'rights'}, ''); is ($struct->{'uri_name'}, 'aureli-julian-2.jif'); } { my $xml = < foo bar 2003-11-05 2003-11-07 00 00 HWNJQUNGJB woot? poll.html EOF my $struct = MKDoc::XML::Dumper->xml2perl ($xml); is (ref $struct, 'flo::editor::Poll'); is ($struct->{'answer1'}, 'foo'); is ($struct->{'answer2'}, 'bar'); is ($struct->{'answer3'}, ''); is ($struct->{'answer4'}, ''); is ($struct->{'answer5'}, ''); is ($struct->{'answer6'}, ''); is ($struct->{'answer7'}, ''); is ($struct->{'answer8'}, ''); is ($struct->{'answer9'}, ''); is ($struct->{'date_start'}, '2003-11-05'); is ($struct->{'date_stop'}, '2003-11-07'); is ($struct->{'hour_start'}, '00'); is ($struct->{'hour_stop'}, '00'); is ($struct->{'id'}, 'HWNJQUNGJB'); is ($struct->{'question'}, 'woot?'); is ($struct->{'uri_name'}, 'poll.html'); } { my $xml = < all bulleted_list foo http://www.foo.com/somerss rss.rss EOF my $struct = MKDoc::XML::Dumper->xml2perl ($xml); is (ref $struct, 'flo::editor::RSS'); is ($struct->{'max'}, 'all'); is ($struct->{'template'}, 'bulleted_list'); is ($struct->{'title'}, 'foo'); is ($struct->{'uri'}, 'http://www.foo.com/somerss'); is ($struct->{'uri_name'}, 'rss.rss'); } { my $xml = < gsdfgsdfgds text.txt EOF my $struct = MKDoc::XML::Dumper->xml2perl ($xml); is (ref $struct, 'flo::editor::Text'); is ($struct->{'data'}, 'gsdfgsdfgds'); is ($struct->{'uri_name'}, 'text.txt'); } { my $xml = < Xchange Learning Centre photo Northamptonshire, UK 18 03 2002 David Matthews Photo taken outside the The Xchange Learning Centre, Kettering. 8217988573/xchange-learning-centre.jpg _edit_block_32_photo © Crown copyright 2003 EOF ok (ref MKDoc::XML::Dumper->xml2perl ($xml)); } __END__ MKDoc-XML-0.75/t/502_MKDoc_XML_Encode.t0000644000076400007640000000077707765640455017351 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Encode; is (MKDoc::XML::Encode->process ('Hello, <'), 'Hello, <'); is (MKDoc::XML::Encode->process ('Hello, >'), 'Hello, >'); is (MKDoc::XML::Encode->process ('Hello, &'), 'Hello, &'); is (MKDoc::XML::Encode->process ('Hello, "'), 'Hello, "'); is (MKDoc::XML::Encode->process ('Hello, \''), 'Hello, ''); is (MKDoc::XML::Encode->process ('ABCDEF'), 'ABCDEF'); 1; __END__ MKDoc-XML-0.75/t/200_MKDoc_XML_TreeBuilder.t0000644000076400007640000000171207765640455020343 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::TreeBuilder; use MKDoc::XML::Tokenizer; { my $xml = <<'EOF'; This is a
quite good test. We should see if the method which grabs descendant nodes is:
  • OK
  • Kind of OK
  • Completely Fubar
EOF my $tokens = MKDoc::XML::Tokenizer->process_data ($xml); my $token = shift @{$tokens}; my $d = MKDoc::XML::TreeBuilder::_descendant_tokens ($token, $tokens); is ($d->[0]->as_string(), 'This is a '); is ($d->[1]->as_string(), '
'); is ($d->[2]->as_string(), ' quite good '); is ($d->[3]->as_string(), ''); is ($d->[4]->as_string(), 'test'); is ($d->[5]->as_string(), ''); my ($foo_node) = MKDoc::XML::TreeBuilder->process_data ($xml); is ($foo_node->{_tag} => 'foo'); } 1; __END__ MKDoc-XML-0.75/t/102_MKDoc_XML_Tokenizer_Attributes.t0000644000076400007640000000170207765640455022315 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Tokenizer; use Data::Dumper; my $data = qq|Hello|; eval { Dumper (MKDoc::XML::Tokenizer->process_data ($data)) }; ok ($@); $data = qq |

|; eval { Dumper (MKDoc::XML::Tokenizer->process_data ($data)) }; ok ($@); $data = qq |

hello world

|; eval { Dumper (MKDoc::XML::Tokenizer->process_data ($data)) }; ok (!$@); $data = qq |

hello world

|; eval { Dumper (MKDoc::XML::Tokenizer->process_data ($data)) }; ok (!$@); $data = qq |

hello world

|; eval { print Dumper (MKDoc::XML::Tokenizer->process_data ($data)) }; ok ($@); $data = qq |

hello world

|; eval { print Dumper (MKDoc::XML::Tokenizer->process_data ($data)) }; ok ($@); $data = qq |

hello world

|; eval { print Dumper (MKDoc::XML::Tokenizer->process_data ($data)) }; ok ($@); 1; __END__ MKDoc-XML-0.75/t/501_MKDoc_XML_DecodeHO.t0000644000076400007640000000113407765640455017551 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Decode; $SIG{__WARN__} = sub {}; # those should be unchanged my $decode = new MKDoc::XML::Decode 'xhtml'; is ($decode->process ('Hello, <'), 'Hello, <'); is ($decode->process ('Hello, >'), 'Hello, >'); is ($decode->process ('Hello, &'), 'Hello, &'); is ($decode->process ('Hello, "'), 'Hello, "'); is ($decode->process ('Hello, ''), 'Hello, ''); # but these should be isnt ($decode->process (' '), ' '); # add your own here :) 1; __END__ MKDoc-XML-0.75/t/401_MKDoc_XML_Stripper_p_not_closing.t0000644000076400007640000000071207765640455022664 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Stripper; # let's test this _node_to_tag business { my $stripper = new MKDoc::XML::Stripper; $stripper->allow (qw /p class id/); my $ugly = '

Hello, World!

'; my $neat = $stripper->process_data ($ugly); is ($neat, '

Hello, World!

'); } 1; __END__ MKDoc-XML-0.75/t/303_MKDoc_XML_Tagger_Nesting.t0000644000076400007640000000074510156053456021031 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Tagger::Preserve; use MKDoc::XML::Tagger; { my $text = MKDoc::XML::Tagger->process_data ( "

URI Name

", { _expr => 'URI Name', _tag => 'a', href => '/' } ); #$text = '

URI Name

' ."\n"; like ($text, qr/

URI <\/a>Name<\/a><\/i><\/p>/); } 1; __END__ MKDoc-XML-0.75/t/402_MKDoc_XML_Stripper_def.t0000644000076400007640000000114307765640455020565 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Stripper; { my $stripper = new MKDoc::XML::Stripper; $stripper->load_def ('mkdoc16'); # check that a few values are really there ok ($stripper->{area}->{'href'}); ok ($stripper->{dfn}->{'lang'}); ok ($stripper->{h3}->{'id'}); ok ($stripper->{h6}->{'xml:lang'}); ok ($stripper->{img}->{'lang'}); ok ($stripper->{legend}->{'class'}); ok ($stripper->{object}->{'dir'}); ok ($stripper->{span}->{'dir'}); ok ($stripper->{th}->{'id'}); } 1; __END__ MKDoc-XML-0.75/t/602_MKDoc_XML_Dumper_Round.t0000644000076400007640000000121207765640455020541 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Dumper; sub testit ($) { my $struct = shift; my $xml = MKDoc::XML::Dumper->perl2xml ($struct); is_deeply (MKDoc::XML::Dumper->xml2perl ($xml), $struct); } testit 'hello'; testit \'hello'; testit \\'hello'; testit \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'hello'; testit []; testit [ qw /foo bar baz/ ]; testit {}; testit { foo => 'bar', baz => 'buz' }; testit [ qw /foo bar baz/, [], { hello => 'world', yo => \\'boo' } ]; testit \[ \[ qw /foo bar baz/ ], \[], \{ hello => 'world', yo => \\'boo' } ]; __END__ MKDoc-XML-0.75/t/301_MKDoc_XML_Tagger_Preserve.t0000644000076400007640000000252010010732370021170 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Tagger::Preserve; # _tag_close and _tag_open functions { my $text = qq|Hello, Cool World|; @MKDoc::XML::Tagger::Preserve::Preserve = ('a'); my @list = (); MKDoc::XML::Tagger::Preserve::_compute_unique_string ($text, qq|Cool World|, \@list); is ($text, 'Hello, Cool World'); is ($list[1], 'Cool World'); } # _tag_close and _tag_open functions { my $text = qq|Hello, Cool World|; @MKDoc::XML::Tagger::Preserve::Preserve = ('a'); my @list = (); ($text, @list) = MKDoc::XML::Tagger::Preserve::_preserve_encode ($text); unlike ($text, qr/Hello, Cool World'); } # _tag_close and _tag_open functions { my $text = qq|Hello, Cool World. Cool huh?|; $text = MKDoc::XML::Tagger::Preserve->process_data ( [ 'a' ], $text, { _expr => 'cool', _tag => 'a', href => 'http://cool.com/' } ); is ($text, qq|Hello, Cool World. Cool huh?|); } 1; __END__ MKDoc-XML-0.75/t/000_MKDoc_XML_Token.t0000644000076400007640000000760610130753640017200 0ustar brunobruno00000000000000#!/usr/bin/perl use lib qw (../lib lib); use Test::More 'no_plan'; use strict; use warnings; use MKDoc::XML::Token; my $file = (-e 't/data/sample.xml') ? 't/data/sample.xml' : 'data/sample.xml'; my $comment = new MKDoc::XML::Token (''); my $declaration = new MKDoc::XML::Token (''); my $pi = new MKDoc::XML::Token (''); my $open_tag_1 = new MKDoc::XML::Token (''); my $open_tag_2 = new MKDoc::XML::Token (''); my $close_tag = new MKDoc::XML::Token (''); my $self_close_tag = new MKDoc::XML::Token ('
'); my $text = new MKDoc::XML::Token ('this is some text'); { ok !$comment->tag_open(); ok !$comment->tag_self_close(); ok !$comment->tag_close(); ok !$comment->pi(); ok !$comment->declaration(); ok $comment->comment(); ok !$comment->tag(); ok $comment->pseudotag(); ok $comment->leaf(); ok !$comment->text(); } { ok !$declaration->tag_open(); ok !$declaration->tag_self_close(); ok !$declaration->tag_close(); ok !$declaration->pi(); ok $declaration->declaration(); ok !$declaration->comment(); ok !$declaration->tag(); ok $declaration->pseudotag(); ok $declaration->leaf(); ok !$declaration->text(); } { ok !$pi->tag_open(); ok !$pi->tag_self_close(); ok !$pi->tag_close(); ok $pi->pi(); ok !$pi->declaration(); ok !$pi->comment(); ok !$pi->tag(); ok $pi->pseudotag(); ok $pi->leaf(); ok !$pi->text(); } { ok $open_tag_1->tag_open(); ok !$open_tag_1->tag_self_close(); ok !$open_tag_1->tag_close(); ok !$open_tag_1->pi(); ok !$open_tag_1->declaration(); ok !$open_tag_1->comment(); ok $open_tag_1->tag(); ok !$open_tag_1->pseudotag(); ok !$open_tag_1->leaf(); ok !$open_tag_1->text(); my $n = $open_tag_1->tag_open(); ok ($n->{_open}); ok (!$n->{_close}); } { ok $open_tag_2->tag_open(); ok !$open_tag_2->tag_self_close(); ok !$open_tag_2->tag_close(); ok !$open_tag_2->pi(); ok !$open_tag_2->declaration(); ok !$open_tag_2->comment(); ok $open_tag_2->tag(); ok !$open_tag_2->pseudotag(); ok !$open_tag_2->leaf(); ok !$open_tag_2->text(); my $n = $open_tag_2->tag_open(); ok ($n->{_open}); ok (!$n->{_close}); } { ok !$close_tag->tag_open(); ok !$close_tag->tag_self_close(); ok $close_tag->tag_close(); ok !$close_tag->pi(); ok !$close_tag->declaration(); ok !$close_tag->comment(); ok $close_tag->tag(); ok !$close_tag->pseudotag(); ok !$close_tag->leaf(); ok !$close_tag->text(); my $n = $close_tag->tag_close(); ok (!$n->{_open}); ok ($n->{_close}); } { ok !$self_close_tag->tag_open(); ok $self_close_tag->tag_self_close(); ok !$self_close_tag->tag_close(); ok !$self_close_tag->pi(); ok !$self_close_tag->declaration(); ok !$self_close_tag->comment(); ok $self_close_tag->tag(); ok !$self_close_tag->pseudotag(); ok $self_close_tag->leaf(); ok !$self_close_tag->text(); my $n = $self_close_tag->tag_self_close(); ok ($n->{_open}); ok ($n->{_close}); } { ok !$text->tag_open(); ok !$text->tag_self_close(); ok !$text->tag_close(); ok !$text->pi(); ok !$text->declaration(); ok !$text->comment(); ok !$text->tag(); ok !$text->pseudotag(); ok $text->leaf(); ok $text->text(); } { my $tag = new MKDoc::XML::Token ('

'); my $node = $tag->tag(); is ($node->{_tag}, 'p'); } { my $data = < EOF $data =~ s/^\s+//; $data =~ s/\s+$//; my $tag = new MKDoc::XML::Token ($data); $tag = $tag->leaf(); is ($tag->{onfocus}, "if(t.value='';"); is ($tag->{value}, "test"); } 1; __END__ MKDoc-XML-0.75/Changes0000644000076400007640000000725210214061760014642 0ustar brunobruno00000000000000Revision history for MKDoc::XML 0.75 Thu Mar 10 15:12:00 2005 - Added tests for tagger bugs when matching numbers and double escaping attribute contents - bugfix for double-encoding attribute-contents bug - fix for invalid XML produced when tagging text matching numeric strings 0.74 Thu Dec 09 14:00:00 2004 - Fix for longstanding bug introduced in 0.66 that resulting in MKDoc::XML::Tagger producing invalid XML. 0.73 Wed Oct 06 12:58:00 2004 - removed defunct jhiver email address - Fixed some 'uninitialized value' problem 0.72 Wed Jul 21 18:41:28 2004 - Added MKDoc::XML::TreePrinter 0.71 Mon Jun 21 13:38:38 2004 - Fixed MKDoc::XML::Tagger bug - Added t/302_MKDoc_XML_Tagger_Spaces.t 0.70 Mon Apr 5 14:31:31 2004 - Attribute parsing fix not commited somehow 0.69 Thu Mar 18 17:25:55 2004 - Added experimental MKDoc::XML::Tagger::Preserve module - Fixed broken attribute parsing in MKDoc::XML::Token 0.68 Sat Jan 31 10:44:31 2004 - Fixed backwards compatibility decoding bug 0.67 Mon Jan 26 13:52:43 2004 - Fixed more backwards compatibility bugs 0.66 Fri Jan 23 16:03:03 2004 - Fixed double hyperlinking issue with MKDoc::XML::Tagger 0.65 Fri Jan 23 13:08:33 2004 - Fixed some backwards compatibility issues 0.64 Fri Jan 23 12:28:27 2004 - Fixed "Use of uninitialized value in join or string" warnings in MKDoc::XML::Decode 0.63 - Added more comprehensive XHTML entities decoding 0.62 Thu Nov 6 11:54:49 2003 - Added current MKDoc backwards compatibility 0.61 Thu Oct 16 13:44:40 2003 - Fixed not dying on

-

-

... 0.60 Thu Oct 9 16:54:44 2003 - Refactored / Modularized MKDoc::XML::Decode - MKDoc whitelist: added required xml container (html), head, title and body - MKDoc whitelist: recreated from xhtml 1.0 strict - MKDoc whitelist: updated test case t/402_MKDoc_XML_Stripper_def.t - Fixed error reporting in MKDoc::XML::TreeBuilder - Removed "noframes" element from xhtml 1.0 transitional 0.53 Tue Sep 30 10:49:06 2003 - Fixed a structural bug in MKDoc::XML::Token - Renamed all is_XXX methods to XXX 0.52 Thu Sep 25 14:19:49 2003 - Fixed a bug in MKDoc::XML::Tagger which would happen with Perl 5.6.1 - Added MKDoc::XML::Stripper XHTML specs contributed by Patrick - Amended MKDoc::XML::Stripper POD accordingly - Fixed missing dependencies 0.51 Fri Sep 19 16:48:10 2003 - Changed lib/MKDoc/XML.pm and Makefile.PL so that it can be picked up by Petal's Makefile.PL 0.5 Fri Sep 19 16:37:12 2003 - Improved MKDoc::XML::Encode and MKDoc::XML::Decode - Added definition loading to MKDoc::XML::Stripper 0.4 Fri Sep 19 12:15:23 2003 - Fixed Robin Berjon's reported bug about tokenizer not dying on invalid comment - Fixed MKDoc::XML::Token incorrectly parsing attributes in single quotes - Fixed MKDoc::XML::Token not parsing attributes spanned on many lines - Fixed bogus regexes in is_self_close() method - Fixed MKDoc::XML::Token not parsing attributes containing '-' 0.3 Wed Sep 17 15:40:50 2003 - Fixed patrick's reported bug "XML::Stripper" not compiling - Fixed patrick's reported bug "p tag not closing" 0.2 Thu Sep 11 16:19:46 2003 - Applied Bruno's documentation typo patches - MKDoc::XML::Encode initial release - MKDoc::XML::EncodeHO initial release - MKDoc::XML::Decode initial release - MKDoc::XML::Dumper initial release 0.1 Thu Sep 4 12:02:03 2003 - MKDoc::XML::Token initial release - MKDoc::XML::Tokenizer initial release - MKDoc::XML::TreeBuilder initial release - MKDoc::XML::Stripper initial release - MKDoc::XML::Tagger initial release MKDoc-XML-0.75/MANIFEST0000644000076400007640000000272410214061640014474 0ustar brunobruno00000000000000.cvsignore Changes lib/MKDoc/XML.pm lib/MKDoc/XML/Decode.pm lib/MKDoc/XML/Decode/Numeric.pm lib/MKDoc/XML/Decode/XHTML.pm lib/MKDoc/XML/Decode/XMLBase.pm lib/MKDoc/XML/Dumper.pm lib/MKDoc/XML/Encode.pm lib/MKDoc/XML/Stripper.pm lib/MKDoc/XML/Stripper/mkdoc16.txt lib/MKDoc/XML/Stripper/xhtml10frameset.txt lib/MKDoc/XML/Stripper/xhtml10strict.txt lib/MKDoc/XML/Stripper/xhtml10transitional.txt lib/MKDoc/XML/Tagger.pm lib/MKDoc/XML/Tagger/Preserve.pm lib/MKDoc/XML/Token.pm lib/MKDoc/XML/Tokenizer.pm lib/MKDoc/XML/TreeBuilder.pm lib/MKDoc/XML/TreePrinter.pm Makefile.PL MANIFEST This list of files META.yml README t/000_MKDoc_XML_Token.t t/001_MKDoc_XML_Token_tag_open.t t/002_MKDoc_XML_Token_tag_self_close.t t/100_MKDoc_XML_Tokenizer.t t/101_MKDoc_XML_Tokenizer_Comment.t t/102_MKDoc_XML_Tokenizer_Attributes.t t/200_MKDoc_XML_TreeBuilder.t t/201_MKDoc_XML_TreeBuilder_Root.t t/300_MKDoc_XML_Tagger.t t/301_MKDoc_XML_Tagger_Preserve.t t/302_MKDoc_XML_Tagger_Spaces.t t/303_MKDoc_XML_Tagger_Nesting.t t/304_MKDoc_XML_Tagger_Numbers.t t/305_MKDoc_XML_tagger_ampersand.t t/400_MKDoc_XML_Stripper.t t/401_MKDoc_XML_Stripper_p_not_closing.t t/402_MKDoc_XML_Stripper_def.t t/500_MKDoc_XML_Decode.t t/501_MKDoc_XML_DecodeHO.t t/502_MKDoc_XML_Encode.t t/600_MKDoc_XML_Dumper_Freeze.t t/601_MKDoc_XML_Dumper_Thaw.t t/602_MKDoc_XML_Dumper_Round.t t/603_MKDoc_XML_Dumper_Compat.t t/604_MKDoc_XML_Dumper_Entities.t t/700_MKDoc_XML_TreePrinter.t t/data/prova.xhtml t/data/root.html t/data/sample.xml MKDoc-XML-0.75/lib/0002755000076400007640000000000010214062151014104 5ustar brunobruno00000000000000MKDoc-XML-0.75/lib/MKDoc/0002755000076400007640000000000010214062151015041 5ustar brunobruno00000000000000MKDoc-XML-0.75/lib/MKDoc/XML/0002755000076400007640000000000010214062151015501 5ustar brunobruno00000000000000MKDoc-XML-0.75/lib/MKDoc/XML/Decode/0002755000076400007640000000000010214062151016664 5ustar brunobruno00000000000000MKDoc-XML-0.75/lib/MKDoc/XML/Decode/XMLBase.pm0000644000076400007640000000046707765640455020514 0ustar brunobruno00000000000000package MKDoc::XML::Decode::XMLBase; use warnings; use strict; our %XML_Decode = ( 'amp' => '&', 'lt' => '<', 'gt' => '>', 'quot' => '"', 'apos' => "'", ); sub process { my $class = shift; my $stuff = shift; return $XML_Decode{$stuff}; } sub module_name { 'xml' } 1; MKDoc-XML-0.75/lib/MKDoc/XML/Decode/Numeric.pm0000644000076400007640000000065007765640455020655 0ustar brunobruno00000000000000package MKDoc::XML::Decode::Numeric; use warnings; use strict; sub process { (@_ == 2) or warn "MKDoc::XML::Encode::process() should be called with two arguments"; my $class = shift; my $stuff = shift; $stuff =~ s/^#// or return; # if hex, convert to hex $stuff =~ s/^\[xX]([0-9a-fA-F])+$/hex($1)/e; return unless ($stuff =~ /^\d+$/); return chr ($stuff); } 1; __END__ MKDoc-XML-0.75/lib/MKDoc/XML/Decode/XHTML.pm0000644000076400007640000002323110130753640020124 0ustar brunobruno00000000000000package MKDoc::XML::Decode::XHTML; use warnings; use strict; # Portions (c) International Organization for Standardization 1986: # Permission to copy in any form is granted for use with conforming SGML # systems and applications as defined in ISO 8879, provided this notice is # included in all copies. our %ENTITY_2_CHAR = ( # Latin1 characters 'nbsp' => chr(160), 'iexcl' => chr(161), 'cent' => chr(162), 'pound' => chr(163), 'curren' => chr(164), 'yen' => chr(165), 'brvbar' => chr(166), 'sect' => chr(167), 'uml' => chr(168), 'copy' => chr(169), 'ordf' => chr(170), 'laquo' => chr(171), 'not' => chr(172), 'shy' => chr(173), 'reg' => chr(174), 'macr' => chr(175), 'deg' => chr(176), 'plusmn' => chr(177), 'sup2' => chr(178), 'sup3' => chr(179), 'acute' => chr(180), 'micro' => chr(181), 'para' => chr(182), 'middot' => chr(183), 'cedil' => chr(184), 'sup1' => chr(185), 'ordm' => chr(186), 'raquo' => chr(187), 'frac14' => chr(188), 'frac12' => chr(189), 'frac34' => chr(190), 'iquest' => chr(191), 'Agrave' => chr(192), 'Aacute' => chr(193), 'Acirc' => chr(194), 'Atilde' => chr(195), 'Auml' => chr(196), 'Aring' => chr(197), 'AElig' => chr(198), 'Ccedil' => chr(199), 'Egrave' => chr(200), 'Eacute' => chr(201), 'Ecirc' => chr(202), 'Euml' => chr(203), 'Igrave' => chr(204), 'Iacute' => chr(205), 'Icirc' => chr(206), 'Iuml' => chr(207), 'ETH' => chr(208), 'Ntilde' => chr(209), 'Ograve' => chr(210), 'Oacute' => chr(211), 'Ocirc' => chr(212), 'Otilde' => chr(213), 'Ouml' => chr(214), 'times' => chr(215), 'Oslash' => chr(216), 'Ugrave' => chr(217), 'Uacute' => chr(218), 'Ucirc' => chr(219), 'Uuml' => chr(220), 'Yacute' => chr(221), 'THORN' => chr(222), 'szlig' => chr(223), 'agrave' => chr(224), 'aacute' => chr(225), 'acirc' => chr(226), 'atilde' => chr(227), 'auml' => chr(228), 'aring' => chr(229), 'aelig' => chr(230), 'ccedil' => chr(231), 'egrave' => chr(232), 'eacute' => chr(233), 'ecirc' => chr(234), 'euml' => chr(235), 'igrave' => chr(236), 'iacute' => chr(237), 'icirc' => chr(238), 'iuml' => chr(239), 'eth' => chr(240), 'ntilde' => chr(241), 'ograve' => chr(242), 'oacute' => chr(243), 'ocirc' => chr(244), 'otilde' => chr(245), 'ouml' => chr(246), 'divide' => chr(247), 'oslash' => chr(248), 'ugrave' => chr(249), 'uacute' => chr(250), 'ucirc' => chr(251), 'uuml' => chr(252), 'yacute' => chr(253), 'thorn' => chr(254), 'yuml' => chr(255), # C0 Controls and Basic Latin # 'quot' => chr(34), # 'amp' => chr(38), # 'apos' => chr(39), # 'lt' => chr(60), # 'gt' => chr(62), # Latin Extended-A 'OElig' => chr(338), 'oelig' => chr(339), 'Scaron' => chr(352), 'scaron' => chr(353), 'Yuml' => chr(376), # Spacin g Modifier Letters 'circ' => chr(710), 'tilde' => chr(732), # General Punctuation # * lsaquo is proposed but not yet ISO standardized # * rsaquo is proposed but not yet ISO standardized 'ensp' => chr(8194), 'emsp' => chr(8195), 'thinsp' => chr(8201), 'zwnj' => chr(8204), 'zwj' => chr(8205), 'lrm' => chr(8206), 'rlm' => chr(8207), 'ndash' => chr(8211), 'mdash' => chr(8212), 'lsquo' => chr(8216), 'rsquo' => chr(8217), 'sbquo' => chr(8218), 'ldquo' => chr(8220), 'rdquo' => chr(8221), 'bdquo' => chr(8222), 'dagger' => chr(8224), 'Dagger' => chr(8225), 'permil' => chr(8240), 'lsaquo' => chr(8249), 'rsaquo' => chr(8250), 'euro' => chr(8364), # Mathematical, Greek and Symbolic characters for HTML # Latin Extended-B 'fnof' => chr(402), # Greek # * there is no Sigmaf, and no U+03A2 character either 'Alpha' => chr(913), 'Beta' => chr(914), 'Gamma' => chr(915), 'Delta' => chr(916), 'Epsilon' => chr(917), 'Zeta' => chr(918), 'Eta' => chr(919), 'Theta' => chr(920), 'Iota' => chr(921), 'Kappa' => chr(922), 'Lambda' => chr(923), 'Mu' => chr(924), 'Nu' => chr(925), 'Xi' => chr(926), 'Omicron' => chr(927), 'Pi' => chr(928), 'Rho' => chr(929), 'Sigma' => chr(931), 'Tau' => chr(932), 'Upsilon' => chr(933), 'Phi' => chr(934), 'Chi' => chr(935), 'Psi' => chr(936), 'Omega' => chr(937), 'alpha' => chr(945), 'beta' => chr(946), 'gamma' => chr(947), 'delta' => chr(948), 'epsilon' => chr(949), 'zeta' => chr(950), 'eta' => chr(951), 'theta' => chr(952), 'iota' => chr(953), 'kappa' => chr(954), 'lambda' => chr(955), 'mu' => chr(956), 'nu' => chr(957), 'xi' => chr(958), 'omicron' => chr(959), 'pi' => chr(960), 'rho' => chr(961), 'sigmaf' => chr(962), 'sigma' => chr(963), 'tau' => chr(964), 'upsilon' => chr(965), 'phi' => chr(966), 'chi' => chr(967), 'psi' => chr(968), 'omega' => chr(969), 'thetasym' => chr(977), 'upsih' => chr(978), 'piv' => chr(982), # General Punctuation # * bullet is NOT the same as bullet operator, U+2219 'bull' => chr(8226), 'hellip' => chr(8230), 'prime' => chr(8242), 'Prime' => chr(8243), 'oline' => chr(8254), 'frasl' => chr(8260), # Letterlike Symbols # * alef symbol is NOT the same as hebrew letter alef, U+05D0 although the same glyph could be used to depict both characters 'weierp' => chr(8472), 'image' => chr(8465), 'real' => chr(8476), 'trade' => chr(8482), 'alefsym' => chr(8501), # Arrows # * Unicode does not say that lArr is the same as the 'is implied by' arrow but also # does not have any other character for that function. So ? lArr can be used for 'is implied by' as ISOtech suggests # * Unicode does not say rArr is the 'implies' character but does not have another # character with this function so ? rArr can be used for 'implies' as ISOtech suggests 'larr' => chr(8592), 'uarr' => chr(8593), 'rarr' => chr(8594), 'darr' => chr(8595), 'harr' => chr(8596), 'crarr' => chr(8629), 'lArr' => chr(8656), 'uArr' => chr(8657), 'rArr' => chr(8658), 'dArr' => chr(8659), 'hArr' => chr(8660), # Mathematical Operators # * should there be a more memorable name than 'ni'? # * prod is NOT the same character as U+03A0 'greek capital letter pi' though the same glyph might be used for both # * sum is NOT the same character as U+03A3 'greek capital letter sigma' though the same glyph might be used for both # * sim: tilde operator is NOT the same character as the tilde, U+007E, although the same glyph might be used to represent both # * note that nsup, 'not a superset of, U+2283' is not covered by the Symbol font encoding and is not included. # Should it be, for symmetry? It is in ISOamsn # * sdot: dot operator is NOT the same character as U+00B7 middle dot 'forall' => chr(8704), 'part' => chr(8706), 'exist' => chr(8707), 'empty' => chr(8709), 'nabla' => chr(8711), 'isin' => chr(8712), 'notin' => chr(8713), 'ni' => chr(8715), 'prod' => chr(8719), 'sum' => chr(8721), 'minus' => chr(8722), 'lowast' => chr(8727), 'radic' => chr(8730), 'prop' => chr(8733), 'infin' => chr(8734), 'ang' => chr(8736), 'and' => chr(8743), 'or' => chr(8744), 'cap' => chr(8745), 'cup' => chr(8746), 'int' => chr(8747), 'there4' => chr(8756), 'sim' => chr(8764), 'cong' => chr(8773), 'asymp' => chr(8776), 'ne' => chr(8800), 'equiv' => chr(8801), 'le' => chr(8804), 'ge' => chr(8805), 'sub' => chr(8834), 'sup' => chr(8835), 'nsub' => chr(8836), 'sube' => chr(8838), 'supe' => chr(8839), 'oplus' => chr(8853), 'otimes' => chr(8855), 'perp' => chr(8869), 'sdot' => chr(8901), # Miscellaneous Technical # * lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation mark' # * rang is NOT the same character as U+003E 'greater than' or U+203A 'single right-pointing angle quotation mark' 'lceil' => chr(8968), 'rceil' => chr(8969), 'lfloor' => chr(8970), 'rfloor' => chr(8971), 'lang' => chr(9001), 'rang' => chr(9002), # Geometric Shapes 'loz' => chr(9674), # Miscellaneous Symbols # * black here seems to mean filled as opposed to hollow 'spades' => chr(9824), 'clubs' => chr(9827), 'hearts' => chr(9829), 'diams' => chr(9830), ); sub process { (@_ == 2) or warn "MKDoc::XML::Encode::process() should be called with two arguments"; my $class = shift; my $stuff = shift; return $ENTITY_2_CHAR{$stuff}; } 1; MKDoc-XML-0.75/lib/MKDoc/XML/Tokenizer.pm0000644000076400007640000001542010130755262020022 0ustar brunobruno00000000000000# ------------------------------------------------------------------------------------- # MKDoc::XML::Tokenizer # ------------------------------------------------------------------------------------- # Author : Jean-Michel Hiver. # Copyright : (c) MKDoc Holdings Ltd, 2003 # # This module turns an XML string into a list of tokens and returns this list. # It is using Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions" # # This module is distributed under the same license as Perl itself. # ------------------------------------------------------------------------------------- package MKDoc::XML::Tokenizer; use MKDoc::XML::Token; use strict; use warnings; our $prev_token; # REX/Perl 1.0 # Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions", # Technical Report TR 1998-17, School of Computing Science, Simon Fraser # University, November, 1998. # Copyright (c) 1998, Robert D. Cameron. # The following code may be freely used and distributed provided that # this copyright and citation notice remains intact and that modifications # or additions are clearly identified. # # Additions: # ---------- # added 'my' and 'our' keywords in front of variables # I like strict mode :) my $TextSE = "[^<]+"; my $UntilHyphen = "[^-]*-"; my $Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-"; my $CommentCE = "$Until2Hyphens>?"; my $UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+"; my $CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>"; my $S = "[ \\n\\t\\r]+"; my $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]"; my $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]"; my $Name = "(?:$NameStrt)(?:$NameChar)*"; my $QuoteSE = "\"[^\"]*\"|'[^']*'"; my $DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*"; my $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>"; my $S1 = "[\\n\\r\\t ]"; my $UntilQMs = "[^?]*\\?+"; my $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>"; my $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S"; my $DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?"; my $DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?"; my $PI_CE = "$Name(?:$PI_Tail)?"; my $EndTagCE = "$Name(?:$S)?>?"; my $AttValSE = "\"[^<\"]*\"|'[^<']*'"; my $ElemTagCE = "$Name(?:$S$Name(?:$S)?=(?:$S)?(?:$AttValSE))*(?:$S)?/?>?"; my $MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)"; our $XML_SPE = "$TextSE|$MarkupSPE"; # Rather than have this: # sub ShallowParse { # my($XML_document) = @_; # return $XML_document =~ /$XML_SPE/g; # } sub process_data { my $class = shift; my $xml = shift; # remove trailing whitespace $xml =~ s/^(?:\s|\r|\n)*\(?:\s|\r|\n)*$/\>/s; local ($prev_token) = ''; my @res = map { _check_001(); _check_002(); $prev_token = $_; bless \$_, 'MKDoc::XML::Token'; } $xml =~ /$XML_SPE/go; return \@res; } #

sub _check_002 { $prev_token =~ /^$/ or die "cannot tokenize: $prev_token$_"; } # sub _check_001 { /^"; next; }; my $tag = $node->{_tag}; my %att = map { $_ => _encode_quot ($node->{$_}) } grep !/^_/, keys %{$node}; my $attr = join " ", map { "$_=\"$att{$_}\"" } keys %att; my $open = $node->{_open}; my $close = $node->{_close}; $open && $close && do { if ($attr) { push @res, "<$tag $attr />" } else { push @res, "<$tag />" } next; }; my $open_tag = $attr ? "<$tag $attr>" : "<$tag>"; my $close_tag = ""; my @desc = $node->{_content} ? @{$node->{_content}} : (); my $res = $open_tag . $class->process (@desc) . $close_tag; push @res, $res; next; }; return join '', @res; } sub _encode_quot { my $res = shift; return '' unless (defined $res); $res =~ s/\"/\"\;/g; return $res; } 1; __END__ =head1 NAME MKDoc::XML::TreePrinter - Builds XML data from a parsed tree =head1 SYNOPSIS my $xml_data = MKDoc::XML::TreePrinter->process_data (@top_nodes); =head1 SUMMARY L takes trees which are produced by L to turn a parsed tree back into XML data. This means you can parse some stuff using L, fiddle around with the tree, and then get the result back as XML data. =head1 AUTHOR Copyright 2003 - MKDoc Holdings Ltd. Author: Jean-Michel Hiver This module is free software and is distributed under the same license as Perl itself. Use it at your own risk. =head1 SEE ALSO L =cut MKDoc-XML-0.75/lib/MKDoc/XML/Stripper.pm0000644000076400007640000001624510130755247017671 0ustar brunobruno00000000000000# ------------------------------------------------------------------------------------- # MKDoc::XML::Stripper # ------------------------------------------------------------------------------------- # Author : Jean-Michel Hiver. # Copyright : (c) MKDoc Holdings Ltd, 2003 # # This module removes user-defined markup from an existing XML file / variable. # # This module is distributed under the same license as Perl itself. # ------------------------------------------------------------------------------------- package MKDoc::XML::Stripper; use MKDoc::XML::Tokenizer; use File::Spec; use strict; use warnings; ## # $class->new(); # -------------- # Returns a new MKDoc::XML::Stripper object. ## sub new { my $class = shift; my $self = bless { @_ }, $class; return $self; } sub load_def { my $self = shift; my $file = shift; $file =~ /\// and return $self->_load_def ($file); $file =~ /\./ and return $self->_load_def ($file); $file .= '.txt'; for (@INC) { my $path = File::Spec->catfile ($_, qw /MKDoc XML Stripper/, $file); -e $path and -f $path and return $self->_load_def ($path); } warn "Cannot read-open $file. Reason: Doesn't seem to be anywhere in \@INC"; } sub _load_def { my $self = shift; my $file = shift; open FP, "<$file" || do { warn "Cannot read-open $file. Reason: $!"; return; }; # clean $self for (keys %{$self}) { delete $self->{$_} } while () { chomp(); s/\#.*$//; s/^\s*//; s/\s*$//; next unless ($_ ne ''); my @l = split /\s+/, $_; $self->allow (@l); } close FP; } ## # $self->allow ($tag, @attributes); # --------------------------------- # Allows the tag $tag to be present along with a list of attributes, # i.e. # # $self->allow (qw /p class id/); ## sub allow { my $self = shift; my $tag = shift; $self->{$tag} ||= {}; for (@_) { $self->{$tag}->{$_} = 1 }; } ## # $self->disallow ($tag, @attributes); # ------------------------------------ # Disallows the tag $tag to be present. ## sub disallow { my $self = shift; my $tag = shift; delete $self->{$tag}; } ## # $self->process_data ($data); # ---------------------------- # Strips tags on $data and returns the stripped result. ## sub process_data { my $self = shift; my $data = shift; my $tokens = MKDoc::XML::Tokenizer->process_data ($data); my @result = map { $self->strip ($_) } @{$tokens}; return join '', map { $$_ } @result; } ## # $self->process_file ($file); # ---------------------------- # Strips tags on $file and returns the stripped result. ## sub process_file { my $self = shift; my $file = shift; my $tokens = MKDoc::XML::Tokenizer->process_file ($file); my @result = map { $self->strip ($_) } @{$tokens}; return join '', map { $$_ } @result; } ## # $self->strip ($token); # ---------------------- # Returns this token stripped out of the stuff which we don't want. # Returns an empty list if the token is not allowed. ## sub strip { my $self = shift; my $token = shift; my $node = $token->tag(); defined $node || return $token; my $tag = $node->{_tag}; return unless ( $self->{$tag} ); for (keys %{$node}) { /^_/ and next; delete $node->{$_} unless $self->{$tag}->{$_}; } return new MKDoc::XML::Token ( _node_to_tag ($node) ); } sub _node_to_tag { my $node = shift; my $tag = $node->{_tag}; my $open = $node->{_open}; my $close = $node->{_close}; my %attr = map { /^_/ ? () : ($_ => $node->{$_}) } keys %{$node}; my $attr = join ' ', map { my $key = $_; my $val = $attr{$key}; ($val =~ /\"/) ? "$key='$val'" : "$key=\"$val\"" } keys %attr; my $res = '<'; $res .= '/' if ($close and not $open); $res .= $tag; $res .= " $attr" if ($attr and $open); $res .= ' /' if ($open and $close); $res .= '>'; return $res; } 1; __END__ =head1 NAME MKDoc::XML::Stripper - Remove unwanted XML / XHTML tags and attributes =head1 SYNOPSIS use MKDoc::XML::Stripper; my $stripper = new MKDoc::XML::Stripper; $stripper->allow (qw /p class id/); my $ugly = '

Hello, World!

'; my $neat = $stripper->process_data ($ugly); print $neat; Should print:

Hello, World!

=head1 SUMMARY MKDoc::XML::Stripper is a class which lets you specify a set of tags and attributes which you want to allow, and then cheekily strip any XML of unwanted tags and attributes. In MKDoc, this is used so that editors use structural XHTML rather than presentational tags, i.e. strip anything which looks like a tag, a 'style' attribute or other tags which would break separation of structure from content. =head1 DISCLAIMER B =head1 API =head2 my $stripper = MKDoc::XML::Stripper->new() Instantiates a new MKDoc::XML::Stripper object. =head2 $stripper->load_def ($def_name); Loads a definition located somewhere in @INC under MKDoc/XML/Stripper. Available definitions are: =over =item xhtml10frameset =item xhtml10strict =item xhtml10transitional =item mkdoc16 - MKDoc 1.6. XHTML structural markup =back You can also load your own definition file, for instance: $stripper->load_def ('my_def.txt'); Definitions are simple text files as follows: # allow p with 'class' and id p class p id # allow more stuff td class td id td style # etc... =head2 $stripper->allow ($tag, @attributes) Allows "<$tag>" to appear in the stripped XML. Additionally, allows @attributes to appear as attributes of <$tag>, so for instance: $stripper->allow ('p', 'class', 'id'); Will allow the following:

However any extra attributes will be stripped, i.e.

Will be rewritten as

=head2 $stripper->disallow ($tag) Explicitly disallows a tag and all its associated attributes. By default everything is disallowed. =head2 $stripper->process_data ($some_xml); Strips $some_xml according to the rules that were given with the allow() and disallow() methods and returns the result. Does not modify $some_xml in place. =head2 $stripper->process_file ('/an/xml/file.xml'); Strips '/an/xml/file.xml' according to the rules that were given with the allow() and disallow() methods and returns the result. Does not modify '/an/xml/file.xml' in place. =head1 NOTES L does not really parse the XML file you're giving to it nor does it care if the XML is well-formed or not. It uses L to turn the XML / XHTML file into a series of L objects and strictly operates on a list of tokens. For this same reason MKDoc::XML::Stripper does not support namespaces. =head1 AUTHOR Copyright 2003 - MKDoc Holdings Ltd. Author: Jean-Michel Hiver This module is free software and is distributed under the same license as Perl itself. Use it at your own risk. =head1 SEE ALSO L L =cut MKDoc-XML-0.75/lib/MKDoc/XML/Token.pm0000644000076400007640000002111110130755257017126 0ustar brunobruno00000000000000=head1 NAME MKDoc::XML::Token - XML Token Object =cut package MKDoc::XML::Token; use strict; use warnings; =head1 SYNOPSIS my $tokens = MKDoc::XML::Tokenizer->process_data ($some_xml); foreach my $token (@{$tokens}) { print "'" . $token->as_string() . "' is text\n" if (defined $token->text()); print "'" . $token->as_string() . "' is a self closing tag\n" if (defined $token->tag_self_close()); print "'" . $token->as_string() . "' is an opening tag\n" if (defined $token->tag_open()); print "'" . $token->as_string() . "' is a closing tag\n" if (defined $token->tag_close()); print "'" . $token->as_string() . "' is a processing instruction\n" if (defined $token->pi()); print "'" . $token->as_string() . "' is a declaration\n" if (defined $token->declaration()); print "'" . $token->as_string() . "' is a comment\n" if (defined $token->comment()); print "'" . $token->as_string() . "' is a tag\n" if (defined $token->tag()); print "'" . $token->as_string() . "' is a pseudo-tag (NOT text and NOT tag)\n" if (defined $token->pseudotag()); print "'" . $token->as_string() . "' is a leaf token (NOT opening tag)\n" if (defined $token->leaf()); } =head1 SUMMARY L is an object representing an XML token produced by L. It has a set of methods to identify the type of token it is, as well as to help building a parsed tree as in L. =head1 API =head2 my $token = new MKDoc::XML::Token ($string_token); Constructs a new MKDoc::XML::Token object. =cut sub new { my $class = shift; my $token = shift; return bless \$token, $class; } =head2 my $string_token = $token->as_string(); Returns the string representation of this token so that: MKDoc::XML::Token->new ($token)->as_string eq $token is a tautology. =cut sub as_string { my $self = shift; return $$self; } =head2 my $node = $token->leaf(); If this token is not an opening tag, this method will return its corresponding node structure as returned by $token->text(), $token->tag_self_close(), etc. Returns undef otherwise. =cut sub leaf { my $self = shift; my $res = undef; $res = $self->comment(); defined $res and return $res; $res = $self->declaration(); defined $res and return $res; $res = $self->pi(); defined $res and return $res; $res = $self->tag_self_close(); defined $res and return $res; $res = $self->text(); defined $res and return $res; return; } =head2 my $node = $token->pseudotag(); If this token is a comment, declaration or processing instruction, this method will return $token->tag_comment(), $token_declaration() or $token->pi() resp. Returns undef otherwise. =cut sub pseudotag { my $self = shift; my $res = undef; $res = $self->comment(); defined $res and return $res; $res = $self->declaration(); defined $res and return $res; $res = $self->pi(); defined $res and return $res; return; } =head2 my $node = $token->tag(); If this token is an opening, closing, or self closing tag, this method will return $token->tag_open(), $token->tag_close() or $token->tag_self_close() resp. Returns undef otherwise. =cut sub tag { my $self = shift; my $res = undef; $res = $self->tag_open(); defined $res and return $res; $res = $self->tag_close(); defined $res and return $res; $res = $self->tag_self_close(); defined $res and return $res; return $res; } =head2 my $node = $token->comment(); If this token object represents a declaration, the following structure is returned: # this is { _tag => '~comment', text => ' I like Pie. Pie is good ', } Returns undef otherwise. =cut sub comment { my $self = shift; my $node = undef; $$self =~ /^<\!--/ and do { $node = { _tag => '~comment', text => $$self, }; $node->{text} =~ s/^<\!--//; $node->{text} =~ s/-->$//; }; $node; } =head2 my $node = $token->declaration(); If this token object represents a declaration, the following structure is returned: # this is { _tag => '~declaration', text => 'DOCTYPE foo', } Returns undef otherwise. =cut sub declaration { my $self = shift; my $node = undef; $$self !~ /^<\!--/ and $$self =~ /^ '~declaration', text => $$self, }; $node->{text} =~ s/^{text} =~ s/>$//; }; $node; } =head2 my $node = $token->pi(); If this token object represents a processing instruction, the following structure is returned: # this is { _tag => '~pi', text => 'xml version="1.0" charset="UTF-8"', } Returns undef otherwise. =cut sub pi { my $self = shift; my $node = undef; $$self =~ /^<\?/ and do { $node = { _tag => '~pi', text => $$self, }; $node->{text} =~ s/^<\?//; $node->{text} =~ s/\>$//; $node->{text} =~ s/\?$//; }; $node; } =head2 my $node = $token->tag_open(); If this token object represents an opening tag, the following structure is returned: # this is { _tag => 'aTag', _open => 1, _close => 0, foo => 'bar', baz => 'buz', } Returns undef otherwise. =cut sub tag_open { my $self = shift; my $node = undef; $$self !~ /^<\!/ and $$self !~ /^<\// and $$self !~ /\/>$/ and $$self !~ /^<\?/ and $$self =~ /^tag_close(); If this token object represents a closing tag, the following structure is returned: # this is { _tag => 'aTag', _open => 0, _close => 1, } Returns undef otherwise. =cut sub tag_close { my $self = shift; my $node = undef; $$self !~ /^<\!/ and $$self =~ /^<\// and $$self !~ /\/>$/ and do { my %node = (); ($node{_tag}) = $$self =~ /.*?([A-Za-z0-9][A-Za-z0-9_:-]*)/; $node{_open} = 0; $node{_close} = 1; $node = \%node; }; $node; } =head2 my $node = $token->tag_self_close(); If this token object represents a self-closing tag, the following structure is returned: # this is { _tag => 'aTag', _open => 1, _close => 1, foo => 'bar', baz => 'buz', } Returns undef otherwise. =cut sub tag_self_close { my $self = shift; my $node = undef; $$self !~ /^<\!/ and $$self !~ /^<\// and $$self =~ /\/>$/ and # ((?:\w|:|-)+)\s*=\s*\"(.*?)\"/gs; $$self =~ /^text(); If this token object represents a piece of text, then this text is returned. Returns undef otherwise. TRAP! $token->text() returns a false value if this text happens to be '0' or ''. So really you should use: if (defined $token->text()) { ... do stuff... } =cut sub text { my $self = shift; return ($$self !~ /^?"; our $AttValSE = "\"[^<\"]*\"|'[^<']*'"; our $ElemTagCE = "$Name((?:$S$Name(?:$S)?=(?:$S)?(?:$AttValSE))*)(?:$S)?/?>?"; our $ElemTagCE_Mod = "$S($Name)(?:$S)?=(?:$S)?($AttValSE)"; our $RE_1 = qr /$ElemTagCE/; our $RE_2 = qr /$ElemTagCE_Mod/; sub _extract_attributes { my $tag = shift; my ($tags) = $tag =~ /$RE_1/g; my %attr = $tag =~ /$RE_2/g; foreach my $key (keys %attr) { my $val = $attr{$key}; $val =~ s/^(\"|\')//; $val =~ s/(\"|\')$//; $attr{$key} = $val; } %attr; } 1; __END__ =head1 NOTES L works with L, which can be used when building a full tree is not necessary. If you need to build a tree, look at L. =head1 AUTHOR Copyright 2003 - MKDoc Holdings Ltd. Author: Jean-Michel Hiver This module is free software and is distributed under the same license as Perl itself. Use it at your own risk. =head1 SEE ALSO L L =cut MKDoc-XML-0.75/lib/MKDoc/XML/Stripper/0002755000076400007640000000000010214062151017311 5ustar brunobruno00000000000000MKDoc-XML-0.75/lib/MKDoc/XML/Stripper/xhtml10strict.txt0000644000076400007640000005030607765640455022634 0ustar brunobruno00000000000000# xhtml 1.0 strict # http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd # # # # lovingly expanded and sorted from the dtd by paul arzul (patricka@mkdoc.com) 2003/09/24 # a # anchor a accesskey a charset a class a coords a dir a href a hreflang a id a lang a name a onblur a onclick a ondblclick a onfocus a onkeydown a onkeypress a onkeyup a onmousedown a onmousemove a onmouseout a onmouseover a onmouseup a rel a rev a shape a style a tabindex a title a type a xml:lang # abbr # abbreviated form (e.g., WWW, HTTP, etc.) abbr class abbr dir abbr id abbr lang abbr onclick abbr ondblclick abbr onkeydown abbr onkeypress abbr onkeyup abbr onmousedown abbr onmousemove abbr onmouseout abbr onmouseover abbr onmouseup abbr style abbr title abbr xml:lang # acronym # acronym class acronym dir acronym id acronym lang acronym onclick acronym ondblclick acronym onkeydown acronym onkeypress acronym onkeyup acronym onmousedown acronym onmousemove acronym onmouseout acronym onmouseover acronym onmouseup acronym style acronym title acronym xml:lang # address # information on author address class address dir address id address lang address onclick address ondblclick address onkeydown address onkeypress address onkeyup address onmousedown address onmousemove address onmouseout address onmouseover address onmouseup address style address title address xml:lang # area # client-side image map area area accesskey area alt area class area coords area dir area href area id area lang area nohref area onblur area onclick area ondblclick area onfocus area onkeydown area onkeypress area onkeyup area onmousedown area onmousemove area onmouseout area onmouseover area onmouseup area shape area style area tabindex area title area xml:lang # b # bold text style b class b dir b id b lang b onclick b ondblclick b onkeydown b onkeypress b onkeyup b onmousedown b onmousemove b onmouseout b onmouseover b onmouseup b style b title b xml:lang # base # document base URI base href base id # bdo # I18N BiDi over-ride bdo class bdo dir bdo id bdo lang bdo onclick bdo ondblclick bdo onkeydown bdo onkeypress bdo onkeyup bdo onmousedown bdo onmousemove bdo onmouseout bdo onmouseover bdo onmouseup bdo style bdo title bdo xml:lang # big # large text style big class big dir big id big lang big onclick big ondblclick big onkeydown big onkeypress big onkeyup big onmousedown big onmousemove big onmouseout big onmouseover big onmouseup big style big title big xml:lang # blockquote # long quotation blockquote cite blockquote class blockquote dir blockquote id blockquote lang blockquote onclick blockquote ondblclick blockquote onkeydown blockquote onkeypress blockquote onkeyup blockquote onmousedown blockquote onmousemove blockquote onmouseout blockquote onmouseover blockquote onmouseup blockquote style blockquote title blockquote xml:lang # body # document body body class body dir body id body lang body onclick body ondblclick body onkeydown body onkeypress body onkeyup body onload body onmousedown body onmousemove body onmouseout body onmouseover body onmouseup body onunload body style body title body xml:lang # br # forced line break br class br id br style br title # button # push button button accesskey button class button dir button disabled button id button lang button name button onblur button onclick button ondblclick button onfocus button onkeydown button onkeypress button onkeyup button onmousedown button onmousemove button onmouseout button onmouseover button onmouseup button style button tabindex button title button type button value button xml:lang # caption # table caption caption class caption dir caption id caption lang caption onclick caption ondblclick caption onkeydown caption onkeypress caption onkeyup caption onmousedown caption onmousemove caption onmouseout caption onmouseover caption onmouseup caption style caption title caption xml:lang # cite # citation cite class cite dir cite id cite lang cite onclick cite ondblclick cite onkeydown cite onkeypress cite onkeyup cite onmousedown cite onmousemove cite onmouseout cite onmouseover cite onmouseup cite style cite title cite xml:lang # code # computer code fragment code class code dir code id code lang code onclick code ondblclick code onkeydown code onkeypress code onkeyup code onmousedown code onmousemove code onmouseout code onmouseover code onmouseup code style code title code xml:lang # col # table column col align col char col charoff col class col dir col id col lang col onclick col ondblclick col onkeydown col onkeypress col onkeyup col onmousedown col onmousemove col onmouseout col onmouseover col onmouseup col span col style col title col valign col width col xml:lang # colgroup # table column group colgroup align colgroup char colgroup charoff colgroup class colgroup dir colgroup id colgroup lang colgroup onclick colgroup ondblclick colgroup onkeydown colgroup onkeypress colgroup onkeyup colgroup onmousedown colgroup onmousemove colgroup onmouseout colgroup onmouseover colgroup onmouseup colgroup span colgroup style colgroup title colgroup valign colgroup width colgroup xml:lang # dd # definition description dd class dd dir dd id dd lang dd onclick dd ondblclick dd onkeydown dd onkeypress dd onkeyup dd onmousedown dd onmousemove dd onmouseout dd onmouseover dd onmouseup dd style dd title dd xml:lang # del # deleted text del cite del class del datetime del dir del id del lang del onclick del ondblclick del onkeydown del onkeypress del onkeyup del onmousedown del onmousemove del onmouseout del onmouseover del onmouseup del style del title del xml:lang # dfn # instance definition dfn class dfn dir dfn id dfn lang dfn onclick dfn ondblclick dfn onkeydown dfn onkeypress dfn onkeyup dfn onmousedown dfn onmousemove dfn onmouseout dfn onmouseover dfn onmouseup dfn style dfn title dfn xml:lang # div # generic language/style container div class div dir div id div lang div onclick div ondblclick div onkeydown div onkeypress div onkeyup div onmousedown div onmousemove div onmouseout div onmouseover div onmouseup div style div title div xml:lang # dl # definition list dl class dl dir dl id dl lang dl onclick dl ondblclick dl onkeydown dl onkeypress dl onkeyup dl onmousedown dl onmousemove dl onmouseout dl onmouseover dl onmouseup dl style dl title dl xml:lang # dt # definition term dt class dt dir dt id dt lang dt onclick dt ondblclick dt onkeydown dt onkeypress dt onkeyup dt onmousedown dt onmousemove dt onmouseout dt onmouseover dt onmouseup dt style dt title dt xml:lang # em # emphasis em class em dir em id em lang em onclick em ondblclick em onkeydown em onkeypress em onkeyup em onmousedown em onmousemove em onmouseout em onmouseover em onmouseup em style em title em xml:lang # fieldset # form control group fieldset class fieldset dir fieldset id fieldset lang fieldset onclick fieldset ondblclick fieldset onkeydown fieldset onkeypress fieldset onkeyup fieldset onmousedown fieldset onmousemove fieldset onmouseout fieldset onmouseover fieldset onmouseup fieldset style fieldset title fieldset xml:lang # form # interactive form form accept form accept-charset form action form class form dir form enctype form id form lang form method form onclick form ondblclick form onkeydown form onkeypress form onkeyup form onmousedown form onmousemove form onmouseout form onmouseover form onmouseup form onreset form onsubmit form style form title form xml:lang # h1 # heading h1 class h1 dir h1 id h1 lang h1 onclick h1 ondblclick h1 onkeydown h1 onkeypress h1 onkeyup h1 onmousedown h1 onmousemove h1 onmouseout h1 onmouseover h1 onmouseup h1 style h1 title h1 xml:lang # h2 # heading h2 class h2 dir h2 id h2 lang h2 onclick h2 ondblclick h2 onkeydown h2 onkeypress h2 onkeyup h2 onmousedown h2 onmousemove h2 onmouseout h2 onmouseover h2 onmouseup h2 style h2 title h2 xml:lang # h3 # heading h3 class h3 dir h3 id h3 lang h3 onclick h3 ondblclick h3 onkeydown h3 onkeypress h3 onkeyup h3 onmousedown h3 onmousemove h3 onmouseout h3 onmouseover h3 onmouseup h3 style h3 title h3 xml:lang # h4 # heading h4 class h4 dir h4 id h4 lang h4 onclick h4 ondblclick h4 onkeydown h4 onkeypress h4 onkeyup h4 onmousedown h4 onmousemove h4 onmouseout h4 onmouseover h4 onmouseup h4 style h4 title h4 xml:lang # h5 # heading h5 class h5 dir h5 id h5 lang h5 onclick h5 ondblclick h5 onkeydown h5 onkeypress h5 onkeyup h5 onmousedown h5 onmousemove h5 onmouseout h5 onmouseover h5 onmouseup h5 style h5 title h5 xml:lang # h6 # heading h6 class h6 dir h6 id h6 lang h6 onclick h6 ondblclick h6 onkeydown h6 onkeypress h6 onkeyup h6 onmousedown h6 onmousemove h6 onmouseout h6 onmouseover h6 onmouseup h6 style h6 title h6 xml:lang # head # document head head dir head id head lang head profile head xml:lang # hr # horizontal rule hr class hr dir hr id hr lang hr onclick hr ondblclick hr onkeydown hr onkeypress hr onkeyup hr onmousedown hr onmousemove hr onmouseout hr onmouseover hr onmouseup hr style hr title hr xml:lang # html # document root element html dir html id html lang html xml:lang html xmlns # i # italic text style i class i dir i id i lang i onclick i ondblclick i onkeydown i onkeypress i onkeyup i onmousedown i onmousemove i onmouseout i onmouseover i onmouseup i style i title i xml:lang # img # Embedded image img alt img class img dir img height img id img ismap img lang img longdesc img onclick img ondblclick img onkeydown img onkeypress img onkeyup img onmousedown img onmousemove img onmouseout img onmouseover img onmouseup img src img style img title img usemap img width img xml:lang # input # form control input accept input accesskey input alt input checked input class input dir input disabled input id input lang input maxlength input name input onblur input onchange input onclick input ondblclick input onfocus input onkeydown input onkeypress input onkeyup input onmousedown input onmousemove input onmouseout input onmouseover input onmouseup input onselect input readonly input size input src input style input tabindex input title input type input usemap input value input xml:lang # ins # inserted text ins cite ins class ins datetime ins dir ins id ins lang ins onclick ins ondblclick ins onkeydown ins onkeypress ins onkeyup ins onmousedown ins onmousemove ins onmouseout ins onmouseover ins onmouseup ins style ins title ins xml:lang # kbd # text to be entered by the user kbd class kbd dir kbd id kbd lang kbd onclick kbd ondblclick kbd onkeydown kbd onkeypress kbd onkeyup kbd onmousedown kbd onmousemove kbd onmouseout kbd onmouseover kbd onmouseup kbd style kbd title kbd xml:lang # label # form field label text label accesskey label class label dir label for label id label lang label onblur label onclick label ondblclick label onfocus label onkeydown label onkeypress label onkeyup label onmousedown label onmousemove label onmouseout label onmouseover label onmouseup label style label title label xml:lang # legend # fieldset legend legend accesskey legend class legend dir legend id legend lang legend onclick legend ondblclick legend onkeydown legend onkeypress legend onkeyup legend onmousedown legend onmousemove legend onmouseout legend onmouseover legend onmouseup legend style legend title legend xml:lang # li # list item li class li dir li id li lang li onclick li ondblclick li onkeydown li onkeypress li onkeyup li onmousedown li onmousemove li onmouseout li onmouseover li onmouseup li style li title li xml:lang # link # a media-independent link link charset link class link dir link href link hreflang link id link lang link media link onclick link ondblclick link onkeydown link onkeypress link onkeyup link onmousedown link onmousemove link onmouseout link onmouseover link onmouseup link rel link rev link style link title link type link xml:lang # map # client-side image map map class map dir map id map lang map name map onclick map ondblclick map onkeydown map onkeypress map onkeyup map onmousedown map onmousemove map onmouseout map onmouseover map onmouseup map style map title map xml:lang # meta # generic metainformation meta content meta dir meta http-equiv meta id meta lang meta name meta scheme meta xml:lang # noscript # alternate content container for non script-based rendering noscript class noscript dir noscript id noscript lang noscript onclick noscript ondblclick noscript onkeydown noscript onkeypress noscript onkeyup noscript onmousedown noscript onmousemove noscript onmouseout noscript onmouseover noscript onmouseup noscript style noscript title noscript xml:lang # object # generic embedded object object archive object class object classid object codebase object codetype object data object declare object dir object height object id object lang object name object onclick object ondblclick object onkeydown object onkeypress object onkeyup object onmousedown object onmousemove object onmouseout object onmouseover object onmouseup object standby object style object tabindex object title object type object usemap object width object xml:lang # ol # ordered list ol class ol dir ol id ol lang ol onclick ol ondblclick ol onkeydown ol onkeypress ol onkeyup ol onmousedown ol onmousemove ol onmouseout ol onmouseover ol onmouseup ol style ol title ol xml:lang # optgroup # option group optgroup class optgroup dir optgroup disabled optgroup id optgroup label optgroup lang optgroup onclick optgroup ondblclick optgroup onkeydown optgroup onkeypress optgroup onkeyup optgroup onmousedown optgroup onmousemove optgroup onmouseout optgroup onmouseover optgroup onmouseup optgroup style optgroup title optgroup xml:lang # option # selectable choice option class option dir option disabled option id option label option lang option onclick option ondblclick option onkeydown option onkeypress option onkeyup option onmousedown option onmousemove option onmouseout option onmouseover option onmouseup option selected option style option title option value option xml:lang # p # paragraph p class p dir p id p lang p onclick p ondblclick p onkeydown p onkeypress p onkeyup p onmousedown p onmousemove p onmouseout p onmouseover p onmouseup p style p title p xml:lang # param # named property value param id param name param type param value param valuetype # pre # preformatted text pre class pre dir pre id pre lang pre onclick pre ondblclick pre onkeydown pre onkeypress pre onkeyup pre onmousedown pre onmousemove pre onmouseout pre onmouseover pre onmouseup pre style pre title pre xml:lang pre xml:space # q # short inline quotation q cite q class q dir q id q lang q onclick q ondblclick q onkeydown q onkeypress q onkeyup q onmousedown q onmousemove q onmouseout q onmouseover q onmouseup q style q title q xml:lang # samp # sample program output, scripts, etc. samp class samp dir samp id samp lang samp onclick samp ondblclick samp onkeydown samp onkeypress samp onkeyup samp onmousedown samp onmousemove samp onmouseout samp onmouseover samp onmouseup samp style samp title samp xml:lang # script # script statements script charset script defer script id script src script type script xml:space # select # option selector select class select dir select disabled select id select lang select multiple select name select onblur select onchange select onclick select ondblclick select onfocus select onkeydown select onkeypress select onkeyup select onmousedown select onmousemove select onmouseout select onmouseover select onmouseup select size select style select tabindex select title select xml:lang # small # small text style small class small dir small id small lang small onclick small ondblclick small onkeydown small onkeypress small onkeyup small onmousedown small onmousemove small onmouseout small onmouseover small onmouseup small style small title small xml:lang # span # generic language/style container span class span dir span id span lang span onclick span ondblclick span onkeydown span onkeypress span onkeyup span onmousedown span onmousemove span onmouseout span onmouseover span onmouseup span style span title span xml:lang # strong # strong emphasis strong class strong dir strong id strong lang strong onclick strong ondblclick strong onkeydown strong onkeypress strong onkeyup strong onmousedown strong onmousemove strong onmouseout strong onmouseover strong onmouseup strong style strong title strong xml:lang # style # style info style dir style id style lang style media style title style type style xml:lang style xml:space # sub # subscript sub class sub dir sub id sub lang sub onclick sub ondblclick sub onkeydown sub onkeypress sub onkeyup sub onmousedown sub onmousemove sub onmouseout sub onmouseover sub onmouseup sub style sub title sub xml:lang # sup # superscript sup class sup dir sup id sup lang sup onclick sup ondblclick sup onkeydown sup onkeypress sup onkeyup sup onmousedown sup onmousemove sup onmouseout sup onmouseover sup onmouseup sup style sup title sup xml:lang # table # table border table cellpadding table cellspacing table class table dir table frame table id table lang table onclick table ondblclick table onkeydown table onkeypress table onkeyup table onmousedown table onmousemove table onmouseout table onmouseover table onmouseup table rules table style table summary table title table width table xml:lang # tbody # table body tbody align tbody char tbody charoff tbody class tbody dir tbody id tbody lang tbody onclick tbody ondblclick tbody onkeydown tbody onkeypress tbody onkeyup tbody onmousedown tbody onmousemove tbody onmouseout tbody onmouseover tbody onmouseup tbody style tbody title tbody valign tbody xml:lang # td # table data cell td abbr td align td axis td char td charoff td class td colspan td dir td headers td id td lang td onclick td ondblclick td onkeydown td onkeypress td onkeyup td onmousedown td onmousemove td onmouseout td onmouseover td onmouseup td rowspan td scope td style td title td valign td xml:lang # textarea # multi-line text field textarea accesskey textarea class textarea cols textarea dir textarea disabled textarea id textarea lang textarea name textarea onblur textarea onchange textarea onclick textarea ondblclick textarea onfocus textarea onkeydown textarea onkeypress textarea onkeyup textarea onmousedown textarea onmousemove textarea onmouseout textarea onmouseover textarea onmouseup textarea onselect textarea readonly textarea rows textarea style textarea tabindex textarea title textarea xml:lang # tfoot # table footer tfoot align tfoot char tfoot charoff tfoot class tfoot dir tfoot id tfoot lang tfoot onclick tfoot ondblclick tfoot onkeydown tfoot onkeypress tfoot onkeyup tfoot onmousedown tfoot onmousemove tfoot onmouseout tfoot onmouseover tfoot onmouseup tfoot style tfoot title tfoot valign tfoot xml:lang # th # table header cell th abbr th align th axis th char th charoff th class th colspan th dir th headers th id th lang th onclick th ondblclick th onkeydown th onkeypress th onkeyup th onmousedown th onmousemove th onmouseout th onmouseover th onmouseup th rowspan th scope th style th title th valign th xml:lang # thead # table header thead align thead char thead charoff thead class thead dir thead id thead lang thead onclick thead ondblclick thead onkeydown thead onkeypress thead onkeyup thead onmousedown thead onmousemove thead onmouseout thead onmouseover thead onmouseup thead style thead title thead valign thead xml:lang # title # document title title dir title id title lang title xml:lang # tr # table row tr align tr char tr charoff tr class tr dir tr id tr lang tr onclick tr ondblclick tr onkeydown tr onkeypress tr onkeyup tr onmousedown tr onmousemove tr onmouseout tr onmouseover tr onmouseup tr style tr title tr valign tr xml:lang # tt # teletype or monospaced text style tt class tt dir tt id tt lang tt onclick tt ondblclick tt onkeydown tt onkeypress tt onkeyup tt onmousedown tt onmousemove tt onmouseout tt onmouseover tt onmouseup tt style tt title tt xml:lang # ul # unordered list ul class ul dir ul id ul lang ul onclick ul ondblclick ul onkeydown ul onkeypress ul onkeyup ul onmousedown ul onmousemove ul onmouseout ul onmouseover ul onmouseup ul style ul title ul xml:lang # var # instance of a variable or program argument var class var dir var id var lang var onclick var ondblclick var onkeydown var onkeypress var onkeyup var onmousedown var onmousemove var onmouseout var onmouseover var onmouseup var style var title var xml:lang MKDoc-XML-0.75/lib/MKDoc/XML/Stripper/xhtml10transitional.txt0000644000076400007640000005620707765640455024041 0ustar brunobruno00000000000000# xhtml 1.0 transitional # http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd # # # # lovingly expanded and sorted from the dtd by paul arzul (patricka@mkdoc.com) 2003/09/24 # a # anchor a accesskey a charset a class a coords a dir a href a hreflang a id a lang a name a onblur a onclick a ondblclick a onfocus a onkeydown a onkeypress a onkeyup a onmousedown a onmousemove a onmouseout a onmouseover a onmouseup a rel a rev a shape a style a tabindex a target a title a type a xml:lang # abbr # abbreviated form (e.g., WWW, HTTP, etc.) abbr class abbr dir abbr id abbr lang abbr onclick abbr ondblclick abbr onkeydown abbr onkeypress abbr onkeyup abbr onmousedown abbr onmousemove abbr onmouseout abbr onmouseover abbr onmouseup abbr style abbr title abbr xml:lang # acronym # acronym class acronym dir acronym id acronym lang acronym onclick acronym ondblclick acronym onkeydown acronym onkeypress acronym onkeyup acronym onmousedown acronym onmousemove acronym onmouseout acronym onmouseover acronym onmouseup acronym style acronym title acronym xml:lang # address # information on author address class address dir address id address lang address onclick address ondblclick address onkeydown address onkeypress address onkeyup address onmousedown address onmousemove address onmouseout address onmouseover address onmouseup address style address title address xml:lang # applet # Java applet applet align applet alt applet archive applet class applet code applet codebase applet height applet hspace applet id applet name applet object applet style applet title applet vspace applet width # area # client-side image map area area accesskey area alt area class area coords area dir area href area id area lang area nohref area onblur area onclick area ondblclick area onfocus area onkeydown area onkeypress area onkeyup area onmousedown area onmousemove area onmouseout area onmouseover area onmouseup area shape area style area tabindex area target area title area xml:lang # b # bold text style b class b dir b id b lang b onclick b ondblclick b onkeydown b onkeypress b onkeyup b onmousedown b onmousemove b onmouseout b onmouseover b onmouseup b style b title b xml:lang # base # document base URI base href base id base target # basefont # base font size basefont color basefont face basefont id basefont size # bdo # I18N BiDi over-ride bdo class bdo dir bdo id bdo lang bdo onclick bdo ondblclick bdo onkeydown bdo onkeypress bdo onkeyup bdo onmousedown bdo onmousemove bdo onmouseout bdo onmouseover bdo onmouseup bdo style bdo title bdo xml:lang # big # large text style big class big dir big id big lang big onclick big ondblclick big onkeydown big onkeypress big onkeyup big onmousedown big onmousemove big onmouseout big onmouseover big onmouseup big style big title big xml:lang # blockquote # long quotation blockquote cite blockquote class blockquote dir blockquote id blockquote lang blockquote onclick blockquote ondblclick blockquote onkeydown blockquote onkeypress blockquote onkeyup blockquote onmousedown blockquote onmousemove blockquote onmouseout blockquote onmouseover blockquote onmouseup blockquote style blockquote title blockquote xml:lang # body # document body body alink body background body bgcolor body class body dir body id body lang body link body onclick body ondblclick body onkeydown body onkeypress body onkeyup body onload body onmousedown body onmousemove body onmouseout body onmouseover body onmouseup body onunload body style body text body title body vlink body xml:lang # br # forced line break br class br clear br id br style br title # button # push button button accesskey button class button dir button disabled button id button lang button name button onblur button onclick button ondblclick button onfocus button onkeydown button onkeypress button onkeyup button onmousedown button onmousemove button onmouseout button onmouseover button onmouseup button style button tabindex button title button type button value button xml:lang # caption # table caption caption align caption class caption dir caption id caption lang caption onclick caption ondblclick caption onkeydown caption onkeypress caption onkeyup caption onmousedown caption onmousemove caption onmouseout caption onmouseover caption onmouseup caption style caption title caption xml:lang # center # shorthand for DIV align=center center class center dir center id center lang center onclick center ondblclick center onkeydown center onkeypress center onkeyup center onmousedown center onmousemove center onmouseout center onmouseover center onmouseup center style center title center xml:lang # cite # citation cite class cite dir cite id cite lang cite onclick cite ondblclick cite onkeydown cite onkeypress cite onkeyup cite onmousedown cite onmousemove cite onmouseout cite onmouseover cite onmouseup cite style cite title cite xml:lang # code # computer code fragment code class code dir code id code lang code onclick code ondblclick code onkeydown code onkeypress code onkeyup code onmousedown code onmousemove code onmouseout code onmouseover code onmouseup code style code title code xml:lang # col # table column col align col char col charoff col class col dir col id col lang col onclick col ondblclick col onkeydown col onkeypress col onkeyup col onmousedown col onmousemove col onmouseout col onmouseover col onmouseup col span col style col title col valign col width col xml:lang # colgroup # table column group colgroup align colgroup char colgroup charoff colgroup class colgroup dir colgroup id colgroup lang colgroup onclick colgroup ondblclick colgroup onkeydown colgroup onkeypress colgroup onkeyup colgroup onmousedown colgroup onmousemove colgroup onmouseout colgroup onmouseover colgroup onmouseup colgroup span colgroup style colgroup title colgroup valign colgroup width colgroup xml:lang # dd # definition description dd class dd dir dd id dd lang dd onclick dd ondblclick dd onkeydown dd onkeypress dd onkeyup dd onmousedown dd onmousemove dd onmouseout dd onmouseover dd onmouseup dd style dd title dd xml:lang # del # deleted text del cite del class del datetime del dir del id del lang del onclick del ondblclick del onkeydown del onkeypress del onkeyup del onmousedown del onmousemove del onmouseout del onmouseover del onmouseup del style del title del xml:lang # dfn # instance definition dfn class dfn dir dfn id dfn lang dfn onclick dfn ondblclick dfn onkeydown dfn onkeypress dfn onkeyup dfn onmousedown dfn onmousemove dfn onmouseout dfn onmouseover dfn onmouseup dfn style dfn title dfn xml:lang # dir # directory list dir class dir compact dir dir dir id dir lang dir onclick dir ondblclick dir onkeydown dir onkeypress dir onkeyup dir onmousedown dir onmousemove dir onmouseout dir onmouseover dir onmouseup dir style dir title dir xml:lang # div # generic language/style container div align div class div dir div id div lang div onclick div ondblclick div onkeydown div onkeypress div onkeyup div onmousedown div onmousemove div onmouseout div onmouseover div onmouseup div style div title div xml:lang # dl # definition list dl class dl compact dl dir dl id dl lang dl onclick dl ondblclick dl onkeydown dl onkeypress dl onkeyup dl onmousedown dl onmousemove dl onmouseout dl onmouseover dl onmouseup dl style dl title dl xml:lang # dt # definition term dt class dt dir dt id dt lang dt onclick dt ondblclick dt onkeydown dt onkeypress dt onkeyup dt onmousedown dt onmousemove dt onmouseout dt onmouseover dt onmouseup dt style dt title dt xml:lang # em # emphasis em class em dir em id em lang em onclick em ondblclick em onkeydown em onkeypress em onkeyup em onmousedown em onmousemove em onmouseout em onmouseover em onmouseup em style em title em xml:lang # fieldset # form control group fieldset class fieldset dir fieldset id fieldset lang fieldset onclick fieldset ondblclick fieldset onkeydown fieldset onkeypress fieldset onkeyup fieldset onmousedown fieldset onmousemove fieldset onmouseout fieldset onmouseover fieldset onmouseup fieldset style fieldset title fieldset xml:lang # font # local change to font font class font color font dir font face font id font lang font size font style font title font xml:lang # form # interactive form form accept form accept-charset form action form class form dir form enctype form id form lang form method form name form onclick form ondblclick form onkeydown form onkeypress form onkeyup form onmousedown form onmousemove form onmouseout form onmouseover form onmouseup form onreset form onsubmit form style form target form title form xml:lang # h1 # heading h1 align h1 class h1 dir h1 id h1 lang h1 onclick h1 ondblclick h1 onkeydown h1 onkeypress h1 onkeyup h1 onmousedown h1 onmousemove h1 onmouseout h1 onmouseover h1 onmouseup h1 style h1 title h1 xml:lang # h2 # heading h2 align h2 class h2 dir h2 id h2 lang h2 onclick h2 ondblclick h2 onkeydown h2 onkeypress h2 onkeyup h2 onmousedown h2 onmousemove h2 onmouseout h2 onmouseover h2 onmouseup h2 style h2 title h2 xml:lang # h3 # heading h3 align h3 class h3 dir h3 id h3 lang h3 onclick h3 ondblclick h3 onkeydown h3 onkeypress h3 onkeyup h3 onmousedown h3 onmousemove h3 onmouseout h3 onmouseover h3 onmouseup h3 style h3 title h3 xml:lang # h4 # heading h4 align h4 class h4 dir h4 id h4 lang h4 onclick h4 ondblclick h4 onkeydown h4 onkeypress h4 onkeyup h4 onmousedown h4 onmousemove h4 onmouseout h4 onmouseover h4 onmouseup h4 style h4 title h4 xml:lang # h5 # heading h5 align h5 class h5 dir h5 id h5 lang h5 onclick h5 ondblclick h5 onkeydown h5 onkeypress h5 onkeyup h5 onmousedown h5 onmousemove h5 onmouseout h5 onmouseover h5 onmouseup h5 style h5 title h5 xml:lang # h6 # heading h6 align h6 class h6 dir h6 id h6 lang h6 onclick h6 ondblclick h6 onkeydown h6 onkeypress h6 onkeyup h6 onmousedown h6 onmousemove h6 onmouseout h6 onmouseover h6 onmouseup h6 style h6 title h6 xml:lang # head # document head head dir head id head lang head profile head xml:lang # hr # horizontal rule hr align hr class hr dir hr id hr lang hr noshade hr onclick hr ondblclick hr onkeydown hr onkeypress hr onkeyup hr onmousedown hr onmousemove hr onmouseout hr onmouseover hr onmouseup hr size hr style hr title hr width hr xml:lang # html # document root element html dir html id html lang html xml:lang html xmlns # i # italic text style i class i dir i id i lang i onclick i ondblclick i onkeydown i onkeypress i onkeyup i onmousedown i onmousemove i onmouseout i onmouseover i onmouseup i style i title i xml:lang # iframe # inline subwindow iframe align iframe class iframe frameborder iframe height iframe id iframe longdesc iframe marginheight iframe marginwidth iframe name iframe scrolling iframe src iframe style iframe title iframe width # img # Embedded image img align img alt img border img class img dir img height img hspace img id img ismap img lang img longdesc img name img onclick img ondblclick img onkeydown img onkeypress img onkeyup img onmousedown img onmousemove img onmouseout img onmouseover img onmouseup img src img style img title img usemap img vspace img width img xml:lang # input # form control input accept input accesskey input align input alt input checked input class input dir input disabled input id input lang input maxlength input name input onblur input onchange input onclick input ondblclick input onfocus input onkeydown input onkeypress input onkeyup input onmousedown input onmousemove input onmouseout input onmouseover input onmouseup input onselect input readonly input size input src input style input tabindex input title input type input usemap input value input xml:lang # ins # inserted text ins cite ins class ins datetime ins dir ins id ins lang ins onclick ins ondblclick ins onkeydown ins onkeypress ins onkeyup ins onmousedown ins onmousemove ins onmouseout ins onmouseover ins onmouseup ins style ins title ins xml:lang # isindex # single line prompt isindex class isindex dir isindex id isindex lang isindex prompt isindex style isindex title isindex xml:lang # kbd # text to be entered by the user kbd class kbd dir kbd id kbd lang kbd onclick kbd ondblclick kbd onkeydown kbd onkeypress kbd onkeyup kbd onmousedown kbd onmousemove kbd onmouseout kbd onmouseover kbd onmouseup kbd style kbd title kbd xml:lang # label # form field label text label accesskey label class label dir label for label id label lang label onblur label onclick label ondblclick label onfocus label onkeydown label onkeypress label onkeyup label onmousedown label onmousemove label onmouseout label onmouseover label onmouseup label style label title label xml:lang # legend # fieldset legend legend accesskey legend align legend class legend dir legend id legend lang legend onclick legend ondblclick legend onkeydown legend onkeypress legend onkeyup legend onmousedown legend onmousemove legend onmouseout legend onmouseover legend onmouseup legend style legend title legend xml:lang # li # list item li class li dir li id li lang li onclick li ondblclick li onkeydown li onkeypress li onkeyup li onmousedown li onmousemove li onmouseout li onmouseover li onmouseup li style li title li type li value li xml:lang # link # a media-independent link link charset link class link dir link href link hreflang link id link lang link media link onclick link ondblclick link onkeydown link onkeypress link onkeyup link onmousedown link onmousemove link onmouseout link onmouseover link onmouseup link rel link rev link style link target link title link type link xml:lang # map # client-side image map map class map dir map id map lang map name map onclick map ondblclick map onkeydown map onkeypress map onkeyup map onmousedown map onmousemove map onmouseout map onmouseover map onmouseup map style map title map xml:lang # menu # menu list menu class menu compact menu dir menu id menu lang menu onclick menu ondblclick menu onkeydown menu onkeypress menu onkeyup menu onmousedown menu onmousemove menu onmouseout menu onmouseover menu onmouseup menu style menu title menu xml:lang # meta # generic metainformation meta content meta dir meta http-equiv meta id meta lang meta name meta scheme meta xml:lang # noscript # alternate content container for non script-based rendering noscript class noscript dir noscript id noscript lang noscript onclick noscript ondblclick noscript onkeydown noscript onkeypress noscript onkeyup noscript onmousedown noscript onmousemove noscript onmouseout noscript onmouseover noscript onmouseup noscript style noscript title noscript xml:lang # object # generic embedded object object align object archive object border object class object classid object codebase object codetype object data object declare object dir object height object hspace object id object lang object name object onclick object ondblclick object onkeydown object onkeypress object onkeyup object onmousedown object onmousemove object onmouseout object onmouseover object onmouseup object standby object style object tabindex object title object type object usemap object vspace object width object xml:lang # ol # ordered list ol class ol compact ol dir ol id ol lang ol onclick ol ondblclick ol onkeydown ol onkeypress ol onkeyup ol onmousedown ol onmousemove ol onmouseout ol onmouseover ol onmouseup ol start ol style ol title ol type ol xml:lang # optgroup # option group optgroup class optgroup dir optgroup disabled optgroup id optgroup label optgroup lang optgroup onclick optgroup ondblclick optgroup onkeydown optgroup onkeypress optgroup onkeyup optgroup onmousedown optgroup onmousemove optgroup onmouseout optgroup onmouseover optgroup onmouseup optgroup style optgroup title optgroup xml:lang # option # selectable choice option class option dir option disabled option id option label option lang option onclick option ondblclick option onkeydown option onkeypress option onkeyup option onmousedown option onmousemove option onmouseout option onmouseover option onmouseup option selected option style option title option value option xml:lang # p # paragraph p align p class p dir p id p lang p onclick p ondblclick p onkeydown p onkeypress p onkeyup p onmousedown p onmousemove p onmouseout p onmouseover p onmouseup p style p title p xml:lang # param # named property value param id param name param type param value param valuetype # pre # preformatted text pre class pre dir pre id pre lang pre onclick pre ondblclick pre onkeydown pre onkeypress pre onkeyup pre onmousedown pre onmousemove pre onmouseout pre onmouseover pre onmouseup pre style pre title pre width pre xml:lang pre xml:space # q # short inline quotation q cite q class q dir q id q lang q onclick q ondblclick q onkeydown q onkeypress q onkeyup q onmousedown q onmousemove q onmouseout q onmouseover q onmouseup q style q title q xml:lang # s # strike-through text style s class s dir s id s lang s onclick s ondblclick s onkeydown s onkeypress s onkeyup s onmousedown s onmousemove s onmouseout s onmouseover s onmouseup s style s title s xml:lang # samp # sample program output, scripts, etc. samp class samp dir samp id samp lang samp onclick samp ondblclick samp onkeydown samp onkeypress samp onkeyup samp onmousedown samp onmousemove samp onmouseout samp onmouseover samp onmouseup samp style samp title samp xml:lang # script # script statements script charset script defer script id script language script src script type script xml:space # select # option selector select class select dir select disabled select id select lang select multiple select name select onblur select onchange select onclick select ondblclick select onfocus select onkeydown select onkeypress select onkeyup select onmousedown select onmousemove select onmouseout select onmouseover select onmouseup select size select style select tabindex select title select xml:lang # small # small text style small class small dir small id small lang small onclick small ondblclick small onkeydown small onkeypress small onkeyup small onmousedown small onmousemove small onmouseout small onmouseover small onmouseup small style small title small xml:lang # span # generic language/style container span class span dir span id span lang span onclick span ondblclick span onkeydown span onkeypress span onkeyup span onmousedown span onmousemove span onmouseout span onmouseover span onmouseup span style span title span xml:lang # strike # strike-through text strike class strike dir strike id strike lang strike onclick strike ondblclick strike onkeydown strike onkeypress strike onkeyup strike onmousedown strike onmousemove strike onmouseout strike onmouseover strike onmouseup strike style strike title strike xml:lang # strong # strong emphasis strong class strong dir strong id strong lang strong onclick strong ondblclick strong onkeydown strong onkeypress strong onkeyup strong onmousedown strong onmousemove strong onmouseout strong onmouseover strong onmouseup strong style strong title strong xml:lang # style # style info style dir style id style lang style media style title style type style xml:lang style xml:space # sub # subscript sub class sub dir sub id sub lang sub onclick sub ondblclick sub onkeydown sub onkeypress sub onkeyup sub onmousedown sub onmousemove sub onmouseout sub onmouseover sub onmouseup sub style sub title sub xml:lang # sup # superscript sup class sup dir sup id sup lang sup onclick sup ondblclick sup onkeydown sup onkeypress sup onkeyup sup onmousedown sup onmousemove sup onmouseout sup onmouseover sup onmouseup sup style sup title sup xml:lang # table # table align table bgcolor table border table cellpadding table cellspacing table class table dir table frame table id table lang table onclick table ondblclick table onkeydown table onkeypress table onkeyup table onmousedown table onmousemove table onmouseout table onmouseover table onmouseup table rules table style table summary table title table width table xml:lang # tbody # table body tbody align tbody char tbody charoff tbody class tbody dir tbody id tbody lang tbody onclick tbody ondblclick tbody onkeydown tbody onkeypress tbody onkeyup tbody onmousedown tbody onmousemove tbody onmouseout tbody onmouseover tbody onmouseup tbody style tbody title tbody valign tbody xml:lang # td # table data cell td abbr td align td axis td bgcolor td char td charoff td class td colspan td dir td headers td height td id td lang td nowrap td onclick td ondblclick td onkeydown td onkeypress td onkeyup td onmousedown td onmousemove td onmouseout td onmouseover td onmouseup td rowspan td scope td style td title td valign td width td xml:lang # textarea # multi-line text field textarea accesskey textarea class textarea cols textarea dir textarea disabled textarea id textarea lang textarea name textarea onblur textarea onchange textarea onclick textarea ondblclick textarea onfocus textarea onkeydown textarea onkeypress textarea onkeyup textarea onmousedown textarea onmousemove textarea onmouseout textarea onmouseover textarea onmouseup textarea onselect textarea readonly textarea rows textarea style textarea tabindex textarea title textarea xml:lang # tfoot # table footer tfoot align tfoot char tfoot charoff tfoot class tfoot dir tfoot id tfoot lang tfoot onclick tfoot ondblclick tfoot onkeydown tfoot onkeypress tfoot onkeyup tfoot onmousedown tfoot onmousemove tfoot onmouseout tfoot onmouseover tfoot onmouseup tfoot style tfoot title tfoot valign tfoot xml:lang # th # table header cell th abbr th align th axis th bgcolor th char th charoff th class th colspan th dir th headers th height th id th lang th nowrap th onclick th ondblclick th onkeydown th onkeypress th onkeyup th onmousedown th onmousemove th onmouseout th onmouseover th onmouseup th rowspan th scope th style th title th valign th width th xml:lang # thead # table header thead align thead char thead charoff thead class thead dir thead id thead lang thead onclick thead ondblclick thead onkeydown thead onkeypress thead onkeyup thead onmousedown thead onmousemove thead onmouseout thead onmouseover thead onmouseup thead style thead title thead valign thead xml:lang # title # document title title dir title id title lang title xml:lang # tr # table row tr align tr bgcolor tr char tr charoff tr class tr dir tr id tr lang tr onclick tr ondblclick tr onkeydown tr onkeypress tr onkeyup tr onmousedown tr onmousemove tr onmouseout tr onmouseover tr onmouseup tr style tr title tr valign tr xml:lang # tt # teletype or monospaced text style tt class tt dir tt id tt lang tt onclick tt ondblclick tt onkeydown tt onkeypress tt onkeyup tt onmousedown tt onmousemove tt onmouseout tt onmouseover tt onmouseup tt style tt title tt xml:lang # u # underlined text style u class u dir u id u lang u onclick u ondblclick u onkeydown u onkeypress u onkeyup u onmousedown u onmousemove u onmouseout u onmouseover u onmouseup u style u title u xml:lang # ul # unordered list ul class ul compact ul dir ul id ul lang ul onclick ul ondblclick ul onkeydown ul onkeypress ul onkeyup ul onmousedown ul onmousemove ul onmouseout ul onmouseover ul onmouseup ul style ul title ul type ul xml:lang # var # instance of a variable or program argument var class var dir var id var lang var onclick var ondblclick var onkeydown var onkeypress var onkeyup var onmousedown var onmousemove var onmouseout var onmouseover var onmouseup var style var title var xml:lang MKDoc-XML-0.75/lib/MKDoc/XML/Stripper/xhtml10frameset.txt0000644000076400007640000005750307765640455023140 0ustar brunobruno00000000000000# xhtml 1.0 frameset # http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd # # # # lovingly expanded and sorted from the dtd by paul arzul (patricka@mkdoc.com) 2003/09/24 # a # anchor a accesskey a charset a class a coords a dir a href a hreflang a id a lang a name a onblur a onclick a ondblclick a onfocus a onkeydown a onkeypress a onkeyup a onmousedown a onmousemove a onmouseout a onmouseover a onmouseup a rel a rev a shape a style a tabindex a target a title a type a xml:lang # abbr # abbreviated form (e.g., WWW, HTTP, etc.) abbr class abbr dir abbr id abbr lang abbr onclick abbr ondblclick abbr onkeydown abbr onkeypress abbr onkeyup abbr onmousedown abbr onmousemove abbr onmouseout abbr onmouseover abbr onmouseup abbr style abbr title abbr xml:lang # acronym # acronym class acronym dir acronym id acronym lang acronym onclick acronym ondblclick acronym onkeydown acronym onkeypress acronym onkeyup acronym onmousedown acronym onmousemove acronym onmouseout acronym onmouseover acronym onmouseup acronym style acronym title acronym xml:lang # address # information on author address class address dir address id address lang address onclick address ondblclick address onkeydown address onkeypress address onkeyup address onmousedown address onmousemove address onmouseout address onmouseover address onmouseup address style address title address xml:lang # applet # Java applet applet align applet alt applet archive applet class applet code applet codebase applet height applet hspace applet id applet name applet object applet style applet title applet vspace applet width # area # client-side image map area area accesskey area alt area class area coords area dir area href area id area lang area nohref area onblur area onclick area ondblclick area onfocus area onkeydown area onkeypress area onkeyup area onmousedown area onmousemove area onmouseout area onmouseover area onmouseup area shape area style area tabindex area target area title area xml:lang # b # bold text style b class b dir b id b lang b onclick b ondblclick b onkeydown b onkeypress b onkeyup b onmousedown b onmousemove b onmouseout b onmouseover b onmouseup b style b title b xml:lang # base # document base URI base href base id base target # basefont # base font size basefont color basefont face basefont id basefont size # bdo # I18N BiDi over-ride bdo class bdo dir bdo id bdo lang bdo onclick bdo ondblclick bdo onkeydown bdo onkeypress bdo onkeyup bdo onmousedown bdo onmousemove bdo onmouseout bdo onmouseover bdo onmouseup bdo style bdo title bdo xml:lang # big # large text style big class big dir big id big lang big onclick big ondblclick big onkeydown big onkeypress big onkeyup big onmousedown big onmousemove big onmouseout big onmouseover big onmouseup big style big title big xml:lang # blockquote # long quotation blockquote cite blockquote class blockquote dir blockquote id blockquote lang blockquote onclick blockquote ondblclick blockquote onkeydown blockquote onkeypress blockquote onkeyup blockquote onmousedown blockquote onmousemove blockquote onmouseout blockquote onmouseover blockquote onmouseup blockquote style blockquote title blockquote xml:lang # body # document body body alink body background body bgcolor body class body dir body id body lang body link body onclick body ondblclick body onkeydown body onkeypress body onkeyup body onload body onmousedown body onmousemove body onmouseout body onmouseover body onmouseup body onunload body style body text body title body vlink body xml:lang # br # forced line break br class br clear br id br style br title # button # push button button accesskey button class button dir button disabled button id button lang button name button onblur button onclick button ondblclick button onfocus button onkeydown button onkeypress button onkeyup button onmousedown button onmousemove button onmouseout button onmouseover button onmouseup button style button tabindex button title button type button value button xml:lang # caption # table caption caption align caption class caption dir caption id caption lang caption onclick caption ondblclick caption onkeydown caption onkeypress caption onkeyup caption onmousedown caption onmousemove caption onmouseout caption onmouseover caption onmouseup caption style caption title caption xml:lang # center # shorthand for DIV align=center center class center dir center id center lang center onclick center ondblclick center onkeydown center onkeypress center onkeyup center onmousedown center onmousemove center onmouseout center onmouseover center onmouseup center style center title center xml:lang # cite # citation cite class cite dir cite id cite lang cite onclick cite ondblclick cite onkeydown cite onkeypress cite onkeyup cite onmousedown cite onmousemove cite onmouseout cite onmouseover cite onmouseup cite style cite title cite xml:lang # code # computer code fragment code class code dir code id code lang code onclick code ondblclick code onkeydown code onkeypress code onkeyup code onmousedown code onmousemove code onmouseout code onmouseover code onmouseup code style code title code xml:lang # col # table column col align col char col charoff col class col dir col id col lang col onclick col ondblclick col onkeydown col onkeypress col onkeyup col onmousedown col onmousemove col onmouseout col onmouseover col onmouseup col span col style col title col valign col width col xml:lang # colgroup # table column group colgroup align colgroup char colgroup charoff colgroup class colgroup dir colgroup id colgroup lang colgroup onclick colgroup ondblclick colgroup onkeydown colgroup onkeypress colgroup onkeyup colgroup onmousedown colgroup onmousemove colgroup onmouseout colgroup onmouseover colgroup onmouseup colgroup span colgroup style colgroup title colgroup valign colgroup width colgroup xml:lang # dd # definition description dd class dd dir dd id dd lang dd onclick dd ondblclick dd onkeydown dd onkeypress dd onkeyup dd onmousedown dd onmousemove dd onmouseout dd onmouseover dd onmouseup dd style dd title dd xml:lang # del # deleted text del cite del class del datetime del dir del id del lang del onclick del ondblclick del onkeydown del onkeypress del onkeyup del onmousedown del onmousemove del onmouseout del onmouseover del onmouseup del style del title del xml:lang # dfn # instance definition dfn class dfn dir dfn id dfn lang dfn onclick dfn ondblclick dfn onkeydown dfn onkeypress dfn onkeyup dfn onmousedown dfn onmousemove dfn onmouseout dfn onmouseover dfn onmouseup dfn style dfn title dfn xml:lang # dir # directory list dir class dir compact dir dir dir id dir lang dir onclick dir ondblclick dir onkeydown dir onkeypress dir onkeyup dir onmousedown dir onmousemove dir onmouseout dir onmouseover dir onmouseup dir style dir title dir xml:lang # div # generic language/style container div align div class div dir div id div lang div onclick div ondblclick div onkeydown div onkeypress div onkeyup div onmousedown div onmousemove div onmouseout div onmouseover div onmouseup div style div title div xml:lang # dl # definition list dl class dl compact dl dir dl id dl lang dl onclick dl ondblclick dl onkeydown dl onkeypress dl onkeyup dl onmousedown dl onmousemove dl onmouseout dl onmouseover dl onmouseup dl style dl title dl xml:lang # dt # definition term dt class dt dir dt id dt lang dt onclick dt ondblclick dt onkeydown dt onkeypress dt onkeyup dt onmousedown dt onmousemove dt onmouseout dt onmouseover dt onmouseup dt style dt title dt xml:lang # em # emphasis em class em dir em id em lang em onclick em ondblclick em onkeydown em onkeypress em onkeyup em onmousedown em onmousemove em onmouseout em onmouseover em onmouseup em style em title em xml:lang # fieldset # form control group fieldset class fieldset dir fieldset id fieldset lang fieldset onclick fieldset ondblclick fieldset onkeydown fieldset onkeypress fieldset onkeyup fieldset onmousedown fieldset onmousemove fieldset onmouseout fieldset onmouseover fieldset onmouseup fieldset style fieldset title fieldset xml:lang # font # local change to font font class font color font dir font face font id font lang font size font style font title font xml:lang # form # interactive form form accept form accept-charset form action form class form dir form enctype form id form lang form method form name form onclick form ondblclick form onkeydown form onkeypress form onkeyup form onmousedown form onmousemove form onmouseout form onmouseover form onmouseup form onreset form onsubmit form style form target form title form xml:lang # frame # subwindow frame class frame frameborder frame id frame longdesc frame marginheight frame marginwidth frame name frame noresize frame scrolling frame src frame style frame title # frameset # window subdivision frameset class frameset cols frameset id frameset onload frameset onunload frameset rows frameset style frameset title # h1 # heading h1 align h1 class h1 dir h1 id h1 lang h1 onclick h1 ondblclick h1 onkeydown h1 onkeypress h1 onkeyup h1 onmousedown h1 onmousemove h1 onmouseout h1 onmouseover h1 onmouseup h1 style h1 title h1 xml:lang # h2 # heading h2 align h2 class h2 dir h2 id h2 lang h2 onclick h2 ondblclick h2 onkeydown h2 onkeypress h2 onkeyup h2 onmousedown h2 onmousemove h2 onmouseout h2 onmouseover h2 onmouseup h2 style h2 title h2 xml:lang # h3 # heading h3 align h3 class h3 dir h3 id h3 lang h3 onclick h3 ondblclick h3 onkeydown h3 onkeypress h3 onkeyup h3 onmousedown h3 onmousemove h3 onmouseout h3 onmouseover h3 onmouseup h3 style h3 title h3 xml:lang # h4 # heading h4 align h4 class h4 dir h4 id h4 lang h4 onclick h4 ondblclick h4 onkeydown h4 onkeypress h4 onkeyup h4 onmousedown h4 onmousemove h4 onmouseout h4 onmouseover h4 onmouseup h4 style h4 title h4 xml:lang # h5 # heading h5 align h5 class h5 dir h5 id h5 lang h5 onclick h5 ondblclick h5 onkeydown h5 onkeypress h5 onkeyup h5 onmousedown h5 onmousemove h5 onmouseout h5 onmouseover h5 onmouseup h5 style h5 title h5 xml:lang # h6 # heading h6 align h6 class h6 dir h6 id h6 lang h6 onclick h6 ondblclick h6 onkeydown h6 onkeypress h6 onkeyup h6 onmousedown h6 onmousemove h6 onmouseout h6 onmouseover h6 onmouseup h6 style h6 title h6 xml:lang # head # document head head dir head id head lang head profile head xml:lang # hr # horizontal rule hr align hr class hr dir hr id hr lang hr noshade hr onclick hr ondblclick hr onkeydown hr onkeypress hr onkeyup hr onmousedown hr onmousemove hr onmouseout hr onmouseover hr onmouseup hr size hr style hr title hr width hr xml:lang # html # document root element html dir html id html lang html xml:lang html xmlns # i # italic text style i class i dir i id i lang i onclick i ondblclick i onkeydown i onkeypress i onkeyup i onmousedown i onmousemove i onmouseout i onmouseover i onmouseup i style i title i xml:lang # iframe # inline subwindow iframe align iframe class iframe frameborder iframe height iframe id iframe longdesc iframe marginheight iframe marginwidth iframe name iframe scrolling iframe src iframe style iframe title iframe width # img # Embedded image img align img alt img border img class img dir img height img hspace img id img ismap img lang img longdesc img name img onclick img ondblclick img onkeydown img onkeypress img onkeyup img onmousedown img onmousemove img onmouseout img onmouseover img onmouseup img src img style img title img usemap img vspace img width img xml:lang # input # form control input accept input accesskey input align input alt input checked input class input dir input disabled input id input lang input maxlength input name input onblur input onchange input onclick input ondblclick input onfocus input onkeydown input onkeypress input onkeyup input onmousedown input onmousemove input onmouseout input onmouseover input onmouseup input onselect input readonly input size input src input style input tabindex input title input type input usemap input value input xml:lang # ins # inserted text ins cite ins class ins datetime ins dir ins id ins lang ins onclick ins ondblclick ins onkeydown ins onkeypress ins onkeyup ins onmousedown ins onmousemove ins onmouseout ins onmouseover ins onmouseup ins style ins title ins xml:lang # isindex # single line prompt isindex class isindex dir isindex id isindex lang isindex prompt isindex style isindex title isindex xml:lang # kbd # text to be entered by the user kbd class kbd dir kbd id kbd lang kbd onclick kbd ondblclick kbd onkeydown kbd onkeypress kbd onkeyup kbd onmousedown kbd onmousemove kbd onmouseout kbd onmouseover kbd onmouseup kbd style kbd title kbd xml:lang # label # form field label text label accesskey label class label dir label for label id label lang label onblur label onclick label ondblclick label onfocus label onkeydown label onkeypress label onkeyup label onmousedown label onmousemove label onmouseout label onmouseover label onmouseup label style label title label xml:lang # legend # fieldset legend legend accesskey legend align legend class legend dir legend id legend lang legend onclick legend ondblclick legend onkeydown legend onkeypress legend onkeyup legend onmousedown legend onmousemove legend onmouseout legend onmouseover legend onmouseup legend style legend title legend xml:lang # li # list item li class li dir li id li lang li onclick li ondblclick li onkeydown li onkeypress li onkeyup li onmousedown li onmousemove li onmouseout li onmouseover li onmouseup li style li title li type li value li xml:lang # link # a media-independent link link charset link class link dir link href link hreflang link id link lang link media link onclick link ondblclick link onkeydown link onkeypress link onkeyup link onmousedown link onmousemove link onmouseout link onmouseover link onmouseup link rel link rev link style link target link title link type link xml:lang # map # client-side image map map class map dir map id map lang map name map onclick map ondblclick map onkeydown map onkeypress map onkeyup map onmousedown map onmousemove map onmouseout map onmouseover map onmouseup map style map title map xml:lang # menu # menu list menu class menu compact menu dir menu id menu lang menu onclick menu ondblclick menu onkeydown menu onkeypress menu onkeyup menu onmousedown menu onmousemove menu onmouseout menu onmouseover menu onmouseup menu style menu title menu xml:lang # meta # generic metainformation meta content meta dir meta http-equiv meta id meta lang meta name meta scheme meta xml:lang # noframes # alternate content container for non frame-based rendering noframes class noframes dir noframes id noframes lang noframes onclick noframes ondblclick noframes onkeydown noframes onkeypress noframes onkeyup noframes onmousedown noframes onmousemove noframes onmouseout noframes onmouseover noframes onmouseup noframes style noframes title noframes xml:lang # noscript # alternate content container for non script-based rendering noscript class noscript dir noscript id noscript lang noscript onclick noscript ondblclick noscript onkeydown noscript onkeypress noscript onkeyup noscript onmousedown noscript onmousemove noscript onmouseout noscript onmouseover noscript onmouseup noscript style noscript title noscript xml:lang # object # generic embedded object object align object archive object border object class object classid object codebase object codetype object data object declare object dir object height object hspace object id object lang object name object onclick object ondblclick object onkeydown object onkeypress object onkeyup object onmousedown object onmousemove object onmouseout object onmouseover object onmouseup object standby object style object tabindex object title object type object usemap object vspace object width object xml:lang # ol # ordered list ol class ol compact ol dir ol id ol lang ol onclick ol ondblclick ol onkeydown ol onkeypress ol onkeyup ol onmousedown ol onmousemove ol onmouseout ol onmouseover ol onmouseup ol start ol style ol title ol type ol xml:lang # optgroup # option group optgroup class optgroup dir optgroup disabled optgroup id optgroup label optgroup lang optgroup onclick optgroup ondblclick optgroup onkeydown optgroup onkeypress optgroup onkeyup optgroup onmousedown optgroup onmousemove optgroup onmouseout optgroup onmouseover optgroup onmouseup optgroup style optgroup title optgroup xml:lang # option # selectable choice option class option dir option disabled option id option label option lang option onclick option ondblclick option onkeydown option onkeypress option onkeyup option onmousedown option onmousemove option onmouseout option onmouseover option onmouseup option selected option style option title option value option xml:lang # p # paragraph p align p class p dir p id p lang p onclick p ondblclick p onkeydown p onkeypress p onkeyup p onmousedown p onmousemove p onmouseout p onmouseover p onmouseup p style p title p xml:lang # param # named property value param id param name param type param value param valuetype # pre # preformatted text pre class pre dir pre id pre lang pre onclick pre ondblclick pre onkeydown pre onkeypress pre onkeyup pre onmousedown pre onmousemove pre onmouseout pre onmouseover pre onmouseup pre style pre title pre width pre xml:lang pre xml:space # q # short inline quotation q cite q class q dir q id q lang q onclick q ondblclick q onkeydown q onkeypress q onkeyup q onmousedown q onmousemove q onmouseout q onmouseover q onmouseup q style q title q xml:lang # s # strike-through text style s class s dir s id s lang s onclick s ondblclick s onkeydown s onkeypress s onkeyup s onmousedown s onmousemove s onmouseout s onmouseover s onmouseup s style s title s xml:lang # samp # sample program output, scripts, etc. samp class samp dir samp id samp lang samp onclick samp ondblclick samp onkeydown samp onkeypress samp onkeyup samp onmousedown samp onmousemove samp onmouseout samp onmouseover samp onmouseup samp style samp title samp xml:lang # script # script statements script charset script defer script id script language script src script type script xml:space # select # option selector select class select dir select disabled select id select lang select multiple select name select onblur select onchange select onclick select ondblclick select onfocus select onkeydown select onkeypress select onkeyup select onmousedown select onmousemove select onmouseout select onmouseover select onmouseup select size select style select tabindex select title select xml:lang # small # small text style small class small dir small id small lang small onclick small ondblclick small onkeydown small onkeypress small onkeyup small onmousedown small onmousemove small onmouseout small onmouseover small onmouseup small style small title small xml:lang # span # generic language/style container span class span dir span id span lang span onclick span ondblclick span onkeydown span onkeypress span onkeyup span onmousedown span onmousemove span onmouseout span onmouseover span onmouseup span style span title span xml:lang # strike # strike-through text strike class strike dir strike id strike lang strike onclick strike ondblclick strike onkeydown strike onkeypress strike onkeyup strike onmousedown strike onmousemove strike onmouseout strike onmouseover strike onmouseup strike style strike title strike xml:lang # strong # strong emphasis strong class strong dir strong id strong lang strong onclick strong ondblclick strong onkeydown strong onkeypress strong onkeyup strong onmousedown strong onmousemove strong onmouseout strong onmouseover strong onmouseup strong style strong title strong xml:lang # style # style info style dir style id style lang style media style title style type style xml:lang style xml:space # sub # subscript sub class sub dir sub id sub lang sub onclick sub ondblclick sub onkeydown sub onkeypress sub onkeyup sub onmousedown sub onmousemove sub onmouseout sub onmouseover sub onmouseup sub style sub title sub xml:lang # sup # superscript sup class sup dir sup id sup lang sup onclick sup ondblclick sup onkeydown sup onkeypress sup onkeyup sup onmousedown sup onmousemove sup onmouseout sup onmouseover sup onmouseup sup style sup title sup xml:lang # table # table align table bgcolor table border table cellpadding table cellspacing table class table dir table frame table id table lang table onclick table ondblclick table onkeydown table onkeypress table onkeyup table onmousedown table onmousemove table onmouseout table onmouseover table onmouseup table rules table style table summary table title table width table xml:lang # tbody # table body tbody align tbody char tbody charoff tbody class tbody dir tbody id tbody lang tbody onclick tbody ondblclick tbody onkeydown tbody onkeypress tbody onkeyup tbody onmousedown tbody onmousemove tbody onmouseout tbody onmouseover tbody onmouseup tbody style tbody title tbody valign tbody xml:lang # td # table data cell td abbr td align td axis td bgcolor td char td charoff td class td colspan td dir td headers td height td id td lang td nowrap td onclick td ondblclick td onkeydown td onkeypress td onkeyup td onmousedown td onmousemove td onmouseout td onmouseover td onmouseup td rowspan td scope td style td title td valign td width td xml:lang # textarea # multi-line text field textarea accesskey textarea class textarea cols textarea dir textarea disabled textarea id textarea lang textarea name textarea onblur textarea onchange textarea onclick textarea ondblclick textarea onfocus textarea onkeydown textarea onkeypress textarea onkeyup textarea onmousedown textarea onmousemove textarea onmouseout textarea onmouseover textarea onmouseup textarea onselect textarea readonly textarea rows textarea style textarea tabindex textarea title textarea xml:lang # tfoot # table footer tfoot align tfoot char tfoot charoff tfoot class tfoot dir tfoot id tfoot lang tfoot onclick tfoot ondblclick tfoot onkeydown tfoot onkeypress tfoot onkeyup tfoot onmousedown tfoot onmousemove tfoot onmouseout tfoot onmouseover tfoot onmouseup tfoot style tfoot title tfoot valign tfoot xml:lang # th # table header cell th abbr th align th axis th bgcolor th char th charoff th class th colspan th dir th headers th height th id th lang th nowrap th onclick th ondblclick th onkeydown th onkeypress th onkeyup th onmousedown th onmousemove th onmouseout th onmouseover th onmouseup th rowspan th scope th style th title th valign th width th xml:lang # thead # table header thead align thead char thead charoff thead class thead dir thead id thead lang thead onclick thead ondblclick thead onkeydown thead onkeypress thead onkeyup thead onmousedown thead onmousemove thead onmouseout thead onmouseover thead onmouseup thead style thead title thead valign thead xml:lang # title # document title title dir title id title lang title xml:lang # tr # table row tr align tr bgcolor tr char tr charoff tr class tr dir tr id tr lang tr onclick tr ondblclick tr onkeydown tr onkeypress tr onkeyup tr onmousedown tr onmousemove tr onmouseout tr onmouseover tr onmouseup tr style tr title tr valign tr xml:lang # tt # teletype or monospaced text style tt class tt dir tt id tt lang tt onclick tt ondblclick tt onkeydown tt onkeypress tt onkeyup tt onmousedown tt onmousemove tt onmouseout tt onmouseover tt onmouseup tt style tt title tt xml:lang # u # underlined text style u class u dir u id u lang u onclick u ondblclick u onkeydown u onkeypress u onkeyup u onmousedown u onmousemove u onmouseout u onmouseover u onmouseup u style u title u xml:lang # ul # unordered list ul class ul compact ul dir ul id ul lang ul onclick ul ondblclick ul onkeydown ul onkeypress ul onkeyup ul onmousedown ul onmousemove ul onmouseout ul onmouseover ul onmouseup ul style ul title ul type ul xml:lang # var # instance of a variable or program argument var class var dir var id var lang var onclick var ondblclick var onkeydown var onkeypress var onkeyup var onmousedown var onmousemove var onmouseout var onmouseover var onmouseup var style var title var xml:lang MKDoc-XML-0.75/lib/MKDoc/XML/Stripper/mkdoc16.txt0000644000076400007640000001750210130753640021330 0ustar brunobruno00000000000000# mkdoc 1.6 defaults by chris croome (chris@mkdoc.com) 2003/05/16 # linearized and sorted by paul arzul (patricka@mkdoc.com) 2003/09/22 # # recreated from xhtml 1.0 strict by paul arzul (patricka@mkdoc.com) 2003/09/26 # a # anchor a accesskey a charset a class a coords a dir a href a hreflang a id a lang a name a rel a rev a shape a tabindex a title a type a xml:lang # abbr # abbreviated form (e.g., WWW, HTTP, etc.) abbr class abbr dir abbr id abbr lang abbr title abbr xml:lang # acronym # acronym class acronym dir acronym id acronym lang acronym title acronym xml:lang # address # information on author address class address dir address id address lang address title address xml:lang # area # client-side image map area area accesskey area alt area class area coords area dir area href area id area lang area nohref area shape area tabindex area title area xml:lang # b b class b dir b id b lang b title b xml:lang # bdo # I18N BiDi over-ride bdo class bdo dir bdo id bdo lang bdo title bdo xml:lang # big big class big dir big id big lang big title big xml:lang # blockquote # long quotation blockquote cite blockquote class blockquote dir blockquote id blockquote lang blockquote title blockquote xml:lang # body # document body body class body dir body id body lang body title body xml:lang # br # forced line break br class br id br title # button # push button button accesskey button class button dir button disabled button id button lang button name button tabindex button title button type button value button xml:lang # caption # table caption caption class caption dir caption id caption lang caption title caption xml:lang # cite # citation cite class cite dir cite id cite lang cite title cite xml:lang # code # computer code fragment code class code dir code id code lang code title code xml:lang # col # table column col class col dir col id col lang col span col title col width col xml:lang # colgroup # table column group colgroup class colgroup dir colgroup id colgroup lang colgroup span colgroup title colgroup width colgroup xml:lang # dd # definition description dd class dd dir dd id dd lang dd title dd xml:lang # del # deleted text del cite del class del datetime del dir del id del lang del title del xml:lang # dfn # instance definition dfn class dfn dir dfn id dfn lang dfn title dfn xml:lang # div # generic language/style container div align div class div dir div id div lang div title div xml:lang # dl # definition list dl class dl dir dl id dl lang dl title dl xml:lang # dt # definition term dt class dt dir dt id dt lang dt title dt xml:lang # em # emphasis em class em dir em id em lang em title em xml:lang # fieldset # form control group fieldset class fieldset dir fieldset id fieldset lang fieldset title fieldset xml:lang # form # interactive form form accept form accept-charset form action form class form dir form enctype form id form lang form method form title form xml:lang # h1 # heading h1 class h1 dir h1 id h1 lang h1 title h1 xml:lang h1 align # h2 # heading h2 class h2 dir h2 id h2 lang h2 title h2 xml:lang h2 align # h3 # heading h3 class h3 dir h3 id h3 lang h3 title h3 xml:lang h3 align # h4 # heading h4 class h4 dir h4 id h4 lang h4 title h4 xml:lang h4 align # h5 # heading h5 class h5 dir h5 id h5 lang h5 title h5 xml:lang h5 align # h6 # heading h6 class h6 dir h6 id h6 lang h6 title h6 xml:lang h6 align # head # document head head dir head id head lang head profile head xml:lang # hr # horizontal rule hr class hr dir hr id hr lang hr title hr xml:lang # html # document root element html dir html id html lang html xml:lang html xmlns # i # italic text style i class i dir i id i lang i title i xml:lang # img # Embedded image img align img alt img class img dir img height img id img ismap img lang img longdesc img src img title img usemap img width img xml:lang img border # input # form control input accept input accesskey input alt input checked input class input dir input disabled input id input lang input maxlength input name input readonly input size input src input tabindex input title input type input usemap input value input xml:lang # ins # inserted text ins cite ins class ins datetime ins dir ins id ins lang ins title ins xml:lang # kbd # text to be entered by the user kbd class kbd dir kbd id kbd lang kbd title kbd xml:lang # label # form field label text label accesskey label class label dir label for label id label lang label title label xml:lang # legend # fieldset legend legend accesskey legend class legend dir legend id legend lang legend title legend xml:lang # li # list item li class li dir li id li lang li title li xml:lang # map # client-side image map map class map dir map id map lang map name map title map xml:lang # object # generic embedded object object archive object class object classid object codebase object codetype object data object declare object dir object height object id object lang object name object standby object tabindex object title object type object usemap object width object xml:lang # ol # ordered list ol class ol dir ol id ol lang ol title ol xml:lang # optgroup # option group optgroup class optgroup dir optgroup disabled optgroup id optgroup label optgroup lang optgroup title optgroup xml:lang # option # selectable choice option class option dir option disabled option id option label option lang option selected option title option value option xml:lang # p # paragraph p class p dir p id p lang p title p xml:lang p align # param # named property value param id param name param type param value param valuetype # pre # preformatted text pre class pre dir pre id pre lang pre title pre xml:lang pre xml:space # q # short inline quotation q cite q class q dir q id q lang q title q xml:lang # samp # sample program output, scripts, etc. samp class samp dir samp id samp lang samp title samp xml:lang # select # option selector select class select dir select disabled select id select lang select multiple select name select size select tabindex select title select xml:lang # small small class small dir small id small lang small title small xml:lang # span # generic language/style container span class span dir span id span lang span title span xml:lang # strong # strong emphasis strong class strong dir strong id strong lang strong title strong xml:lang # sub # subscript sub class sub dir sub id sub lang sub title sub xml:lang # sup # superscript sup class sup dir sup id sup lang sup title sup xml:lang # table # table border table cellpadding table cellspacing table class table dir table frame table id table lang table rules table summary table title table width table xml:lang # tbody # table body tbody class tbody dir tbody id tbody lang tbody title tbody xml:lang # td # table data cell td abbr td axis td class td colspan td dir td headers td id td lang td rowspan td scope td title td xml:lang # textarea # multi-line text field textarea accesskey textarea class textarea cols textarea dir textarea disabled textarea id textarea lang textarea name textarea readonly textarea rows textarea tabindex textarea title textarea xml:lang # tfoot # table footer tfoot class tfoot dir tfoot id tfoot lang tfoot title tfoot xml:lang # th # table header cell th abbr th axis th class th colspan th dir th headers th id th lang th rowspan th scope th title th xml:lang # thead # table header thead class thead dir thead id thead lang thead title thead xml:lang # title # document title title dir title id title lang title xml:lang # tr # table row tr class tr dir tr id tr lang tr title tr xml:lang # tt tt class tt dir tt id tt lang tt title tt xml:lang # ul # unordered list ul class ul dir ul id ul lang ul title ul xml:lang # var # instance of a variable or program argument var class var dir var id var lang var title var xml:lang MKDoc-XML-0.75/lib/MKDoc/XML/Dumper.pm0000644000076400007640000002603010130755242017301 0ustar brunobruno00000000000000# ------------------------------------------------------------------------------------- # MKDoc::XML::Dumper # ------------------------------------------------------------------------------------- # Author : Jean-Michel Hiver. # Copyright : (c) MKDoc Holdings Ltd, 2003 # # This module serializes / dumps / freezes Perl structures to a well-formed XML string # and deserializes / undumps / thaws them back from XML to Perl. # # This module is distributed under the same license as Perl itself. # ------------------------------------------------------------------------------------- package MKDoc::XML::Dumper; use MKDoc::XML::Encode; use MKDoc::XML::Decode; use MKDoc::XML::TreeBuilder; use Scalar::Util; use warnings; use strict; use vars qw /$IndentLevel $BackRef/; our $Compat = 0; sub xml2perl { my $class = shift; my $xml = shift; my (@tree) = MKDoc::XML::TreeBuilder->process_data ($xml); while ( (@tree and not ref $tree[0] and $tree[0] =~ /^(\s|\n|\r)*$/) or (@tree and ref $tree[0] and $tree[0]->{_tag} and $tree[0]->{_tag} eq '~pi') ) { shift (@tree) } local $BackRef = {}; local $IndentLevel = 0; return $class->xml_to_perl ($tree[0]); } # SECTION THAT UNDUMPS PERL FROM XML NODE sub xml_to_perl { my $class = shift; @_ = map { ref $_ ? $_ : () } @_; my @res = map { $class->xml_to_perl_backwards_compat_perl_tag ($_) || $class->xml_to_perl_backref ($_) || $class->xml_to_perl_ref ($_) || $class->xml_to_perl_scalar ($_) || $class->xml_to_perl_hash ($_) || $class->xml_to_perl_array ($_) || $class->xml_to_perl_litteral ($_) } @_; return pop (@res) if (@res == 1); return @res; } sub xml_to_perl_backwards_compat_perl_tag { my ($class, $tree) = @_; ref $tree || return (); $tree->{_tag} eq 'perl' || return (); local ($Compat) = 1; return $class->xml_to_perl (@{$tree->{_content}}); } sub xml_to_perl_backref { my ($class, $tree) = @_; ref $tree || return (); $tree->{_tag} eq 'backref' || return (); my $ref_id = $tree->{id} || return (); exists $BackRef->{$ref_id} || return (); return $BackRef->{$ref_id}; } sub xml_to_perl_ref { my ($class, $tree) = @_; ref $tree || return (); $tree->{_tag} eq 'ref' || return (); my $ref_id = $tree->{id} || return (); my $ref = \\undef; bless $ref, $tree->{bless} if (defined $tree->{bless}); $BackRef->{$ref_id} = $ref; ($$ref) = $class->xml_to_perl ( @{$tree->{_content}} ); return $ref; } sub xml_to_perl_scalar { my ($class, $tree) = @_; ref $tree or return (); $tree->{_tag} eq 'scalar' or return (); my $ref_id = $tree->{id} or return (); my $ref = \\undef; bless $ref, $tree->{bless} if (defined $tree->{bless}); $BackRef->{$ref_id} = $ref; ($$ref) = $class->xml_to_perl ( @{$tree->{_content}} ); return $ref; } sub xml_to_perl_hash { my ($class, $tree) = @_; ref $tree or return (); $tree->{_tag} eq 'hash' or return (); my $ref_id = $tree->{id} or return (); my $ref = {}; bless $ref, $tree->{bless} if (defined $tree->{bless}); $BackRef->{$ref_id} = $ref; my @items = map { ref $_ ? $_ : () } @{$tree->{_content}}; foreach my $item (@items) { my $key = $item->{key}; if ($Compat) { $ref->{$key} = do { my $stuff = $item->{_content}->[0] || ''; my $decode = new MKDoc::XML::Decode ('xml'); $decode->process ($stuff); } } else { my ($val) = $class->xml_to_perl ( @{$item->{_content}} ); $ref->{$key} = $val; } } return $ref; } sub xml_to_perl_array { my ($class, $tree) = @_; ref $tree or return (); $tree->{_tag} eq 'array' or return (); my $ref_id = $tree->{id} or return (); my $ref = []; bless $ref, $tree->{bless} if (defined $tree->{bless}); $BackRef->{$ref_id} = $ref; my @items = map { ref $_ ? $_ : () } @{$tree->{_content}}; foreach my $item (@items) { my $key = $item->{key}; my ($val) = $class->xml_to_perl ( @{$item->{_content}} ); $ref->[$key] = $val; } return $ref; } sub xml_to_perl_litteral { my ($class, $tree) = @_; ref $tree or return (); $tree->{_tag} eq 'litteral' or return (); return undef if ($tree->{undef} and $tree->{undef} eq 'true'); my $decode = new MKDoc::XML::Decode ('xml'); return $decode->process ($tree->{_content}->[0]); } ##################################################################### # DUMPS PERL STRUCTURE TO XML DATA # ##################################################################### sub perl2xml { my $class = shift; my $ref = shift; local $BackRef = {}; local $IndentLevel = 0; return $class->perl_to_xml ($ref); } sub perl_to_xml { my ($class, $ref) = @_; $_ = Scalar::Util::reftype ($ref) || ''; return $class->perl_to_xml_backref ($ref) || $class->perl_to_xml_ref ($ref) || $class->perl_to_xml_scalar ($ref) || $class->perl_to_xml_hash ($ref) || $class->perl_to_xml_array ($ref) || $class->perl_to_xml_litteral ($ref); } sub perl_to_xml_backref { my ($class, $ref) = @_; $ref && ref $ref || return; my $ref_id = 0 + $ref; $BackRef->{$ref_id} || return; return $class->indent() . qq || . "\n"; } sub perl_to_xml_litteral { my ($class, $ref) = @_; (defined $ref) ? $class->indent() . qq || . MKDoc::XML::Encode->process ($ref) . qq || . "\n" : $class->indent() . qq || . "\n"; } sub perl_to_xml_scalar { my ($class, $ref) = @_; $ref && ref $ref && Scalar::Util::reftype ($ref) eq 'SCALAR' || return; my $ref_id = 0 + $ref; $BackRef->{$ref_id} = $ref; my $bless = Scalar::Util::blessed ($ref); $bless = ($bless) ? qq | bless="$bless"| : ''; my $string = ''; $string .= $class->indent() . qq || . "\n"; $class->indent_more(); $string .= $class->perl_to_xml ($$ref); $class->indent_less(); $string .= $class->indent() . qq || . "\n"; return $string; } sub perl_to_xml_ref { my ($class, $ref) = @_; $ref && ref $ref && Scalar::Util::reftype ($ref) eq 'REF' || return; my $ref_id = 0 + $ref; $BackRef->{$ref_id} = $ref; my $bless = Scalar::Util::blessed ($ref); $bless = ($bless) ? qq | bless="$bless"| : ''; my $string = ''; $string .= $class->indent() . qq || . "\n"; $class->indent_more(); $string .= $class->perl_to_xml ($$ref); $class->indent_less(); $string .= $class->indent() . qq || . "\n"; return $string; } sub perl_to_xml_hash { my ($class, $ref) = @_; $ref && ref $ref && Scalar::Util::reftype ($ref) eq 'HASH' || return; my $ref_id = 0 + $ref; $BackRef->{$ref_id} = $ref; my $bless = Scalar::Util::blessed ($ref); $bless = ($bless) ? qq | bless="$bless"| : ''; my $string = ''; $string .= $class->indent() . qq || . "\n"; for (keys %{$ref}) { $class->indent_more(); $string .= $class->indent() . qq || . "\n" ; $class->indent_more(); $string .= $class->perl_to_xml ($ref->{$_}); $class->indent_less(); $string .= $class->indent() . qq || . "\n"; $class->indent_less(); } $string .= $class->indent() . qq || . "\n"; return $string; } sub perl_to_xml_array { my ($class, $ref) = @_; $ref && ref $ref && Scalar::Util::reftype ($ref) eq 'ARRAY' || return; my $ref_id = 0 + $ref; $BackRef->{$ref_id} = $ref; my $bless = Scalar::Util::blessed ($ref); $bless = ($bless) ? qq | bless="$bless"| : ''; my $string = ''; $string .= $class->indent() . qq || . "\n"; for (my $i=0; $i < @{$ref}; $i++) { $class->indent_more(); $string .= $class->indent() . qq || . "\n" ; $class->indent_more(); $string .= $class->perl_to_xml ($ref->[$i]); $class->indent_less(); $string .= $class->indent() . qq || . "\n"; $class->indent_less(); } $string .= $class->indent() . qq || . "\n"; return $string; } sub indent { return " " x $IndentLevel; } sub indent_more { $IndentLevel++; } sub indent_less { $IndentLevel--; } 1; __END__ =head1 NAME MKDoc::XML::Dumper - Same as Data::Dumper, but with XML =head1 SYNOPSIS use MKDoc::XML::Dumper; use Test::More 'no_plan'; my $stuff = [ qw /foo bar baz/, [], { hello => 'world', yo => \\'boo' } ]; my $xml = MKDoc::XML::Dumper->perl2xml ($stuff); my $stuff2 = MKDoc::XML::Dumper->xml2perl ($xml); is_deeply ($stuff, $stuff2); # prints 'ok' =head1 SUMMARY L provides functionality equivalent to Data::Dumper except that rather than serializing structures into a Perl string, it serializes them into a generic XML file format. Of course since XML cannot be evaled, it also provides a mechanism for undumping the xml back into a perl structure. L supports scalar references, hash references, array references, reference references, and litterals. It also supports circular structures and back references to avoid creating unwanted extra copies of the same object. That's all there is to it! =head1 API =head2 my $xml = MKDoc::XML::Dumper->perl2xml ($perl); Turns $perl into an XML string. For instance: my $perl = [ qw /foo bar baz/, { adam => 'apple', bruno => 'berry', chris => 'cherry' } ]; print MKDoc::XML::Dumper->perl2xml ($perl);' Will print something like: foo bar baz berry apple cherry As you can see, every object has an id. This allows for backreferencing, so: my $perl = undef; $perl = \$perl; print MKDoc::XML::Dumper->perl2xml ($perl);' Prints something like: For the curious, these identifiers are computed using some perl black magic: my $id = 0 + $reference; =head2 my $perl = MKDoc::XML::Dumper->perl2xml ($xml); Does the exact reverse operation as xml2perl(). =head1 AUTHOR Copyright 2003 - MKDoc Holdings Ltd. Author: Jean-Michel Hiver This module is free software and is distributed under the same license as Perl itself. Use it at your own risk. =head1 SEE ALSO L L =cut MKDoc-XML-0.75/lib/MKDoc/XML/TreeBuilder.pm0000644000076400007640000002054010130755265020260 0ustar brunobruno00000000000000# ------------------------------------------------------------------------------------- # MKDoc::XML::TreeBuilder # ------------------------------------------------------------------------------------- # Author : Jean-Michel Hiver. # Copyright : (c) MKDoc Holdings Ltd, 2003 # # This module turns an XML string into a tree of elements and returns the top elements. # This assumes that the XML string is well-formed. Well. More or less :) # # This module is distributed under the same license as Perl itself. # ------------------------------------------------------------------------------------- package MKDoc::XML::TreeBuilder; use MKDoc::XML::Tokenizer; use strict; use warnings; ## # $class->process_data ($xml); # ---------------------------- # Parses $xml and turns it into a tree structure very similar # to HTML::Element objects. ## sub process_data { my $class = shift; my $tokens = MKDoc::XML::Tokenizer->process_data (@_); return _process_recurse ($tokens); } ## # $class->process_file ($filename); # --------------------------------- # Parses $xml and turns it into a tree structure very similar # to HTML::Element objects. ## sub process_file { my $class = shift; my $tokens = MKDoc::XML::Tokenizer->process_file (@_); return _process_recurse ($tokens); } ## # _process_recurse ($token_list); # ------------------------------- # Turns $token_list array ref into a tree structure. ## sub _process_recurse { my $tokens = shift; my @result = (); while (@{$tokens}) { # takes the first available token from the $tokens array reference my $token = shift @{$tokens}; my $node = undef; $node = $token->leaf(); defined $node and do { push @result, $node; next; }; $node = $token->tag_open(); defined $node and do { my $descendants = _descendant_tokens ($token, $tokens); $node->{_content} = _process_recurse ($descendants); push @result, $node; next; }; my $token_as_string = $token->as_string(); die qq |parse_error: Is this XML well-formed? (unexpected closing tag "$token_as_string")|; } return wantarray ? @result : \@result; } ## # $class->descendant_tokens ($token, $tokens); # -------------------------------------------- # Removes all tokens from $tokens which are descendants # of $token - assuming that $token is an opening tag token. # # Returns all the tokens removed except for $token matching # closing tag. So the closing tag is removed from $tokens # but not returned. ## sub _descendant_tokens { my $token = shift; my $tokens = shift; my @res = (); my $balance = 1; while (@{$tokens}) { my $next_token = shift (@{$tokens}); my $node = undef; $node = $next_token->leaf(); defined $node and do { push @res, $next_token; next; }; $node = $next_token->tag_open(); defined $node and do { $balance++; push @res, $next_token; next; }; $node = $next_token->tag_close(); defined $node and do { $balance--; last if ($balance == 0); push @res, $next_token; next; }; die "BUG: The program should never reach this statement."; } return \@res if ($balance == 0); my $token_as_string = $token->as_string(); die qq |parse_error: Is this XML well-formed? (could not find closing tag for "$token_as_string")|; } 1; __END__ =head1 NAME MKDoc::XML::TreeBuilder - Builds a parsed tree from XML data =head1 SYNOPSIS my @top_nodes = MKDoc::XML::TreeBuilder->process_data ($some_xml); =head1 SUMMARY L uses L to turn XML data into a parsed tree. Basically it smells like an XML parser, looks like an XML parser, and awfully overlaps with XML parsers. But it's not an XML parser. XML parsers are required to die if the XML data is not well formed. MKDoc::XML::TreeBuilder doesn't give a rip: it'll parse whatever as long as it's good enough for it to parse. XML parsers expand entities. MKDoc::XML::TreeBuilder doesn't. At least not yet. XML parsers generally support namespaces. MKDoc::XML::TreeBuilder doesn't - and probably won't. =head1 DISCLAIMER B =head1 API =head2 my @top_nodes = MKDoc::XML::Tokenizer->process_data ($some_xml); Returns all the top nodes of the $some_xml parsed tree. Although the XML spec says that there can be only one top element in an XML file, you have to take two things into account: 1. Pseudo-elements such as XML declarations, processing instructions, and comments. 2. MKDoc::XML::TreeBuilder is not an XML parser, it's not its job to care about the XML specification, so having multiple top elements is just fine. =head2 my $tokens = MKDoc::XML::Tokenizer->process_data ('/some/file.xml'); Same as MKDoc::XML::TreeBuilder->process_data ($some_xml), except that it reads $some_xml from '/some/file.xml'. =head1 Returned parsed tree - data structure I have tried to make MKDoc::XML::TreeBuilder look enormously like HTML::TreeBuilder. So most of this section is stolen and slightly adapted from the HTML::Element man page. START PLAGIARISM HERE It may occur to you to wonder what exactly a "tree" is, and how it's represented in memory. Consider this HTML document: Stuff

I like potatoes!

Building a syntax tree out of it makes a tree-structure in memory that could be diagrammed as: html (lang='en-US') / \ / \ / \ head body /\ \ / \ \ / \ \ title meta h1 | (name='author', | "Stuff" content='Jojo') "I like potatoes" This is the traditional way to diagram a tree, with the "root" at the top, and it's this kind of diagram that people have in mind when they say, for example, that "the meta element is under the head element instead of under the body element". (The same is also said with "inside" instead of "under" -- the use of "inside" makes more sense when you're looking at the HTML source.) Another way to represent the above tree is with indenting: html (attributes: lang='en-US') head title "Stuff" meta (attributes: name='author' content='Jojo') body h1 "I like potatoes" Incidentally, diagramming with indenting works much better for very large trees, and is easier for a program to generate. The $tree->dump method uses indentation just that way. However you diagram the tree, it's stored the same in memory -- it's a network of objects, each of which has attributes like so: element #1: _tag: 'html' _parent: none _content: [element #2, element #5] lang: 'en-US' element #2: _tag: 'head' _parent: element #1 _content: [element #3, element #4] element #3: _tag: 'title' _parent: element #2 _content: [text segment "Stuff"] element #4 _tag: 'meta' _parent: element #2 _content: none name: author content: Jojo element #5 _tag: 'body' _parent: element #1 _content: [element #6] element #6 _tag: 'h1' _parent: element #5 _content: [text segment "I like potatoes"] The "treeness" of the tree-structure that these elements comprise is not an aspect of any particular object, but is emergent from the relatedness attributes (_parent and _content) of these element-objects and from how you use them to get from element to element. STOP PLAGIARISM HERE This is pretty much the kind of data structure MKDoc::XML::TreeBuilder returns. More information on different nodes and their type is available in L. =head1 NOTES Did I mention that MKDoc::XML::TreeBuilder is NOT an XML parser? =head1 AUTHOR Copyright 2003 - MKDoc Holdings Ltd. Author: Jean-Michel Hiver This module is free software and is distributed under the same license as Perl itself. Use it at your own risk. =head1 SEE ALSO L L =cut MKDoc-XML-0.75/lib/MKDoc/XML/Encode.pm0000644000076400007640000000352510130755245017251 0ustar brunobruno00000000000000# ------------------------------------------------------------------------------------- # MKDoc::XML::Encode # ------------------------------------------------------------------------------------- # Author : Jean-Michel Hiver. # Copyright : (c) MKDoc Holdings Ltd, 2003 # # This modules encodes XML entities & > < " and '. # # This module is distributed under the same license as Perl itself. # ------------------------------------------------------------------------------------- package MKDoc::XML::Encode; use warnings; use strict; our %XML_Encode = ( '&' => 'amp', '<' => 'lt', '>' => 'gt', '"' => 'quot', "'" => 'apos', ); our $XML_Encode_Pattern = join ("|", keys %XML_Encode); sub process { (@_ == 2) or warn "MKDoc::XML::Encode::process() should be called with two arguments"; my $class = shift; my $data = join '', map { (defined $_) ? $_ : '' } @_; $data =~ s/($XML_Encode_Pattern)/&$XML_Encode{$1};/go; return $data; } 1; __END__ =head1 NAME MKDoc::XML::Encode - Encodes XML entities =head1 SYNOPSIS use MKDoc::XML::Encode; # $xml is now "Chris' Baloon" my $xml = MKDoc::XML::Encode->process ("Chris' Baloon"); =head1 SUMMARY MKDoc::XML::Encode is a very simple module which encodes the following entities. ' " > < & That's it. This module and its counterpart L are used by L to XML-encode and XML-decode litterals. =head1 API =head2 my $xml_encoded = MKDoc::XML::Encode->process ($some_string); Does what is said in the summary. =head1 AUTHOR Copyright 2003 - MKDoc Holdings Ltd. Author: Jean-Michel Hiver This module is free software and is distributed under the same license as Perl itself. Use it at your own risk. =head1 SEE ALSO L L =cut MKDoc-XML-0.75/lib/MKDoc/XML/Tagger.pm0000644000076400007640000002307510214060521017254 0ustar brunobruno00000000000000# ------------------------------------------------------------------------------------- # MKDoc::XML::Tagger # ------------------------------------------------------------------------------------- # Author : Jean-Michel Hiver. # Copyright : (c) MKDoc Holdings Ltd, 2003 # # This module adds markup to an existing XML file / variable by matching expression. # You could see it as an XML-compatible search and substitute module. # # The main reason it exists is to automagically hyperlink HTML in MKDoc, and also to # mark up properly abbreviations based on glossaries. # # This module is distributed under the same license as Perl itself. # ------------------------------------------------------------------------------------- package MKDoc::XML::Tagger; use MKDoc::XML::Tokenizer; use strict; use warnings; use utf8; our $tags = []; our $Ignorable_RE = qr /(?:\r|\n|\s|(?:\&\(\d+\)))*/; our @DONT_TAG = qw/a/; ## # $class->process_data ($xml, @expressions); # ------------------------------------------ # Tags $xml with @expressions, where expression is a list of hashes. # # For example: # # MKDoc::XML::Tagger->process ( # 'I like oranges and bananas', # { _expr => 'oranges', _tag => 'a', href => 'http://www.google.com?q=oranges' }, # { _expr => 'bananas', _tag => 'a', href => 'http://www.google.com?q=bananas' }, # # Will return # # 'I like
oranges and \ # bananas. ## sub process_data { my $class = shift; my $tokens = MKDoc::XML::Tokenizer->process_data (shift); return _replace ($tokens, @_); } ## # $class->process_file ($file, @expressions); # ------------------------------------------- # Same as $class->process_data ($data, @expressions), except that $data is read # from $file. ## sub process_file { my $class = shift; my $tokens = MKDoc::XML::Tokenizer->process_file (shift); return _replace ($tokens, @_); } ## # _replace ($tokens, @expressions); # --------------------------------- # This function constructs the newly marked up text from a list # of XML $tokens and a list of @expressions and returns it. # # Longest expressions are applied first. ## sub _replace { my $tokens = shift; my @expr = sort { length ($b->{_expr}) <=> length ($a->{_expr}) } @_; @expr = map { my $hash = \%{$_}; for (keys %{$hash}) { $hash->{$_} =~ s/\&/\&/g; $hash->{$_} =~ s/\{$_} =~ s/\>/\>/g; $hash->{$_} =~ s/\"/\"/g; }; $hash; } @expr; my $text; local $tags; ($text, $tags) = _segregate_markup_from_text ($tokens); # once we have segregated markup from the text, we can safely # encode < and > and "... # $text =~ s/\&/\&/g; # seems to be already encoded... where do we encode this stuff !?! $text =~ s/\/\>/g; $text =~ s/\"/\"/g; # but we don't want any ' $text =~ s/\'/\'/g; # @expr = _filter_out ($text, @expr); while (my $attr = shift (@expr)) { my %attr = %{$attr}; my $tag = delete $attr{_tag} || next; my $expr = delete $attr{_expr} || next; $text = _text_replace ($text, $expr, $tag, \%attr); } while ($text =~ /\&\(\d+\)/) { for (my $i = 0; $i < @{$tags}; $i++) { my $c = $i + 1; my $tag = $tags->[$i]; $text =~ s/\&\($c\)/$tag/g; } } return $text; } ## # _text_replace ($text, $expr, $tag, $attr); # ------------------------------------------ # Replaces all $text, $expr, $tag, $attr. ## sub _text_replace { my $text = shift; my $expr = shift; my $tag = shift; my $attr = shift; my $re = _expression_to_regex ($expr); my $tag1 = _tag_open ($tag, $attr); my $tag2 = _tag_close ($tag, $attr); # let's treat beginning and end of string as spaces, # it makes the regular expressions much easier. $text = " $text "; my %expr = map { $_ => 1 } $text =~ /(?<=\p{IsSpace}|\p{IsPunct}|\&)($re)(?=\p{IsSpace}|\p{IsPunct}|\&)/gi; foreach (keys %expr) { my $to_replace = quotemeta ($_); my $replacement = $_; $replacement =~ s/(\&\(\d+\))/$tag2$1$tag1/g; $replacement = "$tag1$replacement$tag2"; # Double hyperlinking fix # JM - 2004-01-23 push @{$tags}, $replacement; my $rep = '&(' . @{$tags} . ')'; $text =~ s/(?<=\p{IsSpace}|\p{IsPunct}|\&)$to_replace(?=\p{IsSpace}|\p{IsPunct}|\&)/$rep/g; # matching placeholders fix Bruno 2005-03-10 my $rep_quoted = quotemeta ($rep); $text =~ s/&\($rep_quoted\)/&($to_replace)/g; } # remove the first and last space which we previously inserted for # ease-of-regex purposes. $text =~ s/^ //; $text =~ s/ $//; return $text; } ## # _segregate_markup_from_text ($tokens); # -------------------------------------- # From an array reference of tokens, returns text with # placeholders for markup, followed by an array reference # of markup tokens. # # Example: # # [ '', 'Hello ', '
', 'World', '
' ] # # becomes # # ( '&(1)Hello &(2)World&(3)', [ '', '
', '
' ] ) ## sub _segregate_markup_from_text { my $tokens = shift; my @tags = (); my $res = ''; for (@{$tokens}) { $_ = $$_; # replace the token object by its value /^{$_}; "\"$val\""; } } keys %{$attr}; return $attr_str ? "<$tag $attr_str>" : "<$tag>"; } ## # _tag_close ($tag_name); # ----------------------- # Turns a structure representing an closing tag into # a string representing a closing tag. ## sub _tag_close { my $tag = shift; return ""; } 1; __END__ =head1 NAME MKDoc::XML::Tagger - Adds XML markup to XML / XHTML content. =head1 SYNOPSIS use MKDoc::XML::Tagger; print MKDoc::XML::Tagger->process_data ( "

Hello, World!

", { _expr => 'World', _tag => 'strong', class => 'superFort' } ); Should print:

Hello, World!

=head1 SUMMARY MKDoc::XML::Tagger is a class which lets you specify a set of tag and attributes associated with expressions which you want to mark up. This module will then stuff any XML you send out with the extra expressions. For example, let's say that you have a document which has the term 'Microsoft Windows' several times in it. You could wish to surround any instance of the term with a tag. MKDoc::XML::Tagger lets you do exactly that. In MKDoc, this is used so that editors can enter hyperlinks separately from the content. It allows them to enter content without having to worry about the annoying syntax. It also has the added benefit from preventing bad information architecture such as the 'click here' syndrome. We also have plans to use it for automatically linking glossary words, abbreviation tags, etc. MKDoc::XML::Tagger is also probably a very good tool if you are building some kind of Wiki system in which you want expressions to be automagically hyperlinked. =head1 DISCLAIMER B =head1 API The API is very simple. =head2 my $result = MKDoc::XML::Tagger->process_data ($xml, @expressions); Tags $xml with the @expressions list. Each element of @expressions is a hash reference looking like this: { _expr => 'Some Expression', _tag => 'foo', attribute1 => 'bar' attribute2 => 'baz' } Which will try to turn anything which looks like: Some Expression sOmE ExPrEssIoN (etcetera) Into: Some Expression sOmE ExPrEssIoN (etcetera) You can have multiple expressions, in which case longest expressions are processed first. =head2 my $result = MKDoc::XML::Tagger->process_file ('some/file.xml', @expressions); Same as process_data(), except it takes its data from 'some/file.xml'. =head1 NOTES L does not really parse the XML file you're giving to it nor does it care if the XML is well-formed or not. It uses L to turn the XML / XHTML file into a series of L objects and strictly operates on a list of tokens. For this same reason MKDoc::XML::Tagger does not support namespaces. =head1 AUTHOR Copyright 2003 - MKDoc Holdings Ltd. Author: Jean-Michel Hiver This module is free software and is distributed under the same license as Perl itself. Use it at your own risk. =head1 SEE ALSO L L =cut MKDoc-XML-0.75/lib/MKDoc/XML/Tagger/0002755000076400007640000000000010214062151016712 5ustar brunobruno00000000000000MKDoc-XML-0.75/lib/MKDoc/XML/Tagger/Preserve.pm0000644000076400007640000000542510130755270021057 0ustar brunobruno00000000000000# ------------------------------------------------------------------------------------- # MKDoc::XML::Tagger::Preserve # ------------------------------------------------------------------------------------- # Author : Jean-Michel Hiver. # Copyright : (c) MKDoc Holdings Ltd, 2003 # # This module uses MKDoc::XML::Tagger, except it preserves specific tags to prevent # them from being tagged twice. At the moment the module uses regexes to do that so it # might not be very generic but it should at least work for XHTML tags. # ------------------------------------------------------------------------------------- package MKDoc::XML::Tagger::Preserve; use MKDoc::XML::Tagger; use strict; use warnings; use utf8; our @Preserve = (); ## # $class->process_data ($xml, @expressions); # ------------------------------------------ # Tags $xml with @expressions, where expression is a list of hashes. # # For example: # # MKDoc::XML::Tagger::Preserve->process ( # [ 'i_will_be_preserved', 'a' ], # 'I like oranges and bananas', # { _expr => 'oranges', _tag => 'a', href => 'http://www.google.com?q=oranges' }, # { _expr => 'bananas', _tag => 'a', href => 'http://www.google.com?q=bananas' }, # # Will return # # 'I like oranges and \ # bananas. ## sub process_data { my $class = shift; local @Preserve = @{shift()}; my $text = shift; my @list = (); ($text, @list) = _preserve_encode ($text); $text = MKDoc::XML::Tagger->process_data ($text, @_); $text = _preserve_decode ($text, @list); return $text; } sub process_file { my $class = shift; my $file = shift; open FP, "<$file" || do { warn "Cannot read-open $file"; return []; }; my $data = ''; while () { $data .= $_ } close FP; return $class->process_data ($data); } sub _preserve_encode { my $text = shift; my @list = (); for my $tag (@Preserve) { my @tags = $text =~ /(<$tag\s.*?<\/$tag>)/gs; for my $tag (@tags) { while ($text =~ s/\Q$tag\E/_compute_unique_string ($text, $tag, \@list)/e) {} } } return $text, @list; } sub _preserve_decode { my $text = shift; my @tsil = reverse (@_); while (@tsil) { my $val = shift (@tsil); my $id = shift (@tsil); $text =~ s/$id/$val/; } return $text; } sub _compute_unique_string { my $text = shift; my $str = shift; my $list = shift; my $id = join '', map { chr (ord ('a') + int (rand (26))) } 1..10; while ($text =~ /\Q$id\E/) { $id = join '', map { chr (ord ('a') + int (rand (26))) } 1..10; } push @{$list}, $id => $str; return $id; } 1; __END__ MKDoc-XML-0.75/lib/MKDoc/XML.pm0000644000076400007640000000603010214061625016041 0ustar brunobruno00000000000000package MKDoc::XML; use strict; use warnings; our $VERSION = '0.75'; 1; __END__ =head1 NAME MKDoc::XML - The MKDoc XML Toolkit =head1 SYNOPSIS This is an article, not a module. =head1 SUMMARY MKDoc is a web content management system written in Perl which focuses on standards compliance, accessiblity and usability issues, and multi-lingual websites. At MKDoc Ltd we have decided to gradually break up our existing commercial software into a collection of completely independent, well-documented, well-tested open-source CPAN modules. Ultimately we want MKDoc code to be a coherent collection of module distributions, yet each distribution should be usable and useful in itself. MKDoc::XML is part of this effort. You could help us and turn some of MKDoc's code into a CPAN module. You can take a look at the existing code at http://download.mkdoc.org/. If you are interested in some functionality which you would like to see as a standalone CPAN module, send an email to . =head1 DISCLAIMER =over =item B =item MKDoc::XML::* modules do not make sure your XML is well-formed. =item MKDoc::XML::* modules can be used to work with somehow broken XML. =item MKDoc::XML::* modules should not be used as high-level parsers with general purpose XML unless you know what you're doing. =back =head1 WHAT'S IN THE BOX =head2 XML tokenizer L splits your XML / XHTML files into a list of L objects using a single regex. =head2 XML tree builder L sits on top of L and builds parsed trees out of your XML / XHTML data. =head2 XML stripper L objects removes unwanted markup from your XML / HTML data. Useful to remove all those nasty presentational tags or 'style' attributes from your XHTML data for example. =head2 XML tagger L module matches expressions in XML / XHTML documents and tag them appropriately. For example, you could automatically hyperlink certain glossary words or add tags based on a dictionary of abbreviations and acronyms. =head2 XML entity decoder L is a pluggable, configurable entity expander module which currently supports html entities, numerical entities and basic xml entities. =head2 XML entity encoder L does the exact reverse operation as L. =head2 XML Dumper L serializes arbitrarily complex perl structures into XML strings. It is also able of doing the reverse operation, i.e. deserializing an XML string into a perl structure. =head1 AUTHOR Copyright 2003 - MKDoc Holdings Ltd. Author: Jean-Michel Hiver This module is free software and is distributed under the same license as Perl itself. Use it at your own risk. =head1 SEE ALSO Petal: http://search.cpan.org/dist/Petal/ MKDoc: http://www.mkdoc.com/ Help us open-source MKDoc. Join the mkdoc-modules mailing list: mkdoc-modules@lists.webarch.co.uk =cut MKDoc-XML-0.75/META.yml0000644000076400007640000000057610214062150014614 0ustar brunobruno00000000000000# http://module-build.sourceforge.net/META-spec.html #XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# name: MKDoc-XML version: 0.75 version_from: lib/MKDoc/XML.pm installdirs: site requires: Scalar::Util: 1.07 Test::More: 0.47 distribution_type: module generated_by: ExtUtils::MakeMaker version 6.17 MKDoc-XML-0.75/.cvsignore0000644000076400007640000000003710213574313015343 0ustar brunobruno00000000000000.swp Makefile blib pm_to_blib MKDoc-XML-0.75/Makefile.PL0000644000076400007640000000132110130755303015310 0ustar brunobruno00000000000000use ExtUtils::MakeMaker; # See lib/ExtUtils/MakeMaker.pm for details of how to influence # the contents of the Makefile that is written. WriteMakefile( 'NAME' => 'MKDoc::XML', 'VERSION_FROM' => 'lib/MKDoc/XML.pm', # finds $VERSION 'PREREQ_PM' => { 'Test::More' => '0.47', 'Scalar::Util' => '1.07', }, ($] >= 5.005 ? ## Add these new keywords supported since 5.005 (ABSTRACT_FROM => 'lib/MKDoc/XML.pm', # retrieve abstract from module AUTHOR => 'Jean-Michel Hiver') : ()), ); package MY; sub postamble { return < /dev/null EOF } 1; MKDoc-XML-0.75/README0000644000076400007640000000210510130755307014223 0ustar brunobruno00000000000000MKDoc::XML - MKDoc XML stuff ============================ MKDoc::XML is a suite of low level XML processing modules which comprises: * An XML tokenizer * An XML tree builder * A configurable XML markup stripper * A configurable XML markup tagger * An XML entity encoder * An XML entity decoder * An HTML-Only entity decoder * An XML Dumper / Undumper Typically, in MKDoc the stripper is used to remove unwanted presentational markup while the tagger adds structural markup such as tag or automatically hyperlinks expressions so that MKDoc users don't have to know about tag syntax. INSTALLATION To install this module type the following: perl Makefile.PL make make test make install Or better, use CPAN.pm DEPENDENCIES none. MISCELLEANOUS Want to help us open-source MKDoc? Join the MKDoc-Modules community! http://lists.webarch.co.uk/mailman/listinfo/mkdoc-modules COPYRIGHT AND LICENCE This module free software and is distributed under the same license as Perl itself. Copyright (C) 2003 MKDoc Holdings Ltd. Author: Jean-Michel Hiver