XML-Parser-EasyTree-0.01/ 40777 0 0 0 7233634720 10050 5 XML-Parser-EasyTree-0.01/Makefile.PL 100444 0 0 377 7233621136 12015 use ExtUtils::MakeMaker;
# See lib/ExtUtils/MakeMaker.pm for details of how to influence
# the contents of the Makefile that is written.
WriteMakefile(
'NAME' => 'XML::Parser::EasyTree',
'VERSION_FROM' => 'EasyTree.pm', # finds $VERSION
);
XML-Parser-EasyTree-0.01/Changes 100444 0 0 213 7233621136 11323 Revision history for Perl extension XML::Parser::EasyTree.
0.01 Wed Jan 24 12:43:41 2001
- original version; created by h2xs 1.19
XML-Parser-EasyTree-0.01/test.pl 100444 0 0 1256 7233621136 11374 # Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl test.pl'
######################### We start with some black magic to print on failure.
# Change 1..1 below to 1..last_test_to_print .
# (It may become useful if the test is moved to ./t subdirectory.)
BEGIN { $| = 1; print "1..1\n"; }
END {print "not ok 1\n" unless $loaded;}
use XML::Parser::EasyTree;
$loaded = 1;
print "ok 1\n";
######################### End of black magic.
# Insert your test code below (better if it prints "ok 13"
# (correspondingly "not ok 13") depending on the success of chunk 13
# of the test code):
XML-Parser-EasyTree-0.01/EasyTree.pm 100444 0 0 16141 7233634400 12154 package XML::Parser::EasyTree;
use strict;
use vars qw($VERSION $Noempty $Latin);
$VERSION = '0.01';
$XML::Parser::Built_In_Styles{EasyTree} = 1;
sub Init {
my $expat = shift;
$expat->{Lists} = [];
$expat->{Curlist} = $expat->{Tree} = [];
}
sub Start {
my $expat = shift;
my $tag = shift;
checkempty($expat);
my $newlist=[];
my $newnode={type=>'e',attrib=>{},name=>nsname($expat,$tag),content=>$newlist};
while (@_) {
my ($name,$val)=(shift @_,shift@_);
$newnode->{attrib}{nsname($expat,$name)}=encode($val);
}
push @{ $expat->{Lists} }, $expat->{Curlist};
push @{ $expat->{Curlist} }, $newnode;
$expat->{Curlist} = $newlist;
}
sub End {
my $expat=shift;
checkempty($expat);
$expat->{Curlist}=pop @{$expat->{Lists}};
}
sub Char {
my ($expat,$text)=@_;
my $clist=$expat->{Curlist};
if (!@$clist || $clist->[-1]{type} ne 't') {
push @$clist,{type=>'t',content=>''};
}
$clist->[-1]{content}.=encode($text);
}
sub Proc {
my ($expat,$target,$value)=@_;
checkempty($expat);
my $clist=$expat->{Curlist};
push @$clist,{type=>'p',target=>encode($target),content=>encode($value)};
}
sub Final {
my $expat = shift;
checkempty($expat);
delete $expat->{Curlist};
delete $expat->{Lists};
$expat->{Tree};
}
sub nsname {
my ($parser,$name)=@_;
if ($parser->{Namespaces}) {
my $ns=$parser->namespace($name)||'';
$name="{$ns}".$name;
}
return encode($name);
}
sub encode {
my $text=shift;
if ($Latin) {
$text=~s{([\xc0-\xc3])(.)}{
my $hi = ord($1);
my $lo = ord($2);
chr((($hi & 0x03) <<6) | ($lo & 0x3F))
}ge;
}
$text;
}
sub checkempty() {
my $expat=shift;
if ($Noempty) {
my $clist=$expat->{Curlist};
if (@$clist && $clist->[-1]{type} eq 't' && $clist->[-1]{content}=~/^\s+$/) {
pop @$clist;
}
}
}
1;
__END__
=head1 NAME
XML::Parser::EasyTree - Easier tree style for XML::Parser
=head1 SYNOPSIS
use XML::Parser;
use XML::Parser::EasyTree;
$XML::Parser::Easytree::Noempty=1;
my $p=new XML::Parser(Style=>'EasyTree');
my $tree=$p->parsefile('something.xml');
=head1 DESCRIPTION
XML::Parser::EasyTree adds a new "built-in" style called "EasyTree" to
XML::Parser. Like XML::Parser's "Tree" style, setting this style causes
the parser to build a lightweight tree structure representing the XML
document. This structure is, at least in this author's opinion, easier to
work with than the one created by the built-in style.
When the parser is invoked with the EasyTree style, it returns a reference
to an array of tree nodes, each of which is a hash reference. All nodes
have a 'type' key whose value is the type of the node: 'e' for element
nodes, 't' for text nodes, and 'p' for processing instruction nodes. All
nodes also have a 'content' key whose value is a reference to an array
holding the element's child nodes for element nodes, the string value for
text nodes, and the data value for processing instruction nodes. Element
nodes also have an 'attrib' key whose value is a reference to a hash of
attribute names and values. Processing instructions also have a 'target'
key whose value is the PI's target.
EasyTree nodes are ordinary Perl hashes and are not objects. Contiguous
runs of text are always returned in a single node.
The reason the parser returns an array reference rather than the root
element's node is that an XML document can legally contain processing
instructions outside the root element (the xml-stylesheet PI is commonly
used this way).
If the parser's Namespaces option is set, element and attribute names will
be prefixed with their (possibly empty) namespace URI enclosed in curly
brackets.
=head1 SPECIAL VARIABLES
Two package global variables control special behaviors:
=over 4
=item XML::Parser::EasyTree::Latin
If this is set to a nonzero value, all text, names, and values will be
returned in ISO-8859-1 (Latin-1) encoding rather than UTF-8.
=item XML::Parser::EasyTree::Noempty
If this is set to a nonzero value, text nodes containing nothing but
whitespace (such as those generated by line breaks and indentation between
tags) will be omitted from the parse tree.
=back
=head1 EXAMPLE
Parse a prettyprined version of the XML shown in the example for the built-in "Tree" style:
#!perl -w
use strict;
use XML::Parser;
use XML::Parser::EasyTree;
use Data::Dumper;
$XML::Parser::EasyTree::Noempty=1;
my $xml=<<'EOF';
Hello there
Howdy
do
EOF
my $p=new XML::Parser(Style=>'EasyTree');
my $tree=$p->parse($xml);
print Dumper($tree);
Returns:
$VAR1 = [
{ 'name' => 'foo',
'type' => 'e',
'content' => [
{ 'name' => 'head',
'type' => 'e',
'content' => [
{ 'type' => 't',
'content' => 'Hello '
},
{ 'name' => 'em',
'type' => 'e',
'content' => [
{ 'type' => 't',
'content' => 'there'
}
],
'attrib' => {}
}
],
'attrib' => { 'id' => 'a'
}
},
{ 'name' => 'bar',
'type' => 'e',
'content' => [
{ 'type' => 't',
'content' => 'Howdy'
},
{ 'name' => 'ref',
'type' => 'e',
'content' => [],
'attrib' => {}
}
],
'attrib' => {}
},
{ 'type' => 't',
'content' => '
do
'
}
],
'attrib' => {}
}
];
=head1 AUTHOR
Eric Bohlman (ebohlman@omsdev.com)
Copyright (c) 2001 Eric Bohlman. All rights reserved. This program
is free software; you can redistribute it and/or modify it under the same
terms as Perl itself.
=head1 SEE ALSO
XML::Parser
=cut
XML-Parser-EasyTree-0.01/README 100444 0 0 12271 7233634532 10763 =head1 NAME
XML::Parser::EasyTree - Easier tree style for XML::Parser
=head1 SYNOPSIS
use XML::Parser;
use XML::Parser::EasyTree;
$XML::Parser::Easytree::Noempty=1;
my $p=new XML::Parser(Style=>'EasyTree');
my $tree=$p->parsefile('something.xml');
=head1 DESCRIPTION
XML::Parser::EasyTree adds a new "built-in" style called "EasyTree" to
XML::Parser. Like XML::Parser's "Tree" style, setting this style causes
the parser to build a lightweight tree structure representing the XML
document. This structure is, at least in this author's opinion, easier to
work with than the one created by the built-in style.
When the parser is invoked with the EasyTree style, it returns a reference
to an array of tree nodes, each of which is a hash reference. All nodes
have a 'type' key whose value is the type of the node: 'e' for element
nodes, 't' for text nodes, and 'p' for processing instruction nodes. All
nodes also have a 'content' key whose value is a reference to an array
holding the element's child nodes for element nodes, the string value for
text nodes, and the data value for processing instruction nodes. Element
nodes also have an 'attrib' key whose value is a reference to a hash of
attribute names and values. Processing instructions also have a 'target'
key whose value is the PI's target.
EasyTree nodes are ordinary Perl hashes and are not objects. Contiguous
runs of text are always returned in a single node.
The reason the parser returns an array reference rather than the root
element's node is that an XML document can legally contain processing
instructions outside the root element (the xml-stylesheet PI is commonly
used this way).
If the parser's Namespaces option is set, element and attribute names will
be prefixed with their (possibly empty) namespace URI enclosed in curly
brackets.
=head1 SPECIAL VARIABLES
Two package global variables control special behaviors:
=over 4
=item XML::Parser::EasyTree::Latin
If this is set to a nonzero value, all text, names, and values will be
returned in ISO-8859-1 (Latin-1) encoding rather than UTF-8.
=item XML::Parser::EasyTree::Noempty
If this is set to a nonzero value, text nodes containing nothing but
whitespace (such as those generated by line breaks and indentation between
tags) will be omitted from the parse tree.
=back
=head1 EXAMPLE
Parse a prettyprined version of the XML shown in the example for the built-in "Tree" style:
#!perl -w
use strict;
use XML::Parser;
use XML::Parser::EasyTree;
use Data::Dumper;
$XML::Parser::EasyTree::Noempty=1;
my $xml=<<'EOF';
Hello there
Howdy
do
EOF
my $p=new XML::Parser(Style=>'EasyTree');
my $tree=$p->parse($xml);
print Dumper($tree);
Returns:
$VAR1 = [
{ 'name' => 'foo',
'type' => 'e',
'content' => [
{ 'name' => 'head',
'type' => 'e',
'content' => [
{ 'type' => 't',
'content' => 'Hello '
},
{ 'name' => 'em',
'type' => 'e',
'content' => [
{ 'type' => 't',
'content' => 'there'
}
],
'attrib' => {}
}
],
'attrib' => { 'id' => 'a'
}
},
{ 'name' => 'bar',
'type' => 'e',
'content' => [
{ 'type' => 't',
'content' => 'Howdy'
},
{ 'name' => 'ref',
'type' => 'e',
'content' => [],
'attrib' => {}
}
],
'attrib' => {}
},
{ 'type' => 't',
'content' => '
do
'
}
],
'attrib' => {}
}
];
=head1 AUTHOR
Eric Bohlman (ebohlman@omsdev.com)
Copyright (c) 2001 Eric Bohlman. All rights reserved. This program
is free software; you can redistribute it and/or modify it under the same
terms as Perl itself.
=head1 SEE ALSO
XML::Parser
XML-Parser-EasyTree-0.01/MANIFEST 100444 0 0 76 7233634652 11157 README
Changes
EasyTree.pm
Makefile.PL
MANIFEST
test.pl