XML-DT-0.69/0000755000175000017500000000000013457371530011053 5ustar ambsambsXML-DT-0.69/MANIFEST0000644000175000017500000000240013457371530012200 0ustar ambsambsChanges MANIFEST Makefile.PL lib/XML/DT.pm README mkdtskel mkdtdskel mkxmltype examples/arq.pl examples/arq.xml examples/ex2.pl examples/ex3.xml examples/ex1.pl examples/ex5.pl examples/ex6.pl examples/ex7.pl examples/ex10.1.pl examples/ex10.2.pl examples/ex10.2.xml examples/ex10.3.xml examples/ex10.3.pl examples/ex11.1.pl examples/ex11.1.xml examples/ex11.5.pl examples/ex11.5.xml examples/ex.xml examples/jj.dtd examples/makefile examples/README examples/lat1.html examples/10nov.sgm examples/pub.pl examples/publico.dtd examples/gcapaper2tex.pl examples/XPath/ex1.pl examples/XPath/ex1.xml examples/XPath/ex2.pl examples/XPath/ex2.xml examples/XPath/ex3.pl examples/XPath/ex3.xml examples/XPath/ex4.pl examples/XPath/ex4.xml examples/XPath/ex5.pl examples/XPath/ex5.xml examples/XPath/ex6.pl examples/XPath/ex6.xml examples/XPath/ex7.pl examples/XPath/ex7.xml examples/XPath/ex8.pl examples/XPath/ex8.xml examples/makenewexample t/00_basic.t t/50_xpath.t t/05_input.xml t/05_strings.t t/06_mkdtskel.t t/06_output.pl META.yml Module meta-data (added by MakeMaker) t/pod-coverage.t t/pod.t t/06_input.dtd t/06_dtdout.pl t/07_case.t t/07_case.xml t/60_types.t META.json Module JSON meta-data (added by MakeMaker) XML-DT-0.69/mkdtdskel0000644000175000017500000000106711763404720012760 0ustar ambsambs#!/usr/bin/perl use XML::DT; mkdtdskel (@ARGV) #simple huh? __END__ =encoding utf-8 =head1 NAME mkdtskel - DTD generator using XML::DT =head1 SYNOPSIS mkdttskel =head1 DESCRIPTION This command tries to infer the DTD structure for a specific XML file; =head1 SEE ALSO XML::DT(1), mkdtskel(1) and perl(1) =head1 AUTHORS Jose João Almeida, =head1 COPYRIGHT AND LICENSE Copyright 1999-2004 by Projecto Natura This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut XML-DT-0.69/META.yml0000644000175000017500000000137713457371530012334 0ustar ambsambs--- abstract: 'a package for down translation of XML files' author: - 'Jose Joao ' build_requires: ExtUtils::MakeMaker: '0' configure_requires: ExtUtils::MakeMaker: '0' dynamic_config: 1 generated_by: 'ExtUtils::MakeMaker version 7.34, CPAN::Meta::Converter version 2.150010' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: XML-DT no_index: directory: - t - inc requires: ExtUtils::MakeMaker: '6.17' HTTP::Simple: '0' Scalar::Util: '0' Test::More: '0.40' XML::DTDParser: '2.00' XML::LibXML: '1.54' parent: '0' resources: repository: https://natura.di.uminho.pt/svn/main/xml/XML-DT version: '0.69' x_serialization_backend: 'CPAN::Meta::YAML version 0.018' XML-DT-0.69/t/0000755000175000017500000000000013457371530011316 5ustar ambsambsXML-DT-0.69/t/07_case.t0000644000175000017500000000265710730327732012732 0ustar ambsambs#!/usr/bin/perl use XML::DT ; use Test::More tests => 8; my $filename = "t/07_case.xml"; #### %h1=('-default' => sub{"<$q>"}); $str = dt($filename,%h1); $str =~ s/\s//g; is($str, ""); %h1=('-ignorecase' => 1, '-default' => sub{"<$q>"}); $str = dt($filename,%h1); $str =~ s/\s//g; is($str, ""); #### %h2=('c' => sub{ "<$q>" }, '-default' => sub{"<$q>$c"}); $str = dt($filename,%h2); $str =~ s/\s//g; is($str, "aeiou"); %h2=('c' => sub{ "<$q>" }, '-ignorecase' => 1, '-default' => sub{"<$q>$c"}); $str = dt($filename,%h2); $str =~ s/\s//g; is($str, ""); #### %h3=('-ignorecase' => 1, '-default' => sub{"$q:$c"}); $str = dt($filename,%h3); $str =~ s/\s//g; is($str, "a:b:c:aeiouc:aeioub:c:aeiouc:aeioub:c:aeiouc:aeiou"); %h3=('-default' => sub{"$q:$c"}); $str = dt($filename,%h3); $str =~ s/\s//g; is($str, "A:b:c:aeiouc:aeioub:C:aeiouc:aeioub:c:aeiouc:aeiou"); #### %h4=('-ignorecase' => 1, c => sub{ $v{title} }, '-default' => sub{ $v{title} ||=""; "$v{title}$c" }); $str = dt($filename,%h4); $str =~ s/\s//g; is($str, "zbrzbr"); %h4=(c => sub{ $v{title} }, '-default' => sub{ $v{title} ||=""; "$v{title}$c" }); $str = dt($filename,%h4); $str =~ s/\s//g; is($str, "zbraeiou"); XML-DT-0.69/t/06_output.pl0000644000175000017500000000072012124147314013506 0ustar ambsambs#!/usr/bin/perl use XML::DT; use warnings; use strict; my $filename = shift; # Variable Reference # # $c - contents after child processing # $q - element name (tag) # %v - hash of attributes my %handler=( # '-outputenc' => 'ISO-8859-1', # '-default' => sub{"<$q>$c"}, 'a' => sub{ }, # 1 occurrences; 'b' => sub{ }, # 3 occurrences; attributes: title 'c' => sub{ }, # 6 occurrences; attributes: title ); print dt($filename, %handler); XML-DT-0.69/t/50_xpath.t0000644000175000017500000000242710730327732013134 0ustar ambsambs# -*- cperl -*- use Test::More tests => 12; use XML::DT; ### XPath over elements my $xml = <<"EOX"; 123 EOX # identity 1 is (pathdtstring($xml, (-default => sub{toxml} ))."\n", $xml); # identity 2 is (pathdtstring($xml, ("//*" => sub{toxml} ))."\n", $xml); is (pathdtstring($xml, ("//c" => sub{"$c"}, -default => sub{toxml})), "123"); is (pathdtstring($xml, ("/c" => sub{"$c"}, "-default" => sub{toxml}))."\n", $xml); is (pathdtstring($xml, ("/*/*/c" => sub{"$c"}, "-default" => sub{toxml})), "123"); is (pathdtstring($xml, ("/*" => sub{"$q"})), "a"); is (pathdtstring($xml, ("a|b|c" => sub{toxml} ))."\n", $xml); ### XPath over attributes $xml = <<"EOX"; 234 EOX #identity 1 is (pathdtstring($xml, (-default => sub{toxml} ))."\n", $xml); #identity 2 is (pathdtstring($xml, ("//*" => sub{toxml} ))."\n", $xml); is (pathdtstring($xml, ("//*[@*]" => sub{toxml($q,{},$c)})), "234"); is (pathdtstring($xml, ("//a[\@v='2']" => sub{toxml($q,{},$c)}, "//*" => sub{toxml})), "234"); is (pathdtstring($xml, ("//a[\@v='1']" => sub{"zbr"})), "zbr"); XML-DT-0.69/t/06_dtdout.pl0000644000175000017500000000064512124147356013465 0ustar ambsambs#!/usr/bin/perl use warnings; use strict; use XML::DT; my $filename = shift; # Variable Reference # # $c - contents after child processing # $q - element name (tag) # %v - hash of attributes my %handler=( # '-outputenc' => 'ISO-8859-1', # '-default' => sub{"<$q>$c"}, 'a' => sub { }, 'b' => sub { }, # attributes: title 'c' => sub { }, # attributes: title ); print dt($filename, %handler); XML-DT-0.69/t/06_mkdtskel.t0000644000175000017500000000133710730327732013626 0ustar ambsambs#-*- cperl -*- use Test::More tests => 2; use XML::DT; { open TMP, ">_${$}_" or die "Cannot create temporary file\n"; select TMP; mkdtskel("t/05_input.xml"); close TMP; open A, "_${$}_"; open B, "t/06_output.pl"; my $ok = 1; while(defined($a = ) && defined($b = )) { $ok = 0 unless $a eq $b; } close B; close A; ok($ok); unlink "_${$}_"; } ###----------------- { open TMP, ">_${$}_" or die "Cannot create temporary file\n"; select TMP; mkdtskel_fromDTD("t/06_input.dtd"); close TMP; open A, "_${$}_"; open B, "t/06_dtdout.pl"; my $ok = 1; while(defined($a = ) && defined($b = )) { $ok = 0 unless $a eq $b; } close B; close A; ok($ok); unlink "_${$}_"; } XML-DT-0.69/t/05_strings.t0000644000175000017500000000263610730327732013503 0ustar ambsambs#!/usr/bin/perl use XML::DT ; use Test::More tests => 8; my $filename = "t/05_input.xml"; #### %h1=('-default' => sub{"<$q>"}); $str = dt($filename,%h1); $str =~ s/\s//g; is($str, ""); #### %h2=('c' => sub{ "<$q>" }, '-default' => sub{"<$q>$c"}); $str = dt($filename,%h2); $str =~ s/\s//g; is($str, ""); #### %h3=('-default' => sub{"$q:$c"}); $str = dt($filename,%h3); $str =~ s/\s//g; is($str, "a:b:c:aeiouc:aeioub:c:aeiouc:aeioub:c:aeiouc:aeiou"); #### %h4=(c => sub{ $v{title} }, '-default' => sub{ $v{title} ||=""; "$v{title}$c" }); $str = dt($filename,%h4); $str =~ s/\s//g; is($str, "zbrzbr"); ### # Isto não é portável!!!!! # # %h5=(-default=>sub{toxml}); # $str = dt($filename, %h5); # is(`tail -14 t/05_input.xml`,$str); ### $str = dtstring("", -declr => 1); is("\n",$str); ### { no utf8; # use Encode; # for my $straux ("áéíóú", # qq{áéíç}) { # $str = dtstring($straux, -inputenc=>'ISO-8859-1'); # is(decode("iso-8859-1",$str),$straux); # } ### for my $straux ("áéíóú", qq{áéíóú}, "çÇãÃ") { $str = dtstring($straux, -outputenc=>'ISO-8859-1', -inputenc=>'ISO-8859-1'); is($str,$straux); } } XML-DT-0.69/t/pod-coverage.t0000644000175000017500000000024210730327732014050 0ustar ambsambsuse Test::More; eval "use Test::Pod::Coverage 0.08"; plan skip_all => "Test::Pod::Coverage 0.08 required for testing POD coverage" if $@; all_pod_coverage_ok(); XML-DT-0.69/t/05_input.xml0000644000175000017500000000026110730327732013476 0ustar ambsambs aeiou aeiou aeiou aeiou aeiou aeiou XML-DT-0.69/t/00_basic.t0000644000175000017500000000723110760345620013061 0ustar ambsambs# -*- cperl -*- use Test::More tests => 27; BEGIN { use_ok( 'XML::DT' ); } # normalize_space is(XML::DT::_normalize_space(" teste "), "teste"); is(XML::DT::_normalize_space("\tteste\t"), "teste"); is(XML::DT::_normalize_space("\tteste "), "teste"); is(XML::DT::_normalize_space(" spaces in \t the middle\t"), "spaces in the middle"); # toxml as function is(toxml("a",{},""), ""); is(tohtml("a",{},""), ""); is(tohtml("br",{},""), "
"); is(tohtml("hr",{},""), "
"); is(tohtml("link",{type=>"bar"},""), ""); is(tohtml("img",{src=>"foo"},""), ""); is(toxml("a",{},"c"), "c"); is(tohtml("a",{},"c"), "c"); is(toxml("a",{a=>1},"c"), "c"); is(tohtml("a",{a=>1},"c"), "c"); is(toxml({ -q => "html", -c => { -q => "head", -c => { -q => "title", -c => "Titulo da pagina" }}}), "Titulo da pagina"); is(tohtml({ -q => "html", -c => { -q => "head", -c => { -q => "title", -c => "Titulo da pagina" }}}), "Titulo da pagina"); is(toxml({ -q => "html", -c => { -q => "head", -c => [] } }), ""); is(tohtml({ -q => "html", -c => { -q => "head", -c => [] } }), ""); is(toxml({ -q => "html", -c => { -q => "head", -c => [ { -q => "title", -c => "Titulo da pagina" }, { -q => "title", -c => "Titulo da pagina" }]}}), "Titulo da pagina\nTitulo da pagina"); is(tohtml({ -q => "html", -c => { -q => "head", -c => [ { -q => "title", -c => "Titulo da pagina" }, { -q => "title", -c => "Titulo da pagina" }]}}), "Titulo da pagina\nTitulo da pagina"); is(toxml({ -q => "html", -c => [ { -q => "head", -c => [ { -q => "title", -c => "Titulo da pagina" }, { -q => "title", -c => "Titulo da pagina" }]}, { -q => "head", -c => [ { -q => "title", -c => "Titulo da pagina" }, { -q => "title", -c => "Titulo da pagina" }]}]}), "Titulo da pagina\nTitulo da pagina\nTitulo da pagina\nTitulo da pagina"); is(tohtml({ -q => "html", -c => [ { -q => "head", -c => [ { -q => "title", -c => "Titulo da pagina" }, { -q => "title", -c => "Titulo da pagina" }]}, { -q => "head", -c => [ { -q => "title", -c => "Titulo da pagina" }, { -q => "title", -c => "Titulo da pagina" }]}]}), "Titulo da pagina\nTitulo da pagina\nTitulo da pagina\nTitulo da pagina"); # this is one of the most important tests for MathML is(toxml("foo",{},"0"), "0"); # toxml with variables $q = "a"; $c = "b"; %v = (); is(toxml, "b"); $v{foo} = "bar"; is(toxml, "b"); $c = '0'; is(toxml, "0"); XML-DT-0.69/t/06_input.dtd0000644000175000017500000000020410730327732013447 0ustar ambsambs XML-DT-0.69/t/pod.t0000644000175000017500000000020210730327732012253 0ustar ambsambsuse Test::More; eval "use Test::Pod 1.00"; plan skip_all => "Test::Pod 1.00 required for testing POD" if $@; all_pod_files_ok(); XML-DT-0.69/t/07_case.xml0000644000175000017500000000026110730327732013254 0ustar ambsambs aeiou aeiou aeiou aeiou aeiou aeiou XML-DT-0.69/t/60_types.t0000644000175000017500000000134010730327732013146 0ustar ambsambs# -*- cperl -*- use Test::More tests => 5; use Data::Dumper; BEGIN { use_ok( 'XML::DT' ); } my $XML = < foofoofoo EOX like(dtstring($XML, -default => sub { $c }, -type => {bar=>'SEQ'} ), qr/fooARRAY\(0x[0-9a-f]+\)foo/); is_deeply(dtstring($XML, -default => sub { $c }, -type => {bar=>'SEQ', -default=>'SEQ'} ), ['foo',['foo'],'foo']); is_deeply(dtstring($XML, -default => sub { $c }, -type => {-default=>'SEQ'} ), ['foo',['foo'],'foo']); is(toxml(dtstring($XML, -default => sub { $c }, -type => {-default=>'SEQH'})), "foo\nfoo\nfoo"); # yeah, we lose the xml tag. XML-DT-0.69/lib/0000755000175000017500000000000013457371530011621 5ustar ambsambsXML-DT-0.69/lib/XML/0000755000175000017500000000000013457371530012261 5ustar ambsambsXML-DT-0.69/lib/XML/DT.pm0000644000175000017500000010203413457371462013132 0ustar ambsambs## -*- cperl -*- package XML::DT; use 5.008006; use strict; use Data::Dumper; use HTTP::Tiny; my $ua = HTTP::Tiny->new(); use XML::DTDParser "ParseDTDFile"; use XML::LibXML ':libxml'; our $PARSER = 'XML::LibXML'; use parent 'Exporter'; use vars qw($c $u %v $q @dtcontext %dtcontextcount @dtatributes @dtattributes ); our @EXPORT = qw(&dt &dtstring &dturl &inctxt &ctxt &mkdtskel &inpath &mkdtskel_fromDTD &mkdtdskel &tohtml &toxml &MMAPON $c %v $q $u &xmltree &pathdturl @dtcontext %dtcontextcount @dtatributes @dtattributes &pathdt &pathdtstring &father &gfather &ggfather &root); our $VERSION = '0.69'; =encoding utf-8 =head1 NAME XML::DT - a package for down translation of XML files =head1 SYNOPSIS use XML::DT; %xml=( 'music' => sub{"Music from: $c\n"}, 'lyrics' => sub{"Lyrics from: $v{name}\n"}, 'title' => sub{ uc($c) }, '-userdata => { something => 'I like' }, '-default' => sub{"$q:$c"} ); print dt($filename,%xml); =head1 ABSTRACT This module is a XML down processor. It maps tag (element) names to functions to process that element and respective contents. =head1 DESCRIPTION This module processes XML files with an approach similar to OMNIMARK. As XML parser it uses XML::LibXML module in an independent way. You can parse HTML files as if they were XML files. For this, you must supply an extra option to the hash: %hander = ( -html => 1, ... ); You can also ask the parser to recover from XML errors: %hander = ( -recover => 1, ... ); =head1 Functions =head2 dt Down translation function C
receives a filename and a set of expressions (functions) defining the processing and associated values for each element. =head2 dtstring C works in a similar way with C
but takes input from a string instead of a file. =head2 dturl C works in a similar way with C
but takes input from an Internet url instead of a file. =head2 pathdt The C function is a C
function which can handle a subset of XPath on handler keys. Example: %handler = ( "article/title" => sub{ toxml("h1",{},$c) }, "section/title" => sub{ toxml("h2",{},$c) }, "title" => sub{ $c }, "//image[@type='jpg']" => sub{ "JPEG: " }, "//image[@type='bmp']" => sub{ "BMP: sorry, no bitmaps on the web" }, ); pathdt($filename, %handler); Here are some examples of valid XPath expressions under XML::DT: /aaa /aaa/bbb //ccc - ccc somewhere (same as "ccc") /*/aaa/* //* - same as "-default" /aaa[@id] - aaa with an attribute id /*[@*] - root with an attribute /aaa[not(@name)] - aaa with no attribute "name" //bbb[@name='foo'] - ... attribute "name" = "foo" /ccc[normalize-space(@name)='bbb'] //*[name()='bbb'] - complex way of saying "//bbb" //*[starts-with(name(),'aa')] - an element named "aa.*" //*[contains(name(),'c')] - an element ".*c.*" //aaa[string-length(name())=4] "...." //aaa[string-length(name())<4] ".{1,4}" //aaa[string-length(name())>5] ".{5,}" Note that not all XPath is currently handled by XML::DT. A lot of XPath will never be added to XML::DT because is not in accordance with the down translation model. For more documentation about XPath check the specification at http://www.w3c.org or some tutorials under http://www.zvon.org =head2 pathdtstring Like the C function but supporting XPath. =head2 pathdturl Like the C function but supporting XPath. =head2 ctxt Returns the context element of the currently being processed element. So, if you call C you will get your father element, and so on. =head2 inpath C is true if the actual element path matches the provided pattern. This function is meant to be used in the element functions in order to achieve context dependent processing. =head2 inctxt C is true if the actual element father matches the provided pattern. =head2 toxml This is the default "-default" function. It can be used to generate XML based on C<$c> C<$q> and C<%v> variables. Example: add a new attribute to element C without changing it: %handler=( ... ele1 => sub { $v{at1} = "v1"; toxml(); }, ) C can also be used with 3 arguments: tag, attributes and contents toxml("a",{href=> "http://local/f.html"}, "example") returns: example Empty tags are written as empty tags. If you want an empty tag with opening and closing tags, then use the C. =head2 tohtml See C. =head2 xmltree This simple function just makes a HASH reference: { -c => $c, -q => $q, all_the_other_attributes } The function C understands this structure and makes XML with it. =head2 mkdtskel Used by the mkdtskel script to generate automatically a XML::DT perl script file based on an XML file. Check C manpage for details. =head2 mkdtskel_fromDTD Used by the mkdtskel script to generate automatically a XML::DT perl script file based on an DTD file. Check C manpage for details. =head2 mkdtdskel Used by the mkdtskel script to generate automatically a XML::DT perl script file based on a DTD file. Check C manpage for details. =head1 Accessing parents With XML::DT you can access an element parent (or grand-parent) attributes, till the root of the XML document. If you use c<$dtattributes[1]{foo} = 'bar'> on a processing function, you are defining the attribute C for that element parent. In the same way, you can use C<$dtattributes[2]> to access the grand-parent. C<$dtattributes[-1]> is, as expected, the XML document root element. There are some shortcuts: =over 4 =item C =item C =item C You can use these functions to access to your C, grand-father (C) or great-grand-father (C): father("x"); # returns value for attribute "x" on father element father("x", "value"); # sets value for attribute "x" on father # element You can also use it directly as a reference to C<@dtattributes>: father->{"x"}; # gets the attribute father->{"x"} = "value"; # sets the attribute $attributes = father; # gets all attributes reference =item C You can use it as a function to access to your tree root element. root("x"); # gets attribute C on root element root("x", "value"); # sets value for attribute C on root You can also use it directly as a reference to C<$dtattributes[-1]>: root->{"x"}; # gets the attribute x root->{"x"} = "value"; # sets the attribute x $attributes = root; # gets all attributes reference =back =head1 User provided element processing functions The user must provide an HASH with a function for each element, that computes element output. Functions can use the element name C<$q>, the element content C<$c> and the attribute values hash C<%v>. All those global variables are defined in C<$CALLER::>. Each time an element is find the associated function is called. Content is calculated by concatenation of element contents strings and interior elements return values. =head2 C<-default> function When a element has no associated function, the function associated with C<-default> called. If no C<-default> function is defined the default function returns a XML like string for the element. When you use C definitions, you often need do set C<-default> function to return just the contents: C. =head2 C<-outputenc> option C<-outputenc> defines the output encoding (default is Unicode UTF8). =head2 C<-inputenc> option C<-inputenc> forces a input encoding type. Whenever that is possible, define the input encoding in the XML file: =head2 C<-pcdata> function C<-pcdata> function is used to define transformation over the contents. Typically this function should look at context (see C function) The default C<-pcdata> function is the identity =head2 C<-cdata> function You can process C<> in a way different from pcdata. If you define a C<-cdata> method, it will be used. Otherwise, the C<-pcdata> method is called. =head2 C<-begin> function Function to be executed before processing XML file. Example of use: initialization of side-effect variables =head2 C<-end> function Function to be executed after processing XML file. I can use C<$c> content value. The value returned by C<-end> will be the C
return value. Example of use: post-processing of returned contents =head2 C<-recover> option If set, the parser will try to recover in XML errors. =head2 C<-html> option If set, the parser will try to recover in errors. Note that this differs from the previous one in the sense it uses some knowledge of the HTML structure for the recovery. =head2 C<-userdata> option Use this to pass any information you like to your handlers. The data structure you pass in this option will be available as C<< $u >> in your code. -- New in 0.62. =head1 Elements with values other than strings (C<-type>) By default all elements return strings, and contents (C<$c>) is the concatenation of the strings returned by the sub-elements. In some situations the XML text contains values that are better processed as a structured type. The following types (functors) are available: =over 4 =item THE_CHILD Return the result of processing the only child of the element. =item LAST_CHILD Returns the result of processing the last child of the element. =item STR concatenates all the sub-elements returned values (DEFAULT) all the sub-element should return strings to be concatenated; =item SEQ makes an ARRAY with all the sub elements contents; attributes are ignored (they should be processed in the sub-element). (returns a ref) If you have different types of sub-elements, you should use SEQH =item SEQH makes an ARRAY of HASH with all the sub elements (returns a ref); for each sub-element: -q => element name -c => contents at1 => at value1 for each attribute =item MAP makes an HASH with the sub elements; keys are the sub-element names, values are their contents. Attributes are ignored. (they should be processed in the sub-element) (returns a ref) =item MULTIMAP makes an HASH of ARRAY; keys are the sub-element names; values are lists of contents; attributes are ignored (they should be processed in the sub-element); (returns a ref) =item MMAPON(element-list) makes an HASH with the sub-elements; keys are the sub-element names, values are their contents; attributes are ignored (they should be processed in the sub-element); for all the elements contained in the element-list, it is created an ARRAY with their contents. (returns a ref) =item XML return a reference to an HASH with: -q => element name -c => contents at1 => at value1 for each attribute =item ZERO don't process the sub-elements; return "" =back When you use C definitions, you often need do set C<-default> function returning just the contents C. =head2 An example: use XML::DT; %handler = ( contacts => sub{ [ split(";",$c)] }, -default => sub{$c}, -type => { institution => 'MAP', degrees => MMAPON('name') tels => 'SEQ' } ); $a = dt ("f.xml", %handler); with the following f.xml U.M. University of Minho 1111 1112 1113 Portugal J.Joao; J.Rocha; J.Ramalho Computer science Informatica history would make $a { 'name' => [ 'Computer science', 'Informatica ', ' history ' ], 'institution' => { 'tels' => [ 1111, 1112, 1113 ], 'name' => 'University of Minho', 'where' => 'Portugal', 'id' => 'U.M.', 'contacts' => [ 'J.Joao', ' J.Rocha', ' J.Ramalho' ] } }; =head1 DT Skeleton generation It is possible to build an initial processor program based on an example To do this use the function C. Example: perl -MXML::DT -e 'mkdtskel "f.xml"' > f.pl =head1 DTD skeleton generation It makes a naive DTD based on an example(s). To do this use the function C. Example: perl -MXML::DT -e 'mkdtdskel "f.xml"' > f.dtd =head1 SEE ALSO mkdtskel(1) and mkdtdskel(1) =head1 AUTHORS Home for XML::DT; http://natura.di.uminho.pt/~jj/perl/XML/ Jose Joao Almeida, Alberto Manuel Simões, =head1 ACKNOWLEDGEMENTS Michel Rodriguez José Carlos Ramalho Mark A. Hillebrand =head1 COPYRIGHT AND LICENSE Copyright 1999-2012 Project Natura. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut our %ty = (); sub dt { my ($file, %xml)=@_; my ($parser, $tree); # Treat -decl option my $declr = ""; if ($xml{-declr}) { if ($xml{-outputenc}) { $declr = "\n"; } else { $declr = "\n"; } } %ty = (); %ty = (%{$xml{'-type'}}) if defined($xml{'-type'}); $ty{-ROOT} = "NONE"; &{$xml{-begin}} if $xml{-begin}; # TODO --- how to force encoding with XML::LibXML? # $xml{-inputenc} # create a new LibXML parser $parser = XML::LibXML->new(); #### We don't wan't DT to load everytime the DTD (I Think!) $parser->validation(0); # $parser->expand_xinclude(0); # testing $parser->load_ext_dtd(0); $parser->expand_entities(0); $parser->expand_xincludes(1) if $xml{'-xinclude'}; # parse the file my $doc; if ( $xml{'-recover'}) { $parser->recover(1); eval { local $SIG{__WARN__} = sub{}; $doc = $parser->parse_file($file); }; return undef if !$doc; } elsif ( $xml{'-html'}) { $parser->recover(1); eval { local $SIG{__WARN__} = sub{}; $doc = $parser->parse_html_file($file); }; return undef if !$doc; } else { $doc = $parser->parse_file($file) } # get the document root element $tree = $doc->getDocumentElement(); my $return = ""; # execute End action if it exists if($xml{-end}) { $c = _omni("-ROOT", \%xml, $tree); $return = &{$xml{-end}} } else { $return = _omni("-ROOT",\%xml, $tree) } if ($declr) { return $declr.$return; } else { return $return; } } sub ctxt { my $level = $_[0]; $dtcontext[-$level-1]; } sub inpath { my $pattern = shift ; join ("/", @dtcontext) =~ m!\b$pattern\b!; } sub inctxt { my $pattern = shift ; # see if is in root context... return 1 if (($pattern eq "^" && @dtcontext==1) || $pattern eq ".*"); join("/", @dtcontext) =~ m!$pattern/[^/]*$! ; } sub father { my ($a,$b)=@_; if (defined($b)){$dtattributes[1]{$a} = $b} elsif(defined($a)){$dtattributes[1]{$a} } else {$dtattributes[1]} } sub gfather { my ($a,$b)=@_; if (defined($b)){$dtattributes[2]{$a} = $b} elsif(defined($a)){$dtattributes[2]{$a} } else {$dtattributes[2]} } sub ggfather { my ($a,$b)=@_; if (defined($b)){$dtattributes[3]{$a} = $b} elsif(defined($a)){$dtattributes[3]{$a} } else {$dtattributes[3]} } sub root { ### the root my ($a,$b)=@_; if (defined($b)){$dtattributes[-1]{$a} = $b } elsif(defined($a)){$dtattributes[-1]{$a} } else {$dtattributes[-1] } } sub pathdtstring{ my $string = shift; my %h = _pathtodt(@_); return dtstring($string,%h); } sub pathdturl{ my $url = shift; my %h = _pathtodt(@_); return dturl($url,%h); } sub dturl{ my $url = shift; my $contents = $ua->get($url); if ($contents->{success}) { # warn("JJ ok\n"); return dtstring($contents->{content}, @_); } else { # warn("JJ not ok\n"); warn("$contents->{status}: $contents->{reason}"); return undef; } } sub dtstring { my ($string, %xml)=@_; my ($parser, $tree); my $declr = ""; if ($xml{-declr}) { if ($xml{-outputenc}) { $declr = "\n"; } else { $declr = "\n"; } } $xml{'-type'} = {} unless defined $xml{'-type'}; %ty = (%{$xml{'-type'}}, -ROOT => "NONE"); # execute Begin action if it exists if ($xml{-begin}) { &{$xml{-begin}} } if ($xml{-inputenc}) { $string = XML::LibXML::encodeToUTF8($xml{-inputenc}, $string); } # create a new LibXML parser $parser = XML::LibXML->new(); $parser->validation(0); $parser->load_ext_dtd(0); $parser->expand_entities(0); # parse the string my $doc; if ( $xml{'-recover'}) { $parser->recover(1); eval { local $SIG{__WARN__} = sub{}; $doc = $parser->parse_string($string); }; return undef if !$doc; } elsif ( $xml{'-html'}) { $parser->recover(1); eval{ local $SIG{__WARN__} = sub{}; $doc = $parser->parse_html_string($string); }; # if ($@) { return undef; } return undef unless defined $doc; } else { $doc = $parser->parse_string($string); } # get the document root element $tree = $doc->getDocumentElement(); my $return; # Check if we have an end function if ($xml{-end}) { $c = _omni("-ROOT", \%xml, $tree); $return = &{$xml{-end}} } else { $return = _omni("-ROOT", \%xml, $tree) } if ($declr) { return $declr.$return; } else { return $return; } } sub pathdt{ my $file = shift; my %h = _pathtodt(@_); return dt($file,%h); } # Parsing dos predicados do XPath sub _testAttr { my $atr = shift; for ($atr) { s/name\(\)/'$q'/g; # s/\@([A-Za-z_]+)/'$v{$1}'/g; s/\@([A-Za-z_]+)/defined $v{$1}?"'$v{$1}'":"''"/ge; s/\@\*/keys %v?"'1'":"''"/ge; if (/^not\((.*)\)$/) { return ! _testAttr($1); } elsif (/^('|")([^\1]*)(\1)\s*=\s*('|")([^\4]*)\4$/) { return ($2 eq $5); } elsif (/^(.*?)normalize-space\((['"])([^\2)]*)\2\)(.*)$/) { my ($back,$forward)=($1,$4); my $x = _normalize_space($3); return _testAttr("$back'$x'$forward"); } elsif (/starts-with\((['"])([^\1))]*)\1,(['"])([^\3))]*)\3\)/) { my $x = _starts_with($2,$4); return $x; } elsif (/contains\((['"])([^\1))]*)\1,(['"])([^\3))]*)\3\)/) { my $x = _contains($2,$4); return $x; } elsif (/^(.*?)string-length\((['"])([^\2]*)\2\)(.*)$/) { my ($back,$forward) = ($1,$4); my $x = length($3); return _testAttr("$back$x$forward"); } elsif (/^(\d+)\s*=(\d+)$/) { return ($1 == $2); } elsif (/^(\d+)\s*<(\d+)$/) { return ($1 < $2); } elsif (/^(\d+)\s*>(\d+)$/) { return ($1 > $2); } elsif (/^(['"])([^\1]*)\1$/) { return $2; } } return 0; #$atr; } # Funcao auxiliar de teste de predicados do XPath sub _starts_with { my ($string,$preffix) = @_; return 0 unless ($string && $preffix); return 1 if ($string =~ m!^$preffix!); return 0; } # Funcao auxiliar de teste de predicados do XPath sub _contains { my ($string,$s) = @_; return 0 unless ($string && $s); return 1 if ($string =~ m!$s!); return 0; } # Funcao auxiliar de teste de predicados do XPath sub _normalize_space { my $z = shift; $z =~ /^\s*(.*?)\s*$/; $z = $1; $z =~ s!\s+! !g; return $z; } sub _pathtodt { my %h = @_; my %aux=(); my %aux2=(); my %n = (); my $z; for $z (keys %h) { # TODO - Make it more generic if ( $z=~m{\w+(\|\w+)+}) { my @tags = split /\|/, $z; for(@tags) { $aux2{$_}=$h{$z} } } elsif ( $z=~m{(//|/|)(.*)/([^\[]*)(?:\[(.*)\])?} ) { my ($first,$second,$third,$fourth) = ($1,$2,$3,$4); if (($first eq "/") && (!$second)) { $first = ""; $second = '.*'; $third =~ s!\*!-default!; } else { $second =~ s!\*!\[^/\]\+!g; $second =~ s!/$!\(/\.\*\)\?!g; $second =~ s!//!\(/\.\*\)\?/!g; $third =~ s!\*!-default!g; } push( @{$aux{$third}} , [$first,$second,$h{$z},$fourth]); } else { $aux2{$z}=$h{$z};} } for $z (keys %aux){ my $code = sub { my $l; for $l (@{$aux{$z}}) { my $prefix = ""; $prefix = "^" unless (($l->[0]) or ($l->[1])); $prefix = "^" if (($l->[0] eq "/") && ($l->[1])); if ($l->[3]) { if(inctxt("$prefix$l->[1]") && _testAttr($l->[3])) {return &{$l->[2]}; } } else { if(inctxt("$prefix$l->[1]")) {return &{$l->[2]};} } } return &{ $aux2{$z}} if $aux2{$z} ; return &{ $h{-default}} if $h{-default}; &toxml(); }; $n{$z} = $code; } for $z (keys %aux2){ $n{$z} ||= $aux2{$z} ; } return %n; } sub _omni { my ($par, $xml, @l) = @_; my $defaulttype = (exists($xml->{-type}) && exists($xml->{-type}{-default})) ? $xml->{-type}{-default} : "STR"; my $type = $ty{$par} || $defaulttype; my %typeargs = (); if (ref($type) eq "mmapon") { $typeargs{$_} = 1 for (@$type); $type = "MMAPON"; } my $r ; if( $type eq 'STR') { $r = "" } elsif( $type eq 'THE_CHILD' or $type eq 'LAST_CHILD') { $r = 0 } elsif( $type eq 'SEQ' or $type eq "ARRAY") { $r = [] } elsif( $type eq 'SEQH' or $type eq "ARRAYOFHASH") { $r = [] } elsif( $type eq 'MAP' or $type eq "HASH") { $r = {} } elsif( $type eq 'MULTIMAP') { $r = {} } elsif( $type eq 'MMAPON' or $type eq "HASHOFARRAY") { $r = {} } elsif( $type eq 'NONE') { $r = "" } elsif( $type eq 'ZERO') { return "" } my ($name, $val, @val, $atr, $aux); $u = $xml->{-userdata}; while(@l) { my $tree = shift @l; next unless $tree; $name = ref($tree) eq "XML::LibXML::CDATASection" ? "-pcdata" : $tree->getName(); if (ref($tree) eq "XML::LibXML::CDATASection") { $val = $tree->getData(); $name = "-cdata"; $aux = (defined($xml->{-outputenc}))?_fromUTF8($val,$xml->{-outputenc}):$val; if (defined($xml->{-cdata})) { push(@dtcontext,"-cdata"); $c = $aux; $aux = &{$xml->{-cdata}}; pop(@dtcontext); } elsif (defined($xml->{-pcdata})) { push(@dtcontext,"-pcdata"); $c = $aux; $aux = &{$xml->{-pcdata}}; pop(@dtcontext); } } elsif (ref($tree) eq "XML::LibXML::Comment") { ### At the moment, treat as Text ### We will need to change this, I hope! $val = ""; $name = "-pcdata"; $aux= (defined($xml->{-outputenc}))?_fromUTF8($val, $xml->{-outputenc}):$val; if (defined($xml->{-pcdata})) { push(@dtcontext,"-pcdata"); $c = $aux; $aux = &{$xml->{-pcdata}}; pop(@dtcontext); } } elsif (ref($tree) eq "XML::LibXML::Text") { $val = $tree->getData(); $name = "-pcdata"; $aux = (defined($xml->{-outputenc}))?_fromUTF8($val,$xml->{-outputenc}):$val; if (defined($xml->{-pcdata})) { push(@dtcontext,"-pcdata"); $c = $aux; $aux = &{$xml->{-pcdata}}; pop(@dtcontext); } } elsif (ref($tree) eq "XML::LibXML::Element") { my %atr = _nodeAttributes($tree); $atr = \%atr; if (exists($xml->{-ignorecase})) { $name = lc($name); for (keys %$atr) { my ($k,$v) = (lc($_),$atr->{$_}); delete($atr->{$_}); $atr->{$k} = $v; } } push(@dtcontext,$name); $dtcontextcount{$name}++; unshift(@dtatributes, $atr); unshift(@dtattributes, $atr); $aux = _omniele($xml, $name, _omni($name, $xml, ($tree->getChildnodes())), $atr); shift(@dtatributes); shift(@dtattributes); pop(@dtcontext); $dtcontextcount{$name}--; } elsif (ref($tree) eq "XML::LibXML::Node") { if ($tree->nodeType == XML_ENTITY_REF_NODE) { # if we get here, is because we are not expanding entities (I think) if ($tree->textContent) { $aux = $tree->textContent; } else { $aux = '&'.$tree->nodeName.';'; } } else { print STDERR "Not handled, generic node of type: [",$tree->nodeType,"]\n"; } } else { print STDERR "Not handled: [",ref($tree),"]\n"; } if ($type eq "STR"){ if (defined($aux)) {$r .= $aux} ;} elsif ($type eq "THE_CHILD" or $type eq "LAST_CHILD"){ $r = $aux unless _whitepc($aux, $name); } elsif ($type eq "SEQ" or $type eq "ARRAY"){ push(@$r, $aux) unless _whitepc($aux, $name);} elsif ($type eq "SEQH" or $type eq "ARRAYHASH"){ push(@$r,{"-c" => $aux, "-q" => $name, _nodeAttributes($tree) }) unless _whitepc($aux,$name); } elsif($type eq "MMAPON"){ if(not _whitepc($aux,$name)){ if(! $typeargs{$name}) { warn "duplicated tag '$name'\n" if(defined($r->{$name})); $r->{$name} = $aux } else { push(@{$r->{$name}},$aux) unless _whitepc($aux,$name)}} } elsif($type eq "MAP" or $type eq "HASH"){ if(not _whitepc($aux,$name)){ warn "duplicated tag '$name'\n" if(defined($r->{$name})); $r->{$name} = $aux }} elsif($type eq "MULTIMAP"){ push(@{$r->{$name}},$aux) unless _whitepc($aux,$name)} elsif($type eq "NONE"){ $r = $aux;} else { $r="undefined type !!!"} } $r; } sub _omniele { my $xml = shift; my $aux; ($q, $c, $aux) = @_; %v = %$aux; if (defined($xml->{-outputenc})) { for (keys %v){ $v{$_} = _fromUTF8($v{$_}, $xml->{-outputenc}) } } if (defined $xml->{$q}) { &{$xml->{$q}} } elsif (defined $xml->{'-default'}) { &{$xml->{'-default'}} } elsif (defined $xml->{'-tohtml'}) { tohtml() } else { toxml() } } sub xmltree { +{'-c' => $c, '-q' => $q, %v} } sub tohtml { my ($q,$v,$c); if (not @_) { ($q,$v,$c) = ($XML::DT::q, \%XML::DT::v, $XML::DT::c); } elsif (ref($_[0])) { $c = shift; } else { ($q,$v,$c) = @_; } if (not ref($c)) { if ($q eq "-pcdata") { return $c } elsif ($q eq "link" || $q eq "br" || $q eq "hr" || $q eq "img") { return _openTag($q,$v) } else { return _openTag($q,$v) . "$c" } } elsif (ref($c) eq "HASH" && $c->{'-q'} && $c->{'-c'}) { my %a = %$c; my ($q,$c) = delete @a{"-q","-c"}; tohtml($q,\%a,(ref($c)?tohtml($c):$c)); } elsif (ref($c) eq "HASH") { _openTag($q,$v). join("",map {($_ ne "-pcdata") ? ( (ref($c->{$_}) eq "ARRAY") ? "<$_>". join("\n<$_>", @{$c->{$_}}). "\n" : tohtml($_,{},$c->{$_})."\n" ) : () } keys %{$c} ) . "$c->{-pcdata}" } ######## "NOTYetREady" elsif (ref($c) eq "ARRAY") { if (defined($q) && exists($ty{$q}) && $ty{$q} eq "SEQH") { tohtml($q,$v,join("\n",map {tohtml($_)} @$c)) } elsif (defined $q) { tohtml($q,$v,join("",@{$c})) } else { join("\n",map {(ref($_)?tohtml($_):$_)} @$c) } } } sub toxml { my ($q,$v,$c); if (not @_) { ($q, $v, $c) = ($XML::DT::q, \%XML::DT::v, $XML::DT::c); } elsif (ref($_[0])) { $c = shift; } else { ($q, $v, $c) = @_; } if (not ref($c)) { if ($q eq "-pcdata") { return $c } elsif ($c eq "") { return _emptyTag($q,$v) } else { return _openTag($q,$v) . "$c" } } elsif (ref($c) eq "HASH" && $c->{'-q'} && $c->{'-c'}) { my %a = %$c; my ($q,$c) = delete @a{"-q","-c"}; ### _openTag($q,\%a).toxml($c).). ### toxml($q,\%a,join("\n",map {toxml($_)} @$c)) toxml($q,\%a,(ref($c)?toxml($c):$c)); } elsif (ref($c) eq "HASH") { _openTag($q,$v). join("",map {($_ ne "-pcdata") ? ( (ref($c->{$_}) eq "ARRAY") ? "<$_>". join("\n<$_>", @{$c->{$_}}). "\n" : toxml($_,{},$c->{$_})."\n" ) : () } keys %{$c} ) . "$c->{-pcdata}" } ######## "NOTYetREady" elsif (ref($c) eq "ARRAY") { if (defined($q) && exists($ty{$q}) && $ty{$q} eq "SEQH") { toxml($q,$v,join("\n",map {toxml($_)} @$c)) } elsif (defined $q) { toxml($q,$v,join("",@{$c})) } else { join("\n",map {(ref($_)?toxml($_):$_)} @$c) } } } sub _openTag{ "<$_[0]". join("",map {" $_=\"$_[1]{$_}\""} keys %{$_[1]} ).">" } sub _emptyTag{ "<$_[0]". join("",map {" $_=\"$_[1]{$_}\""} keys %{$_[1]} )."/>" } sub mkdtskel_fromDTD { my $filename = shift; my $file = ParseDTDFile($filename); print <<'PERL'; #!/usr/bin/perl use warnings; use strict; use XML::DT; my $filename = shift; # Variable Reference # # $c - contents after child processing # $q - element name (tag) # %v - hash of attributes my %handler=( # '-outputenc' => 'ISO-8859-1', # '-default' => sub{"<$q>$c"}, PERL for (sort keys %{$file}) { print " '$_' => sub { },"; print " # attributes: ", join(", ", keys %{$file->{$_}{attributes}}) if exists($file->{$_}{attributes}); print "\n"; } print <<'PERL'; ); print dt($filename, %handler); PERL } sub mkdtskel{ my @files = @_; my $name; my $HTML = ""; my %element; my %att; my %mkdtskel = ('-default' => sub{ $element{$q}++; for (keys %v) { $att{$q}{$_} = 1 }; ""}, '-end' => sub{ print <<'END'; #!/usr/bin/perl use XML::DT; use warnings; use strict; my $filename = shift; # Variable Reference # # $c - contents after child processing # $q - element name (tag) # %v - hash of attributes my %handler=( # '-outputenc' => 'ISO-8859-1', # '-default' => sub{"<$q>$c"}, END print $HTML; for $name (sort keys %element) { print " '$name' => sub{ }, #"; print " $element{$name} occurrences;"; print ' attributes: ', join(', ', keys %{$att{$name}}) if $att{$name}; # print " \"\$q:\$c\"\n"; print "\n"; } print <<'END'; ); print dt($filename, %handler); END } ); my $file = shift(@files); while($file =~ /^-/){ if ($file eq "-html") { $HTML = " '-html' => 1,\n"; $mkdtskel{'-html'} = 1;} elsif($file eq "-latin1") { $mkdtskel{'-inputenc'}='ISO-8859-1';} else { die("usage mktskel [-html] [-latin1] file \n")} $file=shift(@files)} dt($file,%mkdtskel) } sub _nodeAttributes { my $node = shift; my %answer = (); my @attrs = $node->getAttributes(); for (@attrs) { if (ref($_) eq "XML::LibXML::Namespace") { # TODO: This should not be ignored, I think. # This sould be converted on a standard attribute with # key 'namespace' and respective contents } else { $answer{$_->getName()} = $_->getValue(); } } return %answer; } sub mkdtdskel { my @files = @_; my $name; my %att; my %ele; my %elel; my $root; my %handler=( '-outputenc' => 'ISO-8859-1', '-default' => sub{ $elel{$q}++; $root = $q unless ctxt(1); $ele{ctxt(1)}{$q} ++; for(keys(%v)){$att{$q}{$_} ++ } ; }, '-pcdata' => sub{ if ($c =~ /[^ \t\n]/){ $ele{ctxt(1)}{"#PCDATA"}=1 }}, ); while($files[0] =~ /^-/){ if ($files[0] eq "-html") { $handler{'-html'} = 1;} elsif($files[0] eq "-latin1") { $handler{'-inputenc'}='ISO-8859-1';} else { die("usage mkdtdskel [-html] [-latin1] file* \n")} shift(@files)} for my $filename (@files){ dt($filename,%handler); } print "\n\n"; delete $elel{$root}; for ($root, keys %elel){ _putele($_, \%ele); for $name (keys(%{$att{$_}})) { print( "\t\n"); print( "\t\n"); } } } sub _putele { my ($e,$ele) = @_; my @f ; if ($ele->{$e}) { @f = keys %{$ele->{$e}}; print "= 1 && $f[0] eq "#PCDATA" ? "" : "*"), " >\n"; print "\n"; } else { print "\n"; } } sub _whitepc { $_[1] eq '-pcdata' and $_[0] =~ /^[ \t\r\n]*$/ } sub MMAPON { bless([@_],"mmapon") } sub _fromUTF8 { my $string = shift; my $encode = shift; my $ans = eval { XML::LibXML::decodeFromUTF8($encode, $string) }; if ($@) { return $string } else { return $ans } } 1; XML-DT-0.69/mkxmltype0000755000175000017500000002036612745745252013047 0ustar ambsambs#!/usr/bin/perl -w -s use XML::DT; use Data::Dumper; use File::Temp; use Term::ReadLine; #use locale; use strict; our ($latin1,$html,$show_att,$expand_att_id,$class); our ($lines,$t,$shell); my (@files)=@ARGV; @ARGV=(); mkxmltypes (@files); sub mkxmltypes { my %type=(); my @files = @_; my %root = (); my %att=(); my %dom=(); my %ele=(); my %elel=(); my %atl=(); my %handler=( # '-outputenc' => 'ISO-8859-1', '-default' => sub{ $c =~ s/,$//; if(not $class){ push(@{$type{$q}}, (eval("[$c]") || "?$c")); $elel{$q}++; if(ctxt(1)){ $ele{ctxt(1)}{$q} ++;} else { $root{$q}++} for(keys(%v)){ $atl{$_}++; $att{$q}{$_}{tipo($v{$_})||"_str"} ++ ; $dom{$q}{$_}{$v{$_}} ++ } "'$q',"; } else{ my $qcl=$q; if($v{class}){$qcl .="+$v{class}"} if($v{id} ){$qcl .="+$v{id}"} push(@{$type{$qcl}}, (eval("[$c]") || "?$c")); $elel{$qcl}++; if(ctxt(1)){ my $fcl=ctxt(1); if(father->{class}){$fcl .="+".father->{class}} if(father->{id} ){$fcl .="+".father->{id}} $ele{$fcl}{$qcl} ++;} else { $root{$qcl}++} for(keys(%v)){ next if ($_ eq "class"); next if ($_ eq "id"); $atl{$_}++; $att{$qcl}{$_}{tipo($v{$_})||"_str"} ++ ; $dom{$qcl}{$_}{$v{$_}} ++ } "'$qcl',"; } }, '-pcdata' => sub{ if ($c =~ /[^ \t\n]/){ $ele{ctxt(1)}{"#PCDATA"}=1; "'#PCDATA'," } else {""}}, ); if ($html) { $handler{'-html'} = 1;} if($latin1) { $handler{'-inputenc'}='ISO-8859-1';} for my $fname (@files){ if($lines){ my $tmpfile = File::Temp->new( UNLINK => 0 )->filename; system("head -$lines $fname | xmllint --recover - > $tmpfile"); $fname = $tmpfile; } dt($fname,%handler); unlink($fname) if $lines; } ### print "DEBUG",Dumper(\%att,\%ele,\%dom,\%atl); print "# ", join(" ",keys %root)," ...", scalar(localtime(time)) ,"\n"; my %resumofinal=(); for (keys %type){ my @tipo=(); for my $lista (@{$type{$_}}){ push (@tipo, processa($lista)) } $resumofinal{$_}=resumele(processa2([@tipo])).resumeatts($att{$_}); } if($shell){ shell($t,\%root,\%ele,\%att,\%dom,\%resumofinal,\%atl); } else{ pprint(\%resumofinal,ordem(\%ele,(($t) ||(keys %root) ))); } } sub shell{ my ($t,$root,$ele,$att,$dom,$resumofinal,$atl) = @_; my $last=(keys %$root)[0]; my $elepat = q{[\w:]+}; my $max = 10; my $term = new Term::ReadLine 'sample'; my $tas = $term->Attribs; $tas->{completion_entry_function}= $tas->{list_completion_function}; $tas->{completion_word} = [ keys(%$ele), keys(%$atl) ]; pprint($resumofinal,ordem($ele,(($t) ||(keys %$root) ))); while ( defined ($_ = $term->readline("\npfs> ")) ) { chomp(); $term->addhistory($_) if /\S/; s/^\s*(.*?)\s*$/$1/; if(/($elepat)\[\@?($elepat)\]/){ print resumeatt($att->{$1}{$2},$dom->{$1}{$2},$max); $last = $1} elsif(/\!max\s*=?\s*(\d+)/){$max=$1;} elsif(/\.($elepat)/){ print resumeatt($att->{$last}{$1},$dom->{$last}{$1},$max);} elsif(!$_ or defined $ele->{$_}) { $last=$_; pprint($resumofinal,ordem($ele,(($_) ||(keys %$root) ))); } else{ for my $e (keys %$att){ for my $a (keys %{$att->{$e}}){ print "$e($a):", resumeatt($att->{$e}{$a},$dom->{$e}{$a},$max) if($a eq $_) } } } } } sub ordem{ my ($rel,@st)=@_; my @r=(); my %visited = ('#PCDATA' => 1); while(@st){ my $next = shift(@st); next if $visited{$next}; push(@r,$next); $visited{$next} = 1; push(@st, (grep {! $visited{$_}} (keys %{$rel->{$next}}))); } \@r; } sub pprint{ my $r = shift; my $order = shift; for (@$order){ print "$_ \t=> $r->{$_}\n";} } sub resumeatts{ my $a=shift; my $r=""; for (keys(%{$a})) { if($expand_att_id){ $r .= "\n\t\t * $_:(".join(",",keys %{$a->{$_}}) . ")" } else { $r .= " * $_" } } $r } sub resumeatt{ my $a=shift; my $d=shift; my $max = shift(@_) || 10; my $r= join("|",keys %{$a}) ; my @domact = (grep {defined $_} ((keys %{$d}))[0..$max]); $domact[$max] = '...' if $domact[$max]; $r . " = {". join(",",@domact) . "}\n"; } sub processa{ my $a=shift; if( @$a == 0 ) { +{ _isa =>"empty"} } elsif( @$a == 1 && $a->[0] eq '#PCDATA') { +{ _isa =>"text" ,$a->[0] =>[1,1]} } elsif( @$a == 1 ) { +{ _isa =>"singleton",$a->[0] =>[1,1]} } else{ my %f = (); for (@$a){$f{$_}[0]++,$f{$_}[1]++} my $dif = scalar keys %f; if($dif == 1) { +{ _isa =>"seq", %f} ; } elsif($dif == @$a) { +{ _isa =>"tup", %f}; } elsif($f{'#PCDATA'}){ +{ _isa =>"mixed", %f }; } else { +{ _isa =>"mtup", %f } } } } sub processa2{ my $a=shift; if ( @$a == 0 ) { die("no sons????") } elsif( @$a == 1 ) { $a->[0] } else{ my %f = (); my %maybe = (); for (@$a){$f{sons2str($_)}++; $maybe{$_->{_isa}}++ } my $dif = scalar keys %f; if ($dif == 1) { $a->[0]; } elsif($maybe{mixed} || $maybe{text}){ +{%{join_sons($a)}, _isa=> "mixed"} } else { my %s= %{join_sons($a)}; if(keys %s == 1) { +{%s, _isa => "seq"}} else { +{%s, _isa => "mtup"} } } } } sub resumele{ my $a=shift; ## print Dumper($a); my $i = $a->{_isa}; delete $a->{_isa}; if ($i eq "text") {"text"} elsif ($i eq "empty") {"empty"} elsif ($i eq "singleton") {join(", ", keys %{$a}) } elsif ($i eq "mixed") {delete $a->{'#PCDATA'}; if(keys %{$a}){ "mixed(".join(", ", keys %{$a}).")"} else {"text"} } elsif ($i eq "tup") {"tup(".join(", ", keys %{$a}).")"} elsif ($i eq "seq") {"seq(".join(", ", keys %{$a}).")"} else { my $r= "mtup("; for(sort keys %$a){ $r .= "$_, " if ( $a->{$_}[0] == 1 && $a->{$_}[1] == 1 ); $r .= "$_?, " if ( $a->{$_}[0] == 0 && $a->{$_}[1] == 1 ); $r .= "$_*, " if ( $a->{$_}[0] == 0 && $a->{$_}[1] > 1 ); $r .= "$_+, " if ( $a->{$_}[0] > 0 && $a->{$_}[1] > 1 ); $r .= Dumper($a) if($r =~ /\($/ ); } $r =~ s/, $//; $r.=")"; } } sub join_sons{ my $a = shift; my %final = ( map { ($_ => [$a->[0]{$_}[0], $a->[0]{$_}[1]])} grep {$_ ne "_isa"} keys %{$a->[0]}); my %todas=(); for (@$a){ my @novas=keys %{$_}; @todas{@novas}= @novas; for my $k (keys %todas){ next if $k =~ /_isa/; $final{$k}[0]=0 unless $final{$k}[0]; unless (exists $_->{$k}){ $final{$k}[0]=0; next} $final{$k}[1]=$_->{$k}[1] if $_->{$k}[1] > ($final{$k}[1] || 0); $final{$k}[0]=$_->{$k}[0] if $_->{$k}[0] < $final{$k}[0]; } } \%final } sub sons2str{ my $a = shift; join(' ',($a->{_isa},map { $_ . ($a->{$_}[0]==1 ? "" : "+") } grep {$_ ne "_isa"} sort keys %$a)); } sub tipo{ my $a=shift; for ($a){ if(/^\s*\d+\s*$/) {return "_int" } elsif(m{^\s*(https?|ftp|file)://\w[~&=?\w:/.-]+\s*$}i){return "_url" } elsif(/^\s*\d+\.\d+\s*$/) {return "_real" } elsif(/^\w+$/) {return "_id" } elsif(m{^\s*[\w.-]+\@\w[\w_:/.-]+\s*$}) {return "_email" } else {return undef } } } __END__ =head1 NAME mkxmltype - Make XML analysis using XML::DT =head1 SYNOPSIS mkxmltype =head1 DESCRIPTION This command tries to infer DTD and Camlila-like types for a specific XML file; =head1 Options -latin1 input file encoding is forced to be latin1 -html uses html (libxml2) parser -show_att Show attribute values -expand_att_id -lines=20000 just reads the first 20000 lines of the XML file -t -class '
'... is treated as 'div+a+b' -shell Enter interactive shell mode =head1 SEE ALSO XML::DT(1), mkdtskel(1), mkdtdskel and perl(1) =cut XML-DT-0.69/META.json0000644000175000017500000000253013457371530012474 0ustar ambsambs{ "abstract" : "a package for down translation of XML files", "author" : [ "Jose Joao " ], "dynamic_config" : 1, "generated_by" : "ExtUtils::MakeMaker version 7.34, CPAN::Meta::Converter version 2.150010", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : 2 }, "name" : "XML-DT", "no_index" : { "directory" : [ "t", "inc" ] }, "prereqs" : { "build" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "configure" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "runtime" : { "requires" : { "ExtUtils::MakeMaker" : "6.17", "HTTP::Simple" : "0", "Scalar::Util" : "0", "Test::More" : "0.40", "XML::DTDParser" : "2.00", "XML::LibXML" : "1.54", "parent" : "0" } } }, "release_status" : "stable", "resources" : { "repository" : { "type" : "svn", "url" : "https://natura.di.uminho.pt/svn/main/xml/XML-DT", "web" : "https://natura.di.uminho.pt/svn/main/xml/XML-DT" } }, "version" : "0.69", "x_serialization_backend" : "JSON::PP version 4.02" } XML-DT-0.69/Changes0000644000175000017500000001407313457371507012357 0ustar ambsambsRevision history for Perl extension XML::DT. 0.69 Apr 22, 2019 - Switch from LWP to HTTP::Tiny 0.68 Sep 29, 2015 - Minor fix in MANIFEST to make Kwalittee happy. 0.67 Mar 15, 2015 - Link to public SVN repo (thanks Gabor Szabo for pushing) 0.66 Aug 15, 2014 - fix part of the fix that wasn't fixed (Debian community) 0.65 Aug 1, 2014 - fix the fix Thanks to Olly Betts (Debian community) 0.64 Jul 31, 2014 - fix the way temporary files are created. Thanks to Damyan Ivanov (Debian community) 0.63 Mar 25, 2013 - mkdtskel now uses strict/warnings 0.62 Jul 25, 2012 - Add the -userdata option. - Document -userdata, -html and -recover. 0.61 Jul 25, 2012 - Handle undef elements when parsing wrong formed XML. 0.60 Jul 25, 2012 - Add -recover option to parse XML (not just HTML). 0.59 Jun 5, 2012 - Fix POD encodings. 0.58 Apr 9, 2012 - Adding missing pre-requisite (parent). 0.57 Apr 7, 2012 - Let CDATA and PCDATA be processed in a different way. 0.56 Feb 12, 2011 - Removed debug message :-/ 0.55 Feb 12, 2011 - Process correctly entities (I think) 0.54 Nov 19, 2010 - require 5.8.6 - minor changes; 0.53 Jan 18, 2009 - added 'inpath' function. 0.52 Oct 22, 2008 - fixed bug on whitepc function. 0.51 Feb 22, 2008 - tohtml function with better handle of specific HTML tags 0.50 Feb 21, 2008 - Fixed a problem with HFS+ extended attributes 0.49 Feb 21, 2008 - Fixed a typo 0.48 Feb 21, 2008 - added tohtml function. Does the same as toxml, but does not create empty tags. - use -tohtml=>1 in a handler to use tohtml instead of toxml 0.47 Nov 23, 2006 - finally, XML::Parser backend was removed. - Added father, gfather, ggfather and root acessing method. 0.46 Nov 3, 2006 - mkdtskel for a HTML adds -html flag - XML::LibXML is used by default. 0.45 May 16, 2006 - Added use strict for XML::Parser backend 0.44 May 15, 2006 - Remove the use of $' and $` which makes regular expressions a lot slower 0.43 May 15, 2006 (bad release) 0.42 Sep 18, 2005 - Added use strict; 0.41 Jul 20, 2005 - Removed warning from HTML parsing; 0.40 Apr 06, 2005 - Added default type directive - Added tests for type-based XML processing; - Fixed recursive toxml; 0.39 Mar 22, 2005 - Added open '-ignorecase' to look to tag names and attributes as the same although they differ in case - toxml creates empty tag if $c equals to "" 0.38 Dec 24, 2004 - MERRY CHRISTMAS - Removed Test::Pod and Test::Pod::Coverage from Makefile.PL - Turned off all validation from XML::LibXML -- this way the DTD does not gets loaded and processing becomes faster 0.37 Nov 21, 2004 - Parse correctly CDATA sections using XML::LibXML; 0.36 Nov 19, 2004 - Added support for bad HTML documents; - Added test for -dtd support of mkdtskel; - Moved mkdtskel code to main module; - Changed default format for mkdtskel; 0.35 Nov 15, 2004 - Added -dtd support to mkdtskel (now we need XML::DTDParser) 0.34 Oct 30, 2004 - corrected '@dtattributes' instead of '@dtatributes' NOTICE: we will keep back compatibility for limited time. - Renamed ID type to THE_CHILD - Created LAST_CHILD type 0.33 Oct 03, 2004 - Added ID type; 0.32 Sep 20, 2004 - Added missing documentation; - Added pod and pod-coverage tests; 0.31 Aug 09, 2004 - Added documentation to @dtattributes 0.30 Jan 22, 2004 - Bugs corrected with encodings in attributes - Corrected encoding with utf8 locale -- we hope :-(; 0.29 Jan 07, 2004 - Corrected bug with encodings. - Added test for dtstring and encoding; 0.28 Dec 16, 2003 - Corrected bug when outputing a data structure. 0.27 Nov 14, 2003 - Added -declr switch. When added, dt will add the - Added global variable $PARSER with info about what parser is being used (XML::Parser or XML::LibXML); - Require a recent ExtUtils::MakeMaker. This will fix the problem with PM_FILTER syntax change (I hope) 0.26 Oct 12, 2003 - Corrected makefile so that it detects if it should use perl 5.8.0 or 5.8.1 PM_FILTER syntax; - Fixed bug with tags with only a '0' inside; 0.25 Oct 08, 2003 - Thanks to Martin Mokrejs, detected too many bugs with perl 5.8.1. 0.24.1 Jun 17, 2003 - corrected problem when using a prefix on Makefile.PL 0.24 Fev 20, 2003 - added documentation to mkdtskel and mkdtdskel; - ispell'ed README, Changes and DT.pm(pod) files; - added ExtUtils::MakeMaker version request; - added tests (basic functions, xpath, string processing); - added code to support html parsing using libxml2 html parser; - added XPath tests; Added 'toxml' tests; - added dturl and pathdturl method; - added mkdtskel -html; - added mkdtskel -dtd (dirty solution); 0.23 Dec 23, 2002 - removed a lot of code on Makefile.PL. Now ExtUtils::MakeMaker takes care of the full task. 0.22 Dec 20, 2002 - added mkdtdskel as an installed script; - added DT.pm as distribution file, for cpan indexing; 0.21 May 27, 2002 - let the user choose between XML::Parser or XML::LibXML; 0.20 Fev 20, 2001 - installs mkdtskel shell script to be used quickly - removed HTML files from package. User can do that with pod2html 0.19 Nov 30, 2000 - better pathdt functions with '//aaa[@att='asdasd']' 0.18 Nov 18, 2000 - Bug fixed in dtd generator function: mkdtdskel - possibility of passing parameters do toxml: toxml(tag, {...attributes...}, contents) - XPath functions to use paths instead of tags; 0.17 Oct 30, 2000 - added a -type => ZERO that don't processes its sub-elements and return "". It is good to avoid visiting certain parts of the document, and for better performance. 0.16 Oct 16, 2000 - problems with the changes on UNICODE - 0.15 does not work with Perl 5.005; Solution: "use bytes" if we find it. eval("use bytes") if ($inc{bytes.pm}) { require and import...} 0.15 Sep 16, 2000 - Corrected bug in pod; - problems with the changes on UNICODE; ... XML-DT-0.69/Makefile.PL0000644000175000017500000000232013457371247013027 0ustar ambsambs#!/usr/bin/perl use 5.008006; use ExtUtils::MakeMaker; %req_modules = ( 'ExtUtils::MakeMaker' => '6.17', 'Test::More' => '0.40', 'HTTP::Simple' => '0', 'XML::DTDParser' => '2.00', 'XML::LibXML' => '1.54', 'parent' => '0', 'Scalar::Util' => '0', ); %meta_merge = ( 'meta-spec' => { version => 2 }, resources => { repository => { type => 'svn', url => 'https://natura.di.uminho.pt/svn/main/xml/XML-DT', web => 'https://natura.di.uminho.pt/svn/main/xml/XML-DT', }, }, ); WriteMakefile( 'NAME' => 'XML::DT', 'VERSION_FROM' => 'lib/XML/DT.pm', 'EXE_FILES' => [ "mkdtskel", "mkdtdskel", "mkxmltype" ], 'PM_FILTER' => $PM_FILTER, 'PREREQ_PM' => \%req_modules, 'META_MERGE' => \%meta_merge, 'LICENSE' => 'perl', ($] >= 5.005 ? (ABSTRACT_FROM => 'lib/XML/DT.pm', AUTHOR => 'Jose Joao ') : () ), ); XML-DT-0.69/examples/0000755000175000017500000000000013457371530012671 5ustar ambsambsXML-DT-0.69/examples/jj.dtd0000644000175000017500000000025710730327732013771 0ustar ambsambs XML-DT-0.69/examples/ex10.3.xml0000644000175000017500000000053210730327732014325 0ustar ambsambs U.M. University of Minho 1111 1112 1113 Portugal J.Joao; J.Rocha; J.Ramalho Computer science informatic history XML-DT-0.69/examples/ex.xml0000644000175000017500000000054310730327732014025 0ustar ambsambs Luís Represas Camões an element with attributs Fora do tempo letra incompleta An empty element with attributs Fora do tempo ... Vindos da mesma mãe.. O tempo também se engana ... XML-DT-0.69/examples/ex11.1.xml0000644000175000017500000000042310730327732014323 0ustar ambsambs name0
address00
address01
name1
address10
address11
XML-DT-0.69/examples/pub.pl0000644000175000017500000000036310730327732014012 0ustar ambsambs#!/usr/bin/perl use XML::DT ; my $filename = shift; %xml=( 'ARTIGO' => sub{"------------\n$c\n"}, 'TEXTO' => sub{""}, '-default' => sub{"$q - $c\n"}, '-outputenc' => 'ISO-8859-1' ); print &dt($filename,%xml); XML-DT-0.69/examples/lat1.html0000644000175000017500000000102510730327732014412 0ustar ambsambs C<lat1.pm> - module for unicode utf8 -> latin1 translation of strings

NAME

lat1.pm - module for unicode utf8 -> latin1 translation of strings


SYNOPSIS

   $latin1string = lat1::utf8($utf8string)

XML-DT-0.69/examples/arq.xml0000644000175000017500000001367110730327732014202 0ustar ambsambs 9809/P0000146.JPG1536 1024 Arcada Augustsson Braga Praça 9809/P0000151.JPG 1536 1024 Prédio da Brasileira café Brasileira (Braga) Augustsson Braga café 9809/P0000153.JPG1536 1024 Jardins de S.Bárbara, flores Augustsson Braga Jardim 9809/P0000154.JPG1536 1024 flores (não sei o nome) Augustsson Braga/Jardim S.Bárbara flor 9809/P0000156.JPG1536 1024 flores (não sei o nome) Augustsson Braga/Jardim S.Bárbara flor 9809/P0000157.JPG1536 1024 Biblioteca: edifício medieval, visto de jardim de S. Bárbara Augustsson Braga monumento jardim 9809/P0000158.JPG1536 1024 Câmara Municipal de Braga Augustsson Braga convento praça 9809/P0000160.JPG1536 1024 Biblioteca Pública de Braga entrada barroca Augustsson Braga/Praça do Município monumento biblioteca 9809/P0000161.JPG1536 1024 Sé de Braga:Estátua da Senhora do Leite Augustsson Braga/Sé estátua 9809/P0000163.JPG1536 1024 Igreja de S.Marcos Augustsson Braga igreja 9809/P0000164.JPG1536 1024 Igreja de St. Cruz Augustsson Braga igreja 9809/P0000240.JPG1536 1024 Estátua de D.Afonso Henriques Augustsson Guimarães estátua 9809/P0000254.JPG1536 1024 Rua estreita em Guimarães Augustsson Guimarães rua 9809/P0000256.JPG1536 1024 Biblioteca antiga de Guimarães, junto à Oliveira Augustsson Guimarães monumento 9809/P0000257.JPG1536 1024 Cruzeiro junto à Oliveira Augustsson Guimarães monumento 9809/P0000241.JPG1536 1024 Paço dos Duques de Bragança Augustsson Guimarães Paço 9811/brasileira.jpg 1300 1000 Esplanada do café Brasileira jj café Brasileira (Braga) Braga café esplanada 1989 9811/castelo.jpg1300 1000 Rua do Castelo, torre da igreja dos Terceiros jj Braga rua igreja_po 9811/claraboi.jpg1300 1000 Pópulo: torreão visto pelas traseiras jj Braga promenor arq. igreja_po 1997 9811/janela.jpg1300 1000 Largo de S.Victor: Janelas de rótulo e nicho jj Braga janela casa 1989 9811/mosteiro.jpg1300 1000 Mosteiro de Rendufe jj Braga igreja monumento 9811/ruina.jpg1300 1000 R. D. Pedro V: Casa arruinada; janela guilhotina/3 jj Braga casa janela 1989 9811/tabique.jpg1300 1000 R. ?: Casa arruinada com grade do tabique e janelas de rótula jj Braga casa janela promenor arq. 1989 9812/santo.jpg1036 690 Cara de um santo, Bom Jesus Cara de um santo numa das capelinhas da via-sacra do Bom Jesus jbb Braga/Bom Jesus estátua santo 1998 9902/cabreira.jpg1024 728 Paisagem da Cabreira paisagem da serra da Cabreira junto `a casa abrigo da Serradela jj Minho/Serra da Cabreira/serradela aguarela paisagem etnografia 1998 9902/flautista.jpg840 680 flautista velho a tocar flauta (desenho a caneta, preto e branco) jj desenho música etnografia 1998 9902/mariana1.jpg340 680 Marina a desenhar Marina a desenhar (desenho a grafite) jj desenho 1998 XML-DT-0.69/examples/ex6.pl0000644000175000017500000000113410730327732013723 0ustar ambsambs#!/usr/bin/perl use XML::DT ; use Data::Dumper; sub XML::DT::Dumper {}# Dumper(shift)} my $filename = shift; %xml=( '-default' => sub{ if(inctxt('url')) {"\n (sunof url) $q:$c\n pai=". ctxt(1). "\n" } elsif(inctxt('foto')) {"\n (sun of foto) $q:$c\n pai=". ctxt(1). "\n" } elsif(inctxt('desenho.*')) {"\n (desenho....) $q:$c\n pai=". ctxt(1). "\n" } else {"\n (outros) $q:$c\n pai=". ctxt(1). "\n" } }, '-outputenc' => 'ISO-8859-1' ); # print dt("$filename"); print dt($filename,%xml); XML-DT-0.69/examples/10nov.sgm0000644000175000017500000000247111411137227014337 0ustar ambsambs Terça-feira, 10 de Novembro de 1998 1ª PÁGINA Governo Preocupado com Vitória do "Não" Guterres Encomenda Estudo de Popularidade

A vitória do "não" deixou o Governo intranquilo. Olha-se para a "Alternativa Democrática" mas também para as consequências da derrota do "sim" na imagem de Guterres e do seu Executivo. Por isso decidiram, ontem, mandar fazer uma sondagem. A da RTP-1, no domingo, que dava maioria ao PS, não chegou para dar confiança aos socialistas.

Mas em relação à "AD" o tempo é de expectativa. Ambos os partidos criticaram durante duramente a proposta de Orçamento por, alegadamente, agravar a carga fiscal sobre a classe média. Mas ainda não apresentaram propostas alternativas. Só perante isso é que os socialistas definirão o que é negociável e o que não é. Aí, garantiu o membro do Governo ouvido pelo PÚBLICO, haverá "muita firmeza". 1ª PÁGINA Que Fazer com Pinochet?

A defesa de Pinochet começou a puxar dos trunfos: a continuada prisão do general pode ter consequências no Chile, onde as coisas são "frágeis". A Espanha formaliza hoje à Grã-Bretanha o pedido de extradição do militar. Os Lordes poderão anunciar a sua decisão na quinta-feira.

XML-DT-0.69/examples/ex11.5.xml0000644000175000017500000000273110730327732014333 0ustar ambsambs

XML-DT-0.69/examples/ex3.xml0000644000175000017500000000061110730327732014104 0ustar ambsambs Luís Represas Camões an element with attributs Fora do tempo letra incompleta An empty element with attributs Fora do tempo ... Vindos da mesma mãe.. O tempo também se engana ... XML-DT-0.69/examples/ex10.2.xml0000644000175000017500000000036210730327732014325 0ustar ambsambs U.M. University of Minho 1111 1112 1113 Portugal J.Joao; J.Rocha; J.Ramalho XML-DT-0.69/examples/ex10.3.pl0000644000175000017500000000055610730327732014146 0ustar ambsambs#!/usr/bin/perl use XML::DT; use Data::Dumper; print Dumper(MMAPON('name')); %handler = ( contacts => sub{ [ split(";",$c)] }, -default => sub{$c}, -type => { institution => 'MAP', degrees => MMAPON('name'), tels => 'SEQ' }); $a = dt(shift, %handler); print Dumper($a); XML-DT-0.69/examples/publico.dtd0000644000175000017500000000040111411137227015005 0ustar ambsambs XML-DT-0.69/examples/ex2.pl0000644000175000017500000000157110730327732013724 0ustar ambsambs#!/usr/bin/perl use XML::DT ; use Data::Dumper; my $filename = shift; %xml2=('foto' => sub{$ind{$isa}= [@{$ind{$isa}},$url]; $isa=$url=""; "
  • $c"}, 'url' => sub{$url=$c;"$c"}, 'isa' => sub{$isa=lc($c);$c}, 'author' => sub{uc($c) }, 'resol' => sub{""}, 'arq' => sub{"Indice\n". Dumper(\%ind) . "----------\n$c"}, '-default'=> sub{"$q:$c"}, '-outputenc' => 'ISO-8859-1' ); %xml=( 'music' => sub{"Autor da musica: $c"}, 'musica' => sub{"--------------(AMP)--------\n$c"}, 'lyrics' => sub{"Autor da letra:$c"}, 'title' => sub{ uc($c) . $v{acordes} }, '-default' => sub{"$q:$c"}, 'arquivo' => sub{("_" x 60). dt($v{file},%xml2)}, '-outputenc' => 'ISO-8859-1' ); print dt($filename,%xml); XML-DT-0.69/examples/makenewexample0000644000175000017500000000041510730327732015613 0ustar ambsambs#!/usr/bin/perl @l = ; $name = "ofex". (@l+1) .".pl"; open (G,">>../MANIFEST") or die; print G "examples/$name\n"; close G; open (F,">$name ") or die; print F "#!/usr/bin/perl\n\n=head1 $name\n\n\n\=cut\n\n use XML::DT\n\n"; close F; exec("vi $name"); XML-DT-0.69/examples/arq.pl0000644000175000017500000000212610730327732014006 0ustar ambsambs#!/usr/bin/perl use XML::DT ; my $filename = shift; $f=sub{for (@isa) {$ind{$_}= [@{$ind{$_}},[$url,$title]]}; $url=$title=""; @isa=(); "
  • $c"}; %xml=( 'foto' => $f, 'desenho' => $f, 'aguarela'=> $f, 'url' => sub{$url=$c;""}, 'title' => sub{$title=$c;"$c[$res]
    "}, 'isa' => sub{push(@isa,n($c));" ($c)"}, 'author' => sub{ " $c " }, 'resol' => sub{$res=$c;""}, 'arq' => sub{$c}, # '-default'=> sub{"
  • $q:$c"}, '-outputenc' => 'ISO-8859-1', ); $imglist = "
      " . dt($filename,%xml). "
    " ; print "

    Arquivo de imagens

    ", mkind(%ind), "

    Lista das imagens

    $imglist"; sub mkind{ my %a=@_; my $r="

    Indice

      "; for $p (sort keys %a){ $r.= "
    • $p -- " ; } $r . "
    "; } sub n{ my $a= lc(shift) ; for($a){ s/^ +//; s/ +$//; s/ +/ /g; } $a; } XML-DT-0.69/examples/ex10.2.pl0000644000175000017500000000043510730327732014141 0ustar ambsambs#!/usr/bin/perl use XML::DT; %handler = ( contacts => sub{ [ split(";",$c)] }, -default => sub{$c}, -type => { institution => 'MAP', tels => 'SEQ' }); $a = dt("ex10.2.xml", %handler); use Data::Dumper; print Dumper($a); XML-DT-0.69/examples/ex5.pl0000644000175000017500000000103510730327732013722 0ustar ambsambs#!/usr/bin/perl use XML::DT ; use Data::Dumper; sub XML::DT::Dumper {}# Dumper(shift)} my $filename = shift; %xml=( 'music' => sub{"Autor da musica: $c ($v{x})"}, 'musica' => sub{"\n--------------(AMP)$v{x}--------\n$c"}, '-default' => sub{"contexto=".join("/",@dtcontext). "\n $q:$c\n". "pai=". ctxt(1). "\n" }, 'arquivo' => sub{("_" x 60). dt($v{file},%xml2)}, '-outputenc' => 'ISO-8859-1' ); # print dt("$filename"); print dt($filename,%xml); XML-DT-0.69/examples/gcapaper2tex.pl0000644000175000017500000000345410730327732015615 0ustar ambsambs#!/usr/bin/perl =head1 NAME gcapaper2tex.pl - a perl script to translate XML gcapaper DTD to latex =head1 SYNOPSIS gcapapape2tex.pl mypaper.xml > mupaper.tex =head1 Notes This is an example of the use of XML::DT module. It was partially generated with mkskel.pl script =cut use XML::DT ; my $filename = shift; my $beginLatex = '\documentclass{article} \usepackage[latin1]{inputenc} \usepackage{t1enc} \bibliographystyle{plain} \begin{document} '; my $endLatex = '\end{document} '; my @aut=(); %handler=( '-outputenc' => 'ISO-8859-1', '-default' => sub{"$c"}, 'RANDLIST' => sub{"\\begin{itemize}$c\\end{itemize}"}, 'AFFIL' => sub{""}, 'TITLE' => sub{ if(inctxt('SECTION')){"\\section{$c}"} elsif(inctxt('SUBSEC1')){"\\subsection{$c}"} else {"\\title{$c}"} }, 'GCAPAPER' => sub{"$beginLatex $c $endLatex"}, 'PARA' => sub{"$c\n\n"}, 'ADDRESS' => sub{"\\thanks{$c}"}, 'PUB' => sub{"} $c"}, 'FNAME' => sub{" $c"}, 'EMAIL' => sub{"(\\texttt{$c}) "}, 'FRONT' => sub{"$c\n"}, 'AUTHOR' => sub{ push @aut, $c ; ""}, 'ABSTRACT' => sub{ sprintf('\author{%s}\maketitle\begin{abstract}%s\end{abstract}', join ('\and', @aut) , $c) }, 'CODE.BLOCK' => sub{"\\begin{verbatim}\n$c\\end{verbatim}\n"}, 'XREF' => sub{"\\cite{$v{REFLOC}}"}, 'SECTION' => sub{"$c"}, 'LI' => sub{"\\item $c"}, 'BIBLIOG' => sub{"\n\\begin{thebibliography}{1}\n$c\n\\end{thebibliography}\n"}, 'HIGHLIGHT' => sub{" \\emph{$c} "}, 'BIO' => sub{""}, 'SURNAME' => sub{" $c "}, 'CODE' => sub{"\\verb!$c!"}, 'BIBITEM' => sub{"\n\\bibitem{$c"}, ); print dt($filename,%handler); =head1 author J.Joao Almeida (jj@di.uminho.pt) XML-DT-0.69/examples/ex11.1.pl0000644000175000017500000000062110730327732014136 0ustar ambsambs#!/usr/bin/perl use XML::DT; use Data::Dumper; %handler = ( -default => sub{$c}, person => sub{ print " $c->{name} $c->{address}[0] Dear $c->{name} Merry Christmas from Morris\n"; $c;}, -type => { people => 'SEQ', person => MMAPON('address')}); $people = dt("ex11.1.xml", %handler); print Dumper($people); print $people->[1]{address}[1]; XML-DT-0.69/examples/makefile0000644000175000017500000000057510730327732014374 0ustar ambsambsall: arq.pl arq.xml > _.html scp _.html alfarrabio.um.geira.pt:public_html/img_arq/lista.html arq2.pl arq.xml > _.d1 scp _.d1 alfarrabio.um.geira.pt:aea/img_arq.d1 A=xml::dt.html $a: XML/DT.pm pod2html --title XML/DT.pm > xml::dt.html _titulos: pub.pl _10nov.xml pub.pl _10nov.xml > _titulos _10nov.xml: 10nov.sgm publico.dtd sx 10nov.sgm > _10nov.xml clean: rm -f _* XML-DT-0.69/examples/ex7.pl0000644000175000017500000000740010730327732013726 0ustar ambsambs#!/usr/bin/perl # # see comments in the end # use XML::DT ; my $filename = shift; my $beginLatex = '\documentclass{article} \usepackage[latin1]{inputenc} \usepackage{t1enc} \bibliographystyle{plain} \begin{document} '; my $endLatex = '\end{document} '; my @aut=(); %handler=( '-outputenc' => 'ISO-8859-1', # '-default' => sub{"<$q>$c"}, '-begin' => sub{print"BEGIN\n"}, '-end' => sub{print"end\n";"$beginLatex$c$endLatex"}, '-pcdata' => sub{ if(inctxt('(SECTION|SUBSEC1)')) {$c =~ s/[\s\n]+/ /g; $c } $c }, 'RANDLIST' => sub{"\\begin{itemize}$c\\end{itemize}"}, 'AFFIL' => sub{""}, 'TITLE' => sub{ if(inctxt('SECTION')){"\\section{$c}"} elsif(inctxt('SUBSEC1')){"\\subsection{$c}"} else {"\\title{$c}"} }, 'GCAPAPER' => sub{"$c"}, 'PARA' => sub{"$c\n\n"}, 'ADDRESS' => sub{"\\thanks{$c}"}, 'PUB' => sub{"} $c"}, 'FNAME' => sub{" $c"}, 'EMAIL' => sub{"(\\texttt{$c}) "}, 'FRONT' => sub{"$c\n"}, 'REAR' => sub{"$c"}, 'BIB' => sub{"$c"}, 'BODY' => sub{"$c"}, 'AUTHOR' => sub{ push @aut, $c ; ""}, 'ABSTRACT' => sub{ sprintf('\author{%s}\maketitle\begin{abstract}%s\end{abstract}', join ('\and', @aut) , $c) }, 'CODE.BLOCK' => sub{"\\begin{verbatim}\n$c\\end{verbatim}\n"}, 'XREF' => sub{"\\cite{$v{REFLOC}}"}, 'SECTION' => sub{"$c"}, 'LI' => sub{"\\item $c"}, 'SUBSEC1' => sub{"$c"}, 'BIBLIOG' => sub{"\n\\begin{thebibliography}{1}\n$c\n\\end{thebibliography}\n"}, 'HIGHLIGHT' => sub{" \\emph{$c} "}, 'BIO' => sub{""}, 'SURNAME' => sub{" $c "}, 'CODE' => sub{"\\verb!$c!"}, 'BIBITEM' => sub{"\n\\bibitem{$c"}, ); print dt($filename,%handler); =head1 NAME gcapaper2tex.pl - a perl script to translate XML gcapaper DTD to latex =head1 SYNOPSIS gcapapape2tex.pl mypaper.xml > mupaper.tex =head1 notes This is an example of the use of XML::DT module =head1 The Code use XML::DT ; my $filename = shift; my $beginLatex = '\documentclass{article} \usepackage[latin1]{inputenc} \usepackage{t1enc} \bibliographystyle{plain} \begin{document} '; my $endLatex = '\end{document} '; my @aut=(); %handler=( '-outputenc' => 'ISO-8859-1', # '-default' => sub{"<$q>$c"}, '-pcdata' => sub{ if(inctxt('(SECTION|SUBSEC1)')) {$c =~ s/[\s\n]+/ /g; $c } $c }, 'RANDLIST' => sub{"\\begin{itemize}$c\\end{itemize}"}, 'AFFIL' => sub{""}, 'TITLE' => sub{ if(inctxt('SECTION')){"\\section{$c}"} elsif(inctxt('SUBSEC1')){"\\subsection{$c}"} else {"\\title{$c}"} }, 'GCAPAPER' => sub{"$beginLatex $c $endLatex"}, 'PARA' => sub{"$c\n\n"}, 'ADDRESS' => sub{"\\thanks{$c}"}, 'PUB' => sub{"} $c"}, 'FNAME' => sub{" $c"}, 'EMAIL' => sub{"(\\texttt{$c}) "}, 'FRONT' => sub{"$c\n"}, 'REAR' => sub{"$c"}, 'BIB' => sub{"$c"}, 'BODY' => sub{"$c"}, 'AUTHOR' => sub{ push @aut, $c ; ""}, 'ABSTRACT' => sub{ sprintf('\author{%s}\maketitle\begin{abstract}%s\end{abstract}', join ('\and', @aut) , $c) }, 'CODE.BLOCK' => sub{"\\begin{verbatim}\n$c\\end{verbatim}\n"}, 'XREF' => sub{"\\cite{$v{REFLOC}}"}, 'SECTION' => sub{"$c"}, 'LI' => sub{"\\item $c"}, 'SUBSEC1' => sub{"$c"}, 'BIBLIOG' => sub{"\n\\begin{thebibliography}{1}\n$c\n\\end{thebibliography}\n"}, 'HIGHLIGHT' => sub{" \\emph{$c} "}, 'BIO' => sub{""}, 'SURNAME' => sub{" $c "}, 'CODE' => sub{"\\verb!$c!"}, 'BIBITEM' => sub{"\n\\bibitem{$c"}, ); print dt($filename,%handler); =head1 author J.Joao Almeida (jj@di.uminho.pt) XML-DT-0.69/examples/ex1.pl0000644000175000017500000000056210730327732013722 0ustar ambsambs#!/usr/bin/perl use XML::DT ; my $filename = shift; %xml=( 'music' => sub{"Autor da musica: $c"}, 'musica' => sub{"--------------(AMP)--------\n$c"}, 'lyrics' => sub{"Autor da letra:$c"}, 'title' => sub{ uc($c) . $v{acordes} }, '-default' => sub{"$q:$c"}, '-outputenc' => 'ISO-8859-1' ); print &dt($filename,%xml); XML-DT-0.69/examples/README0000644000175000017500000000121410730327732013543 0ustar ambsambs XML::DT examples A simple example using xml::dt ex.xml - a simple xml file (use ex1.pl ex.xml) ex1.pl - perl program to process it Generation of a html page with an index for a image archive see also http://alfarrabio.um.geira.pt/~jj/img_arq arq.pl - perl program (use make) arq.xml - xml arquive file makefile - example using recursive use of XML::DT ex2.pl - a more complex example (use ex2.pl ex.xml) example using SGML, sx and XML::DT 10nov.sgm - SGML file publico.dtd - DTD _10nov.sgm - xml translation (sx 10nov.sgm > ...) _titulos - make _titulos pub.pl - XML-DT-0.69/examples/ex11.5.pl0000644000175000017500000000267710730327732014157 0ustar ambsambs####################################################### #I am using XML::DT as below (the require is for selective module loading): ####################################################### # #require XML::DT; XML::DT->import (); # Yes !!! use strict; use Data::Dumper; use XML::DT; my @order=qw(volume issue doi author f_page l_page artid epub ppub type); my $M; my %handler = ( '-default' => sub {$c}, 'article' => sub { # $v{issue} = $dtattributes[1]->{number}; # $v{volume} = $dtattributes[2]->{number}; $v{issue} = father("number"); $v{volume} = gfather("number"); $M .= join(" :\t", @v{(@order)}) . "\n"; }, ); dt ("ex11.5.xml", %handler); print $M; __END__
    XML-DT-0.69/examples/ex10.1.pl0000644000175000017500000000156010730327732014140 0ustar ambsambs#!/usr/bin/perl use XML::DT; use Data::Dumper; %handler = ( -default => sub{$c}, packages => sub{ [ split(",",$c)] }, -type => { vendorinfo => 'MAP', paks => 'SEQ' } ); $a = dtstring (" netscape.com netscape.commmm Netscape active communicator-4.07, communicator-4.5, navigator-4.07 ",%handler); print Dumper($a); $a = dtstring (" netscape.com netscape.commmm Netscape active communicator-4.07 communicator-4.5, navigator-4.07 ",%handler); print Dumper($a); XML-DT-0.69/examples/XPath/0000755000175000017500000000000013457371530013715 5ustar ambsambsXML-DT-0.69/examples/XPath/ex8.pl0000644000175000017500000000042510730327732014753 0ustar ambsambs#!/usr/bin/perl use XML::DT ; my $filename = "ex8.xml";; # tests number comparasion and string-length function %handler=( '-inputenc' => 'ISO-8859-1', '//*[string-length(name())<2]' => sub{ print "$c\n";toxml }, ); pathdt($filename,%handler); print "\n"; XML-DT-0.69/examples/XPath/ex4.pl0000644000175000017500000000055510730327732014753 0ustar ambsambs#!/usr/bin/perl use XML::DT ; my $filename = 'ex4.xml'; # tests atribute with name %handler=( # '-outputenc' => 'ISO-8859-1', # '-default' => sub{ ""}, # 'ccc' => sub{"$q:$c"}, '//*[@id]' => sub{print "$c\n";toxml}, # 'ddd' => sub{"$q:$c"}, # 'bbb' => sub{"$q:$c"}, # 'eee' => sub{"$q:$c"}, ); pathdt($filename,%handler); print "\n"; XML-DT-0.69/examples/XPath/ex3.pl0000644000175000017500000000055010730327732014745 0ustar ambsambs#!/usr/bin/perl use XML::DT ; my $filename = 'ex3.xml'; # Paths containing * %handler=( # '-outputenc' => 'ISO-8859-1', # '-default' => sub{ ""}, # 'ccc' => sub{"$q:$c"}, '//*/*/bbb' => sub{print "$c\n";toxml}, # 'ddd' => sub{"$q:$c"}, # 'bbb' => sub{"$q:$c"}, # 'eee' => sub{"$q:$c"}, ); pathdt($filename,%handler); print "\n"; XML-DT-0.69/examples/XPath/ex6.xml0000644000175000017500000000036610730327732015142 0ustar ambsambs Texto dentro de /aaa/bbb Texto dentro de /aaa Texto dentro de /aaa/bbb/ddd/eee Texto dentro de /aaa/bbb/ddd/bbb XML-DT-0.69/examples/XPath/ex6.pl0000644000175000017500000000044710730327732014755 0ustar ambsambs#!/usr/bin/perl use XML::DT ; my $filename = "ex6.xml";; # Test comparasion of atribute with string... # tests the normalize-space function %handler=( '-inputenc' => 'ISO-8859-1', '//*[normalize-space(@id) = "a"]' => sub{print "$c\n";toxml}, ); pathdt($filename,%handler); print "\n"; XML-DT-0.69/examples/XPath/ex7.xml0000644000175000017500000000036710730327732015144 0ustar ambsambs Texto dentro de /aaa/bbb Texto dentro de /aaa Texto dentro de /aaa/bbb/ddd/eee Texto dentro de /aaa/bbb/ddd/bbb XML-DT-0.69/examples/XPath/ex3.xml0000644000175000017500000000034610730327732015135 0ustar ambsambs Texto dentro de /aaa/bbb Texto dentro de /aaa Texto dentro de /aaa/bbb/ddd/eee Texto dentro de /aaa/bbb/ddd/bbb XML-DT-0.69/examples/XPath/ex2.pl0000644000175000017500000000057710730327732014755 0ustar ambsambs#!/usr/bin/perl use lib qw(../..); use XML::DT ; my $filename = 'ex2.xml'; # pending paths %handler=( # '-outputenc' => 'ISO-8859-1', # '-default' => sub{"<$q>$c"}, # 'ccc' => sub{"$q:$c"}, '//ddd/bbb' => sub{print "$c";toxml}, # 'ddd' => sub{"$q:$c"}, # 'bbb' => sub{"$q:$c"}, # 'eee' => sub{"$q:$c"}, ); pathdt($filename,%handler); print "\n"; XML-DT-0.69/examples/XPath/ex8.xml0000644000175000017500000000036210730327732015140 0ustar ambsambs Texto dentro de /aaa/bbb Texto dentro de /aaa Texto dentro de /aaa/bbb/ddd/eee Texto dentro de /aaa/bbb/ddd/bbb XML-DT-0.69/examples/XPath/ex5.pl0000644000175000017500000000057610730327732014757 0ustar ambsambs#!/usr/bin/perl use XML::DT ; my $filename = 'ex5.xml'; # tests if exist @* atribute or not(@*) %handler=( # '-outputenc' => 'ISO-8859-1', # '-default' => sub{ ""}, # 'ccc' => sub{"$q:$c"}, '//*[not(@*)]' => sub{print "$c\n";toxml}, # 'ddd' => sub{"$q:$c"}, # 'bbb' => sub{"$q:$c"}, # 'eee' => sub{"$q:$c"}, ); pathdt($filename,%handler); print "\n"; XML-DT-0.69/examples/XPath/ex5.xml0000644000175000017500000000036410730327732015137 0ustar ambsambs Texto dentro de /aaa/bbb Texto dentro de /aaa Texto dentro de /aaa/bbb/ddd/eee Texto dentro de /aaa/bbb/ddd/bbb XML-DT-0.69/examples/XPath/ex7.pl0000644000175000017500000000041610730327732014752 0ustar ambsambs#!/usr/bin/perl use XML::DT ; my $filename = "ex7.xml";; # Test the name() # tests the starts_with and the contains %handler=( '-inputenc' => 'ISO-8859-1', '//*[not(contains(name(),"c"))]' => sub{print "$c\n";toxml}, ); pathdt($filename,%handler); print "\n"; XML-DT-0.69/examples/XPath/ex4.xml0000644000175000017500000000036410730327732015136 0ustar ambsambs Texto dentro de /aaa/bbb Texto dentro de /aaa Texto dentro de /aaa/bbb/ddd/eee Texto dentro de /aaa/bbb/ddd/bbb XML-DT-0.69/examples/XPath/ex1.pl0000644000175000017500000000057510730327732014752 0ustar ambsambs#!/usr/bin/perl use lib qw(../..); use XML::DT ; my $filename = 'ex1.xml'; # simple complete paths %handler=( # '-outputenc' => 'ISO-8859-1', # '-default' => sub{"<$q>$c"}, # 'ccc' => sub{"$q:$c"}, '/aaa' => sub{print "$c";}, # 'ddd' => sub{"$q:$c"}, # 'bbb' => sub{"$q:$c"}, # 'eee' => sub{"$q:$c"}, ); pathdt($filename,%handler); print "\n"; XML-DT-0.69/examples/XPath/ex1.xml0000644000175000017500000000032510730327732015130 0ustar ambsambs Texto dentro de /aaa/bbb Texto dentro de /aaa Texto dentro de /aaa/bbb/ddd/eee Texto dentro de /aaa/bbb/ddd XML-DT-0.69/examples/XPath/ex2.xml0000644000175000017500000000034610730327732015134 0ustar ambsambs Texto dentro de /aaa/bbb Texto dentro de /aaa Texto dentro de /aaa/bbb/ddd/eee Texto dentro de /aaa/bbb/ddd/bbb XML-DT-0.69/README0000644000175000017500000002015010730327732011725 0ustar ambsambs=head1 XML::DT a Perl XML down translate module With XML::DT, I think that: . it is simple to do simple XML processing tasks :) . it is simple to have the XML processor stored in a single variable (see example 4) . it is simple to translate XML -> Perl user controlled complex structure with a compact "-type" definition (see last section) Feedback welcome -> jj@di.uminho.pt =head1 XML::DT a Perl XML down translate module This document is also available in HTML (pod2html'ized): http://www.di.uminho.pt/~jj/perl/XML/XML-DT.readme.html . design to do simple and compact translation/processing of XML document . it includes some features of omnimark and sgmls.pm; functional approach . it includes functions to automatic build user controlled complex Perl structures (see "working with structures" section) . it was build to show my NLP Perl students that it is easy to work with XML . home page and download: http://www.di.uminho.pt/~jj/perl/XML/DT.html =head1 HOW IT WORKS: . the user must define a handler and call the basic function : dt($filename,%handler) or dtstring($string,%handler) . the handler is a HASH mapping element names to functions. Handlers can have a "-default" function , and a "-end" function . in order to make it smaller each function receives 3 args as global variables $c - contents $q - element name %v - attribute values . the default "-default" function is the identity. The function "toxml" makes the original XML text based on $c, $q and %v values. . see some advanced features in the last examples =head1 SOME simple (naive) examples: INDEX: 1. change to lowercase attribute named "a" in element "e" 2. better solution 3. make some statistics and output results in HTML (using side effects) 4. In a HTML like XML document, substitute ... by the real table of contents (a dirty solution...) 5. a more realistic example: from XML gcapaper DTD to latex WORKING WITH STRUCTURES INSTEAD OF STRINGS... 6. Build the natural Perl structure of the following document (ARRAY,HASH) 7. Multi map on... =head2 1. change to lowercase the contents of the attribute named "a" in element "e" use XML::DT ; my $filename = shift; print dt($filename, ( e => sub{ "$c" })); =head2 2. A better solution of the previous example Ex.1 wouldn't work if we have more attributes in element e. A better solution is print dt($filename, ( e => sub{ $v{a} = lc($v{a}); toxml();})); =head2 3. make some statistics and output results in HTML (using side effects) use XML::DT ; my $filename = shift; %handler=( -default => sub{$elem_counter++; $elem_table{$q}++;"";} # $q -> element name ); dt($filename,%handler); print "

    We have found $elem_counter elements in document

    "; print "
    ELEMENTOCCURS\n"; foreach $elem (sort keys %elem_table) {print "
    $elem$elem_table{$elem}\n";} print "
    "; =head2 4. In a HTML like XML document, substitute ... by the real table of contents (a dirty solution...) %handler=( h1 => sub{ $index .= "\n$c"; toxml();}, h2 => sub{ $index .= "\n\t$c"; toxml();}, h3 => sub{ $index .= "\n\t\t$c"; toxml();}, contents => sub{ $c="__CLEAN__"; toxml();}, -end => sub{ $c =~ s/__CLEAN__/$index/; $c}); print dt($filename,%handler) =head2 5. a more realistic example: from XML gcapaper DTD to latex notes: . "TITLE" is processed in context dependent way! . output in ISOLATIN1 (this is dirty but my LaTeX doesn't support UNICODE) . a stack of authors was necessary because LaTeX structure was different from input structure... . this example was partially created by the function mkdtskel Perl -MXML::DT -e 'mkdtskel "f.xml"' > f.pl and took me about one hour to tune to real LaTeX/XML example. NAME gcapaper2tex.pl - a Perl script to translate XML gcapaper DTD to latex SYNOPSIS gcapaper2tex.pl mypaper.xml > mupaper.tex use XML::DT ; my $filename = shift; my $beginLatex = '\documentclass{article} \begin{document} '; my $endLatex = '\end{document}'; %handler=( '-outputenc' => 'ISO-8859-1', '-default' => sub{"$c"}, 'RANDLIST' => sub{"\\begin{itemize}$c\\end{itemize}"}, 'AFFIL' => sub{""}, # delete affiliation 'TITLE' => sub{ if(inctxt('SECTION')){"\\section{$c}"} elsif(inctxt('SUBSEC1')){"\\subsection{$c}"} else {"\\title{$c}"} }, 'GCAPAPER' => sub{"$beginLatex $c $endLatex"}, 'PARA' => sub{"$c\n\n"}, 'ADDRESS' => sub{"\\thanks{$c}"}, 'PUB' => sub{"} $c"}, 'EMAIL' => sub{"(\\texttt{$c}) "}, 'FRONT' => sub{"$c\n"}, 'AUTHOR' => sub{ push @aut, $c ; ""}, 'ABSTRACT' => sub{ sprintf('\author{%s}\maketitle\begin{abstract}%s\end{abstract}', join ('\and', @aut) , $c) }, 'CODE.BLOCK' => sub{"\\begin{verbatim}\n$c\\end{verbatim}\n"}, 'XREF' => sub{"\\cite{$v{REFLOC}}"}, 'LI' => sub{"\\item $c"}, 'BIBLIOG' =>sub{"\\begin{thebibliography}{1}$c\\end{thebibliography}\n"}, 'HIGHLIGHT' => sub{" \\emph{$c} "}, 'BIO' => sub{""}, #delete biography 'SURNAME' => sub{" $c "}, 'CODE' => sub{"\\verb!$c!"}, 'BIBITEM' => sub{"\n\\bibitem{$c"}, ); print dt($filename,%handler); =head1 WORKING WITH STRUCTURES INSTEAD OF STRINGS... the "-type" definition defines the way to build structures in each case: . "HASH" or "MAP" -> make an hash with the sub-elements; keys are the sub-element names; warn on repetitions; returns the hash reference. . "ARRAY" or "SEQ" -> make an ARRAY with the sub-elements returns an array reference. . "MULTIMAP" -> makes an HASH of ARRAY; keys are the sub-element . MMAPON(name1, ...) -> similar to HASH but accepts repetitions of the sub-elements "name1"... (and makes an array with them) . STR ->(DEFAULT) concatenates all the sub-elements returned values all the sub-element should return strings to be concatenated =head2 6. Build the natural Perl structure of the following document U.M. University of Minho 1111 1112 1113 Portugal J.Joao; J.Rocha; J.Ramalho use XML::DT; %handler = ( -default => sub{$c}, -type => { institution => 'HASH', tels => 'ARRAY' }, contacts => sub{ [ split(";",$c)] }, ); $a = dt("ex10.2.xml", %handler); $a is a reference to an HASH: { 'tels' => [ 1111, 1112, 1113 ], 'name' => 'University of Minho', 'where' => 'Portugal', 'id' => 'U.M.', 'contacts' => [ 'J.Joao', ' J.Rocha', ' J.Ramalho' ] }; =head2 7. Christmas card... We have the following address book: name0
    address00
    address01
    name1
    address10
    address11
    Now we are going to build a structure to store the address book and write a Christmas card to the first address of everyone #!/usr/bin/perl use XML::DT; %handler = ( -default => sub{$c}, person => sub{ mkchristmascard($c); $c}, -type => { people => 'ARRAY', person => MMAPON('address')}); $people = dt("ex11.1.xml", %handler); print $people->[0]{address}[1]; # prints address01 sub mkchristmascard{ my $x=shift; open(A,"|lpr") or die; print A <<"."; $x->{name} $x->{address}[0] Dear $x->{name} Merry Christmas from Braga Perl mongers\n . close A; } XML-DT-0.69/mkdtskel0000644000175000017500000000275212366706310012616 0ustar ambsambs#!/usr/bin/perl -s use XML::DT; use XML::DTDParser "ParseDTDFile"; use File::Temp; our ($dtd, $html, $lines); my $filename = shift; $dtd = 1 if $filename =~ m!\.dtd$!; if ($dtd) { mkdtskel_fromDTD ($filename, @ARGV); } else { if ($lines) { ## XXX - fixme, using head/xmllint is not a good idea my $tmpfile = File::Temp->new( UNLINK => 0 )->filename; system("head -$lines $filename | xmllint --recover - > $tmpfile"); $filename = $tmpfile; } if ($html) { mkdtskel("-html", $filename, @ARGV); } else { mkdtskel($filename, @ARGV); } unlink($filename) if $lines; } __END__ =encoding utf-8 =head1 NAME mkdtskel - Perl code skeleton generator to process XML files with XML::DT =head1 SYNOPSIS mkdtskel [-lines=20000] mkdtskel -dtd mkdtskel -html =head1 DESCRIPTION Use this command to prepare a skeleton file with basic code needed to process your XML file with XML::DT; The command checks the element names and for each one, the attributes. This information is described on the generated file to remember the programmer. =head1 SEE ALSO XML::DT(1), mkdtdskel(1), mkxmltype(1) and perl(1) =head1 AUTHORS Jose Joao Almeida, Alberto Manuel Simões, =head1 COPYRIGHT AND LICENSE Copyright 1999-2013 by Projecto Natura This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut