XML-Easy-0.009000755001750001750 011652350550 13016 5ustar00zeframzefram000000000000XML-Easy-0.009/META.yml000444001750001750 242011652350543 14424 0ustar00zeframzefram000000000000--- abstract: 'XML processing with a clean interface' author: - 'Andrew Main (Zefram) ' build_requires: Encode: 0 IO::File: 0 Module::Build: 0 Params::Classify: 0 Scalar::Util: 0 Test::More: 0 perl: 5.008 strict: 0 utf8: 0 warnings: 0 configure_requires: Module::Build: 0 perl: 5.008 strict: 0 warnings: 0 dynamic_config: 0 generated_by: 'Module::Build version 0.38, CPAN::Meta::Converter version 2.112621' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 name: XML-Easy provides: XML::Easy: file: lib/XML/Easy.pm version: 0.009 XML::Easy::Classify: file: lib/XML/Easy/Classify.pm version: 0.009 XML::Easy::Content: file: lib/XML/Easy/Content.pm version: 0.009 XML::Easy::Element: file: lib/XML/Easy/Element.pm version: 0.009 XML::Easy::NodeBasics: file: lib/XML/Easy/NodeBasics.pm version: 0.009 XML::Easy::Syntax: file: lib/XML/Easy/Syntax.pm version: 0.009 XML::Easy::Text: file: lib/XML/Easy/Text.pm version: 0.009 recommends: Internals: 0 XSLoader: 0 requires: Exporter: 0 Params::Classify: 0 parent: 0 perl: 5.008 strict: 0 warnings: 0 resources: license: http://dev.perl.org/licenses/ version: 0.009 XML-Easy-0.009/SIGNATURE000644001750001750 576411652350550 14455 0ustar00zeframzefram000000000000This file contains message digests of all files listed in MANIFEST, signed via the Module::Signature module, version 0.68. To verify the content in this distribution, first make sure you have Module::Signature installed, then type: % cpansign -v It will check each file's integrity, as well as the signature's validity. If "==> Signature verified OK! <==" is not displayed, the distribution may already have been compromised, and you should not run its Makefile.PL or Build.PL. -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 SHA1 32d6a08057b41fcd42ac79dcc49b05ce5a36ee18 .gitignore SHA1 8d83bc3fbe07d6b5d53808ccfffd5c8341935a5b Build.PL SHA1 e0e2f637263a082fd48f15a3b50cfcb4e7b39257 Changes SHA1 4e8542a6982bd6541b0a813998c8cff61196b0c8 MANIFEST SHA1 3a5f5e0b3b1feaa925b77827c196a23494d9dca3 META.json SHA1 31294438feb2724ee33b6838c71a87d9ef1996d1 META.yml SHA1 ea8fb6dc400b0648137c56c063a30fecfb5643f7 Makefile.PL SHA1 1cfeedb34a5ae1dd42ff2fadb1ca18aedef77242 README SHA1 b5122fc3230b0c24084b81b50fa270e86da1c216 lib/XML/Easy.pm SHA1 560b7fc8d56bfe39b277a2fb1d6d82e3de97bedb lib/XML/Easy.xs SHA1 c67879b3256a5f94924ccfb52cab3dda22fba62d lib/XML/Easy/Classify.pm SHA1 b55b487aee6fdf7d96fc7384336a7fd1035819d0 lib/XML/Easy/Content.pm SHA1 0e4fac67c52ac45fa6752532ed2acc7ea6e8cb2d lib/XML/Easy/Element.pm SHA1 bb52db4c3f1f54b464feee140784a6a8d6f9ecdb lib/XML/Easy/NodeBasics.pm SHA1 b662e86cd50afb766362aaef9db25a82360dba8a lib/XML/Easy/Syntax.pm SHA1 f5f9a4f3dadc98cdb0f58c0396e1b499827b0ead lib/XML/Easy/Text.pm SHA1 036a2141f0e7d587a4a3d848d99ad72c1d4b210c lib/XML/Easy/Transform.pod SHA1 9c42bcfc141b86d95b6700ff7aa9809bc2afdf87 t/DataSets.pm SHA1 90b6aa23e026b0953e3d0a87edf2b1bf31599136 t/ErrorCases.pm SHA1 d7811e63cde3452c9e7e594fd1cbc824cead6712 t/classify.t SHA1 22c8bb9af38f3eaa604350884e3fcc9c200b5f1b t/classify_pp.t SHA1 9fbd1badd64a24c882919d3ae79d58fe31f0aa69 t/easy_module.t SHA1 705e274215fc6ae1b5faabea52b16d6ea15d189e t/easy_module_pp.t SHA1 6d4fc1571179554901221f213c4d481452062893 t/node_basics.t SHA1 b359efbd997c8ac7b4ff0982b05779a5d7ac12dd t/node_basics_pp.t SHA1 7b86ea1d51e2ccf1bf4a17603ee5315c2c21b46e t/node_object.t SHA1 dc17bc19573d5e4988a49e0204e7d1400854d7be t/node_object_pp.t SHA1 904d9a4f76525e2303e4b0c168c68230f223c8de t/pod_cvg.t SHA1 3f447b1d0b8a6247c3a311087f8d66da1c3ca5db t/pod_cvg_pp.t SHA1 65c75abdef6f01a5d1588a307f2ddfe2333dc961 t/pod_syn.t SHA1 7a58917aece6323bd3b838dc970b8fc2ff0d85f4 t/read.data SHA1 1b7a1dac7493e095617c0eab876550de3f5b0140 t/read.t SHA1 58af2dd2fbf80a6548c9eb704e38d00804cf8575 t/read_pp.t SHA1 4c9344067db1d0c06a4bea20bed0a8478f665f3d t/setup_pp.pl SHA1 e02813471673f44d52870dca6209ed6bebf95524 t/syntax_main.t SHA1 e77c6969f37e1b57d640b280146cc588b94cf4e5 t/write.data SHA1 28302d6fb6116276f8e60e2dabedf7b8f0f2f71c t/write.t SHA1 4583c824a750986e01d992d4f8b92b58e70f122c t/write_pp.t -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.10 (GNU/Linux) iEYEARECAAYFAk6p0WMACgkQOV9mt2VyAVE/aQCdHyxCsvEDSwJ6UMuyDEIaFahw H2UAni9beFMlSOaIXbo7BQxV+VQiLo6N =KYoq -----END PGP SIGNATURE----- XML-Easy-0.009/MANIFEST000444001750001750 115511652350543 14310 0ustar00zeframzefram000000000000.gitignore Build.PL Changes MANIFEST META.json META.yml Makefile.PL README lib/XML/Easy.pm lib/XML/Easy.xs lib/XML/Easy/Classify.pm lib/XML/Easy/Content.pm lib/XML/Easy/Element.pm lib/XML/Easy/NodeBasics.pm lib/XML/Easy/Syntax.pm lib/XML/Easy/Text.pm lib/XML/Easy/Transform.pod t/DataSets.pm t/ErrorCases.pm t/classify.t t/classify_pp.t t/easy_module.t t/easy_module_pp.t t/node_basics.t t/node_basics_pp.t t/node_object.t t/node_object_pp.t t/pod_cvg.t t/pod_cvg_pp.t t/pod_syn.t t/read.data t/read.t t/read_pp.t t/setup_pp.pl t/syntax_main.t t/write.data t/write.t t/write_pp.t SIGNATURE Added here by Module::Build XML-Easy-0.009/Changes000444001750001750 1624711652350543 14502 0ustar00zeframzefram000000000000version 0.009; 2011-10-27 * change usage of Params::Classify functions to take advantage of custom ops in Params::Classify 0.012 * fix a latent bug where SvCUR() was used without first checking SvPOK() (but SvPOK was guaranteed by other circumstances) * correct dynamic_config setting to 0 * include META.json in distribution * convert .cvsignore to .gitignore * add MYMETA.json to .gitignore version 0.008; 2010-10-24 * in XS, declare "PROTOTYPES: DISABLE" to prevent automatic generation of unintended prototypes * jump through hoops to avoid compiler warnings * in t/setup_pp.pl, avoid a warning and consequent false test failure that occurs if XSLoader::load() is given no arguments, which is now a valid usage version 0.007; 2010-10-15 * bugfix: avoid memory leak in construction of content objects * in XS, use newSVpvs(), sv_catpvs_nomg(), and gv_stashpvs() wherever appropriate * in XS, use PERL_NO_GET_CONTEXT for efficiency * use full stricture in test suite * also test POD coverage of pure Perl implementation * in test suite, make all numeric comparisons against $] stringify it first, to avoid architecture-dependent problems with floating point rounding giving it an unexpected numeric value * in Build.PL, explicitly set needs_compiler to avoid bogus auto-dependency on ExtUtils::CBuilder * list XML::Easy::SimpleSchemaUtil and XML::Easy::Transform::RationalizeNamespacePrefixes in documentation's lists of other distributions * in Build.PL, complete declaration of configure-time requirements * move declaration of $VERSION in XML::Easy::Syntax to match all other modules * slightly reformat some C code to avoid exceeding 80 columns version 0.006; 2010-05-20 * bugfix: in XS, check SvOBJECT before looking at SvSTASH, because the latter isn't guaranteed to be meaningful otherwise * port to Perl 5.11, supporting new first-class regexp objects in type checking * check for required Perl version at runtime * in XS, avoid using "class" as a variable name, for compatibility with C++ compilers * in Build.PL, explicitly declare configure-time requirements * remove bogus "exit 0" from Build.PL * in XML::Easy::NodeBasics documentation, reference XML::Easy::ProceduralWriter * add MYMETA.yml to .cvsignore version 0.005; 2009-08-16 * bugfix: in xml_content_object(), xml_content(), and xml_element(), avoid clobbering function arguments (this occurred when passing a content array as an argument) * in XML::Easy::NodeBasics, add node equality comparison functions * introduce the term "twine" for content arrays, and corresponding methods and functions (XML::Easy::Content->twine, XML::Easy::Element->content_twine, XML::Easy::Classify::is_xml_content_twine, XML::Easy::Text::xml10_read_{content,extparsedent}_twine, XML::Easy::NodeBasics::xml{,_c,_e}_content_twine) * in XML::Easy::Classify, add "check_" functions for argument checking * in XML::Easy::NodeBasics, add short names for all functions * tweak introductory documentation to improve comprehension * in documentation, note that returned twine arrays and attribute hashes must not be modified * list Test::XML::Easy in documentation's list of other distributions version 0.004; 2009-05-20 * bugfix: correct character classification code that was incorrectly rejecting characters U+ac00 to U+d7a3 (precomposed Hangul syllables) in XML names * XS implementation of low-level constructors and accessors for element and content nodes * document the XML::Easy::Transform:: namespace * in XS code, fix the char_unicode() function to return the documented sentinel value (it was returning a different value than intended, but one which still operated correctly) version 0.003; 2009-05-13 * in XS code, use the correct "PREINIT:" instead of "INIT:" to introduce variable declarations * in XS code, use the cheaper SvOBJECT() in place of truth-value uses of SvSTASH() * use simpler "parent" pragma in place of "base" version 0.002; 2009-03-07 * in t/syntax_main.t, shortcut test for regexp iteration limit bug on pre-5.10 perls, because the test is liable to overflow the stack * work around Module::Build bug that was causing broken ExtUtils::CBuilder objects that failed to compile anything * to check whether C compilation is available, use Module::Build's more robust ->have_c_compiler method, rather than just checking for the availability of a C builder object * avoid return with expression in void function (not valid in C90) * add casts for pointer target signedness, and other small changes to the C code, to avoid compiler warnings * in XML::Easy::Syntax documentation, note the possibility of pre-5.10 perls overflowing the stack when executing regexps version 0.001; 2009-03-03 * introduce class XML::Easy::Content to encapsulate XML content * add friendlier node manipulation functions in XML::Easy::NodeBasics * bugfix: work around perl bug that was causing spurious errors in the pure-Perl parser and serialiser for inputs with long (>= 32 Ki) sequences of characters (the XS parser and serialiser were unaffected) * document that the long-input perl bug affects the XML::Easy::Syntax regular expressions in a way that can't be effectively worked around * bugfix: make pure Perl parser always generate its own exception, not a Perl exception, when processing a character reference for an illegal Unicode character such as U+d800, and not emit a Perl warning when processing a character reference for a dubious Unicode character such as U+1ffff (the XS parser is unaffected) * bugfix: make element construction and pure Perl serialiser always generate its own exception, not a Perl exception, when given data containing an illegal Unicode character such as U+d800 (the XS serialiser is unaffected) * new module XML::Easy::Classify, with data classification functions * move parser and serialiser into new module XML::Easy::Text, leaving XML::Easy itself to be just documentation of the suite * revise documentation and some code for consistency of terminology * detect and complain about use of non-string input to parser functions * detect and complain about use of invalid attribute name in $element->attribute() * bugfix: properly detect and complain about use of glob as encoding name argument to XS serialiser functions (they were being treated as undef on perl 5.8) * where a content array or attribute hash has multiple errors, consistently complain about the first one rather than any other * detect and complain about globs used in place of string arguments, in XS version (the pure-Perl version already detected them) * add #line directive to XML::Easy::Text to give useful line numbers in error messages regarding the code in the __DATA__ section * more test cases * don't use literal CR in t/read.data, because it was causing signature checking problems * avoid declaration-after-statement in C code (not valid in C90) * use full stricture in Build.PL version 0.000; 2008-04-08 * initial released version XML-Easy-0.009/README000444001750001750 320311652350543 14033 0ustar00zeframzefram000000000000NAME XML::Easy - XML processing with a clean interface DESCRIPTION XML::Easy is a collection of modules relating to the processing, parsing, and serialisation of XML data. It is oriented towards the use of XML to represent data for interchange purposes, rather than the use of XML as markup of principally textual data. It does not perform any schema processing, and does not interpret DTDs or any other kind of schema. It adheres strictly to the XML specification, in all its awkward details, except for the aforementioned DTDs. XML::Easy strictly separates the in-program manipulation of XML data from the processing of the textual form of XML. This shields the XML user from the inconvenient and obscure aspects of XML syntax. XML data nodes are mainly processed in a clean functional style, using the XML::Easy::NodeBasics module. In the (very likely) event that an application requires some more purpose-specific XML data processing facilities, they are readily built on top of XML::Easy::NodeBasics, retaining the abstraction from textual XML. When XML must be handled in textual form, for input and output, the XML::Easy::Text module supplies a parser and a serialiser. The interfaces here, too, are functional in nature. There are other modules for some ancillary aspects of XML processing. INSTALLATION perl Build.PL ./Build ./Build test ./Build install AUTHOR Andrew Main (Zefram) COPYRIGHT Copyright (C) 2008, 2009 PhotoBox Ltd Copyright (C) 2009, 2010, 2011 Andrew Main (Zefram) LICENSE This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. XML-Easy-0.009/.gitignore000444001750001750 22111652350543 15120 0ustar00zeframzefram000000000000/Build /Makefile /_build /blib /META.json /META.yml /MYMETA.json /MYMETA.yml /Makefile.PL /SIGNATURE /XML-Easy-* /lib/XML/Easy.c /lib/XML/Easy.o XML-Easy-0.009/Makefile.PL000444001750001750 233311652350543 15130 0ustar00zeframzefram000000000000# Note: this file was auto-generated by Module::Build::Compat version 0.3800 require 5.008; unless (eval "use Module::Build::Compat 0.02; 1" ) { print "This module requires Module::Build to install itself.\n"; require ExtUtils::MakeMaker; my $yn = ExtUtils::MakeMaker::prompt (' Install Module::Build now from CPAN?', 'y'); unless ($yn =~ /^y/i) { die " *** Cannot install without Module::Build. Exiting ...\n"; } require Cwd; require File::Spec; require CPAN; # Save this 'cause CPAN will chdir all over the place. my $cwd = Cwd::cwd(); CPAN::Shell->install('Module::Build::Compat'); CPAN::Shell->expand("Module", "Module::Build::Compat")->uptodate or die "Couldn't install Module::Build, giving up.\n"; chdir $cwd or die "Cannot chdir() back to $cwd: $!"; } eval "use Module::Build::Compat 0.02; 1" or die $@; use lib '_build/lib'; Module::Build::Compat->run_build_pl(args => \@ARGV); my $build_script = 'Build'; $build_script .= '.com' if $^O eq 'VMS'; exit(0) unless(-e $build_script); # cpantesters convention require MyModuleBuilder; Module::Build::Compat->write_makefile(build_class => 'MyModuleBuilder'); XML-Easy-0.009/META.json000444001750001750 432511652350543 14602 0ustar00zeframzefram000000000000{ "abstract" : "XML processing with a clean interface", "author" : [ "Andrew Main (Zefram) " ], "dynamic_config" : 0, "generated_by" : "Module::Build version 0.38, CPAN::Meta::Converter version 2.112621", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "XML-Easy", "prereqs" : { "build" : { "requires" : { "Encode" : 0, "IO::File" : 0, "Module::Build" : 0, "Params::Classify" : 0, "Scalar::Util" : 0, "Test::More" : 0, "perl" : "5.008", "strict" : 0, "utf8" : 0, "warnings" : 0 } }, "configure" : { "requires" : { "Module::Build" : 0, "perl" : "5.008", "strict" : 0, "warnings" : 0 } }, "runtime" : { "recommends" : { "Internals" : 0, "XSLoader" : 0 }, "requires" : { "Exporter" : 0, "Params::Classify" : 0, "parent" : 0, "perl" : "5.008", "strict" : 0, "warnings" : 0 } } }, "provides" : { "XML::Easy" : { "file" : "lib/XML/Easy.pm", "version" : "0.009" }, "XML::Easy::Classify" : { "file" : "lib/XML/Easy/Classify.pm", "version" : "0.009" }, "XML::Easy::Content" : { "file" : "lib/XML/Easy/Content.pm", "version" : "0.009" }, "XML::Easy::Element" : { "file" : "lib/XML/Easy/Element.pm", "version" : "0.009" }, "XML::Easy::NodeBasics" : { "file" : "lib/XML/Easy/NodeBasics.pm", "version" : "0.009" }, "XML::Easy::Syntax" : { "file" : "lib/XML/Easy/Syntax.pm", "version" : "0.009" }, "XML::Easy::Text" : { "file" : "lib/XML/Easy/Text.pm", "version" : "0.009" } }, "release_status" : "stable", "resources" : { "license" : [ "http://dev.perl.org/licenses/" ] }, "version" : "0.009" } XML-Easy-0.009/Build.PL000444001750001750 362111652350543 14453 0ustar00zeframzefram000000000000{ use 5.006; } use warnings; use strict; use Module::Build; Module::Build->subclass(code => q{ unless(__PACKAGE__->can("cbuilder")) { *cbuilder = sub { $_[0]->_cbuilder or die "no C support" }; } unless(__PACKAGE__->can("have_c_compiler")) { *have_c_compiler = sub { my $cb = eval { $_[0]->cbuilder }; return $cb && $cb->have_compiler; }; } if($Module::Build::VERSION < 0.33) { # Older versions of Module::Build have a bug where if the # cbuilder object is used at Build.PL time (which it will # be for this distribution due to the logic in # ->find_xs_files) then that object can be dumped to the # build_params file, and then at Build time it will # attempt to use the dumped blessed object without loading # the ExtUtils::CBuilder class that is needed to make it # work. *write_config = sub { delete $_[0]->{properties}->{_cbuilder}; return $_[0]->SUPER::write_config; }; } sub find_xs_files { my($self) = @_; return {} unless $self->have_c_compiler; return $self->SUPER::find_xs_files; } })->new( module_name => "XML::Easy", license => "perl", configure_requires => { "Module::Build" => 0, "perl" => "5.008", "strict" => 0, "warnings" => 0, }, configure_recommends => { "ExtUtils::CBuilder" => "0.15", }, build_requires => { "Encode" => 0, "IO::File" => 0, "Module::Build" => 0, "Params::Classify" => 0, "Scalar::Util" => 0, "Test::More" => 0, "perl" => "5.008", "strict" => 0, "utf8" => 0, "warnings" => 0, }, build_recommends => { "ExtUtils::CBuilder" => "0.15", }, requires => { "Exporter" => 0, "Params::Classify" => 0, "parent" => 0, "perl" => "5.008", "strict" => 0, "warnings" => 0, }, recommends => { "Internals" => 0, "XSLoader" => 0, }, needs_compiler => 0, dynamic_config => 0, meta_add => { distribution_type => "module" }, create_makefile_pl => "passthrough", sign => 1, )->create_build_script; 1; XML-Easy-0.009/lib000755001750001750 011652350543 13566 5ustar00zeframzefram000000000000XML-Easy-0.009/lib/XML000755001750001750 011652350543 14226 5ustar00zeframzefram000000000000XML-Easy-0.009/lib/XML/Easy.xs000444001750001750 15546011652350543 15713 0ustar00zeframzefram000000000000#define PERL_NO_GET_CONTEXT 1 #include "EXTERN.h" #include "perl.h" #include "XSUB.h" #define PERL_VERSION_DECIMAL(r,v,s) (r*1000000 + v*1000 + s) #define PERL_DECIMAL_VERSION \ PERL_VERSION_DECIMAL(PERL_REVISION,PERL_VERSION,PERL_SUBVERSION) #define PERL_VERSION_GE(r,v,s) \ (PERL_DECIMAL_VERSION >= PERL_VERSION_DECIMAL(r,v,s)) #ifndef newSVpvs # define newSVpvs(string) newSVpvn(""string"", sizeof(string)-1) #endif /* !newSVpvs */ #ifndef sv_catpvs_nomg # define sv_catpvs_nomg(sv, string) \ sv_catpvn_nomg(sv, ""string"", sizeof(string)-1) #endif /* !sv_catpvs_nomg */ #ifndef gv_stashpvs # define gv_stashpvs(name, flags) gv_stashpvn(""name"", sizeof(name)-1, flags) #endif /* !gv_stashpvs */ /* stashed stashes */ static HV *stash_content, *stash_element; /* stashed constant content */ static SV *empty_contentobject; /* parameter classification */ #define sv_is_glob(sv) (SvTYPE(sv) == SVt_PVGV) #if PERL_VERSION_GE(5,11,0) # define sv_is_regexp(sv) (SvTYPE(sv) == SVt_REGEXP) #else /* <5.11.0 */ # define sv_is_regexp(sv) 0 #endif /* <5.11.0 */ #define sv_is_string(sv) \ (!sv_is_glob(sv) && !sv_is_regexp(sv) && \ (SvFLAGS(sv) & (SVf_IOK|SVf_NOK|SVf_POK|SVp_IOK|SVp_NOK|SVp_POK))) /* exceptions */ #define throw_utf8_error() croak("broken internal UTF-8 encoding\n") #define throw_syntax_error(p) croak("XML syntax error\n") #define throw_wfc_error(MSG) croak("XML constraint error: "MSG"\n") #define throw_data_error(MSG) croak("invalid XML data: "MSG"\n") /* * string walking * * The parser deals with strings that are internally encoded using Perl's * extended form of UTF-8. It is not assumed that the encoding is * well-formed; encoding errors will result in an exception. The encoding * octets are treated as U8 type. * * Characters that are known to be in the ASCII range are in some places * processed as U8. General Unicode characters are processed as U32, with * the intent that the entire ISO-10646 31-bit range be handleable. Any * codepoint is accepted for processing, even the surrogates (which are * not legal in true UTF-8 encoding). Perl's extended UTF-8 extends to * 72-bit codepoints; encodings beyond the 31-bit range are translated to * codepoint U+7fffffff, which is equally invalid in the XML syntax. * * char_unicode() returns the codepoint represented by the character being * pointed at, or throws an exception if the encoding is malformed. * * To move on to the character following the one pointed at, use the core * macro UTF8SKIP(), as in (p + UTF8SKIP(p)). It assumes that the character * is properly encoded, so it is essential that char_unicode() has been * called on it first. * * Given an input SV (that is meant to be a string), pass it through * upgrade_sv() to return an SV that contains the string in UTF-8. This * could be either the same SV (if it is already UTF-8-encoded or contains * no non-ASCII characters) or a mortal upgraded copy. * * Given an unboxed Latin-1 string, upgrade_latin1_pvn() returns details of * an equivalent UTF-8 string, either the same string (if it's ASCII) or a * mortal SV. */ #define char_unicode(p) THX_char_unicode(aTHX_ p) static U32 THX_char_unicode(pTHX_ U8 *p) { U32 val = *p; U8 req_c1; int ncont; int i; if(!(val & 0x80)) return val; if(!(val & 0x40)) throw_utf8_error(); if(!(val & 0x20)) { if(!(val & 0x1e)) throw_utf8_error(); val &= 0x1f; ncont = 1; req_c1 = 0x00; } else if(!(val & 0x10)) { val &= 0x0f; ncont = 2; req_c1 = 0x20; } else if(!(val & 0x08)) { val &= 0x07; ncont = 3; req_c1 = 0x30; } else if(!(val & 0x04)) { val &= 0x03; ncont = 4; req_c1 = 0x38; } else if(!(val & 0x02)) { val &= 0x01; ncont = 5; req_c1 = 0x3c; } else if(!(val & 0x01)) { if(!(p[1] & 0x3e)) throw_utf8_error(); for(i = 6; i--; ) if((*++p & 0xc0) != 0x80) throw_utf8_error(); return 0x7fffffff; } else { U8 first_six = 0; for(i = 6; i--; ) { U8 ext = *++p; if((ext & 0xc0) != 0x80) throw_utf8_error(); first_six |= ext; } if(!(first_six & 0x3f)) throw_utf8_error(); for(i = 6; i--; ) if((*++p & 0xc0) != 0x80) throw_utf8_error(); return 0x7fffffff; } if(val == 0 && !(p[1] & req_c1)) throw_utf8_error(); for(i = ncont; i--; ) { U8 ext = *++p; if((ext & 0xc0) != 0x80) throw_utf8_error(); val = UTF8_ACCUMULATE(val, ext); } return val; } #define upgrade_sv(input) THX_upgrade_sv(aTHX_ input) static SV *THX_upgrade_sv(pTHX_ SV *input) { U8 *p, *end; STRLEN len; if(SvUTF8(input)) return input; p = (U8*)SvPV(input, len); for(end = p + len; p != end; p++) { if(*p & 0x80) { SV *output = sv_mortalcopy(input); sv_utf8_upgrade(output); return output; } } return input; } #define upgrade_latin1_pvn(ptrp, lenp) THX_upgrade_latin1_pvn(aTHX_ ptrp, lenp) static void THX_upgrade_latin1_pvn(pTHX_ U8 **ptrp, STRLEN *lenp) { U8 *ptr = *ptrp; STRLEN len = *lenp; U8 *p = ptr, *end = ptr + len; for(; p != end; p++) { if(*p & 0x80) { SV *output = sv_2mortal(newSVpvn((char*)ptr, len)); sv_utf8_upgrade(output); ptr = (U8*)SvPV(output, len); *ptrp = ptr; *lenp = len; return; } } } /* * character classification * * The full Unicode range of characters is subjected to fairly arbitrary * classification. To avoid having enormous bitmaps, the ranges to match * against are stored in lists, which are binary-searched. For speed, * the ASCII range is classified by a bitmap. * * nona_codepoint_is_in_set() checks whether a non-ASCII codepoint is in * a specified character set identified by a Unicode range table. * * The char_is_*() functions each check whether the character being * pointed at is of a particular type. * * The codepoint_is_*() functions each check whether a codepoint is of * a particular type. * * The ascii_codepoint_is_*() functions each check whether an ASCII * codepoint is of a particular type. */ struct unicode_range { U32 first; U32 last; }; static struct unicode_range const uniset_namestart[] = { { 0x003a, 0x003a }, { 0x0041, 0x005a }, { 0x005f, 0x005f }, { 0x0061, 0x007a }, { 0x00c0, 0x00d6 }, { 0x00d8, 0x00f6 }, { 0x00f8, 0x0131 }, { 0x0134, 0x013e }, { 0x0141, 0x0148 }, { 0x014a, 0x017e }, { 0x0180, 0x01c3 }, { 0x01cd, 0x01f0 }, { 0x01f4, 0x01f5 }, { 0x01fa, 0x0217 }, { 0x0250, 0x02a8 }, { 0x02bb, 0x02c1 }, { 0x0386, 0x0386 }, { 0x0388, 0x038a }, { 0x038c, 0x038c }, { 0x038e, 0x03a1 }, { 0x03a3, 0x03ce }, { 0x03d0, 0x03d6 }, { 0x03da, 0x03da }, { 0x03dc, 0x03dc }, { 0x03de, 0x03de }, { 0x03e0, 0x03e0 }, { 0x03e2, 0x03f3 }, { 0x0401, 0x040c }, { 0x040e, 0x044f }, { 0x0451, 0x045c }, { 0x045e, 0x0481 }, { 0x0490, 0x04c4 }, { 0x04c7, 0x04c8 }, { 0x04cb, 0x04cc }, { 0x04d0, 0x04eb }, { 0x04ee, 0x04f5 }, { 0x04f8, 0x04f9 }, { 0x0531, 0x0556 }, { 0x0559, 0x0559 }, { 0x0561, 0x0586 }, { 0x05d0, 0x05ea }, { 0x05f0, 0x05f2 }, { 0x0621, 0x063a }, { 0x0641, 0x064a }, { 0x0671, 0x06b7 }, { 0x06ba, 0x06be }, { 0x06c0, 0x06ce }, { 0x06d0, 0x06d3 }, { 0x06d5, 0x06d5 }, { 0x06e5, 0x06e6 }, { 0x0905, 0x0939 }, { 0x093d, 0x093d }, { 0x0958, 0x0961 }, { 0x0985, 0x098c }, { 0x098f, 0x0990 }, { 0x0993, 0x09a8 }, { 0x09aa, 0x09b0 }, { 0x09b2, 0x09b2 }, { 0x09b6, 0x09b9 }, { 0x09dc, 0x09dd }, { 0x09df, 0x09e1 }, { 0x09f0, 0x09f1 }, { 0x0a05, 0x0a0a }, { 0x0a0f, 0x0a10 }, { 0x0a13, 0x0a28 }, { 0x0a2a, 0x0a30 }, { 0x0a32, 0x0a33 }, { 0x0a35, 0x0a36 }, { 0x0a38, 0x0a39 }, { 0x0a59, 0x0a5c }, { 0x0a5e, 0x0a5e }, { 0x0a72, 0x0a74 }, { 0x0a85, 0x0a8b }, { 0x0a8d, 0x0a8d }, { 0x0a8f, 0x0a91 }, { 0x0a93, 0x0aa8 }, { 0x0aaa, 0x0ab0 }, { 0x0ab2, 0x0ab3 }, { 0x0ab5, 0x0ab9 }, { 0x0abd, 0x0abd }, { 0x0ae0, 0x0ae0 }, { 0x0b05, 0x0b0c }, { 0x0b0f, 0x0b10 }, { 0x0b13, 0x0b28 }, { 0x0b2a, 0x0b30 }, { 0x0b32, 0x0b33 }, { 0x0b36, 0x0b39 }, { 0x0b3d, 0x0b3d }, { 0x0b5c, 0x0b5d }, { 0x0b5f, 0x0b61 }, { 0x0b85, 0x0b8a }, { 0x0b8e, 0x0b90 }, { 0x0b92, 0x0b95 }, { 0x0b99, 0x0b9a }, { 0x0b9c, 0x0b9c }, { 0x0b9e, 0x0b9f }, { 0x0ba3, 0x0ba4 }, { 0x0ba8, 0x0baa }, { 0x0bae, 0x0bb5 }, { 0x0bb7, 0x0bb9 }, { 0x0c05, 0x0c0c }, { 0x0c0e, 0x0c10 }, { 0x0c12, 0x0c28 }, { 0x0c2a, 0x0c33 }, { 0x0c35, 0x0c39 }, { 0x0c60, 0x0c61 }, { 0x0c85, 0x0c8c }, { 0x0c8e, 0x0c90 }, { 0x0c92, 0x0ca8 }, { 0x0caa, 0x0cb3 }, { 0x0cb5, 0x0cb9 }, { 0x0cde, 0x0cde }, { 0x0ce0, 0x0ce1 }, { 0x0d05, 0x0d0c }, { 0x0d0e, 0x0d10 }, { 0x0d12, 0x0d28 }, { 0x0d2a, 0x0d39 }, { 0x0d60, 0x0d61 }, { 0x0e01, 0x0e2e }, { 0x0e30, 0x0e30 }, { 0x0e32, 0x0e33 }, { 0x0e40, 0x0e45 }, { 0x0e81, 0x0e82 }, { 0x0e84, 0x0e84 }, { 0x0e87, 0x0e88 }, { 0x0e8a, 0x0e8a }, { 0x0e8d, 0x0e8d }, { 0x0e94, 0x0e97 }, { 0x0e99, 0x0e9f }, { 0x0ea1, 0x0ea3 }, { 0x0ea5, 0x0ea5 }, { 0x0ea7, 0x0ea7 }, { 0x0eaa, 0x0eab }, { 0x0ead, 0x0eae }, { 0x0eb0, 0x0eb0 }, { 0x0eb2, 0x0eb3 }, { 0x0ebd, 0x0ebd }, { 0x0ec0, 0x0ec4 }, { 0x0f40, 0x0f47 }, { 0x0f49, 0x0f69 }, { 0x10a0, 0x10c5 }, { 0x10d0, 0x10f6 }, { 0x1100, 0x1100 }, { 0x1102, 0x1103 }, { 0x1105, 0x1107 }, { 0x1109, 0x1109 }, { 0x110b, 0x110c }, { 0x110e, 0x1112 }, { 0x113c, 0x113c }, { 0x113e, 0x113e }, { 0x1140, 0x1140 }, { 0x114c, 0x114c }, { 0x114e, 0x114e }, { 0x1150, 0x1150 }, { 0x1154, 0x1155 }, { 0x1159, 0x1159 }, { 0x115f, 0x1161 }, { 0x1163, 0x1163 }, { 0x1165, 0x1165 }, { 0x1167, 0x1167 }, { 0x1169, 0x1169 }, { 0x116d, 0x116e }, { 0x1172, 0x1173 }, { 0x1175, 0x1175 }, { 0x119e, 0x119e }, { 0x11a8, 0x11a8 }, { 0x11ab, 0x11ab }, { 0x11ae, 0x11af }, { 0x11b7, 0x11b8 }, { 0x11ba, 0x11ba }, { 0x11bc, 0x11c2 }, { 0x11eb, 0x11eb }, { 0x11f0, 0x11f0 }, { 0x11f9, 0x11f9 }, { 0x1e00, 0x1e9b }, { 0x1ea0, 0x1ef9 }, { 0x1f00, 0x1f15 }, { 0x1f18, 0x1f1d }, { 0x1f20, 0x1f45 }, { 0x1f48, 0x1f4d }, { 0x1f50, 0x1f57 }, { 0x1f59, 0x1f59 }, { 0x1f5b, 0x1f5b }, { 0x1f5d, 0x1f5d }, { 0x1f5f, 0x1f7d }, { 0x1f80, 0x1fb4 }, { 0x1fb6, 0x1fbc }, { 0x1fbe, 0x1fbe }, { 0x1fc2, 0x1fc4 }, { 0x1fc6, 0x1fcc }, { 0x1fd0, 0x1fd3 }, { 0x1fd6, 0x1fdb }, { 0x1fe0, 0x1fec }, { 0x1ff2, 0x1ff4 }, { 0x1ff6, 0x1ffc }, { 0x2126, 0x2126 }, { 0x212a, 0x212b }, { 0x212e, 0x212e }, { 0x2180, 0x2182 }, { 0x3007, 0x3007 }, { 0x3021, 0x3029 }, { 0x3041, 0x3094 }, { 0x30a1, 0x30fa }, { 0x3105, 0x312c }, { 0x4e00, 0x9fa5 }, { 0xac00, 0xd7a3 }, }; static struct unicode_range const uniset_name[] = { { 0x002d, 0x002e }, { 0x0030, 0x003a }, { 0x0041, 0x005a }, { 0x005f, 0x005f }, { 0x0061, 0x007a }, { 0x00b7, 0x00b7 }, { 0x00c0, 0x00d6 }, { 0x00d8, 0x00f6 }, { 0x00f8, 0x0131 }, { 0x0134, 0x013e }, { 0x0141, 0x0148 }, { 0x014a, 0x017e }, { 0x0180, 0x01c3 }, { 0x01cd, 0x01f0 }, { 0x01f4, 0x01f5 }, { 0x01fa, 0x0217 }, { 0x0250, 0x02a8 }, { 0x02bb, 0x02c1 }, { 0x02d0, 0x02d1 }, { 0x0300, 0x0345 }, { 0x0360, 0x0361 }, { 0x0387, 0x038a }, { 0x038c, 0x038c }, { 0x038e, 0x03a1 }, { 0x03a3, 0x03ce }, { 0x03d0, 0x03d6 }, { 0x03da, 0x03da }, { 0x03dc, 0x03dc }, { 0x03de, 0x03de }, { 0x03e0, 0x03e0 }, { 0x03e2, 0x03f3 }, { 0x0401, 0x040c }, { 0x040e, 0x044f }, { 0x0451, 0x045c }, { 0x045e, 0x0481 }, { 0x0483, 0x0486 }, { 0x0490, 0x04c4 }, { 0x04c7, 0x04c8 }, { 0x04cb, 0x04cc }, { 0x04d0, 0x04eb }, { 0x04ee, 0x04f5 }, { 0x04f8, 0x04f9 }, { 0x0531, 0x0556 }, { 0x0559, 0x0559 }, { 0x0561, 0x0586 }, { 0x0591, 0x05a1 }, { 0x05a3, 0x05b9 }, { 0x05bb, 0x05bd }, { 0x05bf, 0x05bf }, { 0x05c1, 0x05c2 }, { 0x05c4, 0x05c4 }, { 0x05d0, 0x05ea }, { 0x05f0, 0x05f2 }, { 0x0621, 0x063a }, { 0x0641, 0x0652 }, { 0x0660, 0x0669 }, { 0x0670, 0x06b7 }, { 0x06ba, 0x06be }, { 0x06c0, 0x06ce }, { 0x06d0, 0x06d3 }, { 0x06e5, 0x06e8 }, { 0x06ea, 0x06ed }, { 0x06f0, 0x06f9 }, { 0x0901, 0x0903 }, { 0x0905, 0x0939 }, { 0x093e, 0x094d }, { 0x0951, 0x0954 }, { 0x0958, 0x0963 }, { 0x0966, 0x096f }, { 0x0981, 0x0983 }, { 0x0985, 0x098c }, { 0x098f, 0x0990 }, { 0x0993, 0x09a8 }, { 0x09aa, 0x09b0 }, { 0x09b2, 0x09b2 }, { 0x09b6, 0x09b9 }, { 0x09bc, 0x09bc }, { 0x09bf, 0x09c4 }, { 0x09c7, 0x09c8 }, { 0x09cb, 0x09cd }, { 0x09d7, 0x09d7 }, { 0x09dc, 0x09dd }, { 0x09df, 0x09e3 }, { 0x09e6, 0x09f1 }, { 0x0a02, 0x0a02 }, { 0x0a05, 0x0a0a }, { 0x0a0f, 0x0a10 }, { 0x0a13, 0x0a28 }, { 0x0a2a, 0x0a30 }, { 0x0a32, 0x0a33 }, { 0x0a35, 0x0a36 }, { 0x0a38, 0x0a39 }, { 0x0a3c, 0x0a3c }, { 0x0a3f, 0x0a42 }, { 0x0a47, 0x0a48 }, { 0x0a4b, 0x0a4d }, { 0x0a59, 0x0a5c }, { 0x0a5e, 0x0a5e }, { 0x0a70, 0x0a74 }, { 0x0a81, 0x0a83 }, { 0x0a85, 0x0a8b }, { 0x0a8d, 0x0a8d }, { 0x0a8f, 0x0a91 }, { 0x0a93, 0x0aa8 }, { 0x0aaa, 0x0ab0 }, { 0x0ab2, 0x0ab3 }, { 0x0ab5, 0x0ab9 }, { 0x0abd, 0x0ac5 }, { 0x0ac7, 0x0ac9 }, { 0x0acb, 0x0acd }, { 0x0ae0, 0x0ae0 }, { 0x0ae6, 0x0aef }, { 0x0b01, 0x0b03 }, { 0x0b05, 0x0b0c }, { 0x0b0f, 0x0b10 }, { 0x0b13, 0x0b28 }, { 0x0b2a, 0x0b30 }, { 0x0b32, 0x0b33 }, { 0x0b36, 0x0b39 }, { 0x0b3d, 0x0b43 }, { 0x0b47, 0x0b48 }, { 0x0b4b, 0x0b4d }, { 0x0b56, 0x0b57 }, { 0x0b5c, 0x0b5d }, { 0x0b5f, 0x0b61 }, { 0x0b66, 0x0b6f }, { 0x0b82, 0x0b83 }, { 0x0b85, 0x0b8a }, { 0x0b8e, 0x0b90 }, { 0x0b92, 0x0b95 }, { 0x0b99, 0x0b9a }, { 0x0b9c, 0x0b9c }, { 0x0b9e, 0x0b9f }, { 0x0ba3, 0x0ba4 }, { 0x0ba8, 0x0baa }, { 0x0bae, 0x0bb5 }, { 0x0bb7, 0x0bb9 }, { 0x0bbe, 0x0bc2 }, { 0x0bc6, 0x0bc8 }, { 0x0bca, 0x0bcd }, { 0x0bd7, 0x0bd7 }, { 0x0be7, 0x0bef }, { 0x0c01, 0x0c03 }, { 0x0c05, 0x0c0c }, { 0x0c0e, 0x0c10 }, { 0x0c12, 0x0c28 }, { 0x0c2a, 0x0c33 }, { 0x0c35, 0x0c39 }, { 0x0c3e, 0x0c44 }, { 0x0c46, 0x0c48 }, { 0x0c4a, 0x0c4d }, { 0x0c55, 0x0c56 }, { 0x0c60, 0x0c61 }, { 0x0c66, 0x0c6f }, { 0x0c82, 0x0c83 }, { 0x0c85, 0x0c8c }, { 0x0c8e, 0x0c90 }, { 0x0c92, 0x0ca8 }, { 0x0caa, 0x0cb3 }, { 0x0cb5, 0x0cb9 }, { 0x0cbe, 0x0cc4 }, { 0x0cc6, 0x0cc8 }, { 0x0cca, 0x0ccd }, { 0x0cd5, 0x0cd6 }, { 0x0cde, 0x0cde }, { 0x0ce0, 0x0ce1 }, { 0x0ce6, 0x0cef }, { 0x0d02, 0x0d03 }, { 0x0d05, 0x0d0c }, { 0x0d0e, 0x0d10 }, { 0x0d12, 0x0d28 }, { 0x0d2a, 0x0d39 }, { 0x0d3e, 0x0d43 }, { 0x0d46, 0x0d48 }, { 0x0d4a, 0x0d4d }, { 0x0d57, 0x0d57 }, { 0x0d60, 0x0d61 }, { 0x0d66, 0x0d6f }, { 0x0e01, 0x0e2e }, { 0x0e32, 0x0e3a }, { 0x0e46, 0x0e4e }, { 0x0e50, 0x0e59 }, { 0x0e81, 0x0e82 }, { 0x0e84, 0x0e84 }, { 0x0e87, 0x0e88 }, { 0x0e8a, 0x0e8a }, { 0x0e8d, 0x0e8d }, { 0x0e94, 0x0e97 }, { 0x0e99, 0x0e9f }, { 0x0ea1, 0x0ea3 }, { 0x0ea5, 0x0ea5 }, { 0x0ea7, 0x0ea7 }, { 0x0eaa, 0x0eab }, { 0x0ead, 0x0eae }, { 0x0eb2, 0x0eb9 }, { 0x0ebb, 0x0ebd }, { 0x0ec0, 0x0ec4 }, { 0x0ec6, 0x0ec6 }, { 0x0ec8, 0x0ecd }, { 0x0ed0, 0x0ed9 }, { 0x0f18, 0x0f19 }, { 0x0f20, 0x0f29 }, { 0x0f35, 0x0f35 }, { 0x0f37, 0x0f37 }, { 0x0f39, 0x0f39 }, { 0x0f3f, 0x0f47 }, { 0x0f49, 0x0f69 }, { 0x0f71, 0x0f84 }, { 0x0f86, 0x0f8b }, { 0x0f90, 0x0f95 }, { 0x0f97, 0x0f97 }, { 0x0f99, 0x0fad }, { 0x0fb1, 0x0fb7 }, { 0x0fb9, 0x0fb9 }, { 0x10a0, 0x10c5 }, { 0x10d0, 0x10f6 }, { 0x1100, 0x1100 }, { 0x1102, 0x1103 }, { 0x1105, 0x1107 }, { 0x1109, 0x1109 }, { 0x110b, 0x110c }, { 0x110e, 0x1112 }, { 0x113c, 0x113c }, { 0x113e, 0x113e }, { 0x1140, 0x1140 }, { 0x114c, 0x114c }, { 0x114e, 0x114e }, { 0x1150, 0x1150 }, { 0x1154, 0x1155 }, { 0x1159, 0x1159 }, { 0x115f, 0x1161 }, { 0x1163, 0x1163 }, { 0x1165, 0x1165 }, { 0x1167, 0x1167 }, { 0x1169, 0x1169 }, { 0x116d, 0x116e }, { 0x1172, 0x1173 }, { 0x1175, 0x1175 }, { 0x119e, 0x119e }, { 0x11a8, 0x11a8 }, { 0x11ab, 0x11ab }, { 0x11ae, 0x11af }, { 0x11b7, 0x11b8 }, { 0x11ba, 0x11ba }, { 0x11bc, 0x11c2 }, { 0x11eb, 0x11eb }, { 0x11f0, 0x11f0 }, { 0x11f9, 0x11f9 }, { 0x1e00, 0x1e9b }, { 0x1ea0, 0x1ef9 }, { 0x1f00, 0x1f15 }, { 0x1f18, 0x1f1d }, { 0x1f20, 0x1f45 }, { 0x1f48, 0x1f4d }, { 0x1f50, 0x1f57 }, { 0x1f59, 0x1f59 }, { 0x1f5b, 0x1f5b }, { 0x1f5d, 0x1f5d }, { 0x1f5f, 0x1f7d }, { 0x1f80, 0x1fb4 }, { 0x1fb6, 0x1fbc }, { 0x1fbe, 0x1fbe }, { 0x1fc2, 0x1fc4 }, { 0x1fc6, 0x1fcc }, { 0x1fd0, 0x1fd3 }, { 0x1fd6, 0x1fdb }, { 0x1fe0, 0x1fec }, { 0x1ff2, 0x1ff4 }, { 0x1ff6, 0x1ffc }, { 0x20d0, 0x20dc }, { 0x20e1, 0x20e1 }, { 0x2126, 0x2126 }, { 0x212a, 0x212b }, { 0x212e, 0x212e }, { 0x2180, 0x2182 }, { 0x3005, 0x3005 }, { 0x3007, 0x3007 }, { 0x3021, 0x302f }, { 0x3031, 0x3035 }, { 0x3041, 0x3094 }, { 0x3099, 0x309a }, { 0x309d, 0x309e }, { 0x30a1, 0x30fa }, { 0x30fc, 0x30fe }, { 0x3105, 0x312c }, { 0x4e00, 0x9fa5 }, { 0xac00, 0xd7a3 }, }; #define ARRAY_END(a) ((a) + sizeof((a))/sizeof(*(a))) static int nona_codepoint_is_in_set(U32 c, struct unicode_range const *rl, struct unicode_range const *rr) { rr--; while(rl != rr) { /* invariant: c >= rl->first && c < rr[1].first */ struct unicode_range const *rt = rl + ((rr-rl+1) >> 1); if(c >= rt->first) { rl = rt; } else { rr = rt-1; } } return c <= rl->last; } #define CHARATTR_NAMESTART 0x01 #define CHARATTR_NAME 0x02 #define CHARATTR_S 0x04 #define CHARATTR_ENCSTART 0x10 #define CHARATTR_ENC 0x20 #define CHARATTR_CHAR 0x80 static U8 const asciichar_attr[128] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* NUL to BEL */ 0x00, 0x84, 0x84, 0x00, 0x00, 0x84, 0x00, 0x00, /* BS to SI */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* DLE to ETB */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* CAN to US */ 0x84, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* SP to ' */ 0x80, 0x80, 0x80, 0x80, 0x80, 0xa2, 0xa2, 0x80, /* ( to / */ 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, /* 0 to 7 */ 0xa2, 0xa2, 0x83, 0x80, 0x80, 0x80, 0x80, 0x80, /* 8 to ? */ 0x80, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, /* @ to G */ 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, /* H to O */ 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, /* P to W */ 0xb3, 0xb3, 0xb3, 0x80, 0x80, 0x80, 0x80, 0xa3, /* X to _ */ 0x80, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, /* ` to g */ 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, /* h to o */ 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, /* p to w */ 0xb3, 0xb3, 0xb3, 0x80, 0x80, 0x80, 0x80, 0x80, /* x to DEL */ }; #define char_is_namestart(p) THX_char_is_namestart(aTHX_ p) static int THX_char_is_namestart(pTHX_ U8 *p) { U8 c0 = *p; if(!(c0 & 0x80)) return asciichar_attr[c0] & CHARATTR_NAMESTART; return nona_codepoint_is_in_set(char_unicode(p), uniset_namestart, ARRAY_END(uniset_namestart)); } #define char_is_name(p) THX_char_is_name(aTHX_ p) static int THX_char_is_name(pTHX_ U8 *p) { U8 c0 = *p; if(!(c0 & 0x80)) return asciichar_attr[c0] & CHARATTR_NAME; return nona_codepoint_is_in_set(char_unicode(p), uniset_name, ARRAY_END(uniset_name)); } static int char_is_s(U8 *p) { U8 c0 = *p; if(!(c0 & 0x80)) return asciichar_attr[c0] & CHARATTR_S; return 0; } #if 0 /* unused */ static int ascii_codepoint_is_s(U8 c) { return asciichar_attr[c] & CHARATTR_S; } #endif static int char_is_encstart(U8 *p) { U8 c0 = *p; if(!(c0 & 0x80)) return asciichar_attr[c0] & CHARATTR_ENCSTART; return 0; } static int char_is_enc(U8 *p) { U8 c0 = *p; if(!(c0 & 0x80)) return asciichar_attr[c0] & CHARATTR_ENC; return 0; } static int nona_codepoint_is_char(U32 c) { if(c <= 0xd7ff) return 1; return c >= 0xe000 && c <= 0x10ffff && (c & ~1) != 0xfffe; } static int codepoint_is_char(U32 c) { return (c < 0x80) ? asciichar_attr[c] & CHARATTR_CHAR : nona_codepoint_is_char(c); } #define char_is_char(p) THX_char_is_char(aTHX_ p) static int THX_char_is_char(pTHX_ U8 *p) { U8 c0 = *p; if(!(c0 & 0x80)) return asciichar_attr[c0] & CHARATTR_CHAR; return nona_codepoint_is_char(char_unicode(p)); } /* * XML node handling */ #define contentobject_twine(cobj) THX_contentobject_twine(aTHX_ cobj) static SV *THX_contentobject_twine(pTHX_ SV *cobj) { AV *twine; SV **item_ptr; if(!SvROK(cobj)) throw_data_error("content data isn't a content chunk"); twine = (AV*)SvRV(cobj); if(SvTYPE((SV*)twine) != SVt_PVAV || av_len(twine) != 0) throw_data_error("content data isn't a content chunk"); if(!SvOBJECT((SV*)twine) || SvSTASH((SV*)twine) != stash_content) throw_data_error("content data isn't a content chunk"); item_ptr = av_fetch(twine, 0, 0); if(!item_ptr) throw_data_error("content array isn't an array"); return *item_ptr; } #define twine_contentobject(tref) THX_twine_contentobject(aTHX_ tref) static SV *THX_twine_contentobject(pTHX_ SV *tref) { AV *content = newAV(); SV *cref = sv_2mortal(newRV_noinc((SV*)content)); av_push(content, SvREFCNT_inc(tref)); sv_bless(cref, stash_content); SvREADONLY_on((SV*)content); SvREADONLY_on(cref); return cref; } #define element_nodearray(eref) THX_element_nodearray(aTHX_ eref) static AV *THX_element_nodearray(pTHX_ SV *eref) { AV *earr; if(!SvROK(eref)) throw_data_error("element data isn't an element"); earr = (AV*)SvRV(eref); if(SvTYPE((SV*)earr) != SVt_PVAV || av_len(earr) != 2) throw_data_error("element data isn't an element"); if(!SvOBJECT((SV*)earr) || SvSTASH((SV*)earr) != stash_element) throw_data_error("element data isn't an element"); return earr; } #define userchardata_chardata(idata) THX_userchardata_chardata(aTHX_ idata) static SV *THX_userchardata_chardata(pTHX_ SV *idata) { SV *odata; U8 *p, *end; STRLEN len; if(!sv_is_string(idata)) throw_data_error("character data isn't a string"); odata = sv_mortalcopy(idata); sv_utf8_upgrade(odata); SvREADONLY_on(odata); p = (U8*)SvPV(odata, len); end = p + len; while(*p != 0) { if(!char_is_char(p)) throw_data_error("character data " "contains illegal character"); p += UTF8SKIP(p); } if(p != end) throw_data_error("character data contains illegal character"); return odata; } #define usertwine_twine(itref) THX_usertwine_twine(aTHX_ itref) static SV *THX_usertwine_twine(pTHX_ SV *itref) { SV *otref; AV *itwine, *otwine; I32 clen, i; if(!SvROK(itref)) throw_data_error("content array isn't an array"); itwine = (AV*)SvRV(itref); if(SvTYPE((SV*)itwine) != SVt_PVAV || SvOBJECT((SV*)itwine)) throw_data_error("content array isn't an array"); clen = av_len(itwine); if(clen & 1) throw_data_error("content array has even length"); otwine = newAV(); otref = sv_2mortal(newRV_noinc((SV*)otwine)); SvREADONLY_on(otref); av_extend(otwine, clen); for(i = 0; ; i++) { SV **item_ptr, *iitem, *oitem, *elem; item_ptr = av_fetch(itwine, i, 0); if(!item_ptr) throw_data_error("character data isn't a string"); iitem = *item_ptr; if(!sv_is_string(iitem)) throw_data_error("character data isn't a string"); oitem = userchardata_chardata(iitem); av_push(otwine, SvREFCNT_inc(oitem)); if(i++ == clen) break; item_ptr = av_fetch(itwine, i, 0); if(!item_ptr) throw_data_error("element data isn't an element"); iitem = *item_ptr; if(!SvROK(iitem)) throw_data_error("element data isn't an element"); elem = SvRV(iitem); if(!SvOBJECT(elem) || SvSTASH(elem) != stash_element) throw_data_error("element data isn't an element"); oitem = newRV_inc(elem); SvREADONLY_on(oitem); av_push(otwine, oitem); } SvREADONLY_on((SV*)otwine); return otref; } /* * parsing * * The parse_*() functions each parse some syntactic construct within the * XML grammar. Their main input is the pointer to the start of that * construct in the input. Generally they can be pointed at anything, * however malformed, and they will detect a syntax error if it is not the * item they are meant to parse. Upon a successful parse they return, in * one way or another, a pointer to the end of the parsed construct and * any details required of the item's content. Upon syntax error or UTF-8 * encoding error, they throw an exception. * * The end of the input string is not explicitly indicated to the parser * functions. They detect the end of input by means of the NUL terminator. * A NUL can also be embedded in the string, in which case parsing will * initially return a successful result (if that's a valid place to end), * and the outermost code (which has access to the SV) will detect that it * was an embedded NUL rather than end of input and throw an exception. * * Unlike the regular expressions in XML::Easy::Syntax, these parser * functions won't match their grammar production in absolutely any context. * They are specialised to work in the context of the complete XML grammar, * and are permitted to detect XML syntax errors that strictly fall outside * the construct being parsed. For example, parse_contentobject() will * complain if it faces "]]>", rather than matching "]]" and then returning. * * All objects created by parsing are initially mortal, and have their * reference counts later increased when a persistent reference is made. * Thus on exception all the partial results are cleaned up. */ /* parse_s(), parse_opt_s(), parse_eq(): return the updated pointer */ #define parse_s(p) THX_parse_s(aTHX_ p) static U8 *THX_parse_s(pTHX_ U8 *p) { if(!char_is_s(p)) throw_syntax_error(p); do { p++; } while(char_is_s(p)); return p; } static U8 *parse_opt_s(U8 *p) { while(char_is_s(p)) p++; return p; } #define parse_eq(p) THX_parse_eq(aTHX_ p) static U8 *THX_parse_eq(pTHX_ U8 *p) { p = parse_opt_s(p); if(*p != '=') throw_syntax_error(p); return parse_opt_s(p+1); } /* parse_name(): returns the number of octets encoding the name */ #define parse_name(p) THX_parse_name(aTHX_ p) static STRLEN THX_parse_name(pTHX_ U8 *p) { U8 *start = p; if(!char_is_namestart(p)) throw_syntax_error(p); do { p += UTF8SKIP(p); } while(char_is_name(p)); return p - start; } /* parse_reference(): updates pointer in place and returns codepoint of referenced character */ static U8 const digit_value[256] = { 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 99, 77, 99, 99, 99, 99, 99, 10, 11, 12, 13, 14, 15, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 10, 11, 12, 13, 14, 15, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, }; #define parse_reference(pp) THX_parse_reference(aTHX_ pp) static U32 THX_parse_reference(pTHX_ U8 **pp) { U8 *p = *pp; U8 c; U32 val; if(*p != '&') throw_syntax_error(p); c = *++p; if(c == '#') { c = *++p; if(c == 'x') { val = digit_value[*++p]; if(val > 15) throw_syntax_error(p); while(1) { c = digit_value[*++p]; if(c > 15) break; if(val & 0xf0000000) throw_wfc_error("invalid character " "in character reference"); val = (val<<4) + c; } } else { val = digit_value[c]; if(val > 9) throw_syntax_error(p); while(1) { c = digit_value[*++p]; if(c > 9) break; if(val >= 100000000) throw_wfc_error("invalid character " "in character reference"); val = val*10 + c; } } if(c != 77) throw_syntax_error(p); p++; if(!codepoint_is_char(val)) throw_wfc_error("invalid character " "in character reference"); } else if(c == 'l' && p[1] == 't' && p[2] == ';') { p += 3; val = '<'; } else if(c == 'g' && p[1] == 't' && p[2] == ';') { p += 3; val = '>'; } else if(c == 'a' && p[1] == 'm' && p[2] == 'p' && p[3] == ';') { p += 4; val = '&'; } else if(c == 'q' && p[1] == 'u' && p[2] == 'o' && p[3] == 't' && p[4] == ';') { p += 5; val = '"'; } else if(c == 'a' && p[1] == 'p' && p[2] == 'o' && p[3] == 's' && p[4] == ';') { p += 5; val = '\''; } else { p += parse_name(p); if(*p != ';') throw_syntax_error(p); throw_wfc_error("reference to undeclared entity"); } *pp = p; return val; } /* parse_chars(): parses literal characters and references, for use in ordinary content, CDATA, and attribute values; guarantees to return only when facing correct terminator; returns updated pointer, appends characters to supplied SV */ #define CHARDATA_AMP_REF 0x01 #define CHARDATA_LT_ERR 0x02 #define CHARDATA_S_LINEAR 0x04 #define CHARDATA_RBRBGT_ERR 0x08 #define CHARDATA_NUL_END 0x10 #define parse_chars(p, value, endc, flags) \ THX_parse_chars(aTHX_ p, value, endc, flags) static U8 *THX_parse_chars(pTHX_ U8 *p, SV *value, U8 endc, U32 flags) { U8 *lstart = p; while(1) { U8 c = *p; if(c < 0x80) { U8 *lend = p; U32 val; if(c == endc && (c != ']' || (p[1] == ']' && p[2] == '>'))) break; if(c < 0x20) { if(c == 0 && (flags & CHARDATA_NUL_END)) { break; } else if(c == 0x9 || c == 0xa) { val = (flags & CHARDATA_S_LINEAR) ? 0x20 : 0; } else if(c == 0xd) { if(p[1] == 0xa) p++; val = (flags & CHARDATA_S_LINEAR) ? 0x20 : 0xa; } else throw_syntax_error(p); p++; } else if(c == '&' && (flags & CHARDATA_AMP_REF)) { val = parse_reference(&p); } else if((c == '<' && (flags & CHARDATA_LT_ERR)) || (c == ']' && (flags & CHARDATA_RBRBGT_ERR) && p[1] == ']' && p[2] == '>')) { throw_syntax_error(p); } else { val = 0; p++; } if(val) { STRLEN vlen; U8 *vstart, *voldend, *vnewend; if(lstart != lend) sv_catpvn_nomg(value, (char*)lstart, lend-lstart); vlen = SvCUR(value); vstart = (U8*)SvGROW(value, vlen+4+1); voldend = vstart + vlen; vnewend = uvuni_to_utf8_flags(voldend, val, UNICODE_ALLOW_ANY); *vnewend = 0; SvCUR_set(value, vnewend - vstart); lstart = p; } } else { if(!char_is_char(p)) throw_syntax_error(p); p += UTF8SKIP(p); } } if(lstart != p) sv_catpvn_nomg(value, (char*)lstart, p-lstart); return p; } /* parse_comment(), parse_pi(): return updated pointer */ #define parse_comment(p) THX_parse_comment(aTHX_ p) static U8 *THX_parse_comment(pTHX_ U8 *p) { if(!(p[0] == '<' && p[1] == '!' && p[2] == '-' && p[3] == '-')) throw_syntax_error(p); p += 4; while(1) { if(*p == '-') { if(*++p == '-') break; } if(!char_is_char(p)) throw_syntax_error(p); p += UTF8SKIP(p); } if(p[1] != '>') throw_syntax_error(p); return p + 2; } #define parse_pi(p) THX_parse_pi(aTHX_ p) static U8 *THX_parse_pi(pTHX_ U8 *p) { STRLEN tgtlen; if(!(p[0] == '<' && p[1] == '?')) throw_syntax_error(p); p += 2; tgtlen = parse_name(p); if(tgtlen == 3 && (p[0] & ~0x20) == 'X' && (p[1] & ~0x20) == 'M' && (p[2] & ~0x20) == 'L') throw_syntax_error(p); p += tgtlen; if(!(p[0] == '?' && p[1] == '>')) { if(!char_is_s(p)) throw_syntax_error(p); p++; while(!(p[0] == '?' && p[1] == '>')) { if(!char_is_char(p)) throw_syntax_error(p); p += UTF8SKIP(p); } } return p + 2; } /* parse_twine(): parses content, guarantees to return only when facing the correct terminator ("' || c == '/') break; p = parse_s(p); c = *p; if(c == '>' || c == '/') break; namelen = parse_name(p); namestart = p; p += namelen; if(hv_exists(attrs, (char*)namestart, -namelen)) throw_wfc_error("duplicate attribute"); p = parse_eq(p); c = *p; if(c != '"' && c != '\'') throw_syntax_error(p); attval = sv_2mortal(newSVpvs("")); SvUTF8_on(attval); p = parse_chars(p+1, attval, c, CHARDATA_AMP_REF|CHARDATA_LT_ERR|CHARDATA_S_LINEAR) + 1; SvREADONLY_on(attval); if(!hv_store(attrs, (char*)namestart, -namelen, SvREFCNT_inc(attval), 0)) SvREFCNT_dec(attval); } SvREADONLY_on((SV*)attrs); if(c == '/') { if(*++p != '>') throw_syntax_error(p); av_push(element, SvREFCNT_inc(empty_contentobject)); } else { p++; av_push(element, SvREFCNT_inc( parse_contentobject(&p, CONTENT_INSIDE))); p += 2; namelen = parse_name(p); if(namelen != typename_len || memcmp(p, typename_start, namelen)) throw_wfc_error("mismatched tags"); p += namelen; p = parse_opt_s(p); if(*p != '>') throw_syntax_error(p); } *pp = p + 1; sv_bless(eref, stash_element); SvREADONLY_on((SV*)element); SvREADONLY_on(eref); return eref; } /* parse_opt_xmldecl(): parses optional XML declaration or text declaration, returns updated pointer */ #define XMLDECL_VERSION 0x01 #define XMLDECL_ENCODING 0x02 #define XMLDECL_STANDALONE 0x03 #define parse_opt_xmldecl(p, allow, require) \ THX_parse_opt_xmldecl(aTHX_ p, allow, require) static U8 *THX_parse_opt_xmldecl(pTHX_ U8 *p, U32 allow, U32 require) { #if 0 /* unused, because throw_syntax_error() ignores its argument */ U8 *start = p; #endif U32 found = 0; if(!(p[0] == '<' && p[1] == '?' && p[2] == 'x' && p[3] == 'm' && p[4] == 'l' && p[5] <= 0x20)) return p; p += 5; if(*p == '?') goto enddecl; p = parse_s(p); if(*p == '?') goto enddecl; if(p[0] == 'v' && p[1] == 'e' && p[2] == 'r' && p[3] == 's' && p[4] == 'i' && p[5] == 'o' && p[6] == 'n') { U8 q; p = parse_eq(p + 7); q = p[0]; if(q != '"' && q != '\'') throw_syntax_error(start); if(!(p[1] == '1' && p[2] == '.' && p[3] == '0' && p[4] == q)) throw_syntax_error(start); p += 5; found |= XMLDECL_VERSION; if(*p == '?') goto enddecl; p = parse_s(p); if(*p == '?') goto enddecl; } if(p[0] == 'e' && p[1] == 'n' && p[2] == 'c' && p[3] == 'o' && p[4] == 'd' && p[5] == 'i' && p[6] == 'n' && p[7] == 'g') { U8 q; p = parse_eq(p + 8); q = *p; if(q != '"' && q != '\'') throw_syntax_error(start); p++; if(!char_is_encstart(p)) throw_syntax_error(start); do { p++; } while(char_is_enc(p)); if(*p != q) throw_syntax_error(start); p++; found |= XMLDECL_ENCODING; if(*p == '?') goto enddecl; p = parse_s(p); if(*p == '?') goto enddecl; } if(p[0] == 's' && p[1] == 't' && p[2] == 'a' && p[3] == 'n' && p[4] == 'd' && p[5] == 'a' && p[6] == 'l' && p[7] == 'o' && p[8] == 'n' && p[9] == 'e') { U8 q; p = parse_eq(p + 10); q = p[0]; if(q != '"' && q != '\'') throw_syntax_error(start); if(!((p[1] == 'y' && p[2] == 'e' && p[3] == 's' && p[4] == q) || (p[1] == 'n' && p[2] == 'o' && p[3] == q))) throw_syntax_error(start); p += p[1] == 'y' ? 5 : 4; found |= XMLDECL_STANDALONE; if(*p == '?') goto enddecl; p = parse_s(p); if(*p == '?') goto enddecl; } throw_syntax_error(start); enddecl: if(!(p[1] == '>' && !(found & ~allow) && !(require & ~found))) throw_syntax_error(start); return p + 2; } /* parse_misc_seq(): returns updated pointer */ #define parse_misc_seq(p) THX_parse_misc_seq(aTHX_ p) static U8 *THX_parse_misc_seq(pTHX_ U8 *p) { while(1) { U8 c = p[0]; if(c == 0) break; if(c == '<') { c = p[1]; if(c == '!') { p = parse_comment(p); } else if(c == '?') { p = parse_pi(p); } else { break; } } else { p = parse_s(p); } } return p; } /* * serialisation * * The serialise_*() functions each serialise some syntactic construct * within the XML grammar. Their main input is an SV to which they append * the textual form of the item in question. */ #define check_encname(enc) THX_check_encname(aTHX_ enc) static void THX_check_encname(pTHX_ SV *enc) { U8 *p, *end; STRLEN len; if(!sv_is_string(enc)) throw_data_error("encoding name isn't a string"); p = (U8*)SvPV(enc, len); if(len == 0) throw_data_error("illegal encoding name"); end = p + len; if(!char_is_encstart(p)) throw_data_error("illegal encoding name"); while(1) { p++; if(p == end) return; if(!char_is_enc(p)) throw_data_error("illegal encoding name"); } } #define is_name(p, len) THX_is_name(aTHX_ p, len) static int THX_is_name(pTHX_ U8 *p, STRLEN len) { U8 *end = p + len; if(!char_is_namestart(p)) return 0; do { p += UTF8SKIP(p); if(p == end) return 1; } while(char_is_name(p)); return 0; } static U8 const hexdig[16] = "0123456789abcdef"; #define serialise_chardata(out, data) THX_serialise_chardata(aTHX_ out, data) static void THX_serialise_chardata(pTHX_ SV *out, SV *data) { STRLEN datalen; U8 *datastart, *dataend, *p, *lstart; if(!sv_is_string(data)) throw_data_error("character data isn't a string"); data = upgrade_sv(data); datastart = (U8*)SvPV(data, datalen); dataend = datastart + datalen; lstart = p = datastart; while(1) { U8 c = *p; if(c == 0) break; if(c == 0xd || c == '<' || c == '&' || (c == '>' && p-lstart >= 2 && p[-1] == ']' && p[-2] == ']')) { U8 refbuf[6] = "&#xXX;"; if(lstart != p) sv_catpvn_nomg(out, (char*)lstart, p-lstart); refbuf[3] = hexdig[c >> 4]; refbuf[4] = hexdig[c & 0xf]; sv_catpvn(out, (char*)refbuf, 6); lstart = ++p; } else { if(!char_is_char(p)) throw_data_error("character data contains " "illegal character"); p += UTF8SKIP(p); } } if(p != dataend) throw_data_error("character data contains illegal character"); if(lstart != p) sv_catpvn_nomg(out, (char*)lstart, p-lstart); } #define serialise_element(out, elem) THX_serialise_element(aTHX_ out, elem) static void THX_serialise_element(pTHX_ SV *out, SV *elem); #define serialise_twine(out, tref) THX_serialise_twine(aTHX_ out, tref) static void THX_serialise_twine(pTHX_ SV *out, SV *tref) { AV *twine; I32 clen, i; SV **item_ptr; if(!SvROK(tref)) throw_data_error("content array isn't an array"); twine = (AV*)SvRV(tref); if(SvTYPE((SV*)twine) != SVt_PVAV || SvOBJECT((SV*)twine)) throw_data_error("content array isn't an array"); clen = av_len(twine); if(clen & 1) throw_data_error("content array has even length"); item_ptr = av_fetch(twine, 0, 0); if(!item_ptr) throw_data_error("character data isn't a string"); serialise_chardata(out, *item_ptr); for(i = 0; i != clen; ) { item_ptr = av_fetch(twine, ++i, 0); if(!item_ptr) throw_data_error("element data isn't an element"); serialise_element(out, *item_ptr); item_ptr = av_fetch(twine, ++i, 0); if(!item_ptr) throw_data_error("character data isn't a string"); serialise_chardata(out, *item_ptr); } } #define serialise_contentobject(out, cref) \ THX_serialise_contentobject(aTHX_ out, cref) static void THX_serialise_contentobject(pTHX_ SV *out, SV *cref) { serialise_twine(out, contentobject_twine(cref)); } #define serialise_eithercontent(out, cref) \ THX_serialise_eithercontent(aTHX_ out, cref) static void THX_serialise_eithercontent(pTHX_ SV *out, SV *cref) { SV *tgt; if(SvROK(cref) && (tgt = SvRV(cref), SvTYPE(tgt) == SVt_PVAV) && !SvOBJECT(tgt)) { serialise_twine(out, cref); } else { serialise_contentobject(out, cref); } } #define twine_is_empty(tref) THX_twine_is_empty(aTHX_ tref) static int THX_twine_is_empty(pTHX_ SV *tref) { AV *twine; SV **item_ptr; SV *item; if(!SvROK(tref)) return 0; twine = (AV*)SvRV(tref); if(SvTYPE((SV*)twine) != SVt_PVAV || SvOBJECT((SV*)twine)) return 0; if(av_len(twine) != 0) return 0; item_ptr = av_fetch(twine, 0, 0); if(!item_ptr) return 0; item = *item_ptr; if(!SvOK(item) || SvROK(item)) return 0; return SvPOK(item) && SvCUR(item) == 0; } #define content_is_empty(cref) THX_content_is_empty(aTHX_ cref) static int THX_content_is_empty(pTHX_ SV *cref) { AV *twine; SV **item_ptr; if(!SvROK(cref)) return 0; twine = (AV*)SvRV(cref); if(SvTYPE((SV*)twine) != SVt_PVAV || av_len(twine) != 0) return 0; if(!SvOBJECT((SV*)twine) || SvSTASH((SV*)twine) != stash_content) return 0; item_ptr = av_fetch(twine, 0, 0); if(!item_ptr) return 0; return twine_is_empty(*item_ptr); } #define serialise_attvalue(out, data) THX_serialise_attvalue(aTHX_ out, data) static void THX_serialise_attvalue(pTHX_ SV *out, SV *data) { STRLEN datalen; U8 *datastart, *dataend, *p, *lstart; if(!sv_is_string(data)) throw_data_error("character data isn't a string"); data = upgrade_sv(data); datastart = (U8*)SvPV(data, datalen); dataend = datastart + datalen; lstart = p = datastart; while(1) { U8 c = *p; if(c == 0) break; if(c == 0x9 || c == 0xa || c == 0xd || c == '<' || c == '&' || c == '"') { U8 refbuf[6] = "&#xXX;"; if(lstart != p) sv_catpvn_nomg(out, (char*)lstart, p-lstart); refbuf[3] = hexdig[c >> 4]; refbuf[4] = hexdig[c & 0xf]; sv_catpvn(out, (char*)refbuf, 6); lstart = ++p; } else { if(!char_is_char(p)) throw_data_error("character data contains " "illegal character"); p += UTF8SKIP(p); } } if(p != dataend) throw_data_error("character data contains illegal character"); if(lstart != p) sv_catpvn_nomg(out, (char*)lstart, p-lstart); } static void THX_serialise_element(pTHX_ SV *out, SV *eref) { AV *earr; SV **item_ptr; SV *typename, *attrs, *content; HV *ahash; U8 *typename_start; STRLEN typename_len; U32 nattrs; earr = element_nodearray(eref); sv_catpvs_nomg(out, "<"); item_ptr = av_fetch(earr, 0, 0); if(!item_ptr) throw_data_error("element type name isn't a string"); typename = *item_ptr; if(!sv_is_string(typename)) throw_data_error("element type name isn't a string"); typename = upgrade_sv(typename); typename_start = (U8*)SvPV(typename, typename_len); if(!is_name(typename_start, typename_len)) throw_data_error("illegal element type name"); sv_catpvn_nomg(out, (char*)typename_start, typename_len); item_ptr = av_fetch(earr, 1, 0); if(!item_ptr) throw_data_error("attribute hash isn't a hash"); attrs = *item_ptr; if(!SvROK(attrs)) throw_data_error("attribute hash isn't a hash"); ahash = (HV*)SvRV(attrs); if(SvTYPE((SV*)ahash) != SVt_PVHV || SvOBJECT((SV*)ahash)) throw_data_error("attribute hash isn't a hash"); nattrs = hv_iterinit(ahash); if(nattrs != 0) { if(nattrs == 1) { STRLEN klen; U8 *key; HE *ent = hv_iternext(ahash); sv_catpvs_nomg(out, " "); key = (U8*)HePV(ent, klen); if(!HeKUTF8(ent)) upgrade_latin1_pvn(&key, &klen); if(!is_name(key, klen)) throw_data_error("illegal attribute name"); sv_catpvn_nomg(out, (char*)key, klen); sv_catpvs_nomg(out, "=\""); serialise_attvalue(out, HeVAL(ent)); sv_catpvs_nomg(out, "\""); } else { U32 i; AV *keys = newAV(); sv_2mortal((SV*)keys); av_extend(keys, nattrs-1); for(i = nattrs; i--; ) { SV *keysv = upgrade_sv( hv_iterkeysv(hv_iternext(ahash))); SvREFCNT_inc(keysv); av_push(keys, keysv); } sortsv(AvARRAY(keys), nattrs, Perl_sv_cmp); for(i = 0; i != nattrs; i++) { SV *keysv; STRLEN klen; U8 *key; sv_catpvs_nomg(out, " "); keysv = *av_fetch(keys, i, 0); key = (U8*)SvPV(keysv, klen); if(!is_name(key, klen)) throw_data_error("illegal attribute " "name"); sv_catpvn_nomg(out, (char*)key, klen); sv_catpvs_nomg(out, "=\""); serialise_attvalue(out, *hv_fetch(ahash, (char*)key, -klen, 0)); sv_catpvs_nomg(out, "\""); } } } item_ptr = av_fetch(earr, 2, 0); if(!item_ptr) throw_data_error("content data isn't a content chunk"); content = *item_ptr; if(content_is_empty(content)) { sv_catpvs_nomg(out, "/>"); } else { sv_catpvs_nomg(out, ">"); serialise_contentobject(out, content); sv_catpvs_nomg(out, ""); } } MODULE = XML::Easy PACKAGE = XML::Easy::Content PROTOTYPES: DISABLE BOOT: /* stash stashes */ stash_content = gv_stashpvs("XML::Easy::Content", 1); stash_element = gv_stashpvs("XML::Easy::Element", 1); /* stash shared empty-content object */ { SV *chardata; AV *twine; SV *tref; AV *content; SV *cref; chardata = newSVpvs(""); SvREADONLY_on(chardata); twine = newAV(); av_push(twine, chardata); SvREADONLY_on((SV*)twine); tref = newRV_noinc((SV*)twine); SvREADONLY_on(tref); content = newAV(); av_push(content, tref); cref = newRV_noinc((SV*)content); sv_bless(cref, stash_content); SvREADONLY_on((SV*)content); SvREADONLY_on(cref); empty_contentobject = cref; } SV * new(SV *classname, SV *tref) CODE: PERL_UNUSED_VAR(classname); RETVAL = twine_contentobject(usertwine_twine(tref)); SvREFCNT_inc(RETVAL); OUTPUT: RETVAL SV * twine(SV *cref) CODE: RETVAL = contentobject_twine(cref); SvREFCNT_inc(RETVAL); OUTPUT: RETVAL MODULE = XML::Easy PACKAGE = XML::Easy::Element SV * new(SV *classname, SV *type_name, SV *attrs, SV *content) PREINIT: U8 *p; STRLEN len; HV *iahash, *oahash; U32 nattrs; SV *tgt; AV *earr; CODE: PERL_UNUSED_VAR(classname); if(!sv_is_string(type_name)) throw_data_error("element type name isn't a string"); type_name = sv_mortalcopy(type_name); sv_utf8_upgrade(type_name); SvREADONLY_on(type_name); p = (U8*)SvPV(type_name, len); if(!is_name(p, len)) throw_data_error("illegal element type name"); if(!SvROK(attrs)) throw_data_error("attribute hash isn't a hash"); iahash = (HV*)SvRV(attrs); if(SvTYPE((SV*)iahash) != SVt_PVHV || SvOBJECT((SV*)iahash)) throw_data_error("attribute hash isn't a hash"); oahash = newHV(); attrs = sv_2mortal(newRV_noinc((SV*)oahash)); SvREADONLY_on(attrs); nattrs = hv_iterinit(iahash); if(nattrs != 0) { if(nattrs == 1) { STRLEN klen; U8 *key; HE *ent = hv_iternext(iahash); key = (U8*)HePV(ent, klen); if(!HeKUTF8(ent)) upgrade_latin1_pvn(&key, &klen); if(!is_name(key, klen)) throw_data_error("illegal attribute name"); tgt = userchardata_chardata(HeVAL(ent)); if(!hv_store(oahash, (char *)key, -klen, SvREFCNT_inc(tgt), 0)) SvREFCNT_dec(tgt); } else { U32 i; AV *keys = newAV(); sv_2mortal((SV*)keys); av_extend(keys, nattrs-1); for(i = nattrs; i--; ) { SV *keysv = upgrade_sv( hv_iterkeysv(hv_iternext(iahash))); SvREFCNT_inc(keysv); av_push(keys, keysv); } sortsv(AvARRAY(keys), nattrs, Perl_sv_cmp); for(i = 0; i != nattrs; i++) { SV *keysv; STRLEN klen; U8 *key; keysv = *av_fetch(keys, i, 0); key = (U8*)SvPV(keysv, klen); if(!is_name(key, klen)) throw_data_error("illegal attribute " "name"); tgt = *hv_fetch(iahash, (char*)key, -klen, 0); tgt = userchardata_chardata(tgt); if(!hv_store(oahash, (char *)key, -klen, SvREFCNT_inc(tgt), 0)) SvREFCNT_dec(tgt); } } } SvREADONLY_on((SV*)oahash); if(!SvROK(content)) throw_data_error("content data isn't a content chunk"); tgt = SvRV(content); if(!SvOBJECT(tgt) && SvTYPE(tgt) == SVt_PVAV) { content = twine_contentobject(usertwine_twine(content)); } else if(SvOBJECT(tgt) && SvSTASH(tgt) == stash_content) { content = sv_2mortal(newRV_inc(tgt)); SvREADONLY_on(content); } else { throw_data_error("content data isn't a content chunk"); } earr = newAV(); av_extend(earr, 2); av_push(earr, SvREFCNT_inc(type_name)); av_push(earr, SvREFCNT_inc(attrs)); av_push(earr, SvREFCNT_inc(content)); RETVAL = newRV_noinc((SV*)earr); sv_bless(RETVAL, stash_element); SvREADONLY_on(earr); SvREADONLY_on(RETVAL); OUTPUT: RETVAL SV * type_name(SV *eref) PREINIT: AV *earr; SV **item_ptr; CODE: earr = element_nodearray(eref); item_ptr = av_fetch(earr, 0, 0); if(!item_ptr) throw_data_error("element type name isn't a string"); RETVAL = SvREFCNT_inc(*item_ptr); OUTPUT: RETVAL SV * attributes(SV *eref) PREINIT: AV *earr; SV **item_ptr; CODE: earr = element_nodearray(eref); item_ptr = av_fetch(earr, 1, 0); if(!item_ptr) throw_data_error("attribute hash isn't a hash"); RETVAL = SvREFCNT_inc(*item_ptr); OUTPUT: RETVAL SV * attribute(SV *eref, SV *attrname_sv) PREINIT: U8 *attrname; STRLEN attrname_len; AV *earr; HV *ahash; SV **item_ptr, *attrs; CODE: if(!sv_is_string(attrname_sv)) throw_data_error("attribute name isn't a string"); attrname_sv = upgrade_sv(attrname_sv); attrname = (U8*)SvPV(attrname_sv, attrname_len); if(!is_name(attrname, attrname_len)) throw_data_error("illegal attribute name"); earr = element_nodearray(eref); item_ptr = av_fetch(earr, 1, 0); if(!item_ptr) throw_data_error("attribute hash isn't a hash"); attrs = *item_ptr; if(!SvROK(attrs)) throw_data_error("attribute hash isn't a hash"); ahash = (HV*)SvRV(attrs); if(SvTYPE((SV*)ahash) != SVt_PVHV || SvOBJECT((SV*)ahash)) throw_data_error("attribute hash isn't a hash"); if(hv_exists(ahash, (char *)attrname, -attrname_len)) { item_ptr = hv_fetch(ahash, (char *)attrname, -attrname_len, 0); RETVAL = item_ptr ? SvREFCNT_inc(*item_ptr) : &PL_sv_undef; } else { RETVAL = &PL_sv_undef; } OUTPUT: RETVAL SV * content_object(SV *eref) PREINIT: AV *earr; SV **item_ptr; CODE: earr = element_nodearray(eref); item_ptr = av_fetch(earr, 2, 0); if(!item_ptr) throw_data_error("content data isn't a content chunk"); RETVAL = SvREFCNT_inc(*item_ptr); OUTPUT: RETVAL SV * content_twine(SV *eref) PREINIT: AV *earr; SV **item_ptr; CODE: earr = element_nodearray(eref); item_ptr = av_fetch(earr, 2, 0); if(!item_ptr) throw_data_error("content data isn't a content chunk"); RETVAL = contentobject_twine(*item_ptr); SvREFCNT_inc(RETVAL); OUTPUT: RETVAL MODULE = XML::Easy PACKAGE = XML::Easy::Text SV * xml10_read_content_object(SV *text_sv) PROTOTYPE: $ PREINIT: STRLEN text_len; U8 *p, *end; CODE: if(!sv_is_string(text_sv)) throw_data_error("text isn't a string"); text_sv = upgrade_sv(text_sv); p = (U8*)SvPV(text_sv, text_len); end = p + text_len; RETVAL = parse_contentobject(&p, CONTENT_TOPLEVEL); if(p != end) throw_syntax_error(p); SvREFCNT_inc(RETVAL); OUTPUT: RETVAL SV * xml10_read_content_twine(SV *text_sv) PROTOTYPE: $ PREINIT: STRLEN text_len; U8 *p, *end; CODE: if(!sv_is_string(text_sv)) throw_data_error("text isn't a string"); text_sv = upgrade_sv(text_sv); p = (U8*)SvPV(text_sv, text_len); end = p + text_len; RETVAL = parse_twine(&p, CONTENT_TOPLEVEL); if(p != end) throw_syntax_error(p); SvREFCNT_inc(RETVAL); OUTPUT: RETVAL SV * xml10_read_element(SV *text_sv) PROTOTYPE: $ PREINIT: STRLEN text_len; U8 *p, *end; CODE: if(!sv_is_string(text_sv)) throw_data_error("text isn't a string"); text_sv = upgrade_sv(text_sv); p = (U8*)SvPV(text_sv, text_len); end = p + text_len; RETVAL = parse_element(&p); if(p != end) throw_syntax_error(p); SvREFCNT_inc(RETVAL); OUTPUT: RETVAL SV * xml10_read_document(SV *text_sv) PROTOTYPE: $ PREINIT: STRLEN text_len; U8 *p, *end; CODE: if(!sv_is_string(text_sv)) throw_data_error("text isn't a string"); text_sv = upgrade_sv(text_sv); p = (U8*)SvPV(text_sv, text_len); end = p + text_len; p = parse_opt_xmldecl(p, XMLDECL_VERSION|XMLDECL_ENCODING|XMLDECL_STANDALONE, XMLDECL_VERSION); p = parse_misc_seq(p); RETVAL = parse_element(&p); p = parse_misc_seq(p); if(p != end) throw_syntax_error(p); SvREFCNT_inc(RETVAL); OUTPUT: RETVAL SV * xml10_read_extparsedent_object(SV *text_sv) PROTOTYPE: $ PREINIT: STRLEN text_len; U8 *p, *end; CODE: if(!sv_is_string(text_sv)) throw_data_error("text isn't a string"); text_sv = upgrade_sv(text_sv); p = (U8*)SvPV(text_sv, text_len); end = p + text_len; p = parse_opt_xmldecl(p, XMLDECL_VERSION|XMLDECL_ENCODING, XMLDECL_ENCODING); RETVAL = parse_contentobject(&p, CONTENT_TOPLEVEL); if(p != end) throw_syntax_error(p); SvREFCNT_inc(RETVAL); OUTPUT: RETVAL SV * xml10_read_extparsedent_twine(SV *text_sv) PROTOTYPE: $ PREINIT: STRLEN text_len; U8 *p, *end; CODE: if(!sv_is_string(text_sv)) throw_data_error("text isn't a string"); text_sv = upgrade_sv(text_sv); p = (U8*)SvPV(text_sv, text_len); end = p + text_len; p = parse_opt_xmldecl(p, XMLDECL_VERSION|XMLDECL_ENCODING, XMLDECL_ENCODING); RETVAL = parse_twine(&p, CONTENT_TOPLEVEL); if(p != end) throw_syntax_error(p); SvREFCNT_inc(RETVAL); OUTPUT: RETVAL SV * xml10_write_content(SV *cont) PROTOTYPE: $ CODE: RETVAL = sv_2mortal(newSVpvs("")); SvUTF8_on(RETVAL); serialise_eithercontent(RETVAL, cont); SvREFCNT_inc(RETVAL); OUTPUT: RETVAL SV * xml10_write_element(SV *elem) PROTOTYPE: $ CODE: RETVAL = sv_2mortal(newSVpvs("")); SvUTF8_on(RETVAL); serialise_element(RETVAL, elem); SvREFCNT_inc(RETVAL); OUTPUT: RETVAL SV * xml10_write_document(SV *elem, SV *enc = &PL_sv_undef) PROTOTYPE: $;$ CODE: RETVAL = sv_2mortal(newSVpvs("\n"); } else { sv_catpvs_nomg(RETVAL, " standalone=\"yes\"?>\n"); } serialise_element(RETVAL, elem); sv_catpvs_nomg(RETVAL, "\n"); SvREFCNT_inc(RETVAL); OUTPUT: RETVAL SV * xml10_write_extparsedent(SV *cont, SV *enc = &PL_sv_undef) PROTOTYPE: $;$ CODE: RETVAL = sv_2mortal(newSVpvs("")); SvUTF8_on(RETVAL); if(SvOK(enc) || SvTYPE(enc) == SVt_PVGV) { check_encname(enc); sv_catpvs_nomg(RETVAL, ""); } serialise_eithercontent(RETVAL, cont); SvREFCNT_inc(RETVAL); OUTPUT: RETVAL XML-Easy-0.009/lib/XML/Easy.pm000444001750001750 1135711652350543 15651 0ustar00zeframzefram000000000000=head1 NAME XML::Easy - XML processing with a clean interface =head1 SYNOPSIS use XML::Easy::NodeBasics qw(xml_element xml_e_attribute); use XML::Easy::Text qw(xml10_read_document xml10_write_document); $element = xml_element("a", { href => "there" }, "there"); $element = xml10_read_document('there'); $href = xml_e_attribute($element, "href"); $text = xml10_write_document($element); # see specific modules for many more functions =head1 DESCRIPTION L is a collection of modules relating to the processing, parsing, and serialisation of XML data. It is oriented towards the use of XML to represent data for interchange purposes, rather than the use of XML as markup of principally textual data. It does not perform any schema processing, and does not interpret DTDs or any other kind of schema. It adheres strictly to the XML specification, in all its awkward details, except for the aforementioned DTDs. L strictly separates the in-program manipulation of XML data from the processing of the textual form of XML. This shields the XML user from the inconvenient and obscure aspects of XML syntax. XML data nodes are mainly processed in a clean functional style, using the L module. In the (very likely) event that an application requires some more purpose-specific XML data processing facilities, they are readily built on top of L, retaining the abstraction from textual XML. When XML must be handled in textual form, for input and output, the L module supplies a parser and a serialiser. The interfaces here, too, are functional in nature. There are other modules for some ancillary aspects of XML processing. =head1 MODULES The modules in the L distribution are: =over =item L This document. For historical reasons, this can also be loaded as a module, and (though it is deprecated) some of the functions from L can be imported from here. =item L This module provides various type-testing functions, relating to data types used in the L ensemble. These are mainly intended to be used to enforce validity of data being processed by XML-related functions. =item L =item L These are classes used to represent XML data for general manipulation. Objects of these classes hold the meaningful content of the data, independent of textual representation. The data in these nodes cannot be modified: different data requires new nodes. =item L This module supplies functions concerned with the creation, examination, and other manipulation of XML data nodes (content chunks and elements). The nodes are dumb data objects, best manipulated using plain functions such as the ones in this module. =item L This module supplies Perl regular expressions describing the grammar of XML 1.0. This is intended to support doing irregular things with XML, rather than for normal parsing. =item L This module supplies functions that parse and serialise XML data as text according to the XML 1.0 specification. =back =head1 OTHER DISTRIBUTIONS Other CPAN distributions that work with L are: =over =item L A testing tool, providing L-style functions that check whether XML nodes are as expected. =item L Provides a way to construct XML data nodes by procedural code. Some programmers will find this more comfortable than the functional style offered by L. =item L Helps to parse things that are encoded in XML in common ways. =item C This namespace exists to contain modules that perform transformations on XML documents, or parts thereof, in the form of L and L nodes. =back =cut package XML::Easy; { use 5.008; } use warnings; use strict; our $VERSION = "0.009"; use parent "Exporter"; our @EXPORT_OK = qw( xml10_read_content xml10_read_element xml10_read_document xml10_read_extparsedent xml10_write_content xml10_write_element xml10_write_document xml10_write_extparsedent ); require XML::Easy::Text; XML::Easy::Text->VERSION($VERSION); XML::Easy::Text->import(@EXPORT_OK); =head1 SEE ALSO L, L, L, L, L =head1 AUTHOR Andrew Main (Zefram) =head1 COPYRIGHT Copyright (C) 2008, 2009 PhotoBox Ltd Copyright (C) 2009, 2010, 2011 Andrew Main (Zefram) =head1 LICENSE This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; XML-Easy-0.009/lib/XML/Easy000755001750001750 011652350543 15127 5ustar00zeframzefram000000000000XML-Easy-0.009/lib/XML/Easy/Content.pm000444001750001750 741011652350543 17236 0ustar00zeframzefram000000000000=head1 NAME XML::Easy::Content - abstract form of XML content =head1 SYNOPSIS use XML::Easy::Content; $content = XML::Easy::Content->new([ "foo", $subelement, "bar", ]); $twine = $content->twine; =head1 DESCRIPTION An object of this class represents a chunk of XML content, the kind of matter that can be contained within an XML element. This is in an abstract form, intended for general manipulation. It is completely isolated from the textual representation of XML, and holds only the meaningful content of the chunk. The data in a content object cannot be modified: different data requires the creation of a new object. An XML content chunk consists of a sequence of zero or more characters and XML elements, interspersed in any fashion. Character content can use almost all Unicode characters, with only a few characters (such as most of the ASCII control characters) prohibited by the specification from being directly represented in XML. Each XML element in a content chunk itself recursively contains a chunk of content, in addition to having attached metadata. This class is not meant to be subclassed. XML content is unextendable, dumb data. Content objects are better processed using the functions in L than using the methods of this class. =cut package XML::Easy::Content; { use 5.008; } use warnings; use strict; our $VERSION = "0.009"; eval { local $SIG{__DIE__}; require XSLoader; XSLoader::load("XML::Easy", $VERSION) unless defined &new; }; if($@ eq "") { close(DATA); } else { (my $filename = __FILE__) =~ tr# -~##cd; local $/ = undef; my $pp_code = "#line 75 \"$filename\"\n".; close(DATA); { local $SIG{__DIE__}; eval $pp_code; } die $@ if $@ ne ""; } *content = \&twine; 1; __DATA__ # Note perl bug: a bug in perl 5.8.{0..6} screws up __PACKAGE__ (used below) # for the eval. Explicit package declaration here fixes it. package XML::Easy::Content; use Params::Classify 0.000 qw(is_ref); use XML::Easy::Classify qw(check_xml_content_twine); BEGIN { if(eval { local $SIG{__DIE__}; require Internals; exists &Internals::SetReadOnly; }) { *_set_readonly = \&Internals::SetReadOnly; } else { *_set_readonly = sub { }; } } sub _throw_data_error($) { my($msg) = @_; die "invalid XML data: $msg\n"; } =head1 CONSTRUCTOR =over =item XML::Easy::Content->new(TWINE) Constructs and returns a new content chunk object with the specified content. I must be a reference to an array listing the chunk's content in twine form (see L). The content is checked for validity, against the XML 1.0 specification, and the function Cs if it is invalid. =cut sub new { my($class, $twine) = @_; _throw_data_error("content array isn't an array") unless is_ref($twine, "ARRAY"); $twine = [ @$twine ]; _set_readonly(\$_) foreach @$twine; _set_readonly($twine); check_xml_content_twine($twine); my $self = bless([ $twine ], __PACKAGE__); _set_readonly(\$_) foreach @$self; _set_readonly($self); return $self; } =back =head1 METHODS =over =item $content->twine Returns a reference to an array listing the chunk's content in twine form (see L). The returned array must not be subsequently modified. If possible, it will be marked as read-only in order to prevent modification. =cut sub twine { $_[0]->[0] } =item $content->content Deprecated alias for the L method. =back =head1 SEE ALSO L, L =head1 AUTHOR Andrew Main (Zefram) =head1 COPYRIGHT Copyright (C) 2008, 2009 PhotoBox Ltd Copyright (C) 2009, 2010, 2011 Andrew Main (Zefram) =head1 LICENSE This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; XML-Easy-0.009/lib/XML/Easy/Transform.pod000444001750001750 353511652350543 17751 0ustar00zeframzefram000000000000=head1 NAME XML::Easy::Tranform - XML processing with a clean interface =head1 DESCRIPTION The C namespace exists to contain modules that perform transformations on XML documents, or parts thereof, in the form of L and L nodes. L is a collection of modules relating to the processing of XML data. It includes functions to parse and serialise the standard textual form of XML. When XML data is not in text form, L processes it in an abstract syntax-neutral form, as a collection of linked Perl objects. This in-program data format shields XML users from the infelicities of XML syntax. Modules under the C namespace operate on XML data in this abstract structured form, not on textual XML. A transformation on XML data should normally be presented in the form of a function, which takes an L node as its main parameter, and returns an L node (or Cs on error). The input node and output node each represent the root element of the XML document (or fragment thereof) being transformed. These nodes, of course, contain subordinate nodes, according to the structure of the XML data. A reference to the top node is all that is required to effectively pass the whole document. =head1 OTHER DISTRIBUTIONS CPAN distributions under this namespace are: =over =item L Manages XML Namespaces by hoisting all namespace declarations to the root of a document. =back =head1 SEE ALSO L =head1 AUTHOR Andrew Main (Zefram) =head1 COPYRIGHT Copyright (C) 2009, 2010, 2011 Andrew Main (Zefram) =head1 LICENSE This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; XML-Easy-0.009/lib/XML/Easy/NodeBasics.pm000444001750001750 3334311652350543 17662 0ustar00zeframzefram000000000000=head1 NAME XML::Easy::NodeBasics - basic manipulation of XML data nodes =head1 SYNOPSIS use XML::Easy::NodeBasics qw(xml_content_object xml_element); $content = xml_content_object("this", "&", "that"); $content = xml_content_object(@sublems); $element = xml_element("a", { href => "there" }, "there"); $element = xml_element("div", @subelems); use XML::Easy::NodeBasics qw(xml_c_content_object xml_c_content_twine); $content = xml_c_content_object($content); $twine = xml_c_content_twine($content); use XML::Easy::NodeBasics qw( xml_e_type_name xml_e_attributes xml_e_attribute xml_e_content_object ); $type_name = xml_e_type_name($element); $attributes = xml_e_attributes($element); $href = xml_e_attribute($element, "href"); $content = xml_e_content_object($element); use XML::Easy::NodeBasics qw( xml_c_equal xml_e_equal xml_c_unequal xml_e_unequal ); if(xml_c_equal($content0, $content1)) { ... if(xml_e_equal($element0, $element1)) { ... if(xml_c_unequal($content0, $content1)) { ... if(xml_e_unequal($element0, $element1)) { ... =head1 DESCRIPTION This module supplies functions concerned with the creation, examination, and other manipulation of XML data nodes (content chunks and elements). The nodes are dumb data objects, best manipulated using plain functions such as the ones in this module. The nodes are objects of the classes L and L. The data contained within an existing node cannot be modified. This means that references to nodes can be copied and passed around arbitrarily, without worrying about who might write to them, or deep versus shallow copying. As a result, tasks that you might think of as "modifying an XML node" actually involve creating a new node. The node classes do not have any interesting object-oriented behaviour, and their minimalistic methods are not meant to be called directly. Instead, node creation and examination should be performed using the functions of this module. =head2 Twine For the purposes of examining what is contained within a chunk of content, there is a standard representation of content known as "twine". (It's stronger than a string, and has an alternating structure as will be described.) A piece of twine is a reference to an array with an odd number of members. The first and last members, and all members in between with an even index, are strings giving the chunk's character data. Each member with an odd index is a reference to an L object, representing an XML element contained directly within the chunk. Any of the strings may be empty, if the chunk has no character data between subelements or at the start or end of the chunk. When not looking inside a content chunk, it is preferred to represent it in encapsulated form as an L object. =cut package XML::Easy::NodeBasics; { use 5.008; } use warnings; use strict; use Params::Classify 0.000 qw(is_string is_ref); use XML::Easy::Classify 0.001 qw( is_xml_name check_xml_chardata check_xml_attributes is_xml_content_object check_xml_content_object is_xml_element check_xml_element ); use XML::Easy::Content 0.007 (); use XML::Easy::Element 0.007 (); BEGIN { if(eval { local $SIG{__DIE__}; require Internals; exists &Internals::SetReadOnly; }) { *_set_readonly = \&Internals::SetReadOnly; } else { *_set_readonly = sub { }; } } our $VERSION = "0.009"; use parent "Exporter"; our @EXPORT_OK = qw( xml_content_object xc xml_content_twine xct xml_content xml_element xe xml_c_content_object xc_cont xml_c_content_twine xc_twine xml_c_content xml_e_type_name xe_type xml_e_attributes xe_attrs xml_e_attribute xe_attr xml_e_content_object xe_cont xml_e_content_twine xe_twine xml_e_content xml_c_equal xc_eq xml_e_equal xe_eq xml_c_unequal xc_ne xml_e_unequal xe_ne ); sub _throw_data_error($) { my($msg) = @_; die "invalid XML data: $msg\n"; } =head1 FUNCTIONS Each function has two names. There is a longer descriptive name, and a shorter name to spare screen space and the programmer's fingers. =head2 Construction The construction functions each accept any number of items of XML content. These items may be supplied in any of several forms. Content item types may be mixed arbitrarily, in any sequence. The permitted forms of content item are: =over =item character data A plain string of characters that are acceptable to XML. =item element A reference to an L object representing an XML element. =item content object A reference to an L object representing a chunk of XML content. =item twine array A reference to a L array listing a chunk of XML content. =back The construction functions are: =over =item xml_content_object(ITEM ...) =item xc(ITEM ...) Constructs and returns a XML content object based on a list of constituents. Any number of Is (zero or more) may be supplied; each one must be a content item of a permitted type. All the constituents are checked for validity, against the XML 1.0 specification, and the function Cs if any are invalid. All the supplied content items are concatenated to form a single chunk. The function returns a reference to an L object. =cut sub xml_content_twine(@); sub xml_content_object(@) { XML::Easy::Content->new(&xml_content_twine) } *xc = \&xml_content_object; =item xml_content_twine(ITEM ...) =item xct(ITEM ...) Performs the same construction job as L, but returns the resulting content chunk in the form of L rather than a content object. The returned array must not be subsequently modified. If possible, it will be marked as read-only in order to prevent modification. =cut sub xml_content_twine(@) { my @content = (""); foreach(@_) { if(is_string($_)) { check_xml_chardata($_); $content[-1] .= $_; } elsif(is_xml_element($_)) { push @content, $_, ""; } elsif(is_xml_content_object($_)) { my $twine = $_->twine; $content[-1] .= $twine->[0]; push @content, @{$twine}[1 .. $#$twine]; } elsif(is_ref($_, "ARRAY")) { my $twine = XML::Easy::Content->new($_)->twine; $content[-1] .= $twine->[0]; push @content, @{$twine}[1 .. $#$twine]; } else { _throw_data_error("invalid content item"); } } _set_readonly(\$_) foreach @content; _set_readonly(\@content); return \@content; } *xct = \&xml_content_twine; =item xml_content(ITEM ...) Deprecated alias for L. =cut *xml_content = \&xml_content_twine; =item xml_element(TYPE_NAME, ITEM ...) =item xe(TYPE_NAME, ITEM ...) Constructs and returns an L object, representing an XML element, based on a list of consitutents. I must be a string, and gives the name of the element's type. Any number of Is (zero or more) may be supplied; each one must be either a content item of a permitted type or a reference to a hash of attributes. All the constituents are checked for validity, against the XML 1.0 specification, and the function Cs if any are invalid. All the attributes supplied are gathered together to form the element's attribute set. It is an error if an attribute name has been used more than once (even if the same value was given each time). All the supplied content items are concatenated to form the element's content. The function returns a reference to an L object. =cut sub xml_element($@) { my $type_name = shift(@_); XML::Easy::Element->new($type_name, {}, [""]) unless is_xml_name($type_name); my %attrs; for(my $i = 0; $i != @_; ) { my $item = $_[$i]; if(is_ref($item, "HASH")) { while(my($k, $v) = each(%$item)) { _throw_data_error("duplicate attribute name") if exists $attrs{$k}; $attrs{$k} = $v; } splice @_, $i, 1, (); } else { $i++; } } check_xml_attributes(\%attrs); return XML::Easy::Element->new($type_name, \%attrs, &xml_content_object); } *xe = \&xml_element; =back =head2 Examination of content chunks =over =item xml_c_content_object(CONTENT) =item xc_cont(CONTENT) I must be a reference to either an L object or a L array. Returns a reference to an L object encapsulating the content. =cut sub xml_c_content_object($) { if(is_ref($_[0], "ARRAY")) { return XML::Easy::Content->new($_[0]); } else { &check_xml_content_object; return $_[0]; } } *xc_cont = \&xml_c_content_object; =item xml_c_content_twine(CONTENT) =item xc_twine(CONTENT) I must be a reference to either an L object or a L array. Returns a reference to a L array listing the content. The returned array must not be subsequently modified. If possible, it will be marked as read-only in order to prevent modification. =cut sub xml_c_content_twine($) { xml_c_content_object($_[0])->twine } *xc_twine = \&xml_c_content_twine; =item xml_c_content(CONTENT) Deprecated alias for L. =cut *xml_c_content = \&xml_c_content_twine; =back =head2 Examination of elements =over =item xml_e_type_name(ELEMENT) =item xe_type(ELEMENT) I must be a reference to an L object. Returns the element's type's name, as a string. =cut sub xml_e_type_name($) { &check_xml_element; return $_[0]->type_name; } *xe_type = \&xml_e_type_name; =item xml_e_attributes(ELEMENT) =item xe_attrs(ELEMENT) I must be a reference to an L object. Returns a reference to a hash encapsulating the element's attributes. In the hash, each key is an attribute name, and the corresponding value is the attribute's value as a string. The returned hash must not be subsequently modified. If possible, it will be marked as read-only in order to prevent modification. As a side effect, the read-only-ness may make lookup of any non-existent attribute generate an exception rather than returning C. =cut sub xml_e_attributes($) { &check_xml_element; return $_[0]->attributes; } *xe_attrs = \&xml_e_attributes; =item xml_e_attribute(ELEMENT, NAME) =item xe_attr(ELEMENT, NAME) I must be a reference to an L object. Looks up a specific attribute of the element, by a name supplied as a string. If there is an attribute by that name then its value is returned, as a string. If there is no such attribute then C is returned. =cut sub xml_e_attribute($$) { check_xml_element($_[0]); return $_[0]->attribute($_[1]); } *xe_attr = \&xml_e_attribute; =item xml_e_content_object(ELEMENT) =item xe_cont(ELEMENT) I must be a reference to an L object. Returns a reference to an L object encapsulating the element's content. =cut sub xml_e_content_object($) { &check_xml_element; return $_[0]->content_object; } *xe_cont = \&xml_e_content_object; =item xml_e_content_twine(ELEMENT) =item xe_twine(ELEMENT) I must be a reference to an L object. Returns a reference to a L array listing the element's content. The returned array must not be subsequently modified. If possible, it will be marked as read-only in order to prevent modification. =cut sub xml_e_content_twine($) { &check_xml_element; return $_[0]->content_twine; } *xe_twine = \&xml_e_content_twine; =item xml_e_content(ELEMENT) Deprecated alias for L. =cut *xml_e_content = \&xml_e_content_twine; =back =head2 Comparison =over =item xml_c_equal(A, B) =item xc_eq(A, B) I and I must each be a reference to either an L object or a L array. Returns true if they represent exactly the same content, and false if they do not. =cut sub _xe_eq($$); sub _xct_eq($$) { my($a, $b) = @_; return !!1 if $a == $b; return !!0 unless @$a == @$b; for(my $i = $#$a; $i >= 0; $i -= 2) { return !!0 unless $a->[$i] eq $b->[$i]; } for(my $i = $#$a-1; $i >= 0; $i -= 2) { return !!0 unless _xe_eq($a->[$i], $b->[$i]); } return !!1; } sub xml_c_equal($$) { return _xct_eq(xml_c_content_twine($_[0]), xml_c_content_twine($_[1])); } *xc_eq = \&xml_c_equal; =item xml_e_equal(A, B) =item xe_eq(A, B) I and I must each be a reference to an L object. Returns true if they represent exactly the same element, and false if they do not. =cut sub _xe_eq($$) { my($a, $b) = @_; return !!1 if $a == $b; return !!0 unless $a->type_name eq $b->type_name; my $aattr = $a->attributes; my $battr = $b->attributes; foreach(keys %$aattr) { return !!0 unless exists($battr->{$_}) && $aattr->{$_} eq $battr->{$_}; } foreach(keys %$battr) { return !!0 unless exists $aattr->{$_}; } return _xct_eq($a->content_twine, $b->content_twine); } sub xml_e_equal($$) { check_xml_element($_[0]); check_xml_element($_[1]); return &_xe_eq; } *xe_eq = \&xml_e_equal; =item xml_c_unequal(A, B) =item xc_ne(A, B) I and I must each be a reference to either an L object or a L array. Returns true if they do not represent exactly the same content, and false if they do. =cut sub xml_c_unequal($$) { !&xml_c_equal } *xc_ne = \&xml_c_unequal; =item xml_e_unequal(A, B) =item xe_ne(A, B) I and I must each be a reference to an L object. Returns true if they do not represent exactly the same element, and false if they do. =cut sub xml_e_unequal($$) { !&xml_e_equal } *xe_ne = \&xml_e_unequal; =back =head1 SEE ALSO L, L, L, L, L, L =head1 AUTHOR Andrew Main (Zefram) =head1 COPYRIGHT Copyright (C) 2009, 2010, 2011 Andrew Main (Zefram) =head1 LICENSE This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; XML-Easy-0.009/lib/XML/Easy/Element.pm000444001750001750 1456111652350543 17242 0ustar00zeframzefram000000000000=head1 NAME XML::Easy::Element - abstract form of XML element =head1 SYNOPSIS use XML::Easy::Element; $element = XML::Easy::Element->new("a", { href => "#there" }, $content); $type_name = $element->type_name; $attributes = $element->attributes; $href = $element->attribute("href"); $content = $element->content_object; =head1 DESCRIPTION An object of this class represents an XML element, a node in the tree making up an XML document. This is in an abstract form, intended for general manipulation. It is completely isolated from the textual representation of XML, and holds only the meaningful content of the element. The data in an element object cannot be modified: different data requires the creation of a new object. The properties of an XML element are of three kinds. Firstly, the element has exactly one type, which is referred to by a name. Secondly, the element has a set of zero or more attributes. Each attribute consists of a name, which is unique among the attributes of the element, and a value, which is a string of characters. Finally, the element has content, which is a sequence of zero or more characters and (recursively) elements, interspersed in any fashion. The element type name and attribute names all follow the XML syntax for names. This allows the use of a wide set of Unicode characters, with some restrictions. Attribute values and character content can use almost all Unicode characters, with only a few characters (such as most of the ASCII control characters) prohibited by the specification from being directly represented in XML. This class is not meant to be subclassed. XML elements are unextendable, dumb data. Element objects are better processed using the functions in L than using the methods of this class. =cut package XML::Easy::Element; { use 5.008; } use warnings; use strict; use XML::Easy::Content 0.007 (); our $VERSION = "0.009"; eval { local $SIG{__DIE__}; require XSLoader; XSLoader::load("XML::Easy", $VERSION) unless defined &new; }; if($@ eq "") { close(DATA); } else { (my $filename = __FILE__) =~ tr# -~##cd; local $/ = undef; my $pp_code = "#line 83 \"$filename\"\n".; close(DATA); { local $SIG{__DIE__}; eval $pp_code; } die $@ if $@ ne ""; } *content = \&content_twine; 1; __DATA__ # Note perl bug: a bug in perl 5.8.{0..6} screws up __PACKAGE__ (used below) # for the eval. Explicit package declaration here fixes it. package XML::Easy::Element; use Params::Classify 0.000 qw(is_string is_ref); use XML::Easy::Classify 0.001 qw(check_xml_attributes check_xml_content_object); use XML::Easy::Syntax 0.000 qw($xml10_name_rx); BEGIN { if(eval { local $SIG{__DIE__}; require Internals; exists &Internals::SetReadOnly; }) { *_set_readonly = \&Internals::SetReadOnly; } else { *_set_readonly = sub { }; } } sub _throw_data_error($) { my($msg) = @_; die "invalid XML data: $msg\n"; } =head1 CONSTRUCTOR =over =item XML::Easy::Element->new(TYPE_NAME, ATTRIBUTES, CONTENT) Constructs and returns a new element object with the specified properties. I must be a string. I must be a reference to a hash in the same form that is returned by the accessor method C (below). I must be a reference to either an L object or a twine array (see L). All are checked for validity, against the XML 1.0 specification, and the function Cs if any are invalid. =cut sub new { my($class, $type_name, $attrs, $content) = @_; _throw_data_error("element type name isn't a string") unless is_string($type_name); { no warnings "utf8"; _throw_data_error("illegal element type name") unless $type_name =~ /\A$xml10_name_rx\z/o; } _throw_data_error("attribute hash isn't a hash") unless is_ref($attrs, "HASH"); $attrs = { %$attrs }; _set_readonly(\$_) foreach values %$attrs; _set_readonly($attrs); check_xml_attributes($attrs); if(is_ref($content, "ARRAY")) { $content = XML::Easy::Content->new($content); } else { check_xml_content_object($content); } my $self = bless([ $type_name, $attrs, $content ], __PACKAGE__); _set_readonly(\$_) foreach @$self; _set_readonly($self); return $self; } =back =head1 METHODS =over =item $element->type_name Returns the element type name, as a string. =cut sub type_name { $_[0]->[0] } =item $element->attributes Returns a reference to a hash encapsulating the element's attributes. In the hash, each key is an attribute name, and the corresponding value is the attribute's value as a string. The returned hash must not be subsequently modified. If possible, it will be marked as read-only in order to prevent modification. As a side effect, the read-only-ness may make lookup of any non-existent attribute generate an exception rather than returning C. =cut sub attributes { $_[0]->[1] } =item $element->attribute(NAME) Looks up a specific attribute of the element. The supplied I must be a string containing a valid attribute name. If there is an attribute by that name then its value is returned, as a string. If there is no such attribute then C is returned. =cut sub attribute { _throw_data_error("attribute name isn't a string") unless is_string($_[1]); { no warnings "utf8"; _throw_data_error("illegal attribute name") unless $_[1] =~ /\A$xml10_name_rx\z/o; } return exists($_[0]->[1]->{$_[1]}) ? $_[0]->[1]->{$_[1]} : undef; } =item $element->content_object Returns a reference to an L object encapsulating the element's content. =cut sub content_object { $_[0]->[2] } =item $element->content_twine Returns a reference to a twine array (see L) listing the element's content. The returned array must not be subsequently modified. If possible, it will be marked as read-only in order to prevent modification. =cut sub content_twine { my $content = $_[0]->[2]; check_xml_content_object($content); return $content->twine; } =item $element->content Deprecated alias for the L method. =back =head1 SEE ALSO L, L =head1 AUTHOR Andrew Main (Zefram) =head1 COPYRIGHT Copyright (C) 2008, 2009 PhotoBox Ltd Copyright (C) 2009, 2010, 2011 Andrew Main (Zefram) =head1 LICENSE This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; XML-Easy-0.009/lib/XML/Easy/Text.pm000444001750001750 5157711652350543 16605 0ustar00zeframzefram000000000000=head1 NAME XML::Easy::Text - XML parsing and serialisation =head1 SYNOPSIS use XML::Easy::Text qw( xml10_read_content_object xml10_read_element xml10_read_document xml10_read_extparsedent_object ); $content = xml10_read_content_object($text); $element = xml10_read_element($text); $element = xml10_read_document($text); $content = xml10_read_extparsedent_object($text); use XML::Easy::Text qw( xml10_write_content xml10_write_element xml10_write_document xml10_write_extparsedent ); $text = xml10_write_content($content); $text = xml10_write_element($element); $text = xml10_write_document($element, "UTF-8"); $text = xml10_write_extparsedent($content, "UTF-8"); =head1 DESCRIPTION This module supplies functions that parse and serialise XML data according to the XML 1.0 specification. This module is oriented towards the use of XML to represent data for interchange purposes, rather than the use of XML as markup of principally textual data. It does not perform any schema processing, and does not interpret DTDs or any other kind of schema. It adheres strictly to the XML specification, in all its awkward details, except for the aforementioned DTDs. XML data in memory is represented using a tree of L and L objects. Such a tree encapsulates all the structure and data content of an XML element or document, without any irrelevant detail resulting from the textual syntax. These node trees are readily manipulated by the functions in L. The functions of this module are implemented in C for performance, with a pure Perl backup version (which has good performance compared to other pure Perl parsers) for systems that can't handle XS modules. =cut package XML::Easy::Text; { use 5.008; } use warnings; use strict; use XML::Easy::Content 0.007 (); use XML::Easy::Element 0.007 (); our $VERSION = "0.009"; use parent "Exporter"; our @EXPORT_OK = qw( xml10_read_content_object xml10_read_content_twine xml10_read_content xml10_read_element xml10_read_document xml10_read_extparsedent_object xml10_read_extparsedent_twine xml10_read_extparsedent xml10_write_content xml10_write_element xml10_write_document xml10_write_extparsedent ); eval { local $SIG{__DIE__}; require XSLoader; XSLoader::load("XML::Easy", $VERSION) unless defined &xml10_write_document; }; if($@ eq "") { close(DATA); } else { (my $filename = __FILE__) =~ tr# -~##cd; local $/ = undef; my $pp_code = "#line 98 \"$filename\"\n".; close(DATA); { local $SIG{__DIE__}; eval $pp_code; } die $@ if $@ ne ""; } *xml10_read_content = \&xml10_read_content_twine; *xml10_read_extparsedent = \&xml10_read_extparsedent_twine; 1; __DATA__ use Params::Classify 0.000 qw(is_string is_ref is_strictly_blessed); use XML::Easy::Syntax 0.001 qw( $xml10_char_rx $xml10_chardata_rx $xml10_comment_rx $xml10_encname_rx $xml10_eq_rx $xml10_miscseq_rx $xml10_name_rx $xml10_pi_rx $xml10_prolog_xdtd_rx $xml10_s_rx $xml10_textdecl_rx ); BEGIN { if(eval { local $SIG{__DIE__}; require Internals; exists &Internals::SetReadOnly; }) { *_set_readonly = \&Internals::SetReadOnly; } else { *_set_readonly = sub { }; } } sub _throw_syntax_error($) { my($rtext) = @_; die "XML syntax error\n"; } sub _throw_wfc_error($) { my($msg) = @_; die "XML constraint error: $msg\n"; } sub _throw_data_error($) { my($msg) = @_; die "invalid XML data: $msg\n"; } =head1 FUNCTIONS All functions C on error. =head2 Parsing These function take textual XML and extract the abstract XML content. In the terminology of the XML specification, they constitute a non-validating processor: they check for well-formedness of the XML, but not for adherence of the content to any schema. The inputs (to be parsed) for these functions are always character strings. XML text is frequently encoded using UTF-8, or some other Unicode encoding, so that it can contain characters from the full Unicode repertoire. In that case, something must perform UTF-8 decoding (or decoding of some other character encoding) to convert the octets of a file to the characters on which these functions operate. A Perl I/O layer can do the job (see L), or it can be performed explicitly using the C function in the L module. =cut my %predecl_entity = ( lt => "<", gt => ">", amp => "&", quot => '"', apos => "'", ); sub _parse_reference($) { my($rtext) = @_; if($$rtext =~ /\G&#x([0-9A-Fa-f]+);/gc) { my $v = $1; _throw_wfc_error("invalid character in character reference") unless $v =~ /\A0*(.{1,6})\z/s; no warnings "utf8"; my $c = chr(hex($v)); _throw_wfc_error("invalid character in character reference") unless $c =~ /\A$xml10_char_rx\z/o; return $c; } elsif($$rtext =~ /\G&#([0-9]+);/gc) { my $v = $1; _throw_wfc_error("invalid character in character reference") unless $v =~ /\A0*(.{1,7})\z/s; no warnings "utf8"; my $c = chr($v); _throw_wfc_error("invalid character in character reference") unless $c =~ /\A$xml10_char_rx\z/o; return $c; } elsif($$rtext =~ /\G&($xml10_name_rx);/ogc) { my $c = $predecl_entity{$1}; _throw_wfc_error("reference to undeclared entity") unless defined $c; return $c; } else { _throw_syntax_error($rtext) } } sub _parse_attvalue($) { my($rtext) = @_; $$rtext =~ /\G(["'])/gc or _throw_syntax_error($rtext); my $q = $1; my $value = ""; while(1) { if($$rtext =~ /\G$q/gc) { last; } elsif($$rtext =~ /\G(?:\x{d}\x{a}?|[\x{9}\x{a}])/gc) { $value .= " "; } elsif($$rtext =~ /\G(["'] |(?:(?![<&"'\x{9}\x{a}\x{d}]) $xml10_char_rx)+)/xogc) { $value .= $1; } elsif($$rtext =~ /\G(?=&)/gc) { $value .= _parse_reference($rtext); } else { _throw_syntax_error($rtext) } } return $value; } sub _parse_element($); sub _parse_twine($) { my($rtext) = @_; my @twine = (""); while(1) { # Note perl bug: in some versions of perl, including 5.8.8 # and 5.10.0, the "+" in the character-data regexp acts # as "{1,32767}", and so won't match longer sequences # of characters. (In some perl versions this behaviour # varies according to the encoding of the input string.) # Therefore, immediately after matching character data, # it is *not* guaranteed that the next thing cannot # be more valid character data. For this reason it is # vitally important that the control flow in that case # try the same regexp again. if($$rtext =~ /\G((?:(?![<&])$xml10_char_rx)+)/ogc) { my $value = $1; # Due to the perl bug noted above, it is # necessary to backtrace a bit in some cases, # where significant subsequences of characters # might be split across the end of a match. pos($$rtext) -= length($1) if $value =~ s/(?!\A)(\x{d}|\]\]?)\z//; _throw_syntax_error($rtext) if $value =~ /\]\]>/; $value =~ s/\x{d}\x{a}?/\x{a}/g; $twine[-1] .= $value; } elsif($$rtext =~ m#\G(?=<[^/?!])#gc) { push @twine, _parse_element($rtext), ""; } elsif($$rtext =~ /\G(?=&)/gc) { $twine[-1] .= _parse_reference($rtext); } elsif($$rtext =~ /\G/ogc) { my $value = $1; $value =~ s/\x{d}\x{a}?/\x{a}/g; $twine[-1] .= $value; } elsif($$rtext =~ /\G(?:$xml10_pi_rx|$xml10_comment_rx)/ogc) { # no content } else { return \@twine; } } } sub _parse_contentobject($) { return XML::Easy::Content->new(_parse_twine($_[0])); } my $empty_contentobject = XML::Easy::Content->new([""]); sub _parse_element($) { my($rtext) = @_; $$rtext =~ /\G<($xml10_name_rx)/ogc or _throw_syntax_error($rtext); my $ename = $1; my %attrs; while($$rtext =~ /\G$xml10_s_rx/ogc) { last unless $$rtext =~ /\G($xml10_name_rx)$xml10_eq_rx/ogc; _throw_wfc_error("duplicate attribute") if exists $attrs{$1}; $attrs{$1} = _parse_attvalue($rtext); } $$rtext =~ m#\G(/)?>#gc or _throw_syntax_error($rtext); my $content; if(defined $1) { $content = $empty_contentobject; } else { $content = _parse_contentobject($rtext); $$rtext =~ m#\G#gc or _throw_syntax_error($rtext); _throw_wfc_error("mismatched tags") unless $1 eq $ename; } return XML::Easy::Element->new($ename, \%attrs, $content); } =over =item xml10_read_content_object(TEXT) I must be a character string. It is parsed against the B production of the XML 1.0 grammar; i.e., as a sequence of the kind of matter that can appear between the start-tag and end-tag of an element. Returns a reference to an L object. Normally one would not want to use this function directly, but prefer the higher-level C function. This function exists for the construction of custom XML parsers in situations that don't match the full XML grammar. =cut sub xml10_read_content_object($) { _throw_data_error("text isn't a string") unless is_string($_[0]); my($text) = @_; my $content = _parse_contentobject(\$text); $text =~ /\G\z/gc or _throw_syntax_error(\$text); return $content; } =item xml10_read_content_twine(TEXT) Performs the same parsing job as L, but returns the resulting content chunk in the form of twine (see L) rather than a content object. The returned array must not be subsequently modified. If possible, it will be marked as read-only in order to prevent modification. =cut sub xml10_read_content_twine($) { _throw_data_error("text isn't a string") unless is_string($_[0]); my($text) = @_; my $twine = _parse_twine(\$text); $text =~ /\G\z/gc or _throw_syntax_error(\$text); _set_readonly(\$_) foreach @$twine; _set_readonly($twine); return $twine; } =item xml10_read_content(TEXT) Deprecated alias for L. =item xml10_read_element(TEXT) I must be a character string. It is parsed against the B production of the XML 1.0 grammar; i.e., as an item bracketed by tags and containing content that may recursively include other elements. Returns a reference to an L object. Normally one would not want to use this function directly, but prefer the higher-level C function. This function exists for the construction of custom XML parsers in situations that don't match the full XML grammar. =cut sub xml10_read_element($) { _throw_data_error("text isn't a string") unless is_string($_[0]); my($text) = @_; my $element = _parse_element(\$text); $text =~ /\G\z/gc or _throw_syntax_error(\$text); return $element; } =item xml10_read_document(TEXT) I must be a character string. It is parsed against the B production of the XML 1.0 grammar; i.e., as a root element (possibly containing subelements) optionally preceded and followed by non-content matter, possibly headed by an XML declaration. (A document type declaration is I accepted; this module does not process schemata.) Returns a reference to an L object which represents the root element. Nothing is returned relating to the XML declaration or other non-content matter. This is the most likely function to use to process incoming XML data. Beware that the encoding declaration in the XML declaration, if any, does not affect the interpretation of the input as a sequence of characters. =cut sub xml10_read_document($) { _throw_data_error("text isn't a string") unless is_string($_[0]); my($text) = @_; $text =~ /\A$xml10_prolog_xdtd_rx/ogc or _throw_syntax_error(\$text); my $element = _parse_element(\$text); $text =~ /\G$xml10_miscseq_rx\z/ogc or _throw_syntax_error(\$text); return $element; } =item xml10_read_extparsedent_object(TEXT) I must be a character string. It is parsed against the B production of the XML 1.0 grammar; i.e., as a sequence of content (containing character data and subelements), possibly headed by a text declaration (which is similar to, but not the same as, an XML declaration). Returns a reference to an L object. This is a relatively obscure part of the XML grammar, used when a subpart of a document is stored in a separate file. You're more likely to require the C function. =cut sub xml10_read_extparsedent_object($) { _throw_data_error("text isn't a string") unless is_string($_[0]); my($text) = @_; $text =~ /\A$xml10_textdecl_rx/gc; my $content = _parse_contentobject(\$text); $text =~ /\G\z/gc or _throw_syntax_error(\$text); return $content; } =item xml10_read_extparsedent_twine(TEXT) Performs the same parsing job as L, but returns the resulting content chunk in the form of twine (see L) rather than a content object. The returned array must not be subsequently modified. If possible, it will be marked as read-only in order to prevent modification. =cut sub xml10_read_extparsedent_twine($) { _throw_data_error("text isn't a string") unless is_string($_[0]); my($text) = @_; $text =~ /\A$xml10_textdecl_rx/gc; my $twine = _parse_twine(\$text); $text =~ /\G\z/gc or _throw_syntax_error(\$text); _set_readonly(\$_) foreach @$twine; _set_readonly($twine); return $twine; } =item xml10_read_extparsedent(TEXT) Deprecated alias for L. =back =head2 Serialisation These function take abstract XML data and serialise it as textual XML. They do not perform indentation, default attribute suppression, or any other schema-dependent processing. The outputs of these functions are always character strings. XML text is frequently encoded using UTF-8, or some other Unicode encoding, so that it can contain characters from the full Unicode repertoire. In that case, something must perform UTF-8 encoding (or encoding of some other character encoding) to convert the characters generated by these functions to the octets of a file. A Perl I/O layer can do the job (see L), or it can be performed explicitly using the C function in the L module. =cut sub _serialise_chardata($$) { my($rtext, $str) = @_; _throw_data_error("character data isn't a string") unless is_string($str); no warnings "utf8"; while($str !~ /\G\z/gc) { # Note perl bug: in some versions of perl, including 5.8.8 # and 5.10.0, the "+" in the plain-character regexp acts # as "{1,32767}", and so won't match longer sequences # of characters. (In some perl versions this behaviour # varies according to the encoding of the input string.) # Therefore, immediately after matching character data, # it is *not* guaranteed that the next thing cannot # be more valid plain characters. For this reason it is # vitally important that the control flow in that case # try the same regexp again. if($str =~ /\G((?:(?![\x{d}<&]|(?<=\]\])>)$xml10_char_rx)+) /xgc) { # Note perl bug: directly appending $1 to # $$rtext in this statement tickles a bug # in perl 5.8.0 that causes UTF-8 lossage. # The apparently-redundant stringification of # $1 works around it. $$rtext .= "$1"; } elsif($str =~ /\G([\x{d}<&>])/gc) { $$rtext .= sprintf("&#x%02x;", ord($1)); } else { _throw_data_error( "character data contains illegal character"); } } } sub _serialise_element($$); sub _serialise_twine($$) { my($rtext, $twine) = @_; _throw_data_error("content array isn't an array") unless is_ref($twine, "ARRAY"); _throw_data_error("content array has even length") unless @$twine % 2 == 1; _serialise_chardata($rtext, $twine->[0]); my $ncont = @$twine; for(my $i = 1; $i != $ncont; ) { _serialise_element($rtext, $twine->[$i++]); _serialise_chardata($rtext, $twine->[$i++]); } } sub _serialise_contentobject($$) { my($rtext, $content) = @_; _throw_data_error("content data isn't a content chunk") unless is_strictly_blessed($content, "XML::Easy::Content"); _serialise_twine($rtext, $content->twine); } sub _serialise_eithercontent($$) { if(is_ref($_[1], "ARRAY")) { goto &_serialise_twine; } else { goto &_serialise_contentobject; } } sub _serialise_attvalue($$) { my($rtext, $str) = @_; _throw_data_error("character data isn't a string") unless is_string($str); no warnings "utf8"; while($str !~ /\G\z/gc) { # Note perl bug: in some versions of perl, including 5.8.8 # and 5.10.0, the "+" in the plain-character regexp acts # as "{1,32767}", and so won't match longer sequences # of characters. (In some perl versions this behaviour # varies according to the encoding of the input string.) # Therefore, immediately after matching character data, # it is *not* guaranteed that the next thing cannot # be more valid plain characters. For this reason it is # vitally important that the control flow in that case # try the same regexp again. if($str =~ /\G((?:(?![\x{9}\x{a}\x{d}"<&])$xml10_char_rx)+) /xgc) { # Note perl bug: directly appending $1 to # $$rtext in this statement tickles a bug # in perl 5.8.0 that causes UTF-8 lossage. # The apparently-redundant stringification of # $1 works around it. $$rtext .= "$1"; } elsif($str =~ /\G([\x{9}\x{a}\x{d}"<&])/gc) { $$rtext .= sprintf("&#x%02x;", ord($1)); } else { _throw_data_error( "character data contains illegal character"); } } } sub _serialise_element($$) { my($rtext, $elem) = @_; _throw_data_error("element data isn't an element") unless is_strictly_blessed($elem, "XML::Easy::Element"); my $type_name = $elem->type_name; _throw_data_error("element type name isn't a string") unless is_string($type_name); { no warnings "utf8"; _throw_data_error("illegal element type name") unless $type_name =~ /\A$xml10_name_rx\z/o; } $$rtext .= "<".$type_name; my $attributes = $elem->attributes; _throw_data_error("attribute hash isn't a hash") unless is_ref($attributes, "HASH"); foreach(sort keys %$attributes) { { no warnings "utf8"; _throw_data_error("illegal attribute name") unless /\A$xml10_name_rx\z/o; } $$rtext .= " ".$_."=\""; _serialise_attvalue($rtext, $attributes->{$_}); $$rtext .= "\""; } my $twine = $elem->content_twine; if(is_ref($twine, "ARRAY") && @$twine == 1 && is_string($twine->[0]) && $twine->[0] eq "") { $$rtext .= "/>"; } else { $$rtext .= ">"; _serialise_twine($rtext, $twine); $$rtext .= ""; } } =over =item xml10_write_content(CONTENT) I must be a reference to either an L object or a twine array (see L). The XML 1.0 textual representation of that content is returned. =cut sub xml10_write_content($) { my($content) = @_; my $text = ""; _serialise_eithercontent(\$text, $content); return $text; } =item xml10_write_element(ELEMENT) I must be a reference to an L object. The XML 1.0 textual representation of that element is returned. =cut sub xml10_write_element($) { my($elem) = @_; my $text = ""; _serialise_element(\$text, $elem); return $text; } =item xml10_write_document(ELEMENT[, ENCODING]) I must be a reference to an L object. The XML 1.0 textual form of a document with that element as the root element is returned. The document includes an XML declaration. If I is supplied, it must be a valid character encoding name, and the XML declaration specifies it in an encoding declaration. (The returned string consists of unencoded characters regardless of the encoding specified.) =cut sub xml10_write_document($;$) { my($elem, $encname) = @_; my $text = "\n"; _serialise_element(\$text, $elem); $text .= "\n"; return $text; } =item xml10_write_extparsedent(CONTENT[, ENCODING]) I must be a reference to either an L object or a twine array (see L). The XML 1.0 textual form of an external parsed entity encapsulating that content is returned. If I is supplied, it must be a valid character encoding name, and the returned entity includes a text declaration that specifies the encoding name in an encoding declaration. (The returned string consists of unencoded characters regardless of the encoding specified.) =cut sub xml10_write_extparsedent($;$) { my($content, $encname) = @_; my $text = ""; if(defined $encname) { _throw_data_error("encoding name isn't a string") unless is_string($encname); { no warnings "utf8"; _throw_data_error("illegal encoding name") unless $encname =~ /\A$xml10_encname_rx\z/; } $text .= ""; } _serialise_eithercontent(\$text, $content); return $text; } =back =head1 SEE ALSO L, L, L =head1 AUTHOR Andrew Main (Zefram) =head1 COPYRIGHT Copyright (C) 2008, 2009 PhotoBox Ltd Copyright (C) 2009, 2010, 2011 Andrew Main (Zefram) =head1 LICENSE This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; XML-Easy-0.009/lib/XML/Easy/Syntax.pm000444001750001750 6720211652350543 17137 0ustar00zeframzefram000000000000=head1 NAME XML::Easy::Syntax - excruciatingly correct XML syntax =head1 SYNOPSIS use XML::Easy::Syntax qw($xml10_name_rx); if($name =~ /\A$xml10_name_rx\z/o) { ... # and many other regular expressions =head1 DESCRIPTION This module supplies Perl regular expressions describing the grammar of XML 1.0. This is intended to support doing irregular things with XML, rather than for normal parsing. These regular expressions encompass the entire XML grammar except for document type declarations and DTDs. This document assumes general familiarity with XML. =cut package XML::Easy::Syntax; { use 5.008; } use warnings; use strict; our $VERSION = "0.009"; use parent "Exporter"; our @EXPORT_OK = qw( $xml10_char_rx $xml10_s_rx $xml10_eq_rx $xml10_namestartchar_rx $xml10_namechar_rx $xml10_name_rx $xml10_names_rx $xml10_nmtoken_rx $xml10_nmtokens_rx $xml10_charref_rx $xml10_entityref_rx $xml10_reference_rx $xml10_chardata_rx $xml10_cdata_rx $xml10_cdstart_rx $xml10_cdend_rx $xml10_cdsect_rx $xml10_attvalue_rx $xml10_attribute_rx $xml10_stag_rx $xml10_etag_rx $xml10_emptyelemtag_rx $xml10_comment_rx $xml10_pitarget_rx $xml10_pi_rx $xml10_content_rx $xml10_element_rx $xml10_versionnum_rx $xml10_versioninfo_rx $xml10_encname_rx $xml10_encodingdecl_rx $xml10_sddecl_rx $xml10_xmldecl_rx $xml10_textdecl_rx $xml10_misc_rx $xml10_miscseq_rx $xml10_prolog_xdtd_rx $xml10_document_xdtd_rx $xml10_extparsedent_rx ); sub _charclass_regexp($) { my($class) = @_; $class =~ tr/ \t\n//d; return eval("qr/[$class]/"); } =head1 REGULAR EXPRESSIONS Each of these regular expressions corresponds precisely to one of the productions in the EBNF grammar in the XML 1.0 specification. Well-formedness constraints that are not expressed in the EBNF are I checked by the regular expressions; these are noted in the documentation below. The regular expressions do not include any anchors, so to check whether an entire string matches a production you must supply the anchors yourself. =head2 Syntax pieces =over =item $xml10_char_rx Any single character that is acceptable to XML 1.0. This includes most Unicode characters (up to codepoint 0x10ffff). The excluded codepoints are the sentinels 0xfffe and 0xffff, the surrogate blocks, and most of the C0 control characters (0x00 to 0x1f, except for 0x09 (tab), 0x0a (linefeed/newline), and 0x0d (carriage return)). It is a rule of XML that all characters making up an XML document must be in this permitted set. The grammar productions can only match sequences of acceptable characters. This rule is enforced by the regular expressions in this module. Furthermore, it is a rule that the character data in a document cannot even I a character outside the permitted set. This is expressed as a well-formedness constraint on character references. =cut our $xml10_char_rx = _charclass_regexp(q( \x{9} \x{a} \x{d} \x{20}-\x{d7ff} \x{e000}-\x{fffd} \x{10000}-\x{10ffff} )); =item $xml10_s_rx Any sequence of one or more acceptable whitespace characters. The whitespace characters, for this purpose, are tab, linefeed/newline, carriage return, and space. Non-ASCII whitespace characters, and the more exotic ASCII whitespace characters, do not qualify. =cut our $xml10_s_rx = qr/[\x{9}\x{a}\x{d}\x{20}]+/; =item $xml10_eq_rx Equals sign, surrounded by optional whitespace. =cut our $xml10_eq_rx = qr/$xml10_s_rx?=$xml10_s_rx?/o; =back =head2 Names =over =item $xml10_namestartchar_rx Any single character that is permitted at the start of a name. The permitted characters are "B<_>", "B<:>", and letters (categorised according to Unicode 2.0). This production is not named in the XML specification. =cut our $xml10_namestartchar_rx = _charclass_regexp(q( \x{003a} \x{0041}-\x{005a} \x{005f} \x{0061}-\x{007a} \x{00c0}-\x{00d6} \x{00d8}-\x{00f6} \x{00f8}-\x{0131} \x{0134}-\x{013e} \x{0141}-\x{0148} \x{014a}-\x{017e} \x{0180}-\x{01c3} \x{01cd}-\x{01f0} \x{01f4}-\x{01f5} \x{01fa}-\x{0217} \x{0250}-\x{02a8} \x{02bb}-\x{02c1} \x{0386} \x{0388}-\x{038a} \x{038c} \x{038e}-\x{03a1} \x{03a3}-\x{03ce} \x{03d0}-\x{03d6} \x{03da} \x{03dc} \x{03de} \x{03e0} \x{03e2}-\x{03f3} \x{0401}-\x{040c} \x{040e}-\x{044f} \x{0451}-\x{045c} \x{045e}-\x{0481} \x{0490}-\x{04c4} \x{04c7}-\x{04c8} \x{04cb}-\x{04cc} \x{04d0}-\x{04eb} \x{04ee}-\x{04f5} \x{04f8}-\x{04f9} \x{0531}-\x{0556} \x{0559} \x{0561}-\x{0586} \x{05d0}-\x{05ea} \x{05f0}-\x{05f2} \x{0621}-\x{063a} \x{0641}-\x{064a} \x{0671}-\x{06b7} \x{06ba}-\x{06be} \x{06c0}-\x{06ce} \x{06d0}-\x{06d3} \x{06d5} \x{06e5}-\x{06e6} \x{0905}-\x{0939} \x{093d} \x{0958}-\x{0961} \x{0985}-\x{098c} \x{098f}-\x{0990} \x{0993}-\x{09a8} \x{09aa}-\x{09b0} \x{09b2} \x{09b6}-\x{09b9} \x{09dc}-\x{09dd} \x{09df}-\x{09e1} \x{09f0}-\x{09f1} \x{0a05}-\x{0a0a} \x{0a0f}-\x{0a10} \x{0a13}-\x{0a28} \x{0a2a}-\x{0a30} \x{0a32}-\x{0a33} \x{0a35}-\x{0a36} \x{0a38}-\x{0a39} \x{0a59}-\x{0a5c} \x{0a5e} \x{0a72}-\x{0a74} \x{0a85}-\x{0a8b} \x{0a8d} \x{0a8f}-\x{0a91} \x{0a93}-\x{0aa8} \x{0aaa}-\x{0ab0} \x{0ab2}-\x{0ab3} \x{0ab5}-\x{0ab9} \x{0abd} \x{0ae0} \x{0b05}-\x{0b0c} \x{0b0f}-\x{0b10} \x{0b13}-\x{0b28} \x{0b2a}-\x{0b30} \x{0b32}-\x{0b33} \x{0b36}-\x{0b39} \x{0b3d} \x{0b5c}-\x{0b5d} \x{0b5f}-\x{0b61} \x{0b85}-\x{0b8a} \x{0b8e}-\x{0b90} \x{0b92}-\x{0b95} \x{0b99}-\x{0b9a} \x{0b9c} \x{0b9e}-\x{0b9f} \x{0ba3}-\x{0ba4} \x{0ba8}-\x{0baa} \x{0bae}-\x{0bb5} \x{0bb7}-\x{0bb9} \x{0c05}-\x{0c0c} \x{0c0e}-\x{0c10} \x{0c12}-\x{0c28} \x{0c2a}-\x{0c33} \x{0c35}-\x{0c39} \x{0c60}-\x{0c61} \x{0c85}-\x{0c8c} \x{0c8e}-\x{0c90} \x{0c92}-\x{0ca8} \x{0caa}-\x{0cb3} \x{0cb5}-\x{0cb9} \x{0cde} \x{0ce0}-\x{0ce1} \x{0d05}-\x{0d0c} \x{0d0e}-\x{0d10} \x{0d12}-\x{0d28} \x{0d2a}-\x{0d39} \x{0d60}-\x{0d61} \x{0e01}-\x{0e2e} \x{0e30} \x{0e32}-\x{0e33} \x{0e40}-\x{0e45} \x{0e81}-\x{0e82} \x{0e84} \x{0e87}-\x{0e88} \x{0e8a} \x{0e8d} \x{0e94}-\x{0e97} \x{0e99}-\x{0e9f} \x{0ea1}-\x{0ea3} \x{0ea5} \x{0ea7} \x{0eaa}-\x{0eab} \x{0ead}-\x{0eae} \x{0eb0} \x{0eb2}-\x{0eb3} \x{0ebd} \x{0ec0}-\x{0ec4} \x{0f40}-\x{0f47} \x{0f49}-\x{0f69} \x{10a0}-\x{10c5} \x{10d0}-\x{10f6} \x{1100} \x{1102}-\x{1103} \x{1105}-\x{1107} \x{1109} \x{110b}-\x{110c} \x{110e}-\x{1112} \x{113c} \x{113e} \x{1140} \x{114c} \x{114e} \x{1150} \x{1154}-\x{1155} \x{1159} \x{115f}-\x{1161} \x{1163} \x{1165} \x{1167} \x{1169} \x{116d}-\x{116e} \x{1172}-\x{1173} \x{1175} \x{119e} \x{11a8} \x{11ab} \x{11ae}-\x{11af} \x{11b7}-\x{11b8} \x{11ba} \x{11bc}-\x{11c2} \x{11eb} \x{11f0} \x{11f9} \x{1e00}-\x{1e9b} \x{1ea0}-\x{1ef9} \x{1f00}-\x{1f15} \x{1f18}-\x{1f1d} \x{1f20}-\x{1f45} \x{1f48}-\x{1f4d} \x{1f50}-\x{1f57} \x{1f59} \x{1f5b} \x{1f5d} \x{1f5f}-\x{1f7d} \x{1f80}-\x{1fb4} \x{1fb6}-\x{1fbc} \x{1fbe} \x{1fc2}-\x{1fc4} \x{1fc6}-\x{1fcc} \x{1fd0}-\x{1fd3} \x{1fd6}-\x{1fdb} \x{1fe0}-\x{1fec} \x{1ff2}-\x{1ff4} \x{1ff6}-\x{1ffc} \x{2126} \x{212a}-\x{212b} \x{212e} \x{2180}-\x{2182} \x{3007} \x{3021}-\x{3029} \x{3041}-\x{3094} \x{30a1}-\x{30fa} \x{3105}-\x{312c} \x{4e00}-\x{9fa5} \x{ac00}-\x{d7a3} )); =item $xml10_namechar_rx Any single character that is permitted in a name other than at the start. The permitted characters are "B<.>", "B<->", "B<_>", "B<:>", and letters, digits, combining characters, and extenders (categorised according to Unicode 2.0). =cut our $xml10_namechar_rx = _charclass_regexp(q( \x{002d}-\x{002e} \x{0030}-\x{003a} \x{0041}-\x{005a} \x{005f} \x{0061}-\x{007a} \x{00b7} \x{00c0}-\x{00d6} \x{00d8}-\x{00f6} \x{00f8}-\x{0131} \x{0134}-\x{013e} \x{0141}-\x{0148} \x{014a}-\x{017e} \x{0180}-\x{01c3} \x{01cd}-\x{01f0} \x{01f4}-\x{01f5} \x{01fa}-\x{0217} \x{0250}-\x{02a8} \x{02bb}-\x{02c1} \x{02d0}-\x{02d1} \x{0300}-\x{0345} \x{0360}-\x{0361} \x{0387}-\x{038a} \x{038c} \x{038e}-\x{03a1} \x{03a3}-\x{03ce} \x{03d0}-\x{03d6} \x{03da} \x{03dc} \x{03de} \x{03e0} \x{03e2}-\x{03f3} \x{0401}-\x{040c} \x{040e}-\x{044f} \x{0451}-\x{045c} \x{045e}-\x{0481} \x{0483}-\x{0486} \x{0490}-\x{04c4} \x{04c7}-\x{04c8} \x{04cb}-\x{04cc} \x{04d0}-\x{04eb} \x{04ee}-\x{04f5} \x{04f8}-\x{04f9} \x{0531}-\x{0556} \x{0559} \x{0561}-\x{0586} \x{0591}-\x{05a1} \x{05a3}-\x{05b9} \x{05bb}-\x{05bd} \x{05bf} \x{05c1}-\x{05c2} \x{05c4} \x{05d0}-\x{05ea} \x{05f0}-\x{05f2} \x{0621}-\x{063a} \x{0641}-\x{0652} \x{0660}-\x{0669} \x{0670}-\x{06b7} \x{06ba}-\x{06be} \x{06c0}-\x{06ce} \x{06d0}-\x{06d3} \x{06e5}-\x{06e8} \x{06ea}-\x{06ed} \x{06f0}-\x{06f9} \x{0901}-\x{0903} \x{0905}-\x{0939} \x{093e}-\x{094d} \x{0951}-\x{0954} \x{0958}-\x{0963} \x{0966}-\x{096f} \x{0981}-\x{0983} \x{0985}-\x{098c} \x{098f}-\x{0990} \x{0993}-\x{09a8} \x{09aa}-\x{09b0} \x{09b2} \x{09b6}-\x{09b9} \x{09bc} \x{09bf}-\x{09c4} \x{09c7}-\x{09c8} \x{09cb}-\x{09cd} \x{09d7} \x{09dc}-\x{09dd} \x{09df}-\x{09e3} \x{09e6}-\x{09f1} \x{0a02} \x{0a05}-\x{0a0a} \x{0a0f}-\x{0a10} \x{0a13}-\x{0a28} \x{0a2a}-\x{0a30} \x{0a32}-\x{0a33} \x{0a35}-\x{0a36} \x{0a38}-\x{0a39} \x{0a3c} \x{0a3f}-\x{0a42} \x{0a47}-\x{0a48} \x{0a4b}-\x{0a4d} \x{0a59}-\x{0a5c} \x{0a5e} \x{0a70}-\x{0a74} \x{0a81}-\x{0a83} \x{0a85}-\x{0a8b} \x{0a8d} \x{0a8f}-\x{0a91} \x{0a93}-\x{0aa8} \x{0aaa}-\x{0ab0} \x{0ab2}-\x{0ab3} \x{0ab5}-\x{0ab9} \x{0abd}-\x{0ac5} \x{0ac7}-\x{0ac9} \x{0acb}-\x{0acd} \x{0ae0} \x{0ae6}-\x{0aef} \x{0b01}-\x{0b03} \x{0b05}-\x{0b0c} \x{0b0f}-\x{0b10} \x{0b13}-\x{0b28} \x{0b2a}-\x{0b30} \x{0b32}-\x{0b33} \x{0b36}-\x{0b39} \x{0b3d}-\x{0b43} \x{0b47}-\x{0b48} \x{0b4b}-\x{0b4d} \x{0b56}-\x{0b57} \x{0b5c}-\x{0b5d} \x{0b5f}-\x{0b61} \x{0b66}-\x{0b6f} \x{0b82}-\x{0b83} \x{0b85}-\x{0b8a} \x{0b8e}-\x{0b90} \x{0b92}-\x{0b95} \x{0b99}-\x{0b9a} \x{0b9c} \x{0b9e}-\x{0b9f} \x{0ba3}-\x{0ba4} \x{0ba8}-\x{0baa} \x{0bae}-\x{0bb5} \x{0bb7}-\x{0bb9} \x{0bbe}-\x{0bc2} \x{0bc6}-\x{0bc8} \x{0bca}-\x{0bcd} \x{0bd7} \x{0be7}-\x{0bef} \x{0c01}-\x{0c03} \x{0c05}-\x{0c0c} \x{0c0e}-\x{0c10} \x{0c12}-\x{0c28} \x{0c2a}-\x{0c33} \x{0c35}-\x{0c39} \x{0c3e}-\x{0c44} \x{0c46}-\x{0c48} \x{0c4a}-\x{0c4d} \x{0c55}-\x{0c56} \x{0c60}-\x{0c61} \x{0c66}-\x{0c6f} \x{0c82}-\x{0c83} \x{0c85}-\x{0c8c} \x{0c8e}-\x{0c90} \x{0c92}-\x{0ca8} \x{0caa}-\x{0cb3} \x{0cb5}-\x{0cb9} \x{0cbe}-\x{0cc4} \x{0cc6}-\x{0cc8} \x{0cca}-\x{0ccd} \x{0cd5}-\x{0cd6} \x{0cde} \x{0ce0}-\x{0ce1} \x{0ce6}-\x{0cef} \x{0d02}-\x{0d03} \x{0d05}-\x{0d0c} \x{0d0e}-\x{0d10} \x{0d12}-\x{0d28} \x{0d2a}-\x{0d39} \x{0d3e}-\x{0d43} \x{0d46}-\x{0d48} \x{0d4a}-\x{0d4d} \x{0d57} \x{0d60}-\x{0d61} \x{0d66}-\x{0d6f} \x{0e01}-\x{0e2e} \x{0e32}-\x{0e3a} \x{0e46}-\x{0e4e} \x{0e50}-\x{0e59} \x{0e81}-\x{0e82} \x{0e84} \x{0e87}-\x{0e88} \x{0e8a} \x{0e8d} \x{0e94}-\x{0e97} \x{0e99}-\x{0e9f} \x{0ea1}-\x{0ea3} \x{0ea5} \x{0ea7} \x{0eaa}-\x{0eab} \x{0ead}-\x{0eae} \x{0eb2}-\x{0eb9} \x{0ebb}-\x{0ebd} \x{0ec0}-\x{0ec4} \x{0ec6} \x{0ec8}-\x{0ecd} \x{0ed0}-\x{0ed9} \x{0f18}-\x{0f19} \x{0f20}-\x{0f29} \x{0f35} \x{0f37} \x{0f39} \x{0f3f}-\x{0f47} \x{0f49}-\x{0f69} \x{0f71}-\x{0f84} \x{0f86}-\x{0f8b} \x{0f90}-\x{0f95} \x{0f97} \x{0f99}-\x{0fad} \x{0fb1}-\x{0fb7} \x{0fb9} \x{10a0}-\x{10c5} \x{10d0}-\x{10f6} \x{1100} \x{1102}-\x{1103} \x{1105}-\x{1107} \x{1109} \x{110b}-\x{110c} \x{110e}-\x{1112} \x{113c} \x{113e} \x{1140} \x{114c} \x{114e} \x{1150} \x{1154}-\x{1155} \x{1159} \x{115f}-\x{1161} \x{1163} \x{1165} \x{1167} \x{1169} \x{116d}-\x{116e} \x{1172}-\x{1173} \x{1175} \x{119e} \x{11a8} \x{11ab} \x{11ae}-\x{11af} \x{11b7}-\x{11b8} \x{11ba} \x{11bc}-\x{11c2} \x{11eb} \x{11f0} \x{11f9} \x{1e00}-\x{1e9b} \x{1ea0}-\x{1ef9} \x{1f00}-\x{1f15} \x{1f18}-\x{1f1d} \x{1f20}-\x{1f45} \x{1f48}-\x{1f4d} \x{1f50}-\x{1f57} \x{1f59} \x{1f5b} \x{1f5d} \x{1f5f}-\x{1f7d} \x{1f80}-\x{1fb4} \x{1fb6}-\x{1fbc} \x{1fbe} \x{1fc2}-\x{1fc4} \x{1fc6}-\x{1fcc} \x{1fd0}-\x{1fd3} \x{1fd6}-\x{1fdb} \x{1fe0}-\x{1fec} \x{1ff2}-\x{1ff4} \x{1ff6}-\x{1ffc} \x{20d0}-\x{20dc} \x{20e1} \x{2126} \x{212a}-\x{212b} \x{212e} \x{2180}-\x{2182} \x{3005} \x{3007} \x{3021}-\x{302f} \x{3031}-\x{3035} \x{3041}-\x{3094} \x{3099}-\x{309a} \x{309d}-\x{309e} \x{30a1}-\x{30fa} \x{30fc}-\x{30fe} \x{3105}-\x{312c} \x{4e00}-\x{9fa5} \x{ac00}-\x{d7a3} )); =item $xml10_name_rx A name, of the type used to identify element types, attributes, entities, and other things in XML. =cut our $xml10_name_rx = qr/$xml10_namestartchar_rx$xml10_namechar_rx*/o; =item $xml10_names_rx A space-separated list of one or more names. =cut our $xml10_names_rx = qr/$xml10_name_rx(?:\x{20}$xml10_name_rx)*/o; =item $xml10_nmtoken_rx A name-like token, much like a name except that the first character is no more restricted than the remaining characters. These tokens play no part in basic XML syntax, and in the specification are only used as part of attribute typing. =cut our $xml10_nmtoken_rx = qr/$xml10_namechar_rx+/o; =item $xml10_nmtokens_rx A space-separated list of one or more name-like tokens. =cut our $xml10_nmtokens_rx = qr/$xml10_nmtoken_rx(?:\x{20}$xml10_nmtoken_rx)*/o; =back =head2 References =over =item $xml10_charref_rx A numeric character reference (beginning with "B<&#>" and ending with "B<;>"). There is a non-syntactic well-formedness constraint: the codepoint is required to be within the Unicode range and to refer to an acceptable character (as discussed at C<$xml10_char_rx>). =cut our $xml10_charref_rx = qr/&#(?:[0-9]+|x[0-9a-fA-F]+);/; =item $xml10_entityref_rx A general entity reference (beginning with "B<&>" and ending with "B<;>"). There are non-syntactic well-formedness constraints: the referenced entity must be declared (possibly implicitly), must not be an unparsed entity, must not contain a recursive reference to itself, and its replacement text must itself be well-formed. =cut our $xml10_entityref_rx = qr/&$xml10_name_rx;/o; =item $xml10_reference_rx Either a character reference or an entity reference. The well-formedness constraints of both reference types (see above) apply. =cut our $xml10_reference_rx = qr/$xml10_entityref_rx|$xml10_charref_rx/o; =back =head2 Character data =over =item $xml10_chardata_rx Ordinary literal character data. This consists of zero or more acceptable charaters, other than the metacharacters "B<< < >>" and "B<&>", and not including "B<< ]]> >>" as a subsequence. Such data stands for itself when it appears between the start and end tags of an element, where it can be interspersed with references, CDATA sections, comments, and processing instructions. In the XML grammar, character data is parsed, and taken literally, I line endings have been canonicalised (to the newline character). Pre-canonicalisation character data, with variable line endings, will still match this production but should not be interpreted literally. Beware that a string that does not match this production might parse as two adjacent strings each of which matches. This can happen because of the prohibition on "B<< ]]> >>" being embedded in character data, while the characters of that sequence are acceptable individually. The XML grammar does not allow two instances of this production to abut. =cut our $xml10_chardata_rx = qr/(?: \]?(?![<&\]])$xml10_char_rx |\]{2,}(?![<&\>\]])$xml10_char_rx )*\]*/xo; =item $xml10_cdata_rx Literal character data in a CDATA section. This consists of zero or more acceptable charaters, not including "B<< ]]> >>" as a subsequence. Unlike ordinary literal character data, the characters "B<< < >>" and "B<&>" are not metacharacters here. Such data stands for itself when it appears within a CDATA section. As with ordinary literal character data (see above), this data is meant to be taken literally only after line endings have been canonicalised. Also, as with ordinary literal character data, two instances of this production should not abut. =cut our $xml10_cdata_rx = qr/(?: \]?(?!\])$xml10_char_rx |\]{2,}(?![\>\]])$xml10_char_rx )*\]*/xo; =item $xml10_cdstart_rx =item $xml10_cdend_rx The fixed strings "B<< >" and "B<< ]]> >>" which begin and finish a CDATA section. =cut our $xml10_cdstart_rx = qr//; =item $xml10_cdsect_rx A CDATA section. This consists of "B<< >", literal character data with metacharacters disabled, and "B<< ]]> >>". =cut # Note: using the $xml10_cdata_rx regexp (from above) here would be much # less efficient than this use of (?>...). It would also run into the # perl bug described in L. our $xml10_cdsect_rx = qr/(?>)/o; =back =head2 Tags =over =item $xml10_attvalue_rx A quoted attribute value. This consists of acceptable characters other than "B<< < >>", "B<&>", and the quote character, interspersed with references, surrounded by matching "B<">" or "B<'>" quotes. The well-formedness constraints of references apply, and additionally the replacement text of any referenced entities must not contain any "B<< < >>" characters, and it is not permitted to refer to external entities. =cut our $xml10_attvalue_rx = qr/"(?:(?![<&"])$xml10_char_rx|$xml10_reference_rx)*" |'(?:(?![<&'])$xml10_char_rx|$xml10_reference_rx)*' /xo; =item $xml10_attribute_rx A complete attribute, consisting of name, equals sign, and quoted value. The well-formedness constraints of attribute values (pertaining to references) apply. =cut our $xml10_attribute_rx = qr/$xml10_name_rx$xml10_eq_rx$xml10_attvalue_rx/o; =item $xml10_stag_rx A start-tag, used to begin an element. This consists of "B<< < >>", the element type name, whitespace-separated list of attributes, and "B<< > >>". The well-formedness constraints of attribute values (pertaining to references) apply. There is also a well-formedness constraint that attribute names must be unique within the tag. =cut our $xml10_stag_rx = qr#<$xml10_name_rx (?:$xml10_s_rx$xml10_attribute_rx)* $xml10_s_rx?>#xo; =item $xml10_etag_rx An end-tag, used to finish an element. This consists of "B<< >", the element type name, and "B<< > >>". =cut our $xml10_etag_rx = qr##o; =item $xml10_emptyelemtag_rx An empty-element tag, used to represent an element with no content. This consists of "B<< < >>", the element type name, whitespace-separated list of attributes, and "B<< /> >>". The well-formedness constraints of attribute values (pertaining to references) apply. There is also a well-formedness constraint that attribute names must be unique within the tag. (These are the same constraints as for start-tags.) =cut our $xml10_emptyelemtag_rx = qr#<$xml10_name_rx (?:$xml10_s_rx$xml10_attribute_rx)* $xml10_s_rx?/>#xo; =back =head2 Non-data content =over =item $xml10_comment_rx A comment. This does not contribute to the data content of an XML document. It consists of "B<< >>". It is not permitted for the content to include "B<-->" as a subsequence, nor for it to end with "B<->". =cut # Note perl bug: the theoretically-cleaner way of expressing this syntax, # //, runs into a problem where the # "*" acts as "{0,32767}", discussed in L, and so fails to match # longer comments. The way that is used here, with a sufficiently simple # expression inside the "*", doesn't run into that problem, but instead # relies on the (?>...) together with the non-greedy quantifier for # proper parsing. It is important for this regexp to not suffer from # this bug, because it is used in the pure-Perl parser. our $xml10_comment_rx = qr/ # XML syntax error ###e- # XML syntax error ###e- # XML syntax error ###e # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###e foobar # bless( [ "a", {}, bless( [ [ "foobar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###e foo$()bar # bless( [ "a", {}, bless( [ [ "foobar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###e $(foo)bar # bless( [ "a", {}, bless( [ [ ("foo"x40000)."bar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- # XML syntax error ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d foobar # bless( [ "a", {}, bless( [ [ "foobar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###e- # XML syntax error ###e- # XML syntax error ###e- # XML syntax error ###e # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###e foobar # bless( [ "a", {}, bless( [ [ "foobar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- # XML syntax error ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d foobar # bless( [ "a", {}, bless( [ [ "foobar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- ]]> # XML syntax error ###d- ]]> # XML syntax error ###d- ]]> # XML syntax error ###d ]]> # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d foo]]>bar # bless( [ "a", {}, bless( [ [ "foobar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- foo]]>bar # XML syntax error ###d foo]>bar # bless( [ "a", {}, bless( [ [ "foo]>bar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d foo]]bar # bless( [ "a", {}, bless( [ [ "foo]]bar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d foo>bar # bless( [ "a", {}, bless( [ [ "foo>bar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d foo]bar # bless( [ "a", {}, bless( [ [ "foo]bar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- ]]>bar # XML syntax error ###d ]>bar # bless( [ "a", {}, bless( [ [ "]>bar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ]]bar # bless( [ "a", {}, bless( [ [ "]]bar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d >bar # bless( [ "a", {}, bless( [ [ ">bar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ]bar # bless( [ "a", {}, bless( [ [ "]bar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- foo]]> # XML syntax error ###d foo]> # bless( [ "a", {}, bless( [ [ "foo]>" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d foo]] # bless( [ "a", {}, bless( [ [ "foo]]" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d foo> # bless( [ "a", {}, bless( [ [ "foo>" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d foo] # bless( [ "a", {}, bless( [ [ "foo]" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- ]]> # XML syntax error ###d ]> # bless( [ "a", {}, bless( [ [ "]>" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ]] # bless( [ "a", {}, bless( [ [ "]]" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d > # bless( [ "a", {}, bless( [ [ ">" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ] # bless( [ "a", {}, bless( [ [ "]" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- ${ }]]> # XML syntax error ###d ${ }]> # bless( [ "a", {}, bless( [ [ (" "x32764)."]>" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ${ }]] # bless( [ "a", {}, bless( [ [ (" "x32764)."]]" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ${ }> # bless( [ "a", {}, bless( [ [ (" "x32764).">" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ${ }] # bless( [ "a", {}, bless( [ [ (" "x32764)."]" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- ${ } ]]> # XML syntax error ###d ${ } ]> # bless( [ "a", {}, bless( [ [ (" "x32765)."]>" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ${ } ]] # bless( [ "a", {}, bless( [ [ (" "x32765)."]]" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ${ } > # bless( [ "a", {}, bless( [ [ (" "x32765).">" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ${ } ] # bless( [ "a", {}, bless( [ [ (" "x32765)."]" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- ${ } ]]> # XML syntax error ###d ${ } ]> # bless( [ "a", {}, bless( [ [ (" "x32766)."]>" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ${ } ]] # bless( [ "a", {}, bless( [ [ (" "x32766)."]]" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ${ } > # bless( [ "a", {}, bless( [ [ (" "x32766).">" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ${ } ] # bless( [ "a", {}, bless( [ [ (" "x32766)."]" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- ${ } ]]> # XML syntax error ###d ${ } ]> # bless( [ "a", {}, bless( [ [ (" "x32767)."]>" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ${ } ]] # bless( [ "a", {}, bless( [ [ (" "x32767)."]]" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ${ } > # bless( [ "a", {}, bless( [ [ (" "x32767).">" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ${ } ] # bless( [ "a", {}, bless( [ [ (" "x32767)."]" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x" => "foo]]>bar" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x" => "]]>bar" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x" => "foo]]>" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x" => "]]>" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x" => (" "x32764)."]]>" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x" => (" "x32765)."]]>" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x" => (" "x32766)."]]>" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x" => (" "x32767)."]]>" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- # XML syntax error ###d- # XML syntax error ###d- # XML syntax error ###d- # XML syntax error ###d # bless( [ "x\x{b7}y", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x\x{b7}y" => "z" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{b7}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x·y # bless( [ "a", {}, bless( [ [ "x\x{b7}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "x\x{b7}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "x\x{137}y", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x\x{137}y" => "z" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{137}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d xķy # bless( [ "a", {}, bless( [ [ "x\x{137}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "x\x{137}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "x".("a"x40000)."y", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x".("a"x40000)."y" => "z" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x".("a"x40000)."y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x$(a)y # bless( [ "a", {}, bless( [ [ "x".("a"x40000)."y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "x".("a"x40000)."y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "x".("\x{b7}"x40000)."y", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x".("\x{b7}"x40000)."y" => "z" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x".("\x{b7}"x40000)."y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x$(·)y # bless( [ "a", {}, bless( [ [ "x".("\x{b7}"x40000)."y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "x".("\x{b7}"x40000)."y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "x".("\x{137}"x40000)."y", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x".("\x{137}"x40000)."y" => "z" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x".("\x{137}"x40000)."y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x$(ķ)y # bless( [ "a", {}, bless( [ [ "x".("\x{137}"x40000)."y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "x".("\x{137}"x40000)."y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x".(" "x40000)."y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x$(~ )y # bless( [ "a", {}, bless( [ [ "x".("\n"x40000)."y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "x".("\n"x40000)."y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "xX".(" "x40000)."y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d xX$(~ )y # bless( [ "a", {}, bless( [ [ "xX".("\n"x40000)."y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "xX".("\n"x40000)."y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- # XML syntax error ###d- # XML syntax error ###d- # XML syntax error ###d- xy # XML syntax error ###d- # XML syntax error ###d- # XML syntax error ###d- # XML syntax error ###d- # XML syntax error ###d- # XML syntax error ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- < a/> # XML syntax error ###d- # XML syntax error ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- < a/> # XML syntax error ###d- # XML syntax error ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- <~a/> # XML syntax error ###d- # XML syntax error ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- <~ a/> # XML syntax error ###d foo bar # bless( [ "a", {}, bless( [ [ "foo bar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d foo bar # bless( [ "a", {}, bless( [ [ "foo\tbar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d foo bar # bless( [ "a", {}, bless( [ [ "foo\nbar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d foo~bar # bless( [ "a", {}, bless( [ [ "foo\nbar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d foo~ bar # bless( [ "a", {}, bless( [ [ "foo\nbar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ " " ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "\t" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "\n" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ~ # bless( [ "a", {}, bless( [ [ "\n" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ~ # bless( [ "a", {}, bless( [ [ "\n" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "foo bar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "foo\tbar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "foo\nbar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "foo\nbar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "foo\nbar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ " " ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "\t" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "\n" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "\n" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "\n" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x" => "foo bar" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x" => "foo bar" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x" => "foo bar" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x" => "foo bar" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x" => "foo bar" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x" => " " }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x" => " " }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x" => " " }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x" => " " }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "x" => " " }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- x&y # XML syntax error ###d x<y # bless( [ "a", {}, bless( [ [ "xx>y # bless( [ "a", {}, bless( [ [ "x>y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x&y # bless( [ "a", {}, bless( [ [ "x&y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x"y # bless( [ "a", {}, bless( [ [ "x\"y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x'y # bless( [ "a", {}, bless( [ [ "x'y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x$(')y # bless( [ "a", {}, bless( [ [ "x".("'"x40000)."y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x&foo;y # XML constraint error: reference to undeclared entity ###d x&x$(a)y;y # XML constraint error: reference to undeclared entity ###d x&x$(·)y;y # XML constraint error: reference to undeclared entity ###d x&x$(ķ)y;y # XML constraint error: reference to undeclared entity ###d x�y # XML constraint error: invalid character in character reference ###d x�y # XML constraint error: invalid character in character reference ###d xy # XML constraint error: invalid character in character reference ###d xy # XML constraint error: invalid character in character reference ###d xy # XML constraint error: invalid character in character reference ###d xy # XML constraint error: invalid character in character reference ###d x y # bless( [ "a", {}, bless( [ [ "x\ty" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x y # bless( [ "a", {}, bless( [ [ "x\ty" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x y # bless( [ "a", {}, bless( [ [ "x\ny" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x y # bless( [ "a", {}, bless( [ [ "x\ny" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x y # XML constraint error: invalid character in character reference ###d x y # XML constraint error: invalid character in character reference ###d x y # XML constraint error: invalid character in character reference ###d x y # XML constraint error: invalid character in character reference ###d x y # bless( [ "a", {}, bless( [ [ "x\ry" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x y # bless( [ "a", {}, bless( [ [ "x\ry" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d xy # XML constraint error: invalid character in character reference ###d xy # XML constraint error: invalid character in character reference ###d xy # XML constraint error: invalid character in character reference ###d xy # XML constraint error: invalid character in character reference ###d x y # bless( [ "a", {}, bless( [ [ "x y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x y # bless( [ "a", {}, bless( [ [ "x y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x:y # bless( [ "a", {}, bless( [ [ "x:y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x:y # bless( [ "a", {}, bless( [ [ "x:y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x퟿y # bless( [ "a", {}, bless( [ [ "x\x{d7ff}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x퟿y # bless( [ "a", {}, bless( [ [ "x\x{d7ff}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x�y # XML constraint error: invalid character in character reference ###d x�y # XML constraint error: invalid character in character reference ###d x�y # XML constraint error: invalid character in character reference ###d x�y # XML constraint error: invalid character in character reference ###d xy # bless( [ "a", {}, bless( [ [ "x\x{e000}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d xy # bless( [ "a", {}, bless( [ [ "x\x{e000}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x﷐y # bless( [ "a", {}, bless( [ [ "x\x{fdd0}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x﷐y # bless( [ "a", {}, bless( [ [ "x\x{fdd0}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x�y # bless( [ "a", {}, bless( [ [ "x\x{fffd}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x�y # bless( [ "a", {}, bless( [ [ "x\x{fffd}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x￾y # XML constraint error: invalid character in character reference ###d x￾y # XML constraint error: invalid character in character reference ###d x￿y # XML constraint error: invalid character in character reference ###d x￿y # XML constraint error: invalid character in character reference ###d x𐀀y # bless( [ "a", {}, bless( [ [ "x\x{10000}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x𐀀y # bless( [ "a", {}, bless( [ [ "x\x{10000}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x🿽y # bless( [ "a", {}, bless( [ [ "x\x{1fffd}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x🿽y # bless( [ "a", {}, bless( [ [ "x\x{1fffd}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x🿾y # bless( [ "a", {}, bless( [ [ "x\x{1fffe}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x🿾y # bless( [ "a", {}, bless( [ [ "x\x{1fffe}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x🿿y # bless( [ "a", {}, bless( [ [ "x\x{1ffff}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x🿿y # bless( [ "a", {}, bless( [ [ "x\x{1ffff}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x𠀀y # bless( [ "a", {}, bless( [ [ "x\x{20000}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x𠀀y # bless( [ "a", {}, bless( [ [ "x\x{20000}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x􏿽y # bless( [ "a", {}, bless( [ [ "x\x{10fffd}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x􏿽y # bless( [ "a", {}, bless( [ [ "x\x{10fffd}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x􏿾y # bless( [ "a", {}, bless( [ [ "x\x{10fffe}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x􏿾y # bless( [ "a", {}, bless( [ [ "x\x{10fffe}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x􏿿y # bless( [ "a", {}, bless( [ [ "x\x{10ffff}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x􏿿y # bless( [ "a", {}, bless( [ [ "x\x{10ffff}y" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x�y # XML constraint error: invalid character in character reference ###d x�y # XML constraint error: invalid character in character reference ###d x&#x$(a);y # XML constraint error: invalid character in character reference ###d x&#$(1);y # XML constraint error: invalid character in character reference ###d- # XML syntax error ###d # bless( [ "a", { "z" => "x # bless( [ "a", { "z" => "x>y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x&y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\"y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x'y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x".("'"x40000)."y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # XML constraint error: reference to undeclared entity ###d # XML constraint error: reference to undeclared entity ###d # XML constraint error: reference to undeclared entity ###d # XML constraint error: reference to undeclared entity ###d # XML constraint error: invalid character in character reference ###d # bless( [ "a", { "z" => "x\ty" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\ny" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\ry" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d # bless( [ "a", { "z" => "x\ty" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\ty" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\ny" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\ny" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d # bless( [ "a", { "z" => "x\ry" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\ry" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d # bless( [ "a", { "z" => "x y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x:y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x:y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{d7ff}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{d7ff}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d # bless( [ "a", { "z" => "x\x{e000}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{e000}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{fdd0}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{fdd0}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{fffd}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{fffd}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d # bless( [ "a", { "z" => "x\x{10000}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{10000}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{1fffd}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{1fffd}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{1fffe}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{1fffe}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{1ffff}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{1ffff}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{20000}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{20000}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{10fffd}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{10fffd}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{10fffe}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{10fffe}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{10ffff}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", { "z" => "x\x{10ffff}y" }, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d # XML constraint error: invalid character in character reference ###d foo'bar # bless( [ "a", {}, bless( [ [ "foo'bar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d foo"bar # bless( [ "a", {}, bless( [ [ "foo\"bar" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- # XML syntax error ###d- # XML syntax error ###e- # XML syntax error ###e- # XML syntax error ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- # XML syntax error ###d- # XML syntax error ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- # XML syntax error ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d # bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- # XML syntax error ###d Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. # bless( [ "para", {}, bless( [ [ "Lorem ipsum dolor sit amet, consectetur adipisicing elit,\nsed do eiusmod tempor incididunt ut labore et dolore magna aliqua.\n", bless( [ "b", { "tag" => "thing" }, bless( [ [ "Ut enim ad minim veniam, quis nostrud exercitation\nullamco laboris nisi ut aliquip ex ea commodo consequat." ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ), " Duis aute\nirure dolor in reprehenderit in voluptate velit esse cillum dolore eu\nfugiat nulla pariatur. ", bless( [ "word", {}, bless( [ [ "Excepteur" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ), " sint occaecat cupidatat\nnon proident, sunt in culpa qui officia deserunt mollit anim id est\nlaborum." ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ķ # bless( [ "a", {}, bless( [ [ "\x{137}" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- xy # XML syntax error ###d- xy # XML syntax error ###d xy # bless( [ "a", {}, bless( [ [ "xy" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- xy # XML syntax error ###d xy # bless( [ "a", {}, bless( [ [ "xy" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d- xy # XML syntax error ###c # bless( [ [ "", bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ), "\n" ] ], 'XML::Easy::Content' ) ###c # bless( [ [ "\n", bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ), "" ] ], 'XML::Easy::Content' ) ###c a # bless( [ [ "a", bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ), "" ] ], 'XML::Easy::Content' ) ###c a # bless( [ [ "", bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ), "a" ] ], 'XML::Easy::Content' ) ###c a # bless( [ [ "a" ] ], 'XML::Easy::Content' ) ###c # bless( [ [ "", bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ), "" ] ], 'XML::Easy::Content' ) ###c- # XML syntax error ###c- # XML syntax error ###c # bless( [ [ "" ] ], 'XML::Easy::Content' ) ###c # bless( [ [ "", bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ), "" ] ], 'XML::Easy::Content' ) ###c foobar # bless( [ [ "foo", bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ), "bar", bless( [ "b", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ), "" ] ], 'XML::Easy::Content' ) ###x # bless( [ [ "", bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ), "\n" ] ], 'XML::Easy::Content' ) ###x # bless( [ [ "\n", bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ), "" ] ], 'XML::Easy::Content' ) ###x a # bless( [ [ "a", bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ), "" ] ], 'XML::Easy::Content' ) ###x a # bless( [ [ "", bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ), "a" ] ], 'XML::Easy::Content' ) ###x a # bless( [ [ "a" ] ], 'XML::Easy::Content' ) ###x # bless( [ [ "", bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ), "" ] ], 'XML::Easy::Content' ) ###x- # XML syntax error ###x # bless( [ [ "", bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ), "" ] ], 'XML::Easy::Content' ) ###x # bless( [ [ "" ] ], 'XML::Easy::Content' ) ###x # bless( [ [ "", bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ), "" ] ], 'XML::Easy::Content' ) ###x foobar # bless( [ [ "foo", bless( [ "a", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ), "bar", bless( [ "b", {}, bless( [ [ "" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ), "" ] ], 'XML::Easy::Content' ) ###d- # XML syntax error ###e- # XML syntax error ###c # bless( [ [ "" ] ], 'XML::Easy::Content' ) ###x # bless( [ [ "" ] ], 'XML::Easy::Content' ) ###d z # bless( [ "a", {}, bless( [ [ "z" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ~ # bless( [ "a", {}, bless( [ [ "\n" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d < # bless( [ "a", {}, bless( [ [ "<" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d yz # bless( [ "a", {}, bless( [ [ "yz" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d y~ # bless( [ "a", {}, bless( [ [ "y\n" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d y< # bless( [ "a", {}, bless( [ [ "y<" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ~z # bless( [ "a", {}, bless( [ [ "\nz" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ~~ # bless( [ "a", {}, bless( [ [ "\n\n" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ~< # bless( [ "a", {}, bless( [ [ "\n<" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d >z # bless( [ "a", {}, bless( [ [ ">z" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d >~ # bless( [ "a", {}, bless( [ [ ">\n" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d >< # bless( [ "a", {}, bless( [ [ "><" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d xyz # bless( [ "a", {}, bless( [ [ "xyz" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d xy~ # bless( [ "a", {}, bless( [ [ "xy\n" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d xy< # bless( [ "a", {}, bless( [ [ "xy<" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x~z # bless( [ "a", {}, bless( [ [ "x\nz" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x~~ # bless( [ "a", {}, bless( [ [ "x\n\n" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x~< # bless( [ "a", {}, bless( [ [ "x\n<" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x>z # bless( [ "a", {}, bless( [ [ "x>z" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x>~ # bless( [ "a", {}, bless( [ [ "x>\n" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d x>< # bless( [ "a", {}, bless( [ [ "x><" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ~yz # bless( [ "a", {}, bless( [ [ "\nyz" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ~y~ # bless( [ "a", {}, bless( [ [ "\ny\n" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ~y< # bless( [ "a", {}, bless( [ [ "\ny<" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ~~z # bless( [ "a", {}, bless( [ [ "\n\nz" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ~~~ # bless( [ "a", {}, bless( [ [ "\n\n\n" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ~~< # bless( [ "a", {}, bless( [ [ "\n\n<" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ~>z # bless( [ "a", {}, bless( [ [ "\n>z" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ~>~ # bless( [ "a", {}, bless( [ [ "\n>\n" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d ~>< # bless( [ "a", {}, bless( [ [ "\n><" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d &yz # bless( [ "a", {}, bless( [ [ "&yz" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d &y~ # bless( [ "a", {}, bless( [ [ "&y\n" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d &y< # bless( [ "a", {}, bless( [ [ "&y<" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d &~z # bless( [ "a", {}, bless( [ [ "&\nz" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d &~~ # bless( [ "a", {}, bless( [ [ "&\n\n" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d &~< # bless( [ "a", {}, bless( [ [ "&\n<" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d &>z # bless( [ "a", {}, bless( [ [ "&>z" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d &>~ # bless( [ "a", {}, bless( [ [ "&>\n" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ###d &>< # bless( [ "a", {}, bless( [ [ "&><" ] ], 'XML::Easy::Content' ) ], 'XML::Easy::Element' ) ### XML-Easy-0.009/t/node_basics_pp.t000444001750001750 15111652350543 16532 0ustar00zeframzefram000000000000use warnings; use strict; do "t/setup_pp.pl" or die $@ || $!; do "t/node_basics.t" or die $@ || $!; 1; XML-Easy-0.009/t/node_object_pp.t000444001750001750 15111652350543 16534 0ustar00zeframzefram000000000000use warnings; use strict; do "t/setup_pp.pl" or die $@ || $!; do "t/node_object.t" or die $@ || $!; 1; XML-Easy-0.009/t/easy_module.t000444001750001750 73111652350543 16074 0ustar00zeframzefram000000000000use warnings; use strict; use Test::More tests => 18; BEGIN { $SIG{__WARN__} = sub { die "WARNING: $_[0]" }; } my @funcs = qw( xml10_read_content xml10_read_element xml10_read_document xml10_read_extparsedent xml10_write_content xml10_write_element xml10_write_document xml10_write_extparsedent ); use_ok "XML::Easy"; ok defined(&{"XML::Easy::$_"}) foreach @funcs; ok \&{"XML::Easy::$_"} == \&{"XML::Easy::Text::$_"} foreach @funcs; use_ok "XML::Easy", @funcs; 1; XML-Easy-0.009/t/read.t000444001750001750 704611652350543 14527 0ustar00zeframzefram000000000000use warnings; use strict; use Encode qw(decode); use IO::File (); use Params::Classify qw(scalar_class); use Scalar::Util qw(blessed reftype); use t::ErrorCases qw(COUNT_error_text test_error_text); use utf8 (); use Test::More tests => 1 + 2*504 + 2 + COUNT_error_text*6 + 5; BEGIN { $SIG{__WARN__} = sub { die "WARNING: $_[0]" }; } BEGIN { use_ok "XML::Easy::Text", qw( xml10_read_content_object xml10_read_content_twine xml10_read_element xml10_read_document xml10_read_extparsedent_object xml10_read_extparsedent_twine ); } sub deep_match($$); sub deep_match($$) { my($a, $b) = @_; my $ac = scalar_class($a); my $bc = scalar_class($b); return 0 unless $ac eq $bc; if($ac eq "STRING") { return $a eq $b; } elsif($ac eq "BLESSED" || $ac eq "REF") { return 0 if $ac eq "BLESSED" && blessed($a) ne blessed($b); my $at = reftype($a); my $bt = reftype($b); return 0 unless $at eq $bt; if($at =~ /\A(?:REF|SCALAR|LVALUE|GLOB)\z/) { return deep_match($$a, $$b); } elsif($at eq "ARRAY") { return 0 unless @$a == @$b; foreach(my $i = @$a; $i--; ) { return 0 unless deep_match($a->[$i], $b->[$i]); } return 1; } elsif($at eq "HASH") { my @keys = keys %$a; foreach(@keys) { return 0 unless exists $b->{$_}; return 0 unless deep_match($a->{$_}, $b->{$_}); } foreach(keys %$b) { return 0 unless exists $a->{$_}; } return 1; } else { return 1; } } else { return 1; } } sub upgraded($) { my($str) = @_; utf8::upgrade($str); return $str; } sub downgraded($) { my($str) = @_; utf8::downgrade($str, 1); return $str; } my %reader = ( c => \&xml10_read_content_object, e => \&xml10_read_element, d => \&xml10_read_document, x => \&xml10_read_extparsedent_object, ); sub try_read($$) { my $result = eval { $reader{$_[0]}->($_[1]) }; return $@ ne "" ? [ "error", $@ ] : [ "ok", $result ]; } my $data_in = IO::File->new("t/read.data", "r") or die; my $line = $data_in->getline; while(1) { $line =~ /\A###([a-z])?-?\n\z/ or die; last unless defined $1; my $prod = $1; $line = $data_in->getline; last unless defined $line; my $input = ""; while($line ne "#\n") { die if $line =~ /\A###/; $input .= $line; $line = $data_in->getline; die unless defined $line; } die if $input eq ""; chomp($input); $input =~ tr/~/\r/; $input =~ s/\$\((.*?)\)/$1 x 40000/seg; $input =~ s/\$\{(.*?)\}/$1 x 32764/seg; $input = decode("UTF-8", $input); my $correct = ""; while(1) { $line = $data_in->getline; die unless defined $line; last if $line =~ /\A###/; $correct .= $line; } chomp $correct; $correct = $correct =~ /\A[:'A-Za-z ]+\z/ ? [ "error", "$correct\n" ] : [ "ok", do { no warnings "utf8"; eval($correct) } ]; ok deep_match(try_read($prod, upgraded($input)), $correct); ok deep_match(try_read($prod, downgraded($input)), $correct); } is_deeply xml10_read_content_object("foobarbaz")->twine, xml10_read_content_twine("foobarbaz"); is_deeply xml10_read_extparsedent_object("foobarbaz")->twine, xml10_read_extparsedent_twine("foobarbaz"); foreach my $func ( (values %reader), \&xml10_read_content_twine, \&xml10_read_extparsedent_twine, ) { test_error_text($func); } ok defined(&{"XML::Easy::Text::xml10_read_content"}); ok \&{"XML::Easy::Text::xml10_read_content"} == \&{"XML::Easy::Text::xml10_read_content_twine"}; ok defined(&{"XML::Easy::Text::xml10_read_extparsedent"}); ok \&{"XML::Easy::Text::xml10_read_extparsedent"} == \&{"XML::Easy::Text::xml10_read_extparsedent_twine"}; use_ok "XML::Easy::Text", qw(xml10_read_content xml10_read_extparsedent); 1; XML-Easy-0.009/t/pod_syn.t000444001750001750 23611652350543 15241 0ustar00zeframzefram000000000000use warnings; use strict; use Test::More; plan skip_all => "Test::Pod not available" unless eval "use Test::Pod 1.00; 1"; Test::Pod::all_pod_files_ok(); 1;